Skip to content

Core API

The core layer provides the fingerprinting engine, similarity scoring, healing logic, and SQLite storage.


ElementFingerprint

Rich identity snapshot of a DOM element.

Attributes:

Name Type Description
tag str

HTML tag name, lowercased (e.g. "button", "input", "div").

text str

Visible text content, stripped and lowercased.

attributes frozenset[tuple[str, str]]

Frozen set of (name, value) pairs for all HTML attributes.

dom_path tuple[str, ...]

Tuple of ancestor tag names from root to this element.

siblings tuple[str, ...]

Tuple of tag names of immediate sibling elements.

bbox BoundingBox | None

Bounding box at fingerprint time, or None if not available.

locator str

The original locator string used to find this element.

test_id str

Identifier for the test that created this fingerprint.

Source code in breadcrumb/core/fingerprint.py
@dataclass(frozen=True)
class ElementFingerprint:
    """Rich identity snapshot of a DOM element.

    Attributes:
        tag: HTML tag name, lowercased (e.g. "button", "input", "div").
        text: Visible text content, stripped and lowercased.
        attributes: Frozen set of (name, value) pairs for all HTML attributes.
        dom_path: Tuple of ancestor tag names from root to this element.
        siblings: Tuple of tag names of immediate sibling elements.
        bbox: Bounding box at fingerprint time, or None if not available.
        locator: The original locator string used to find this element.
        test_id: Identifier for the test that created this fingerprint.
    """

    tag: str
    text: str
    attributes: frozenset[tuple[str, str]]
    dom_path: tuple[str, ...]
    siblings: tuple[str, ...]
    bbox: BoundingBox | None = None
    locator: str = ""
    test_id: str = ""

    @staticmethod
    def _normalize_text(text: str | None) -> str:
        """Normalize text content: strip, lowercase, collapse whitespace."""
        if not text:
            return ""
        return " ".join(text.strip().lower().split())

    @staticmethod
    def _normalize_tag(tag: str | None) -> str:
        """Normalize tag name: lowercase, strip."""
        if not tag:
            return ""
        return tag.strip().lower()

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> ElementFingerprint:
        """Create a fingerprint from a plain dictionary.

        Useful for reconstructing fingerprints from storage or test fixtures.
        Handles type coercion (lists -> tuples/frozensets) automatically.

        Args:
            data: Dictionary with fingerprint fields. At minimum requires
                'tag'. All other fields have sensible defaults.

        Returns:
            A new ElementFingerprint instance.
        """
        bbox_data = data.get("bbox")
        bbox: BoundingBox | None = None
        if bbox_data is not None:
            if isinstance(bbox_data, BoundingBox):
                bbox = bbox_data
            elif isinstance(bbox_data, dict):
                bbox = BoundingBox(
                    x=float(bbox_data.get("x", 0)),
                    y=float(bbox_data.get("y", 0)),
                    width=float(bbox_data.get("width", 0)),
                    height=float(bbox_data.get("height", 0)),
                )

        raw_attrs = data.get("attributes", set())
        if isinstance(raw_attrs, dict):
            attrs = frozenset(raw_attrs.items())
        elif isinstance(raw_attrs, (list, tuple)):
            attrs = frozenset(tuple(pair) for pair in raw_attrs)
        elif isinstance(raw_attrs, frozenset):
            attrs = raw_attrs
        else:
            attrs = frozenset()

        raw_dom = data.get("dom_path", ())
        dom_path = tuple(raw_dom) if not isinstance(raw_dom, tuple) else raw_dom

        raw_siblings = data.get("siblings", ())
        siblings = tuple(raw_siblings) if not isinstance(raw_siblings, tuple) else raw_siblings

        return cls(
            tag=cls._normalize_tag(data.get("tag", "")),
            text=cls._normalize_text(data.get("text", "")),
            attributes=attrs,
            dom_path=dom_path,
            siblings=siblings,
            bbox=bbox,
            locator=data.get("locator", ""),
            test_id=data.get("test_id", ""),
        )

    def to_dict(self) -> dict[str, Any]:
        """Serialize the fingerprint to a plain dictionary for storage.

        Returns:
            Dictionary representation suitable for JSON serialization
            and SQLite storage.
        """
        result: dict[str, Any] = {
            "tag": self.tag,
            "text": self.text,
            "attributes": sorted([list(pair) for pair in self.attributes]),
            "dom_path": list(self.dom_path),
            "siblings": list(self.siblings),
            "locator": self.locator,
            "test_id": self.test_id,
        }
        if self.bbox is not None:
            result["bbox"] = {
                "x": self.bbox.x,
                "y": self.bbox.y,
                "width": self.bbox.width,
                "height": self.bbox.height,
            }
        else:
            result["bbox"] = None
        return result
from_dict(data: dict[str, Any]) -> ElementFingerprint

Create a fingerprint from a plain dictionary.

Useful for reconstructing fingerprints from storage or test fixtures. Handles type coercion (lists -> tuples/frozensets) automatically.

Parameters:

Name Type Description Default
data dict[str, Any]

Dictionary with fingerprint fields. At minimum requires 'tag'. All other fields have sensible defaults.

required

Returns:

Type Description
ElementFingerprint

A new ElementFingerprint instance.

Source code in breadcrumb/core/fingerprint.py
@classmethod
def from_dict(cls, data: dict[str, Any]) -> ElementFingerprint:
    """Create a fingerprint from a plain dictionary.

    Useful for reconstructing fingerprints from storage or test fixtures.
    Handles type coercion (lists -> tuples/frozensets) automatically.

    Args:
        data: Dictionary with fingerprint fields. At minimum requires
            'tag'. All other fields have sensible defaults.

    Returns:
        A new ElementFingerprint instance.
    """
    bbox_data = data.get("bbox")
    bbox: BoundingBox | None = None
    if bbox_data is not None:
        if isinstance(bbox_data, BoundingBox):
            bbox = bbox_data
        elif isinstance(bbox_data, dict):
            bbox = BoundingBox(
                x=float(bbox_data.get("x", 0)),
                y=float(bbox_data.get("y", 0)),
                width=float(bbox_data.get("width", 0)),
                height=float(bbox_data.get("height", 0)),
            )

    raw_attrs = data.get("attributes", set())
    if isinstance(raw_attrs, dict):
        attrs = frozenset(raw_attrs.items())
    elif isinstance(raw_attrs, (list, tuple)):
        attrs = frozenset(tuple(pair) for pair in raw_attrs)
    elif isinstance(raw_attrs, frozenset):
        attrs = raw_attrs
    else:
        attrs = frozenset()

    raw_dom = data.get("dom_path", ())
    dom_path = tuple(raw_dom) if not isinstance(raw_dom, tuple) else raw_dom

    raw_siblings = data.get("siblings", ())
    siblings = tuple(raw_siblings) if not isinstance(raw_siblings, tuple) else raw_siblings

    return cls(
        tag=cls._normalize_tag(data.get("tag", "")),
        text=cls._normalize_text(data.get("text", "")),
        attributes=attrs,
        dom_path=dom_path,
        siblings=siblings,
        bbox=bbox,
        locator=data.get("locator", ""),
        test_id=data.get("test_id", ""),
    )
to_dict() -> dict[str, Any]

Serialize the fingerprint to a plain dictionary for storage.

Returns:

Type Description
dict[str, Any]

Dictionary representation suitable for JSON serialization

dict[str, Any]

and SQLite storage.

Source code in breadcrumb/core/fingerprint.py
def to_dict(self) -> dict[str, Any]:
    """Serialize the fingerprint to a plain dictionary for storage.

    Returns:
        Dictionary representation suitable for JSON serialization
        and SQLite storage.
    """
    result: dict[str, Any] = {
        "tag": self.tag,
        "text": self.text,
        "attributes": sorted([list(pair) for pair in self.attributes]),
        "dom_path": list(self.dom_path),
        "siblings": list(self.siblings),
        "locator": self.locator,
        "test_id": self.test_id,
    }
    if self.bbox is not None:
        result["bbox"] = {
            "x": self.bbox.x,
            "y": self.bbox.y,
            "width": self.bbox.width,
            "height": self.bbox.height,
        }
    else:
        result["bbox"] = None
    return result

BoundingBox

Axis-aligned bounding box of an element's visual position.

Source code in breadcrumb/core/fingerprint.py
@dataclass(frozen=True)
class BoundingBox:
    """Axis-aligned bounding box of an element's visual position."""

    x: float
    y: float
    width: float
    height: float

    @property
    def center(self) -> tuple[float, float]:
        """Return the center point of the bounding box."""
        return (self.x + self.width / 2, self.y + self.height / 2)
center: tuple[float, float]

Return the center point of the bounding box.


FingerprintStore

SQLite-backed storage for fingerprints and healing events.

Usage::

store = FingerprintStore()          # Uses .breadcrumb.db in cwd
store = FingerprintStore("path/to/db.sqlite")
store.save_fingerprint(fingerprint)
fp = store.load_fingerprint("test_login", "#login-btn")

The database is created automatically on first access.

Source code in breadcrumb/core/storage.py
class FingerprintStore:
    """SQLite-backed storage for fingerprints and healing events.

    Usage::

        store = FingerprintStore()          # Uses .breadcrumb.db in cwd
        store = FingerprintStore("path/to/db.sqlite")
        store.save_fingerprint(fingerprint)
        fp = store.load_fingerprint("test_login", "#login-btn")

    The database is created automatically on first access.
    """

    def __init__(self, db_path: str | Path = DEFAULT_DB_PATH) -> None:
        self._db_path = Path(db_path)
        self._conn: sqlite3.Connection | None = None
        self._ensure_db()

    @property
    def db_path(self) -> Path:
        """Return the path to the database file."""
        return self._db_path

    def _get_conn(self) -> sqlite3.Connection:
        """Get or create the database connection."""
        if self._conn is None:
            self._conn = sqlite3.connect(str(self._db_path))
            self._conn.row_factory = sqlite3.Row
            self._conn.execute("PRAGMA journal_mode=WAL")
            self._conn.execute("PRAGMA foreign_keys=ON")
        return self._conn

    def _ensure_db(self) -> None:
        """Create tables if they don't exist."""
        conn = self._get_conn()
        conn.executescript(_CREATE_TABLES_SQL)

        row = conn.execute(
            "SELECT value FROM schema_meta WHERE key = 'schema_version'",
        ).fetchone()
        if row is None:
            conn.execute(
                "INSERT INTO schema_meta (key, value) VALUES ('schema_version', ?)",
                (str(SCHEMA_VERSION),),
            )
            conn.commit()

    def save_fingerprint(self, fingerprint: ElementFingerprint) -> None:
        """Save or update a fingerprint for a (test_id, locator) pair.

        Called on passing test runs to keep the fingerprint database current.

        Raises:
            ValueError: If test_id or locator is empty.
        """
        if not fingerprint.test_id:
            msg = "Fingerprint must have a test_id to be stored."
            raise ValueError(msg)
        if not fingerprint.locator:
            msg = "Fingerprint must have a locator to be stored."
            raise ValueError(msg)

        conn = self._get_conn()
        conn.execute(
            "INSERT INTO fingerprints (test_id, locator, fingerprint_json, updated_at) "
            "VALUES (?, ?, ?, ?) "
            "ON CONFLICT (test_id, locator) "
            "DO UPDATE SET fingerprint_json = excluded.fingerprint_json, "
            "             updated_at = excluded.updated_at",
            (
                fingerprint.test_id,
                fingerprint.locator,
                json.dumps(fingerprint.to_dict()),
                time.time(),
            ),
        )
        conn.commit()

    def load_fingerprint(self, test_id: str, locator: str) -> ElementFingerprint | None:
        """Load a stored fingerprint for a (test_id, locator) pair.

        Returns None if not found.
        """
        conn = self._get_conn()
        row = conn.execute(
            "SELECT fingerprint_json FROM fingerprints WHERE test_id = ? AND locator = ?",
            (test_id, locator),
        ).fetchone()

        if row is None:
            return None

        data: dict[str, Any] = json.loads(row[0])  # type: ignore[index]
        return ElementFingerprint.from_dict(data)

    def record_healing(self, event: HealingEvent) -> None:
        """Record a healing event. Append-only for reporting."""
        conn = self._get_conn()
        conn.execute(
            "INSERT INTO healing_events "
            "(test_id, locator, confidence, original_json, healed_json, timestamp) "
            "VALUES (?, ?, ?, ?, ?, ?)",
            (
                event.test_id,
                event.locator,
                event.confidence,
                json.dumps(event.original_fingerprint),
                json.dumps(event.healed_fingerprint),
                event.timestamp,
            ),
        )
        conn.commit()

    def get_healing_events(
        self,
        test_id: str | None = None,
        locator: str | None = None,
    ) -> list[HealingEvent]:
        """Query healing events, optionally filtered by test and/or locator.

        Returns list ordered by timestamp descending.
        """
        conn = self._get_conn()
        query = "SELECT * FROM healing_events"
        params: list[str] = []
        conditions: list[str] = []

        if test_id is not None:
            conditions.append("test_id = ?")
            params.append(test_id)
        if locator is not None:
            conditions.append("locator = ?")
            params.append(locator)

        if conditions:
            query += " WHERE " + " AND ".join(conditions)

        query += " ORDER BY timestamp DESC"

        rows = conn.execute(query, params).fetchall()
        return [
            HealingEvent(
                test_id=r["test_id"],  # type: ignore[index]
                locator=r["locator"],  # type: ignore[index]
                confidence=r["confidence"],  # type: ignore[index]
                original_fingerprint=json.loads(r["original_json"]),  # type: ignore[index]
                healed_fingerprint=json.loads(r["healed_json"]),  # type: ignore[index]
                timestamp=r["timestamp"],  # type: ignore[index]
            )
            for r in rows
        ]

    def get_all_fingerprints(self) -> list[ElementFingerprint]:
        """Load all stored fingerprints."""
        conn = self._get_conn()
        rows = conn.execute("SELECT fingerprint_json FROM fingerprints").fetchall()
        return [
            ElementFingerprint.from_dict(json.loads(r[0]))  # type: ignore[index]
            for r in rows
        ]

    def delete_fingerprint(self, test_id: str, locator: str) -> bool:
        """Delete a stored fingerprint. Returns True if found and deleted."""
        conn = self._get_conn()
        cursor = conn.execute(
            "DELETE FROM fingerprints WHERE test_id = ? AND locator = ?",
            (test_id, locator),
        )
        conn.commit()
        return cursor.rowcount > 0

    def clear(self) -> None:
        """Delete all fingerprints and healing events."""
        conn = self._get_conn()
        conn.execute("DELETE FROM fingerprints")
        conn.execute("DELETE FROM healing_events")
        conn.commit()

    def stats(self) -> dict[str, int]:
        """Return counts of fingerprints and healing events."""
        conn = self._get_conn()
        fp_row = conn.execute("SELECT COUNT(*) FROM fingerprints").fetchone()
        he_row = conn.execute("SELECT COUNT(*) FROM healing_events").fetchone()
        fp_count: int = fp_row[0] if fp_row is not None else 0  # type: ignore[index]
        he_count: int = he_row[0] if he_row is not None else 0  # type: ignore[index]
        return {"fingerprints": fp_count, "healing_events": he_count}

    def close(self) -> None:
        """Close the database connection."""
        if self._conn is not None:
            self._conn.close()
            self._conn = None
db_path: Path

Return the path to the database file.

save_fingerprint(fingerprint: ElementFingerprint) -> None

Save or update a fingerprint for a (test_id, locator) pair.

Called on passing test runs to keep the fingerprint database current.

Raises:

Type Description
ValueError

If test_id or locator is empty.

Source code in breadcrumb/core/storage.py
def save_fingerprint(self, fingerprint: ElementFingerprint) -> None:
    """Save or update a fingerprint for a (test_id, locator) pair.

    Called on passing test runs to keep the fingerprint database current.

    Raises:
        ValueError: If test_id or locator is empty.
    """
    if not fingerprint.test_id:
        msg = "Fingerprint must have a test_id to be stored."
        raise ValueError(msg)
    if not fingerprint.locator:
        msg = "Fingerprint must have a locator to be stored."
        raise ValueError(msg)

    conn = self._get_conn()
    conn.execute(
        "INSERT INTO fingerprints (test_id, locator, fingerprint_json, updated_at) "
        "VALUES (?, ?, ?, ?) "
        "ON CONFLICT (test_id, locator) "
        "DO UPDATE SET fingerprint_json = excluded.fingerprint_json, "
        "             updated_at = excluded.updated_at",
        (
            fingerprint.test_id,
            fingerprint.locator,
            json.dumps(fingerprint.to_dict()),
            time.time(),
        ),
    )
    conn.commit()
load_fingerprint(test_id: str, locator: str) -> ElementFingerprint | None

Load a stored fingerprint for a (test_id, locator) pair.

Returns None if not found.

Source code in breadcrumb/core/storage.py
def load_fingerprint(self, test_id: str, locator: str) -> ElementFingerprint | None:
    """Load a stored fingerprint for a (test_id, locator) pair.

    Returns None if not found.
    """
    conn = self._get_conn()
    row = conn.execute(
        "SELECT fingerprint_json FROM fingerprints WHERE test_id = ? AND locator = ?",
        (test_id, locator),
    ).fetchone()

    if row is None:
        return None

    data: dict[str, Any] = json.loads(row[0])  # type: ignore[index]
    return ElementFingerprint.from_dict(data)
record_healing(event: HealingEvent) -> None

Record a healing event. Append-only for reporting.

Source code in breadcrumb/core/storage.py
def record_healing(self, event: HealingEvent) -> None:
    """Record a healing event. Append-only for reporting."""
    conn = self._get_conn()
    conn.execute(
        "INSERT INTO healing_events "
        "(test_id, locator, confidence, original_json, healed_json, timestamp) "
        "VALUES (?, ?, ?, ?, ?, ?)",
        (
            event.test_id,
            event.locator,
            event.confidence,
            json.dumps(event.original_fingerprint),
            json.dumps(event.healed_fingerprint),
            event.timestamp,
        ),
    )
    conn.commit()
get_healing_events(test_id: str | None = None, locator: str | None = None) -> list[HealingEvent]

Query healing events, optionally filtered by test and/or locator.

Returns list ordered by timestamp descending.

Source code in breadcrumb/core/storage.py
def get_healing_events(
    self,
    test_id: str | None = None,
    locator: str | None = None,
) -> list[HealingEvent]:
    """Query healing events, optionally filtered by test and/or locator.

    Returns list ordered by timestamp descending.
    """
    conn = self._get_conn()
    query = "SELECT * FROM healing_events"
    params: list[str] = []
    conditions: list[str] = []

    if test_id is not None:
        conditions.append("test_id = ?")
        params.append(test_id)
    if locator is not None:
        conditions.append("locator = ?")
        params.append(locator)

    if conditions:
        query += " WHERE " + " AND ".join(conditions)

    query += " ORDER BY timestamp DESC"

    rows = conn.execute(query, params).fetchall()
    return [
        HealingEvent(
            test_id=r["test_id"],  # type: ignore[index]
            locator=r["locator"],  # type: ignore[index]
            confidence=r["confidence"],  # type: ignore[index]
            original_fingerprint=json.loads(r["original_json"]),  # type: ignore[index]
            healed_fingerprint=json.loads(r["healed_json"]),  # type: ignore[index]
            timestamp=r["timestamp"],  # type: ignore[index]
        )
        for r in rows
    ]
get_all_fingerprints() -> list[ElementFingerprint]

Load all stored fingerprints.

Source code in breadcrumb/core/storage.py
def get_all_fingerprints(self) -> list[ElementFingerprint]:
    """Load all stored fingerprints."""
    conn = self._get_conn()
    rows = conn.execute("SELECT fingerprint_json FROM fingerprints").fetchall()
    return [
        ElementFingerprint.from_dict(json.loads(r[0]))  # type: ignore[index]
        for r in rows
    ]
delete_fingerprint(test_id: str, locator: str) -> bool

Delete a stored fingerprint. Returns True if found and deleted.

Source code in breadcrumb/core/storage.py
def delete_fingerprint(self, test_id: str, locator: str) -> bool:
    """Delete a stored fingerprint. Returns True if found and deleted."""
    conn = self._get_conn()
    cursor = conn.execute(
        "DELETE FROM fingerprints WHERE test_id = ? AND locator = ?",
        (test_id, locator),
    )
    conn.commit()
    return cursor.rowcount > 0
clear() -> None

Delete all fingerprints and healing events.

Source code in breadcrumb/core/storage.py
def clear(self) -> None:
    """Delete all fingerprints and healing events."""
    conn = self._get_conn()
    conn.execute("DELETE FROM fingerprints")
    conn.execute("DELETE FROM healing_events")
    conn.commit()
stats() -> dict[str, int]

Return counts of fingerprints and healing events.

Source code in breadcrumb/core/storage.py
def stats(self) -> dict[str, int]:
    """Return counts of fingerprints and healing events."""
    conn = self._get_conn()
    fp_row = conn.execute("SELECT COUNT(*) FROM fingerprints").fetchone()
    he_row = conn.execute("SELECT COUNT(*) FROM healing_events").fetchone()
    fp_count: int = fp_row[0] if fp_row is not None else 0  # type: ignore[index]
    he_count: int = he_row[0] if he_row is not None else 0  # type: ignore[index]
    return {"fingerprints": fp_count, "healing_events": he_count}
close() -> None

Close the database connection.

Source code in breadcrumb/core/storage.py
def close(self) -> None:
    """Close the database connection."""
    if self._conn is not None:
        self._conn.close()
        self._conn = None

HealingEvent

Record of a single healing occurrence.

Source code in breadcrumb/core/storage.py
@dataclass
class HealingEvent:
    """Record of a single healing occurrence."""

    test_id: str
    locator: str
    confidence: float
    original_fingerprint: dict[str, Any]
    healed_fingerprint: dict[str, Any]
    timestamp: float