Source code for id_translation.mapping.score_functions

"""Functions which return a likeness score.

See Also:
    The :class:`~.HeuristicScore` class.
"""

from collections.abc import Iterable as _Iterable

from . import exceptions
from .types import CandidateType, ContextType, ValueType

VERBOSE: bool = False



[docs]
def modified_hamming(
    name: str,
    candidates: _Iterable[str],
    context: ContextType | None,  # noqa: ARG001
    *,
    add_length_ratio_term: bool = True,
    positional_penalty: float = 0.001,
) -> _Iterable[float]:
    """Compute hamming distance modified by length ratio, from the back. Score range is ``[0, 1]``.

    Args:
        name: A name that should be mapped one of the sources in `candidates`.
        candidates: Candidate sources.
        context: Should be ``None``. Always ignored, exists for compatibility.
        add_length_ratio_term: If ``True``, score is divided by ``abs(len(name) - len(candidate))``.
        positional_penalty: A penalty applied to prefer earlier `candidates`, according to the formulare
            ``penalty = index(candidate) * positional_penalty)``.

    Examples:
        >>> from id_translation.mapping.score_functions import modified_hamming
        >>> list(modified_hamming("aa", ["aa", "a", "ab", "aa"], context=None))
        [1.0, 0.499, 0.498, 0.997]
        >>> list(
        ...     modified_hamming(
        ...         "aa", ["aa", "a", "ab", "aa"], context=None, positional_penalty=0
        ...     )
        ... )
        [1.0, 0.5, 0.5, 1.0]
        >>> list(modified_hamming("face", ["face", "FAce", "race", "place"], context=None))
        [1.0, 0.499, 0.748, 0.372]
    """

    def _apply(candidate: str) -> float:
        sz = min(len(candidate), len(name))
        same = sum([name[i] == candidate[i] for i in range(-sz, 0)])

        ratio = (1 / (1 + abs(len(candidate) - len(name)))) if add_length_ratio_term else 1
        normalized_hamming = same / sz

        return ratio * normalized_hamming

    yield from (s - i * positional_penalty for i, s in enumerate(map(_apply, candidates)))




[docs]
def equality(
    value: ValueType,
    candidates: _Iterable[CandidateType],
    context: ContextType | None,  # noqa: ARG001
) -> _Iterable[float]:
    """Return 1.0 if ``k == c_i``, 0.0 otherwise.

    Examples:
        >>> from id_translation.mapping.score_functions import equality
        >>> list(equality("a", "aAb", context=None))
        [1.0, 0.0, 0.0]
    """
    yield from map(float, (value == c for c in candidates))




[docs]
def disabled(
    value: ValueType,
    candidates: _Iterable[CandidateType],
    context: ContextType | None,
    strict: bool = True,
) -> _Iterable[float]:
    """Special value to indicate that scoring logic has been disabled.

    This is a workaround to allow users to indicate that the scoring logic is disabled, and that overrides should be
    used instead. The ``disabled``-function has no special meaning to the mapper, and will be called as any other
    scoring function.

    Returns:
        If `strict` is ``False``, negative infinity for all `candidates`, serving as a catch-all removal filter.

    Raises:
        ScoringDisabledError: If `strict` is ``True``.

    See Also:
        The :ref:`override-only-mapping` documentation.
    """
    if strict:
        raise exceptions.ScoringDisabledError(value, candidates, context)

    return [float("-inf")] * sum(1 for _ in candidates)