from collections.abc import Iterator, Mapping, Sequence
from copy import copy
from typing import TYPE_CHECKING, Any, Generic, Self
from rics.collections.dicts import InheritedKeysDict, reverse_dict
from rics.misc import tname
from ..transform.types import Transformer, Transformers
from ..types import HasSources, IdType, NameToSource, NameType, SourceType
from ._format import Format
from ._format_applier import FormatApplier
from ._magic_dict import MagicDict
from .types import FormatType, PlaceholderTranslations, SourcePlaceholderTranslations
if TYPE_CHECKING:
import pandas
[docs]
class TranslationMap(
Generic[NameType, SourceType, IdType],
HasSources[SourceType],
Mapping[NameType | SourceType, MagicDict[IdType]],
):
"""Storage class for fetched translations.
Args:
source_translations: Fetched translations ``{source: PlaceholderTranslations}``.
name_to_source: Mappings ``{name: source}``, but may be overridden by the user.
fmt: A translation format. Must be given to use as a mapping.
default_fmt: Alternative format specification to use instead of `fmt` for fallback translation.
default_fmt_placeholders: Per-source default placeholder values.
enable_uuid_heuristics: Improves matching when :py:class:`~uuid.UUID`-like IDs are in use.
transformers: A dict ``{source: transformer}`` of initialized :class:`.Transformer` instances.
"""
def __init__(
self,
source_translations: SourcePlaceholderTranslations[SourceType],
*,
fmt: FormatType = Format.DEFAULT,
default_fmt: FormatType = Format.DEFAULT_FAILED,
name_to_source: NameToSource[NameType, SourceType] | None = None,
default_fmt_placeholders: InheritedKeysDict[SourceType, str, Any] | None = None,
enable_uuid_heuristics: bool = True,
transformers: Transformers[SourceType, IdType] | None = None,
) -> None:
self.fmt = Format.parse(fmt)
self.default_fmt = Format.parse(default_fmt)
self.default_fmt_placeholders = default_fmt_placeholders or InheritedKeysDict.make({})
transformers = transformers or {}
self._format_appliers: dict[SourceType, FormatApplier[NameType, SourceType, IdType]] = {
source: FormatApplier(translations, transformer=transformers.get(source))
for source, translations in source_translations.items()
}
self._names_and_sources: set[NameType | SourceType] = set()
self.name_to_source = name_to_source or {}
self._reverse_mode: bool = False
self.enable_uuid_heuristics = enable_uuid_heuristics
[docs]
def to_dicts(self) -> dict[SourceType, dict[str, Sequence[Any]]]:
"""Get the underlying data used for translations as dicts.
This is equivalent using :meth:`to_pandas`, then calling ``DataFrame.to_dict(orient='list')`` on each frame.
Returns:
A dict ``{source: {placeholder: [values...]}}``.
"""
return {applier.source: applier.to_dict() for applier in self._format_appliers.values()}
def _extract_translations(self) -> SourcePlaceholderTranslations[SourceType]:
return {source: applier._translations for source, applier in self._format_appliers.items()}
[docs]
def to_pandas(self) -> dict[SourceType, "pandas.DataFrame"]:
"""Get the underlying data used for translations as :class:`pandas.DataFrame`.
Returns:
A dict ``{source: DataFrame}``.
"""
return {applier.source: applier.to_pandas() for applier in self._format_appliers.values()}
[docs]
def to_translations(self, fmt: FormatType | None = None) -> dict[SourceType, MagicDict[IdType]]:
"""Create translations for all sources.
Returned values are of type :class:`.MagicDict`. To convert to regular built-in dicts, run
.. code-block::
translations = translation_map.to_translations()
as_regular_dicts = {
source: dict(magic)
for source, magic in translations.items()
}
on the returned dict-of-magic-dicts.
Args:
fmt: :class:`.Format` to use. If ``None``, fall back to init format.
Returns:
A dict of translations ``{source: MagicDict}``.
"""
return {source: self.apply(source, fmt=fmt) for source in self.sources}
[docs]
@classmethod
def from_pandas(
cls,
frames: dict[SourceType, "pandas.DataFrame"],
fmt: FormatType = Format.DEFAULT,
*,
default_fmt: FormatType = Format.DEFAULT_FAILED,
) -> Self:
"""Create a new instance from a :class:`pandas.DataFrame` dict.
Args:
frames: A dict ``{source: DataFrame}``.
fmt: A translation format. Must be given to use as a mapping.
default_fmt: Alternative format specification to use instead of `fmt` for fallback translation.
Returns:
A new ``TranslationMap``.
"""
source_translations = {
source: PlaceholderTranslations.from_dataframe(source, frame) for source, frame in frames.items()
}
return cls(source_translations, fmt=fmt, default_fmt=default_fmt)
[docs]
def apply(
self,
name_or_source: NameType | SourceType,
fmt: FormatType | None = None,
*,
default_fmt: FormatType | None = None,
) -> MagicDict[IdType]:
"""Create translations for a given name or source.
Args:
name_or_source: A name or source to translate.
fmt: :class:`.Format` to use. If ``None``, fall back to init format.
default_fmt: Alternative format for default translation. Resolution: Arg -> init arg, fmt arg, init fmt arg
Returns:
Translations for `name` as a dict ``{id: translation}``.
Raises:
ValueError: If ``fmt=None`` and initialized without `fmt`.
KeyError: If trying to translate `name` which is not known.
Notes:
This method is called by ``__getitem__``.
"""
fmt = self._fmt if fmt is None else fmt
if fmt is None:
raise ValueError("No format specified and None given at initialization.") # pragma: no cover
fmt = Format.parse(fmt)
default_fmt = self._default_fmt if default_fmt is None else Format.parse(default_fmt)
source = self.name_to_source.get(name_or_source, name_or_source) # type: ignore
applier = self._format_appliers[source]
try:
translations: MagicDict[IdType] = applier.apply(
fmt,
default_fmt=default_fmt,
default_fmt_placeholders=self.default_fmt_placeholders.get(source),
enable_uuid_heuristics=self.enable_uuid_heuristics,
)
except Exception as e:
e.add_note(f"{name_or_source=}")
e.add_note(f"{source=}")
raise
return (
MagicDict(
reverse_dict(translations, duplicate_key_action="raise"), # type: ignore
default_value=default_fmt.fstring(positional=True),
enable_uuid_heuristics=False,
)
if self.reverse_mode
else translations
)
@property
def names(self) -> list[NameType]:
"""Return names that can be translated."""
return list(self.name_to_source)
@property
def sources(self) -> list[SourceType]:
return list(self._format_appliers)
@property
def placeholders(self) -> dict[SourceType, list[str]]:
return {applier.source: applier.placeholders for applier in self._format_appliers.values()}
@property
def len_per_source(self) -> dict[SourceType, int]:
"""Number of IDs per :attr:`source <sources>`."""
return {applier.source: len(applier) for applier in self._format_appliers.values()}
@property
def name_to_source(self) -> NameToSource[NameType, SourceType]:
"""Return name-to-source mapping."""
return dict(self._name_to_source)
@name_to_source.setter
def name_to_source(self, value: NameToSource[NameType, SourceType]) -> None:
self._name_to_source: NameToSource[NameType, SourceType] = value
self._names_and_sources = set(value).union(self._format_appliers)
@property
def fmt(self) -> Format:
"""Return the translation format."""
return self._fmt
@fmt.setter
def fmt(self, value: FormatType) -> None:
self._fmt = Format.parse(value)
@property
def default_fmt(self) -> Format:
"""Return the format specification to use instead of :attr:`fmt` for fallback translation."""
return self._default_fmt
@default_fmt.setter
def default_fmt(self, value: FormatType) -> None:
self._default_fmt = Format.parse(value)
@property
def default_fmt_placeholders(self) -> InheritedKeysDict[SourceType, str, Any]:
"""Return the default translations used for `default_fmt_placeholders` placeholders."""
return self._default_fmt_placeholders
@default_fmt_placeholders.setter
def default_fmt_placeholders(self, value: InheritedKeysDict[SourceType, str, Any]) -> None:
self._default_fmt_placeholders = InheritedKeysDict.make(value) if value else InheritedKeysDict()
@property
def reverse_mode(self) -> bool:
"""Return reversed mode status flag.
If set, the mappings returned by :meth:`apply` (and therefore also ``__getitem__``) are reversed.
Returns:
Reversal status flag.
"""
return self._reverse_mode
@reverse_mode.setter
def reverse_mode(self, value: bool) -> None:
self._reverse_mode = value
@property
def enable_uuid_heuristics(self) -> bool:
"""Return automatic UUID mitigation status."""
return self._enable_uuid_heuristics
@enable_uuid_heuristics.setter
def enable_uuid_heuristics(self, value: bool) -> None:
self._enable_uuid_heuristics = value
@property
def transformers(self) -> dict[SourceType, Transformer[IdType]]:
"""Get a dict ``{source: transformer}`` of :class:`.Transformer` instances used by this ``TranslationMap``."""
return {
source: applier.transformer
for source, applier in self._format_appliers.items()
if applier.transformer is not None
}
[docs]
def copy(self) -> Self:
"""Make a copy of this ``TranslationMap``."""
return copy(self)
def __getitem__(self, name_or_source: NameType | SourceType) -> MagicDict[IdType]:
return self.apply(name_or_source)
def __len__(self) -> int:
return len(self._names_and_sources)
def __iter__(self) -> Iterator[NameType | SourceType]:
return iter(self._names_and_sources)
def __bool__(self) -> bool:
return bool(self._format_appliers)
def __repr__(self) -> str:
len_per_source = self.len_per_source
order = sorted(len_per_source, key=len_per_source.__getitem__, reverse=True)
sources = ", ".join(f"'{source}': {len_per_source[source]} IDs" for source in order)
return f"{tname(self)}({sources})"