import logging
from collections.abc import Callable, Generator, Iterable
from contextlib import contextmanager
from os import getenv
from pathlib import Path
from typing import TYPE_CHECKING, Any, Generic, TypeAlias
from rics.collections.dicts import InheritedKeysDict
from rics.env.read import read_bool
from rics.types import AnyPath
from id_translation.exceptions import ConfigurationError
from id_translation.fetching import AbstractFetcher, CacheAccess, Fetcher, MultiFetcher
from id_translation.mapping import Mapper
from id_translation.transform.types import Transformer, Transformers
from id_translation.types import IdType, NameType, SourceType
from . import factories as cf
from ._load_toml import load_toml_file
from .meta import ConfigMetadata, Metaconf
if TYPE_CHECKING:
from id_translation import Translator
SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS = "ID_TRANSLATION_SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS"
[docs]
class TranslatorFactory(Generic[NameType, SourceType, IdType]):
"""Create a :class:`.Translator` from TOML inputs."""
FetcherFactory: TypeAlias = Callable[[str, dict[str, Any]], AbstractFetcher[Any, Any]]
"""Signature for :attr:`FETCHER_FACTORY`."""
FETCHER_FACTORY: FetcherFactory = staticmethod(cf.default_fetcher_factory)
"""A callable ``(clazz, config) -> AbstractFetcher``.
Overwrite attribute with your own :attr:`.FetcherFactory` implementation to customize.
Args:
clazz: Type of :class:`.AbstractFetcher` to create.
config: Keyword arguments for the fetcher class.
Returns:
An :class:`.AbstractFetcher` instance.
Raises:
exceptions.ConfigurationError: If `config` is invalid.
TypeError: If `clazz` is not an :class:`.AbstractFetcher` subtype.
See Also:
:ref:`translator-config-fetching`
"""
MapperFactory: TypeAlias = Callable[[dict[str, Any], bool], Mapper[Any, Any, Any] | None]
"""Signature for :attr:`MAPPER_FACTORY`."""
MAPPER_FACTORY: MapperFactory = cf.default_mapper_factory
"""A callable ``(config, for_fetcher) -> Mapper | None``.
Overwrite attribute with your own :attr:`.MapperFactory` implementation to customize.
If ``None`` is returned, a suitable default is used instead.
Args:
config: Keyword arguments for the :class:`.Mapper`.
for_fetcher: Flag indicating that the :class:`.Mapper` returned will be used by an :class:`.AbstractFetcher` instance.
Returns:
A :class:`.Mapper` instance or ``None``.
Raises:
ConfigurationError: If `config` is invalid.
See Also:
:ref:`translator-config-mapping`
"""
TransformerFactory: TypeAlias = Callable[[str, dict[str, Any]], Transformer[Any]]
"""Signature for :attr:`TRANSFORMER_FACTORY`."""
TRANSFORMER_FACTORY: TransformerFactory = cf.default_transformer_factory
"""A callable ``(clazz, config) -> Transformer``.
Overwrite attribute with your own :attr:`.TransformerFactory` implementation to customize.
Args:
clazz: Type of :class:`.Transformer` to create.
config: Keyword arguments for the transformer class.
Returns:
A :class:`.Transformer` instance.
Raises:
ConfigurationError: If `config` is invalid.
See Also:
:ref:`translator-config-transform`
"""
CacheAccessFactory: TypeAlias = Callable[[str, dict[str, Any]], CacheAccess[Any, Any]]
"""Signature for :attr:`CACHE_ACCESS_FACTORY`."""
CACHE_ACCESS_FACTORY: CacheAccessFactory = cf.default_cache_access_factory
"""A callable ``(clazz, config) -> CacheAccess``.
Overwrite attribute with your own :attr:`.CacheAccessFactory` implementation to customize.
Args:
clazz: Type of :class:`.CacheAccess` to create.
config: Keyword arguments for the cache class.
Returns:
A :class:`.CacheAccess` instance.
Raises:
ConfigurationError: If `config` is invalid.
"""
TOP_LEVEL_KEYS = ("translator", "mapping", "fetching", "unknown_ids", "transform")
"""Top-level keys allowed in the main configuration file."""
def __init__(
self,
file: AnyPath,
fetchers: Iterable[AnyPath],
clazz: type["Translator[NameType, SourceType, IdType]"] | None = None,
suppress_optional_fetcher_init_errors: bool | None = None,
) -> None:
from id_translation import Translator
self.file = str(file)
self.extra_fetchers = list(map(str, fetchers))
self.clazz: type[Translator[NameType, SourceType, IdType]] = clazz or Translator[NameType, SourceType, IdType]
metaconf_path = Path(self.file).with_name("metaconf.toml")
self._metaconf = Metaconf.from_path_or_default(metaconf_path)
self.logger = logging.getLogger(__package__).getChild(type(self).__name__)
if suppress_optional_fetcher_init_errors is None:
suppress_optional_fetcher_init_errors = read_bool(SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS)
self.suppress_optional_fetcher_init_errors = suppress_optional_fetcher_init_errors
@property
def metaconf(self) -> Metaconf:
"""Returns the meta configuration instance used by this factory."""
return self._metaconf
[docs]
def create(self) -> "Translator[NameType, SourceType, IdType]":
"""Create :class:`.Translator` instance."""
config_metadata = ConfigMetadata.from_toml_paths(self.file, self.extra_fetchers, self.clazz)
with _rethrow_with_file(self.file):
config: dict[str, Any] = self.load_toml_file(self.file)
fetcher, fetcher_transformers = self._handle_fetching(
config.pop("fetching", {}), self.extra_fetchers, _identifier_from_config_metadata(config_metadata)
)
with _rethrow_with_file(self.file):
_check_allowed_keys(self.TOP_LEVEL_KEYS, actual=config, toml_path="<root>")
translator_config = config.pop("translator", {})
mapper = self._make_mapper("translator", translator_config)
_make_default_translations(translator_config, config.pop("unknown_ids", {}))
translator_transformers = self._handler_transformers(config.pop("transform", {}))
if keys := set(fetcher_transformers).intersection(translator_transformers):
msg = f"Transformers for {len(keys)} sources also defined on the fetcher level: {keys}."
raise ValueError(msg)
translator_config["transformers"] = translator_transformers | fetcher_transformers
ans = self.clazz(
fetcher,
mapper=mapper,
**translator_config,
)
ans._config_metadata = config_metadata
return ans
[docs]
def load_toml_file(self, path: str) -> dict[str, Any]:
"""Read a TOML file from `path` with the current :attr:`.Metaconf.env` settings.
Args:
path: Path to file.
Returns:
A dict parsed from `path`.
See Also:
:func:`.load_toml_file`
"""
env = self.metaconf.env
return load_toml_file(
path,
allow_interpolation=env.allow_interpolation,
allow_nested=env.allow_nested,
allow_blank=env.allow_blank,
)
def _handle_fetching(
self,
config: dict[str, Any],
extra_fetchers: list[str],
default_identifiers: list[list[str]],
) -> tuple[Fetcher[SourceType, IdType], Transformers[SourceType, IdType]]:
multi_fetcher_kwargs = config.pop("MultiFetcher", {})
fetchers: list[Fetcher[SourceType, IdType]] = []
transformers: Transformers[SourceType, IdType] = {}
if config:
with _rethrow_with_file(self.file, show_init_errors_hint=True):
fetcher = self._make_fetcher(default_identifiers[0], **config)
if isinstance(fetcher, Exception):
self._log_optional_fetcher_init_error(fetcher, str(self.file))
else:
fetchers.append(fetcher) # Add primary fetcher
for i, fetcher_file in enumerate(extra_fetchers, start=1):
with _rethrow_with_file(fetcher_file, show_init_errors_hint=True):
fetcher_config = self.load_toml_file(fetcher_file)
_check_allowed_keys(["fetching", "transform"], actual=fetcher_config, toml_path="<root>")
fetcher = self._make_fetcher(default_identifiers[i], **fetcher_config["fetching"])
if isinstance(fetcher, Exception):
self._log_optional_fetcher_init_error(fetcher, fetcher_file)
continue
new_transformers = self._handler_transformers(fetcher_config.get("transform", {}))
if keys := set(new_transformers).intersection(transformers):
msg = f"Transformers for {len(keys)} sources were already defined in another fetcher file: {keys}."
raise ValueError(msg)
transformers.update(new_transformers)
fetchers.append(fetcher)
if not fetchers:
raise ConfigurationError(
f"At least one [fetching]-section is required. Add it to '{self.file}',"
" or as an auxiliary configuration.",
)
retval: Fetcher[SourceType, IdType]
if len(fetchers) == 1:
if multi_fetcher_kwargs and self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug(
f"MultiFetcher arguments {multi_fetcher_kwargs} are ignored; only one fetcher defined."
)
retval = fetchers[0]
else:
retval = MultiFetcher(*fetchers, **multi_fetcher_kwargs)
return retval, transformers
def _log_optional_fetcher_init_error(self, exception: BaseException, fetcher_file: str) -> None:
value = getenv(SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS)
env = f"{SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS}={value}"
url = "https://id-translation.readthedocs.io/en/stable/documentation/translator-config.html#optional-fetchers"
self.logger.exception(
f"Discarded optional fetcher in file '{fetcher_file}': {exception!r}."
f"\nHint: Discarded since `optional=true` and `{env}`."
f"\nHint: See {url} for help.",
exc_info=exception,
extra={"fetcher_file": str(fetcher_file), "reason": str(exception)},
)
@classmethod
def _make_mapper(cls, parent_section: str, config: dict[str, Any]) -> Mapper[Any, Any, Any] | None:
if "mapping" not in config:
return None # pragma: no cover
config = config.pop("mapping")
for_fetcher = parent_section.startswith("fetching")
if for_fetcher:
config = {**AbstractFetcher.default_mapper_kwargs(), **config}
return cls.MAPPER_FACTORY(config, for_fetcher)
@classmethod
def _make_cache_access(cls, config: dict[str, Any]) -> CacheAccess[Any, Any]:
return cls.CACHE_ACCESS_FACTORY(config.pop("type"), config)
def _make_fetcher(self, __identifiers: list[str], **config: Any) -> AbstractFetcher[SourceType, IdType] | Exception:
mapper = self._make_mapper("fetching", config) if "mapping" in config else None
cache_access = self._make_cache_access(config.pop("cache")) if "cache" in config else None
if len(config) == 0: # pragma: no cover
raise ConfigurationError("Fetcher implementation section missing.")
if len(config) > 1: # pragma: no cover
raise ConfigurationError(f"Multiple fetcher implementations specified in the same file: {sorted(config)}")
clazz, kwargs = next(iter(config.items()))
kwargs["identifiers"] = kwargs.get("identifiers", __identifiers)
kwargs["mapper"] = mapper
kwargs["cache_access"] = cache_access
is_optional = kwargs.get("optional")
if isinstance(is_optional, bool):
# Only if ID_TRANSLATION_SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS=true.
is_optional = is_optional and self.suppress_optional_fetcher_init_errors
else:
is_optional = False
if is_optional:
try:
return self.FETCHER_FACTORY(clazz, kwargs)
except Exception as e:
return e
else:
return self.FETCHER_FACTORY(clazz, kwargs)
@classmethod
def _handler_transformers(cls, per_source: dict[SourceType, dict[str, Any]]) -> Transformers[SourceType, IdType]:
transformers = {}
for source, config in per_source.items():
if len(config) != 1:
raise ConfigurationError(
"Transformation config must be specified as [transform.<source>.<transformer-class>] sections."
)
clazz, kwargs = next(iter(config.items()))
transformers[source] = cls.TRANSFORMER_FACTORY(clazz, kwargs)
return transformers
def _make_default_translations(
out: dict[str, Any],
config: dict[str, Any],
) -> None:
_check_allowed_keys(["fmt", "overrides"], actual=config, toml_path="translator.unknown_ids")
if "fmt" in config:
out["default_fmt"] = config.pop("fmt")
if "overrides" in config:
shared, specific = _split_overrides(config.pop("overrides"))
out["default_fmt_placeholders"] = InheritedKeysDict(specific, default=shared)
def _check_allowed_keys(allowed: Iterable[str], *, actual: Iterable[str], toml_path: str) -> None:
bad_keys = set(actual).difference(allowed)
if bad_keys:
raise ValueError(f"Forbidden keys {sorted(bad_keys)} in [{toml_path}]-section.")
def _split_overrides(overrides: Any) -> Any:
specific = {k: v for k, v in overrides.items() if isinstance(v, dict)}
shared = {k: v for k, v in overrides.items() if k not in specific}
return shared, specific
def _identifier_from_config_metadata(config_metadata: ConfigMetadata) -> list[list[str]]:
# Use the config filename and sha hash as the default keys
return list(map(lambda t: [t[0].name, t[1]], (config_metadata.main, *config_metadata.extra_fetchers)))
@contextmanager
def _rethrow_with_file(
file: str,
*,
show_init_errors_hint: bool = False,
) -> Generator[None, None, None]:
try:
yield
except Exception as e:
file_hint = f"In file: {Path(file).resolve()}"
notes = [file_hint]
if show_init_errors_hint:
notes.append(f"Setting {SUPPRESS_OPTIONAL_FETCHER_INIT_ERRORS}=true may help temporarily.")
for hint in notes:
e.add_note(f"Hint: {hint}")
if isinstance(e, ConfigurationError):
raise
else:
msg = f"{type(e).__name__}: {e}\n raised when parsing file: {Path(file).resolve()}"
raise ConfigurationError(msg) from e