from importlib import util as import_util
import os
import pandas as pd
from pathlib import Path
from components import parsing
from components.tools import utils
[docs]
class EnrichmentAdmin:
"""Manage enrichment handlers and orchestrate enrichment runs.
:param parameters_file: Path to parameters TOML used to locate handler modules.
"""
def __init__(self, parameters_file: str) -> None:
parameters: dict = utils.read_toml(Path(parameters_file))
self._enrichment_handlers: dict = {}
self._enrichments: dict = {}
self._defaults: dict = {}
self._disabled: list = []
self._handler_basedir: str = os.path.join(*parameters['Data paths']['Enrichers'])
for _module_filename in os.listdir(self._handler_basedir):
if not _module_filename.endswith('.py'):
continue
if _module_filename.startswith('__'):
continue
_filepath: str = os.path.join(self._handler_basedir, _module_filename)
_module_name: str = _module_filename.rsplit('.',maxsplit=1)[0]
try:
_spec = import_util.spec_from_file_location(
'module.name', _filepath)
_api_module = import_util.module_from_spec(_spec)
_spec.loader.exec_module(_api_module)
_handler = _api_module.handler()
self._enrichment_handlers[_module_name] = {
'handler': _handler,
'available': _handler.get_available(),
'name': _handler.nice_name,
'defaults': _handler.get_default_panel()
}
except Exception as e:
print(f"Warning: Failed to load enrichment module {_module_name}: {e}")
# Continue with other modules even if one fails
continue
for a in self._enrichment_handlers[_module_name]['available']:
self._enrichments[a] = _module_name
for a in self._enrichment_handlers[_module_name]['defaults']:
show = True
for ban_str in parameters['file loading']['Do not show in enrichment default']:
if ban_str in a.lower():
show = False
if show:
self._defaults[a] = _module_name
self.import_handlers()
[docs]
def get_available(self) -> list:
"""List all available enrichment names across handlers.
:returns: Sorted list of enrichment names.
"""
return sorted(list(self._enrichments.keys()))
[docs]
def get_default(self) -> list:
"""List default enrichment names suggested by handlers.
:returns: Sorted list of default enrichment names.
"""
return sorted(list(self._defaults.keys()))
[docs]
def get_disabled(self) -> list:
"""List enrichments disabled by configuration.
:returns: Sorted list of disabled enrichment names.
"""
return sorted(self._disabled)
[docs]
def import_handlers(self) -> dict:
"""Import all enrichment handler modules from configured directory.
:returns: Dict mapping module name -> handler instance.
"""
ret_dict: dict = {}
for module_filename in os.listdir(self._handler_basedir):
if module_filename.endswith('.py'):
if module_filename.startswith('__'):
continue
filepath: str = os.path.join(self._handler_basedir, module_filename)
module_name: str = module_filename.rsplit('.',maxsplit=1)[0]
try:
spec = import_util.spec_from_file_location('module.name', filepath)
api_module = import_util.module_from_spec(spec)
spec.loader.exec_module(api_module)
ret_dict[module_name] = api_module.handler()
except Exception as e:
print(f"Warning: Failed to load enrichment handler {module_name}: {e}")
# Continue with other modules even if one fails
continue
self._imported_handlers: dict = ret_dict
[docs]
def enrich_all(self, data_table: pd.DataFrame,enrichment_strings: list, id_column: str = None, id_list: list = None, split_by_column: str = None, split_name: str = None) -> list:
"""Run all requested enrichments via their handlers.
:param data_table: Input table with identifiers and optional split column.
:param enrichment_strings: List of enrichment names to run.
:param id_column: Column containing identifiers to enrich.
:param id_list: Explicit list of identifiers if not using ``id_column``.
:param split_by_column: Optional column to split input by groups/baits.
:param split_name: Label for the split dimension (defaults to 'Sample group').
:returns: Tuple of (result_names, return_dataframes, information).
:raises AssertionError: If neither ``id_column`` nor ``id_list`` is provided.
"""
assert ((id_column is not None) or (id_list is not None)), 'Supply either id_column or id_list'
if split_by_column is not None:
if split_name is None:
split_name = 'Sample group'
else:
assert id_list is None, 'Can not supply id_list with split_by_column!'
enrichments_to_do: dict = {}
for e_str in enrichment_strings:
apiname = self._enrichments[e_str]
if apiname not in enrichments_to_do:
enrichments_to_do[apiname] = []
enrichments_to_do[apiname].append(e_str)
enrichment_results: list = []
enrichment_names: list = []
done_info: list = []
for api, enrichmentlist in enrichments_to_do.items():
try:
enrichment_options: str = ';'.join(enrichmentlist)
enrichment_input = []
if split_by_column:
for b in data_table[split_by_column].unique():
df = data_table[data_table[split_by_column]==b]
enrichment_input.append([b, list(df[id_column].values)])
else:
if id_list:
enrichment_input.append(['All',id_list])
else:
enrichment_input.append(['All',df[id_column]])
result_names: list
return_dataframes: list
done_information: list
handler = self._imported_handlers[api]
result_names, return_dataframes, done_information = handler.enrich(enrichment_input, enrichment_options)
except Exception as e:
#TODO move to logging module
print(f'Error in enrichment {api}: {e}')
result_names = ['Error']
return_dataframes = ['','','',pd.DataFrame()]
done_information = ['Enrichment failed.']
continue
enrichment_results.extend(return_dataframes)
enrichment_names.extend(result_names)
done_info.extend(done_information)
return (enrichment_names, enrichment_results, done_info)