Source code for app.components.EnrichmentAdmin

from importlib import util as import_util
import os
import pandas as pd
from pathlib import Path
from components import parsing
from components.tools import utils


[docs] class EnrichmentAdmin: """Manage enrichment handlers and orchestrate enrichment runs. :param parameters_file: Path to parameters TOML used to locate handler modules. """ def __init__(self, parameters_file: str) -> None: parameters: dict = utils.read_toml(Path(parameters_file)) self._enrichment_handlers: dict = {} self._enrichments: dict = {} self._defaults: dict = {} self._disabled: list = [] self._handler_basedir: str = os.path.join(*parameters['Data paths']['Enrichers']) for _module_filename in os.listdir(self._handler_basedir): if not _module_filename.endswith('.py'): continue if _module_filename.startswith('__'): continue _filepath: str = os.path.join(self._handler_basedir, _module_filename) _module_name: str = _module_filename.rsplit('.',maxsplit=1)[0] try: _spec = import_util.spec_from_file_location( 'module.name', _filepath) _api_module = import_util.module_from_spec(_spec) _spec.loader.exec_module(_api_module) _handler = _api_module.handler() self._enrichment_handlers[_module_name] = { 'handler': _handler, 'available': _handler.get_available(), 'name': _handler.nice_name, 'defaults': _handler.get_default_panel() } except Exception as e: print(f"Warning: Failed to load enrichment module {_module_name}: {e}") # Continue with other modules even if one fails continue for a in self._enrichment_handlers[_module_name]['available']: self._enrichments[a] = _module_name for a in self._enrichment_handlers[_module_name]['defaults']: show = True for ban_str in parameters['file loading']['Do not show in enrichment default']: if ban_str in a.lower(): show = False if show: self._defaults[a] = _module_name self.import_handlers()
[docs] def get_available(self) -> list: """List all available enrichment names across handlers. :returns: Sorted list of enrichment names. """ return sorted(list(self._enrichments.keys()))
[docs] def get_default(self) -> list: """List default enrichment names suggested by handlers. :returns: Sorted list of default enrichment names. """ return sorted(list(self._defaults.keys()))
[docs] def get_disabled(self) -> list: """List enrichments disabled by configuration. :returns: Sorted list of disabled enrichment names. """ return sorted(self._disabled)
[docs] def import_handlers(self) -> dict: """Import all enrichment handler modules from configured directory. :returns: Dict mapping module name -> handler instance. """ ret_dict: dict = {} for module_filename in os.listdir(self._handler_basedir): if module_filename.endswith('.py'): if module_filename.startswith('__'): continue filepath: str = os.path.join(self._handler_basedir, module_filename) module_name: str = module_filename.rsplit('.',maxsplit=1)[0] try: spec = import_util.spec_from_file_location('module.name', filepath) api_module = import_util.module_from_spec(spec) spec.loader.exec_module(api_module) ret_dict[module_name] = api_module.handler() except Exception as e: print(f"Warning: Failed to load enrichment handler {module_name}: {e}") # Continue with other modules even if one fails continue self._imported_handlers: dict = ret_dict
[docs] def enrich_all(self, data_table: pd.DataFrame,enrichment_strings: list, id_column: str = None, id_list: list = None, split_by_column: str = None, split_name: str = None) -> list: """Run all requested enrichments via their handlers. :param data_table: Input table with identifiers and optional split column. :param enrichment_strings: List of enrichment names to run. :param id_column: Column containing identifiers to enrich. :param id_list: Explicit list of identifiers if not using ``id_column``. :param split_by_column: Optional column to split input by groups/baits. :param split_name: Label for the split dimension (defaults to 'Sample group'). :returns: Tuple of (result_names, return_dataframes, information). :raises AssertionError: If neither ``id_column`` nor ``id_list`` is provided. """ assert ((id_column is not None) or (id_list is not None)), 'Supply either id_column or id_list' if split_by_column is not None: if split_name is None: split_name = 'Sample group' else: assert id_list is None, 'Can not supply id_list with split_by_column!' enrichments_to_do: dict = {} for e_str in enrichment_strings: apiname = self._enrichments[e_str] if apiname not in enrichments_to_do: enrichments_to_do[apiname] = [] enrichments_to_do[apiname].append(e_str) enrichment_results: list = [] enrichment_names: list = [] done_info: list = [] for api, enrichmentlist in enrichments_to_do.items(): try: enrichment_options: str = ';'.join(enrichmentlist) enrichment_input = [] if split_by_column: for b in data_table[split_by_column].unique(): df = data_table[data_table[split_by_column]==b] enrichment_input.append([b, list(df[id_column].values)]) else: if id_list: enrichment_input.append(['All',id_list]) else: enrichment_input.append(['All',df[id_column]]) result_names: list return_dataframes: list done_information: list handler = self._imported_handlers[api] result_names, return_dataframes, done_information = handler.enrich(enrichment_input, enrichment_options) except Exception as e: #TODO move to logging module print(f'Error in enrichment {api}: {e}') result_names = ['Error'] return_dataframes = ['','','',pd.DataFrame()] done_information = ['Enrichment failed.'] continue enrichment_results.extend(return_dataframes) enrichment_names.extend(result_names) done_info.extend(done_information) return (enrichment_names, enrichment_results, done_info)