Source code for app.pipeline_module.batch_figure_builder_from_divs

#!/usr/bin/env python3
"""
Batch Figure Builder using saved Dash divs

This module loads the saved div pickle files generated by the batch pipeline
and uses them directly with infra.save_figures, just like the GUI does.

This approach is much more reliable than trying to reconstruct figures from JSON.
"""

import os
import sys
import pickle
import json
import logging
import copy
from typing import Dict, List, Any, Optional, Tuple
from pathlib import Path

# Add current directory to path for imports
sys.path.insert(0, os.path.dirname(__file__))

from pipeline_module.pipeline_batch import dash_to_wire
from components import infra
from components.figures import tic_graph
from components import parsing

logger = logging.getLogger(__name__)


[docs] def load_div_pickle(pickle_path: str) -> Dict[str, Any]: """Load a div pickle file. :param pickle_path: Path to the pickle file. :returns: Dict of div components or empty dict if not found or on error. """ if not os.path.exists(pickle_path): logger.warning(f"Div pickle file not found: {pickle_path}") return {} try: with open(pickle_path, 'rb') as f: divs = pickle.load(f) logger.info(f"Loaded {len(divs)} divs from {pickle_path}") return divs except Exception as e: logger.error(f"Failed to load divs from {pickle_path}: {e}") return {}
[docs] def build_analysis_divs_from_saved_divs(batch_output_dir: str, workflow: str, params: dict) -> List[Any]: """Build ``analysis_divs`` list from saved div pickle files. The GUI export expects a flat list of individual div components. :param batch_output_dir: Directory containing the batch output files. :param workflow: Workflow type ('proteomics' or 'interactomics'). :param params: Parsed parameters dict (for TIC rendering defaults). :returns: List of analysis div components ready for infra.save_figures. """ analysis_divs = [] # Load QC divs (common to both workflows) qc_divs = load_div_pickle(os.path.join(batch_output_dir, "03_qc_divs.pickle")) # Define the order for QC figures (matches GUI order) qc_order = [ 'counts', 'coverage', 'common_proteins', 'reproducibility', 'missing', 'sum', 'mean', 'distribution', 'commonality' ] # Add QC divs in the correct order for div_key in qc_order: if div_key in qc_divs: div = qc_divs[div_key] # Convert to wire format - infra.save_figures expects dict format wire_div = dash_to_wire(div) analysis_divs.append(wire_div) logger.info(f"Added QC div: {div_key}") else: logger.warning(f"QC div not found: {div_key}") with open(os.path.join(batch_output_dir, "03_qc_artifacts.json"), "r") as f: qc_data = json.load(f) # 0. TIC plot if 'tic' in qc_data: for datatype in qc_data['tic'].keys(): tic_div = copy.deepcopy(qc_divs['tic']) # Janky, but we want all chromatograms here. tic_div.children[0].children = tic_div.children[0].children.replace('TIC', datatype) tic_div.children[1].figure = tic_graph.tic_figure(defaults=params["Figure defaults"]["full-height"], traces=qc_data['tic'], datatype=datatype) analysis_divs.append(dash_to_wire(tic_div)) logger.info(f"Added TIC div: {datatype}") else: logger.warning(f"TIC div not found: {datatype}") # Load workflow-specific divs if workflow.lower() == "proteomics": proteomics_pickle_path = os.path.join(batch_output_dir, "04_proteomics_divs.pickle") proteomics_divs = load_div_pickle(proteomics_pickle_path) # Define order for proteomics figures proteomics_order = [ 'na_filter', 'normalization', 'missing_values_in_other_samples', 'imputation', 'pca', 'cv', 'clustermap', 'pertubation', 'volcano' ] # Add proteomics divs for div_key in proteomics_order: if div_key in proteomics_divs: div = proteomics_divs[div_key] wire_div = dash_to_wire(div) analysis_divs.append(wire_div) logger.info(f"Added proteomics div: {div_key}") else: logger.info(f"Proteomics div not found: {div_key}") elif workflow.lower() == "interactomics": interactomics_pickle_path = os.path.join(batch_output_dir, "05_interactomics_divs.pickle") interactomics_divs = load_div_pickle(interactomics_pickle_path) # Define order for interactomics figures interactomics_order = [ 'saint', 'known', 'common_proteins', 'network', 'pca', 'enrichment', 'msmic' ] # Add interactomics divs for div_key in interactomics_order: if div_key in interactomics_divs: div = interactomics_divs[div_key] wire_div = dash_to_wire(div) analysis_divs.append(wire_div) logger.info(f"Added interactomics div: {div_key}") else: logger.info(f"Interactomics div not found: {div_key}") logger.info(f"Built {len(analysis_divs)} analysis divs for {workflow} workflow") return analysis_divs
[docs] def get_commonality_pdf_data(batch_output_dir: str) -> Optional[str]: """Get commonality PDF data if available. :param batch_output_dir: Directory containing batch output. :returns: PDF data string, or None if not available. """ artifacts_path = os.path.join(batch_output_dir, "03_qc_artifacts.json") if not os.path.exists(artifacts_path): logger.warning(f"QC artifacts file not found: {artifacts_path}") return None try: with open(artifacts_path, 'r') as f: artifacts = json.load(f) commonality_pdf = artifacts.get('commonality_pdf') if commonality_pdf: logger.info("Found commonality PDF data") return commonality_pdf else: logger.info("No commonality PDF data found") return None except Exception as e: logger.error(f"Failed to load QC artifacts: {e}") return None
[docs] def save_batch_figures_using_saved_divs(batch_output_dir: str, export_dir: str, workflow: str, parameters:dict, output_formats: Optional[List[str]] = None) -> Dict[str, Any]: """Save batch figures using saved div pickle files and GUI infrastructure. :param batch_output_dir: Directory containing batch output and div pickle files. :param export_dir: Directory for figure export. :param workflow: Workflow type ('proteomics' or 'interactomics'). :param parameters: Parsed parameters dict for figure defaults. :param output_formats: Output format list, default ['html', 'pdf', 'png']. :returns: Summary dict with export details and counts. """ if output_formats is None: output_formats = ['html', 'pdf', 'png'] logger.info(f"Building analysis divs from saved divs for {workflow} workflow...") # Build analysis_divs from saved pickle files analysis_divs = build_analysis_divs_from_saved_divs(batch_output_dir, workflow, parameters) if not analysis_divs: logger.error("No analysis divs were built - figure export failed") return { "success": False, "error": "No analysis divs found", "figures_generated": 0 } # Get commonality PDF data commonality_pdf_data = get_commonality_pdf_data(batch_output_dir) os.makedirs(export_dir, exist_ok=True) logger.info(f"Saving figures to: {export_dir}") logger.info(f"Output formats: {output_formats}") logger.info(f"Analysis divs: {len(analysis_divs)}") logger.info(f"Commonality PDF: {'Yes' if commonality_pdf_data else 'No'}") try: # Use GUI's save_figures function figures_result = infra.save_figures( analysis_divs=analysis_divs, export_dir=export_dir, output_formats=output_formats, commonality_pdf_data=commonality_pdf_data, workflow=workflow ) logger.info("Figure export completed successfully using GUI infrastructure") return { "success": True, "figures_generated": len(analysis_divs), "output_formats": output_formats, "export_directory": export_dir, "commonality_pdf": commonality_pdf_data is not None, "workflow": workflow, "gui_result": figures_result } except Exception as e: logger.error(f"Figure export failed: {e}") return { "success": False, "error": str(e), "figures_generated": 0 }
[docs] def main(): """Command-line entry point for figure generation using saved divs. :returns: None. """ import argparse parser = argparse.ArgumentParser(description="Generate figures from saved div pickle files") parser.add_argument("batch_output_dir", help="Directory containing batch output and div pickle files") parser.add_argument("export_dir", help="Directory for figure export") parser.add_argument("workflow", choices=["proteomics", "interactomics"], help="Workflow type") parser.add_argument("--formats", nargs="*", default=["html", "pdf", "png"], help="Output formats") parser.add_argument("parameters", help="Parameters") args = parser.parse_args() # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') parameters = parsing.parse_parameters(Path(args.parameters)) # Save figures result = save_batch_figures_using_saved_divs( args.batch_output_dir, args.export_dir, args.workflow, parameters, args.formats, ) if result["success"]: print(f"Successfully generated {result['figures_generated']} figures") print(f"Export directory: {result['export_directory']}") else: print(f"Figure generation failed: {result['error']}")
if __name__ == "__main__": main()