#!/usr/bin/env python3
"""
Batch Figure Builder using saved Dash divs
This module loads the saved div pickle files generated by the batch pipeline
and uses them directly with infra.save_figures, just like the GUI does.
This approach is much more reliable than trying to reconstruct figures from JSON.
"""
import os
import sys
import pickle
import json
import logging
import copy
from typing import Dict, List, Any, Optional, Tuple
from pathlib import Path
# Add current directory to path for imports
sys.path.insert(0, os.path.dirname(__file__))
from pipeline_module.pipeline_batch import dash_to_wire
from components import infra
from components.figures import tic_graph
from components import parsing
logger = logging.getLogger(__name__)
[docs]
def load_div_pickle(pickle_path: str) -> Dict[str, Any]:
"""Load a div pickle file.
:param pickle_path: Path to the pickle file.
:returns: Dict of div components or empty dict if not found or on error.
"""
if not os.path.exists(pickle_path):
logger.warning(f"Div pickle file not found: {pickle_path}")
return {}
try:
with open(pickle_path, 'rb') as f:
divs = pickle.load(f)
logger.info(f"Loaded {len(divs)} divs from {pickle_path}")
return divs
except Exception as e:
logger.error(f"Failed to load divs from {pickle_path}: {e}")
return {}
[docs]
def build_analysis_divs_from_saved_divs(batch_output_dir: str, workflow: str, params: dict) -> List[Any]:
"""Build ``analysis_divs`` list from saved div pickle files.
The GUI export expects a flat list of individual div components.
:param batch_output_dir: Directory containing the batch output files.
:param workflow: Workflow type ('proteomics' or 'interactomics').
:param params: Parsed parameters dict (for TIC rendering defaults).
:returns: List of analysis div components ready for infra.save_figures.
"""
analysis_divs = []
# Load QC divs (common to both workflows)
qc_divs = load_div_pickle(os.path.join(batch_output_dir, "03_qc_divs.pickle"))
# Define the order for QC figures (matches GUI order)
qc_order = [
'counts', 'coverage', 'common_proteins', 'reproducibility',
'missing', 'sum', 'mean', 'distribution', 'commonality'
]
# Add QC divs in the correct order
for div_key in qc_order:
if div_key in qc_divs:
div = qc_divs[div_key]
# Convert to wire format - infra.save_figures expects dict format
wire_div = dash_to_wire(div)
analysis_divs.append(wire_div)
logger.info(f"Added QC div: {div_key}")
else:
logger.warning(f"QC div not found: {div_key}")
with open(os.path.join(batch_output_dir, "03_qc_artifacts.json"), "r") as f:
qc_data = json.load(f)
# 0. TIC plot
if 'tic' in qc_data:
for datatype in qc_data['tic'].keys():
tic_div = copy.deepcopy(qc_divs['tic']) # Janky, but we want all chromatograms here.
tic_div.children[0].children = tic_div.children[0].children.replace('TIC', datatype)
tic_div.children[1].figure = tic_graph.tic_figure(defaults=params["Figure defaults"]["full-height"], traces=qc_data['tic'], datatype=datatype)
analysis_divs.append(dash_to_wire(tic_div))
logger.info(f"Added TIC div: {datatype}")
else:
logger.warning(f"TIC div not found: {datatype}")
# Load workflow-specific divs
if workflow.lower() == "proteomics":
proteomics_pickle_path = os.path.join(batch_output_dir, "04_proteomics_divs.pickle")
proteomics_divs = load_div_pickle(proteomics_pickle_path)
# Define order for proteomics figures
proteomics_order = [
'na_filter', 'normalization', 'missing_values_in_other_samples', 'imputation', 'pca',
'cv', 'clustermap', 'pertubation', 'volcano'
]
# Add proteomics divs
for div_key in proteomics_order:
if div_key in proteomics_divs:
div = proteomics_divs[div_key]
wire_div = dash_to_wire(div)
analysis_divs.append(wire_div)
logger.info(f"Added proteomics div: {div_key}")
else:
logger.info(f"Proteomics div not found: {div_key}")
elif workflow.lower() == "interactomics":
interactomics_pickle_path = os.path.join(batch_output_dir, "05_interactomics_divs.pickle")
interactomics_divs = load_div_pickle(interactomics_pickle_path)
# Define order for interactomics figures
interactomics_order = [
'saint', 'known', 'common_proteins', 'network', 'pca', 'enrichment', 'msmic'
]
# Add interactomics divs
for div_key in interactomics_order:
if div_key in interactomics_divs:
div = interactomics_divs[div_key]
wire_div = dash_to_wire(div)
analysis_divs.append(wire_div)
logger.info(f"Added interactomics div: {div_key}")
else:
logger.info(f"Interactomics div not found: {div_key}")
logger.info(f"Built {len(analysis_divs)} analysis divs for {workflow} workflow")
return analysis_divs
[docs]
def get_commonality_pdf_data(batch_output_dir: str) -> Optional[str]:
"""Get commonality PDF data if available.
:param batch_output_dir: Directory containing batch output.
:returns: PDF data string, or None if not available.
"""
artifacts_path = os.path.join(batch_output_dir, "03_qc_artifacts.json")
if not os.path.exists(artifacts_path):
logger.warning(f"QC artifacts file not found: {artifacts_path}")
return None
try:
with open(artifacts_path, 'r') as f:
artifacts = json.load(f)
commonality_pdf = artifacts.get('commonality_pdf')
if commonality_pdf:
logger.info("Found commonality PDF data")
return commonality_pdf
else:
logger.info("No commonality PDF data found")
return None
except Exception as e:
logger.error(f"Failed to load QC artifacts: {e}")
return None
[docs]
def main():
"""Command-line entry point for figure generation using saved divs.
:returns: None.
"""
import argparse
parser = argparse.ArgumentParser(description="Generate figures from saved div pickle files")
parser.add_argument("batch_output_dir", help="Directory containing batch output and div pickle files")
parser.add_argument("export_dir", help="Directory for figure export")
parser.add_argument("workflow", choices=["proteomics", "interactomics"], help="Workflow type")
parser.add_argument("--formats", nargs="*", default=["html", "pdf", "png"],
help="Output formats")
parser.add_argument("parameters", help="Parameters")
args = parser.parse_args()
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
parameters = parsing.parse_parameters(Path(args.parameters))
# Save figures
result = save_batch_figures_using_saved_divs(
args.batch_output_dir,
args.export_dir,
args.workflow,
parameters,
args.formats,
)
if result["success"]:
print(f"Successfully generated {result['figures_generated']} figures")
print(f"Export directory: {result['export_directory']}")
else:
print(f"Figure generation failed: {result['error']}")
if __name__ == "__main__":
main()