Source code for app.components.figures.cvplot

"""
Coefficient of variation (CV) plot per sample group.

Computes per-group mean, std and CV, and renders a violin plot with mean
annotations for each group.
"""
import plotly.graph_objects as go
from dash.dcc import Graph
import pandas as pd

[docs] def make_graph(raw_data: pd.DataFrame, sample_groups: dict, replicate_colors: dict, defaults: dict, id_name: str, dlname: str): """Create a CV violin plot across sample groups. :param raw_data: DataFrame of values; columns referenced by ``sample_groups``. :param sample_groups: Mapping group -> list of column names. :param replicate_colors: Mapping with ``'sample groups'`` color strings per group. :param defaults: Dictionary with ``config``, ``height``, ``width`` and related settings. :param id_name: Component ID for the ``Graph``. :param dlname: Name for the downloaded figure file. :returns: Tuple ``(Graph, out_data)`` where out_data contains group stats. """ # Dictionary to store CVs for each sample group group_cvs = {} group_means = {} group_stds = {} # Calculate CVs separately for each sample group # This will drop proteins with only one non-NA value in the group. for sg, group_cols in sample_groups.items(): means = raw_data[group_cols].mean(axis=1) stds = raw_data[group_cols].std(axis=1) # Drop proteins with only one non-NA value in the group: std cannot be calculated. means = means[stds.notna()] stds = stds[stds.notna()] cv_percent = (stds / means) * 100 group_cvs[sg] = cv_percent group_means[sg] = means group_stds[sg] = stds # Create violin plot fig = go.Figure() # Calculate max CV to set y-axis range max_cv = max([max(cvs) for cvs in group_cvs.values()]) y_max = ((int(max_cv) // 10) + 1) * 10 # Round up to nearest 10 annotations = [] for sg in sample_groups.keys(): values = list(group_cvs[sg]) mean_val = pd.Series(values).mean() fig.add_trace(go.Violin( y=values, name=sg, box_visible=True, meanline_visible=True, fillcolor=replicate_colors['sample groups'][sg].replace(', 1)', ', 0.4)'), # More transparent fill (0.5 -> 0.3) line_color='black',#replicate_colors['sample groups'][sg], line=dict(width=1), # Add thinner line width points=False # Remove outliers )) # Add mean annotation annotations.append(dict( x=sg, y=mean_val*1.2, text=f"Mean: {mean_val:.1f}%", showarrow=False, yshift=10, font=dict(color='black') )) width: int = defaults['width'] if 'min_width_per' in defaults and defaults['min_width_per'] > 0: target_width = defaults['side_width'] + defaults['min_width_per']*4*len(sample_groups.keys()) if width < target_width: width = target_width fig.update_layout( autosize=False, height=defaults['height'], width=width, yaxis=dict( title='%CV', tickmode='linear', tick0=0, dtick=10, # Set tick interval to 10 range=[0, y_max] # Set range from 0 to rounded max ), showlegend=True, violingap=0.2, violinmode='overlay', annotations=annotations ) out_data = { 'group_means': {sg: means.to_dict() for sg, means in group_means.items()}, 'group_cvs': {sg: cvs.to_dict() for sg, cvs in group_cvs.items()}, 'group_stds': {sg: stds.to_dict() for sg, stds in group_stds.items()} } config = defaults['config'].copy() config['toImageButtonOptions'] = config['toImageButtonOptions'].copy() config['toImageButtonOptions']['filename'] = dlname return (Graph(config=config, figure=fig, id=id_name), out_data)