"""
Coefficient of variation (CV) plot per sample group.
Computes per-group mean, std and CV, and renders a violin plot with mean
annotations for each group.
"""
import plotly.graph_objects as go
from dash.dcc import Graph
import pandas as pd
[docs]
def make_graph(raw_data: pd.DataFrame, sample_groups: dict, replicate_colors: dict, defaults: dict, id_name: str, dlname: str):
"""Create a CV violin plot across sample groups.
:param raw_data: DataFrame of values; columns referenced by ``sample_groups``.
:param sample_groups: Mapping group -> list of column names.
:param replicate_colors: Mapping with ``'sample groups'`` color strings per group.
:param defaults: Dictionary with ``config``, ``height``, ``width`` and related settings.
:param id_name: Component ID for the ``Graph``.
:param dlname: Name for the downloaded figure file.
:returns: Tuple ``(Graph, out_data)`` where out_data contains group stats.
"""
# Dictionary to store CVs for each sample group
group_cvs = {}
group_means = {}
group_stds = {}
# Calculate CVs separately for each sample group
# This will drop proteins with only one non-NA value in the group.
for sg, group_cols in sample_groups.items():
means = raw_data[group_cols].mean(axis=1)
stds = raw_data[group_cols].std(axis=1)
# Drop proteins with only one non-NA value in the group: std cannot be calculated.
means = means[stds.notna()]
stds = stds[stds.notna()]
cv_percent = (stds / means) * 100
group_cvs[sg] = cv_percent
group_means[sg] = means
group_stds[sg] = stds
# Create violin plot
fig = go.Figure()
# Calculate max CV to set y-axis range
max_cv = max([max(cvs) for cvs in group_cvs.values()])
y_max = ((int(max_cv) // 10) + 1) * 10 # Round up to nearest 10
annotations = []
for sg in sample_groups.keys():
values = list(group_cvs[sg])
mean_val = pd.Series(values).mean()
fig.add_trace(go.Violin(
y=values,
name=sg,
box_visible=True,
meanline_visible=True,
fillcolor=replicate_colors['sample groups'][sg].replace(', 1)', ', 0.4)'), # More transparent fill (0.5 -> 0.3)
line_color='black',#replicate_colors['sample groups'][sg],
line=dict(width=1), # Add thinner line width
points=False # Remove outliers
))
# Add mean annotation
annotations.append(dict(
x=sg,
y=mean_val*1.2,
text=f"Mean: {mean_val:.1f}%",
showarrow=False,
yshift=10,
font=dict(color='black')
))
width: int = defaults['width']
if 'min_width_per' in defaults and defaults['min_width_per'] > 0:
target_width = defaults['side_width'] + defaults['min_width_per']*4*len(sample_groups.keys())
if width < target_width:
width = target_width
fig.update_layout(
autosize=False,
height=defaults['height'],
width=width,
yaxis=dict(
title='%CV',
tickmode='linear',
tick0=0,
dtick=10, # Set tick interval to 10
range=[0, y_max] # Set range from 0 to rounded max
),
showlegend=True,
violingap=0.2,
violinmode='overlay',
annotations=annotations
)
out_data = {
'group_means': {sg: means.to_dict() for sg, means in group_means.items()},
'group_cvs': {sg: cvs.to_dict() for sg, cvs in group_cvs.items()},
'group_stds': {sg: stds.to_dict() for sg, stds in group_stds.items()}
}
config = defaults['config'].copy()
config['toImageButtonOptions'] = config['toImageButtonOptions'].copy()
config['toImageButtonOptions']['filename'] = dlname
return (Graph(config=config, figure=fig, id=id_name), out_data)