Plotly Interactive Plots
Overview
Plotly is a Python library for producing interactive, web-ready figures backed by HTML and JavaScript. It exposes two complementary APIs: plotly.express (px) provides a high-level, DataFrame-oriented interface for generating common chart types in one line, while plotly.graph_objects (go) offers fine-grained control over every trace, axis, and layout property. Figures are fully interactive by default — supporting hover tooltips, zoom, pan, and click events — and can be embedded in web pages, Jupyter notebooks, or built into web applications using the Dash framework.
When to Use
- You need hover tooltips that display gene names, p-values, or sample metadata without cluttering the static figure.
- You are building a multi-panel interactive dashboard for dose-response curves, patient cohorts, or multi-condition comparisons.
- You want to share figures as self-contained HTML files that non-programmers can explore in a browser.
- You need 3D scatter or surface plots for structural biology, conformational landscapes, or PCA of high-dimensional data.
- You are creating heatmaps of gene expression or correlation matrices where users need to zoom into specific gene clusters.
- You require animation frames to show time-series or treatment-response trajectories.
- Use
seaborninstead when you need automatic statistical aggregation (confidence intervals, regression fits) with minimal code. - Use
matplotlibwhen you need fine-grained control over every axis element for print-ready publication figures at exact journal specifications.
Prerequisites
- Python packages:
plotly,kaleido(static image export),pandas,numpy - Data requirements: pandas DataFrames or NumPy arrays; long-form (tidy) data works best with
px - Environment: Jupyter Lab/Notebook (inline rendering), or save as HTML for browser display
pip install plotly kaleido pandas numpy
For Jupyter Lab inline rendering (if not automatic):
pip install "jupyterlab>=3" ipywidgets
Quick Start
import plotly.express as px
import pandas as pd
# Gene expression scatter with hover info
df = pd.DataFrame({
"log2FC": [-3.1, 0.2, 1.8, 2.5, -0.5, 4.1],
"neg_log10_padj": [8.2, 0.4, 2.1, 6.8, 0.1, 9.3],
"gene": ["BRCA1", "MYC", "TP53", "EGFR", "CDKN1A", "KRAS"],
"significance": ["sig", "ns", "ns", "sig", "ns", "sig"],
})
fig = px.scatter(
df, x="log2FC", y="neg_log10_padj",
color="significance", hover_name="gene",
title="Volcano Plot — Treatment vs Control",
)
fig.show()
Core API
Module 1: px Scatter and Line — Relational Plots
px.scatter() and px.line() map DataFrame columns to visual encodings (color, symbol, size) and automatically populate hover tooltips from hover_data.
import plotly.express as px
import pandas as pd
import numpy as np
# Dose-response scatter: color by drug, symbol by cell line
np.random.seed(42)
df = pd.DataFrame({
"dose_uM": np.tile([0.01, 0.1, 1, 10, 100], 4),
"viability": np.clip(np.random.normal(
[100, 90, 70, 40, 10] * 4, 5), 0, 110),
"drug": ["DrugA"] * 5 + ["DrugA"] * 5 + ["DrugB"] * 5 + ["DrugB"] * 5,
"cell_line": ["HCT116"] * 10 + ["MCF7"] * 10,
"replicate": np.tile([1, 2, 3, 4, 5], 4),
})
fig = px.scatter(
df, x="dose_uM", y="viability",
color="drug", symbol="cell_line",
log_x=True,
hover_data={"replicate": True, "dose_uM": ":.2f"},
labels={"viability": "Cell Viability (%)", "dose_uM": "Dose (µM)"},
title="Dose-Response by Drug and Cell Line",
)
fig.show()
print(f"Figure has {len(fig.data)} traces")
# Time-course gene expression line plot
time_df = pd.DataFrame({
"hour": list(range(0, 25, 4)) * 3,
"expression": [1.0, 1.8, 3.2, 4.5, 3.8, 2.1, 1.2,
1.0, 2.5, 5.1, 6.8, 5.5, 3.2, 1.8,
1.0, 1.1, 1.0, 1.2, 1.1, 1.0, 0.9],
"gene": ["MYC"] * 7 + ["EGFR"] * 7 + ["GAPDH"] * 7,
})
fig = px.line(
time_df, x="hour", y="expression",
color="gene", markers=True,
labels={"expression": "Relative Expression (log2)", "hour": "Time (h)"},
title="Time-Course Gene Expression",
)
fig.update_traces(line=dict(width=2.5), marker=dict(size=8))
fig.show()
Module 2: px Statistical Plots — Distributions and Categories
px.box(), px.violin(), px.histogram(), and px.strip() produce publication-ready distribution summaries with built-in grouping.
import plotly.express as px
import pandas as pd
import numpy as np
# Violin + strip overlay: expression by cell type
np.random.seed(7)
n = 60
cell_data = pd.DataFrame({
"expression": np.concatenate([
np.random.normal(4.2, 0.8, n),
np.random.normal(6.5, 1.2, n),
np.random.normal(2.8, 0.6, n),
]),
"cell_type": ["T cell"] * n + ["B cell"] * n + ["NK cell"] * n,
"patient_id": np.tile([f"P{i:02d}" for i in range(1, 11)], 18),
})
fig = px.violin(
cell_data, x="cell_type", y="expression",
color="cell_type", box=True, points="all",
hover_data=["patient_id"],
labels={"expression": "CD3E Expression (log2 CPM)"},
title="CD3E Expression Across Cell Types",
)
fig.update_traces(jitter=0.3, pointpos=-1.5)
fig.show()
print(f"Cells per type: {cell_data.groupby('cell_type').size().to_dict()}")
# Histogram with rug: distribution of fold changes
fc_df = pd.DataFrame({
"log2FC": np.concatenate([
np.random.normal(0.1, 0.8, 500), # not DE genes
np.random.normal(2.5, 0.4, 50), # upregulated
np.random.normal(-2.3, 0.4, 40), # downregulated
]),
"category": ["background"] * 500 + ["up"] * 50 + ["down"] * 40,
})
fig = px.histogram(
fc_df, x="log2FC", color="category",
nbins=60, barmode="overlay", opacity=0.7,
marginal="rug",
labels={"log2FC": "log2 Fold Change", "count": "Gene Count"},
title="Distribution of Fold Changes (DESeq2 Results)",
color_discrete_map={"background": "gray", "up": "crimson", "down": "steelblue"},
)
fig.show()
Module 3: px Heatmap and Matrix — Gene Expression and Correlations
px.imshow() renders 2D arrays or DataFrames as color-encoded matrices, ideal for expression heatmaps and correlation matrices.
import plotly.express as px
import pandas as pd
import numpy as np
# Gene expression heatmap (genes × samples)
np.random.seed(12)
genes = [f"Gene_{g}" for g in ["BRCA1", "TP53", "EGFR", "MYC", "KRAS",
"CDKN1A", "RB1", "PTEN", "VHL", "APC"]]
samples = [f"S{i:02d}" for i in range(1, 9)]
expr_matrix = pd.DataFrame(
np.random.normal(0, 1.5, (10, 8)) +
np.array([2, -1, 3, -2, 1, -3, 0, 2, -1, 3]).reshape(-1, 1),
index=genes, columns=samples,
)
fig = px.imshow(
expr_matrix,
color_continuous_scale="RdBu_r",
color_continuous_midpoint=0,
aspect="auto",
labels={"color": "log2 Expression (z-score)"},
title="Gene Expression Heatmap",
)
fig.update_xaxes(side="top")
fig.update_layout(width=600, height=500)
fig.show()
print(f"Heatmap shape: {expr_matrix.shape} (genes × samples)")
# Correlation matrix heatmap
from itertools import combinations
markers = ["IL6", "TNF", "CXCL10", "IFNg", "IL10", "IL1B", "CCL2", "IL17A"]
np.random.seed(3)
raw = np.random.multivariate_normal(
mean=np.zeros(8),
cov=np.eye(8) * 0.3 + 0.7,
size=80,
)
corr_df = pd.DataFrame(raw, columns=markers).corr()
fig = px.imshow(
corr_df,
color_continuous_scale="RdBu_r",
color_continuous_midpoint=0,
zmin=-1, zmax=1,
text_auto=".2f",
title="Cytokine Correlation Matrix (n=80 patients)",
)
fig.update_traces(textfont_size=10)
fig.show()
Module 4: go Graph Objects — Full Trace Control
plotly.graph_objects provides fine-grained access to every trace property: marker symbols, error bars, fill areas, and multi-trace layouts. Essential when px lacks the flexibility you need.
import plotly.graph_ob