Plotly Interactive Plots

Overview

Plotly is a Python library for producing interactive, web-ready figures backed by HTML and JavaScript. It exposes two complementary APIs: plotly.express (px) provides a high-level, DataFrame-oriented interface for generating common chart types in one line, while plotly.graph_objects (go) offers fine-grained control over every trace, axis, and layout property. Figures are fully interactive by default — supporting hover tooltips, zoom, pan, and click events — and can be embedded in web pages, Jupyter notebooks, or built into web applications using the Dash framework.

When to Use

You need hover tooltips that display gene names, p-values, or sample metadata without cluttering the static figure.
You are building a multi-panel interactive dashboard for dose-response curves, patient cohorts, or multi-condition comparisons.
You want to share figures as self-contained HTML files that non-programmers can explore in a browser.
You need 3D scatter or surface plots for structural biology, conformational landscapes, or PCA of high-dimensional data.
You are creating heatmaps of gene expression or correlation matrices where users need to zoom into specific gene clusters.
You require animation frames to show time-series or treatment-response trajectories.
Use seaborn instead when you need automatic statistical aggregation (confidence intervals, regression fits) with minimal code.
Use matplotlib when you need fine-grained control over every axis element for print-ready publication figures at exact journal specifications.

Prerequisites

Python packages: plotly, kaleido (static image export), pandas, numpy
Data requirements: pandas DataFrames or NumPy arrays; long-form (tidy) data works best with px
Environment: Jupyter Lab/Notebook (inline rendering), or save as HTML for browser display

pip install plotly kaleido pandas numpy

For Jupyter Lab inline rendering (if not automatic):

pip install "jupyterlab>=3" ipywidgets

Quick Start

import plotly.express as px
import pandas as pd

# Gene expression scatter with hover info
df = pd.DataFrame({
    "log2FC": [-3.1, 0.2, 1.8, 2.5, -0.5, 4.1],
    "neg_log10_padj": [8.2, 0.4, 2.1, 6.8, 0.1, 9.3],
    "gene": ["BRCA1", "MYC", "TP53", "EGFR", "CDKN1A", "KRAS"],
    "significance": ["sig", "ns", "ns", "sig", "ns", "sig"],
})

fig = px.scatter(
    df, x="log2FC", y="neg_log10_padj",
    color="significance", hover_name="gene",
    title="Volcano Plot — Treatment vs Control",
)
fig.show()

Core API

Module 1: px Scatter and Line — Relational Plots

px.scatter() and px.line() map DataFrame columns to visual encodings (color, symbol, size) and automatically populate hover tooltips from hover_data.

import plotly.express as px
import pandas as pd
import numpy as np

# Dose-response scatter: color by drug, symbol by cell line
np.random.seed(42)
df = pd.DataFrame({
    "dose_uM": np.tile([0.01, 0.1, 1, 10, 100], 4),
    "viability": np.clip(np.random.normal(
        [100, 90, 70, 40, 10] * 4, 5), 0, 110),
    "drug": ["DrugA"] * 5 + ["DrugA"] * 5 + ["DrugB"] * 5 + ["DrugB"] * 5,
    "cell_line": ["HCT116"] * 10 + ["MCF7"] * 10,
    "replicate": np.tile([1, 2, 3, 4, 5], 4),
})

fig = px.scatter(
    df, x="dose_uM", y="viability",
    color="drug", symbol="cell_line",
    log_x=True,
    hover_data={"replicate": True, "dose_uM": ":.2f"},
    labels={"viability": "Cell Viability (%)", "dose_uM": "Dose (µM)"},
    title="Dose-Response by Drug and Cell Line",
)
fig.show()
print(f"Figure has {len(fig.data)} traces")

# Time-course gene expression line plot
time_df = pd.DataFrame({
    "hour": list(range(0, 25, 4)) * 3,
    "expression": [1.0, 1.8, 3.2, 4.5, 3.8, 2.1, 1.2,
                   1.0, 2.5, 5.1, 6.8, 5.5, 3.2, 1.8,
                   1.0, 1.1, 1.0, 1.2, 1.1, 1.0, 0.9],
    "gene": ["MYC"] * 7 + ["EGFR"] * 7 + ["GAPDH"] * 7,
})

fig = px.line(
    time_df, x="hour", y="expression",
    color="gene", markers=True,
    labels={"expression": "Relative Expression (log2)", "hour": "Time (h)"},
    title="Time-Course Gene Expression",
)
fig.update_traces(line=dict(width=2.5), marker=dict(size=8))
fig.show()

Module 2: px Statistical Plots — Distributions and Categories

px.box(), px.violin(), px.histogram(), and px.strip() produce publication-ready distribution summaries with built-in grouping.

import plotly.express as px
import pandas as pd
import numpy as np

# Violin + strip overlay: expression by cell type
np.random.seed(7)
n = 60
cell_data = pd.DataFrame({
    "expression": np.concatenate([
        np.random.normal(4.2, 0.8, n),
        np.random.normal(6.5, 1.2, n),
        np.random.normal(2.8, 0.6, n),
    ]),
    "cell_type": ["T cell"] * n + ["B cell"] * n + ["NK cell"] * n,
    "patient_id": np.tile([f"P{i:02d}" for i in range(1, 11)], 18),
})

fig = px.violin(
    cell_data, x="cell_type", y="expression",
    color="cell_type", box=True, points="all",
    hover_data=["patient_id"],
    labels={"expression": "CD3E Expression (log2 CPM)"},
    title="CD3E Expression Across Cell Types",
)
fig.update_traces(jitter=0.3, pointpos=-1.5)
fig.show()
print(f"Cells per type: {cell_data.groupby('cell_type').size().to_dict()}")

# Histogram with rug: distribution of fold changes
fc_df = pd.DataFrame({
    "log2FC": np.concatenate([
        np.random.normal(0.1, 0.8, 500),   # not DE genes
        np.random.normal(2.5, 0.4, 50),    # upregulated
        np.random.normal(-2.3, 0.4, 40),   # downregulated
    ]),
    "category": ["background"] * 500 + ["up"] * 50 + ["down"] * 40,
})

fig = px.histogram(
    fc_df, x="log2FC", color="category",
    nbins=60, barmode="overlay", opacity=0.7,
    marginal="rug",
    labels={"log2FC": "log2 Fold Change", "count": "Gene Count"},
    title="Distribution of Fold Changes (DESeq2 Results)",
    color_discrete_map={"background": "gray", "up": "crimson", "down": "steelblue"},
)
fig.show()

Module 3: px Heatmap and Matrix — Gene Expression and Correlations

px.imshow() renders 2D arrays or DataFrames as color-encoded matrices, ideal for expression heatmaps and correlation matrices.

import plotly.express as px
import pandas as pd
import numpy as np

# Gene expression heatmap (genes × samples)
np.random.seed(12)
genes = [f"Gene_{g}" for g in ["BRCA1", "TP53", "EGFR", "MYC", "KRAS",
                                 "CDKN1A", "RB1", "PTEN", "VHL", "APC"]]
samples = [f"S{i:02d}" for i in range(1, 9)]

expr_matrix = pd.DataFrame(
    np.random.normal(0, 1.5, (10, 8)) +
    np.array([2, -1, 3, -2, 1, -3, 0, 2, -1, 3]).reshape(-1, 1),
    index=genes, columns=samples,
)

fig = px.imshow(
    expr_matrix,
    color_continuous_scale="RdBu_r",
    color_continuous_midpoint=0,
    aspect="auto",
    labels={"color": "log2 Expression (z-score)"},
    title="Gene Expression Heatmap",
)
fig.update_xaxes(side="top")
fig.update_layout(width=600, height=500)
fig.show()
print(f"Heatmap shape: {expr_matrix.shape} (genes × samples)")

# Correlation matrix heatmap
from itertools import combinations

markers = ["IL6", "TNF", "CXCL10", "IFNg", "IL10", "IL1B", "CCL2", "IL17A"]
np.random.seed(3)
raw = np.random.multivariate_normal(
    mean=np.zeros(8),
    cov=np.eye(8) * 0.3 + 0.7,
    size=80,
)
corr_df = pd.DataFrame(raw, columns=markers).corr()

fig = px.imshow(
    corr_df,
    color_continuous_scale="RdBu_r",
    color_continuous_midpoint=0,
    zmin=-1, zmax=1,
    text_auto=".2f",
    title="Cytokine Correlation Matrix (n=80 patients)",
)
fig.update_traces(textfont_size=10)
fig.show()

Module 4: go Graph Objects — Full Trace Control

plotly.graph_objects provides fine-grained access to every trace property: marker symbols, error bars, fill areas, and multi-trace layouts. Essential when px lacks the flexibility you need.

import plotly.graph_ob

plotly-interactive-plots

Cómo agregar

Pega en el README de tu repo

Skills relacionadas

xlsx

mem-search

weekly-digests

how-it-works

Recibe nuevas skills de Dados e Análise todos los lunes