TensorBoard: Visualization Toolkit for ML
When to Use This Skill
Use TensorBoard when you need to:
- Visualize training metrics like loss and accuracy over time
- Debug models with histograms and distributions
- Compare experiments across multiple runs
- Visualize model graphs and architecture
- Project embeddings to lower dimensions (t-SNE, PCA)
- Track hyperparameter experiments
- Profile performance and identify bottlenecks
- Visualize images and text during training
Users: 20M+ downloads/year | GitHub Stars: 27k+ | License: Apache 2.0
Installation
# Install TensorBoard
pip install tensorboard
# PyTorch integration
pip install torch torchvision tensorboard
# TensorFlow integration (TensorBoard included)
pip install tensorflow
# Launch TensorBoard
tensorboard --logdir=runs
# Access at http://localhost:6006
Quick Start
PyTorch
from torch.utils.tensorboard import SummaryWriter
# Create writer
writer = SummaryWriter('runs/experiment_1')
# Training loop
for epoch in range(10):
train_loss = train_epoch()
val_acc = validate()
# Log metrics
writer.add_scalar('Loss/train', train_loss, epoch)
writer.add_scalar('Accuracy/val', val_acc, epoch)
# Close writer
writer.close()
# Launch: tensorboard --logdir=runs
TensorFlow/Keras
import tensorflow as tf
# Create callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir='logs/fit',
histogram_freq=1
)
# Train model
model.fit(
x_train, y_train,
epochs=10,
validation_data=(x_val, y_val),
callbacks=[tensorboard_callback]
)
# Launch: tensorboard --logdir=logs
Core Concepts
1. SummaryWriter (PyTorch)
from torch.utils.tensorboard import SummaryWriter
# Default directory: runs/CURRENT_DATETIME
writer = SummaryWriter()
# Custom directory
writer = SummaryWriter('runs/experiment_1')
# Custom comment (appended to default directory)
writer = SummaryWriter(comment='baseline')
# Log data
writer.add_scalar('Loss/train', 0.5, step=0)
writer.add_scalar('Loss/train', 0.3, step=1)
# Flush and close
writer.flush()
writer.close()
2. Logging Scalars
# PyTorch
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
for epoch in range(100):
train_loss = train()
val_loss = validate()
# Log individual metrics
writer.add_scalar('Loss/train', train_loss, epoch)
writer.add_scalar('Loss/val', val_loss, epoch)
writer.add_scalar('Accuracy/train', train_acc, epoch)
writer.add_scalar('Accuracy/val', val_acc, epoch)
# Learning rate
lr = optimizer.param_groups[0]['lr']
writer.add_scalar('Learning_rate', lr, epoch)
writer.close()
# TensorFlow
import tensorflow as tf
train_summary_writer = tf.summary.create_file_writer('logs/train')
val_summary_writer = tf.summary.create_file_writer('logs/val')
for epoch in range(100):
with train_summary_writer.as_default():
tf.summary.scalar('loss', train_loss, step=epoch)
tf.summary.scalar('accuracy', train_acc, step=epoch)
with val_summary_writer.as_default():
tf.summary.scalar('loss', val_loss, step=epoch)
tf.summary.scalar('accuracy', val_acc, step=epoch)
3. Logging Multiple Scalars
# PyTorch: Group related metrics
writer.add_scalars('Loss', {
'train': train_loss,
'validation': val_loss,
'test': test_loss
}, epoch)
writer.add_scalars('Metrics', {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1': f1_score
}, epoch)
4. Logging Images
# PyTorch
import torch
from torchvision.utils import make_grid
# Single image
writer.add_image('Input/sample', img_tensor, epoch)
# Multiple images as grid
img_grid = make_grid(images[:64], nrow=8)
writer.add_image('Batch/inputs', img_grid, epoch)
# Predictions visualization
pred_grid = make_grid(predictions[:16], nrow=4)
writer.add_image('Predictions', pred_grid, epoch)
# TensorFlow
import tensorflow as tf
with file_writer.as_default():
# Encode images as PNG
tf.summary.image('Training samples', images, step=epoch, max_outputs=25)
5. Logging Histograms
# PyTorch: Track weight distributions
for name, param in model.named_parameters():
writer.add_histogram(name, param, epoch)
# Track gradients
if param.grad is not None:
writer.add_histogram(f'{name}.grad', param.grad, epoch)
# Track activations
writer.add_histogram('Activations/relu1', activations, epoch)
# TensorFlow
with file_writer.as_default():
tf.summary.histogram('weights/layer1', layer1.kernel, step=epoch)
tf.summary.histogram('activations/relu1', activations, step=epoch)
6. Logging Model Graph
# PyTorch
import torch
model = MyModel()
dummy_input = torch.randn(1, 3, 224, 224)
writer.add_graph(model, dummy_input)
writer.close()
# TensorFlow (automatic with Keras)
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir='logs',
write_graph=True
)
model.fit(x, y, callbacks=[tensorboard_callback])
Advanced Features
Embedding Projector
Visualize high-dimensional data (embeddings, features) in 2D/3D.
import torch
from torch.utils.tensorboard import SummaryWriter
# Get embeddings (e.g., word embeddings, image features)
embeddings = model.get_embeddings(data) # Shape: (N, embedding_dim)
# Metadata (labels for each point)
metadata = ['class_1', 'class_2', 'class_1', ...]
# Images (optional, for image embeddings)
label_images = torch.stack([img1, img2, img3, ...])
# Log to TensorBoard
writer.add_embedding(
embeddings,
metadata=metadata,
label_img=label_images,
global_step=epoch
)
In TensorBoard:
- Navigate to "Projector" tab
- Choose PCA, t-SNE, or UMAP visualization
- Search, filter, and explore clusters
Hyperparameter Tuning
from torch.utils.tensorboard import SummaryWriter
# Try different hyperparameters
for lr in [0.001, 0.01, 0.1]:
for batch_size in [16, 32, 64]:
# Create unique run directory
writer = SummaryWriter(f'runs/lr{lr}_bs{batch_size}')
# Log hyperparameters
writer.add_hparams(
{'lr': lr, 'batch_size': batch_size},
{'hparam/accuracy': final_acc, 'hparam/loss': final_loss}
)
# Train and log
for epoch in range(10):
loss = train(lr, batch_size)
writer.add_scalar('Loss/train', loss, epoch)
writer.close()
# Compare in TensorBoard's "HParams" tab
Text Logging
# PyTorch: Log text (e.g., model predictions, summaries)
writer.add_text('Predictions', f'Epoch {epoch}: {predictions}', epoch)
writer.add_text('Config', str(config), 0)
# Log markdown tables
markdown_table = """
| Metric | Value |
|--------|-------|
| Accuracy | 0.95 |
| F1 Score | 0.93 |
"""
writer.add_text('Results', markdown_table, epoch)
PR Curves
Precision-Recall curves for classification.
from torch.utils.tensorboard import SummaryWriter
# Get predictions and labels
predictions = model(test_data) # Shape: (N, num_classes)
labels = test_labels # Shape: (N,)
# Log PR curve for each class
for i in range(num_classes):
writer.add_pr_curve(
f'PR_curve/class_{i}',
labels == i,
predictions[:, i],
global_step=epoch
)
Integration Examples
PyTorch Training Loop
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
# Setup
writer = SummaryWriter('runs/resnet_experiment')
model = ResNet50()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Log model graph
dummy_input = torch.randn(1, 3, 224, 224)
writer.add_graph(model, dummy_input)
# Training loop
for epoch in range(50):
model.train()
train_loss = 0.0
train_correct = 0
fo