Histolab WSI Processing
Overview
Histolab is a Python library for processing whole slide images (WSI) in digital pathology. It automates tissue detection, extracts informative tiles from gigapixel images using multiple strategies, and provides composable filter pipelines for preprocessing. The library handles SVS, TIFF, NDPI, and other WSI formats via OpenSlide.
When to Use
- Extracting tiles from whole slide images for deep learning model training
- Detecting tissue regions and filtering background/artifacts in histopathology slides
- Building preprocessing pipelines for H&E or IHC stained tissue sections
- Creating quality-driven tile datasets ranked by nuclei density or cellularity
- Performing batch tile extraction across slide collections with consistent parameters
- Assessing slide quality and tissue coverage before computational pathology workflows
- For raw slide access without tile extraction, use
openslide-pythondirectly - For complex multiplexed imaging or spatial proteomics pipelines, use
pathmlinstead
Prerequisites
- Python packages:
histolab(includes OpenSlide Python bindings) - System dependency: OpenSlide C library must be installed separately
- Supported formats: SVS, TIFF, NDPI, VMS, SCN, MRXS (via OpenSlide)
# macOS
brew install openslide
pip install histolab
# Ubuntu/Debian
sudo apt-get install openslide-tools
pip install histolab
Quick Start
from histolab.slide import Slide
from histolab.tiler import RandomTiler
# Load slide
slide = Slide("slide.svs", processed_path="output/")
print(f"Dimensions: {slide.dimensions}, Levels: {slide.levels}")
# Configure tiler
tiler = RandomTiler(
tile_size=(512, 512), n_tiles=100, level=0, seed=42,
check_tissue=True, tissue_percent=80.0
)
# Preview and extract
tiler.locate_tiles(slide, n_tiles=20)
tiler.extract(slide)
Core API
Module 1: Slide Management
The Slide class is the primary interface for loading and inspecting WSI files.
from histolab.slide import Slide
from histolab.data import prostate_tissue
# Load from built-in sample data (prostate, ovarian, breast, heart, kidney)
prostate_svs, prostate_path = prostate_tissue()
slide = Slide(prostate_path, processed_path="output/")
# Inspect properties
print(f"Dimensions: {slide.dimensions}") # (width, height) at level 0
print(f"Levels: {slide.levels}") # Number of pyramid levels
print(f"Level dims: {slide.level_dimensions}") # Dimensions per level
print(f"Magnification: {slide.properties.get('openslide.objective-power', 'N/A')}")
print(f"MPP-X: {slide.properties.get('openslide.mpp-x', 'N/A')}")
# Thumbnail and scaled image
slide.save_thumbnail() # Saves to processed_path
scaled = slide.scaled_image(scale_factor=32)
# Extract region at specific coordinates
region = slide.extract_region(location=(1000, 2000), size=(512, 512), level=0)
Module 2: Tissue Detection
Mask classes identify tissue regions and filter background for tile extraction.
from histolab.masks import TissueMask, BiggestTissueBoxMask, BinaryMask
import numpy as np
# TissueMask: segments ALL tissue regions (multiple sections)
tissue_mask = TissueMask()
mask_array = tissue_mask(slide) # Binary NumPy array: True=tissue, False=background
print(f"Tissue coverage: {mask_array.sum() / mask_array.size * 100:.1f}%")
# BiggestTissueBoxMask: bounding box of largest tissue region (default)
biggest_mask = BiggestTissueBoxMask()
# Visualize mask on slide thumbnail
slide.locate_mask(tissue_mask)
# Custom mask via BinaryMask subclass
class RectangularROI(BinaryMask):
def __init__(self, x, y, w, h):
self.x, self.y, self.w, self.h = x, y, w, h
def _mask(self, slide):
thumb = slide.thumbnail
mask = np.zeros(thumb.shape[:2], dtype=bool)
mask[self.y:self.y+self.h, self.x:self.x+self.w] = True
return mask
Module 3: Tile Extraction
Three strategies for extracting tiles: random sampling, grid coverage, and score-based selection.
from histolab.tiler import RandomTiler, GridTiler, ScoreTiler
from histolab.scorer import NucleiScorer
from histolab.masks import TissueMask
# RandomTiler: fixed number of randomly positioned tiles
random_tiler = RandomTiler(
tile_size=(512, 512), n_tiles=100, level=0,
seed=42, check_tissue=True, tissue_percent=80.0
)
random_tiler.locate_tiles(slide, n_tiles=20) # Preview first
random_tiler.extract(slide)
# GridTiler: systematic grid coverage
grid_tiler = GridTiler(
tile_size=(512, 512), level=0,
pixel_overlap=0, check_tissue=True, tissue_percent=70.0
)
grid_tiler.extract(slide, extraction_mask=TissueMask())
# ScoreTiler: top-ranked tiles by scoring function
score_tiler = ScoreTiler(
tile_size=(512, 512), n_tiles=50, level=0,
scorer=NucleiScorer(), check_tissue=True
)
score_tiler.extract(slide, report_path="tiles_report.csv")
# Report CSV: tile_name, x_coord, y_coord, level, score, tissue_percent
Module 4: Filters and Preprocessing
Composable image and morphological filters for tissue detection and preprocessing.
from histolab.filters.image_filters import (
RgbToGrayscale, RgbToHsv, RgbToHed,
OtsuThreshold, AdaptiveThreshold,
StretchContrast, HistogramEqualization, Invert
)
from histolab.filters.morphological_filters import (
BinaryDilation, BinaryErosion, BinaryOpening, BinaryClosing,
RemoveSmallObjects, RemoveSmallHoles
)
from histolab.filters.compositions import Compose
# Standard tissue detection pipeline
tissue_pipeline = Compose([
RgbToGrayscale(),
OtsuThreshold(),
BinaryDilation(disk_size=5),
RemoveSmallHoles(area_threshold=1000),
RemoveSmallObjects(area_threshold=500)
])
# Use custom pipeline with TissueMask
from histolab.masks import TissueMask
custom_mask = TissueMask(filters=tissue_pipeline)
# Stain deconvolution (H&E)
hed_filter = RgbToHed() # Hematoxylin-Eosin-DAB separation
# Apply filters to individual tiles
from histolab.tile import Tile
filter_chain = Compose([RgbToGrayscale(), StretchContrast()])
filtered_tile = tile.apply_filters(filter_chain)
# Lambda for custom inline filters
from histolab.filters.image_filters import Lambda
import numpy as np
brightness = Lambda(lambda img: np.clip(img * 1.2, 0, 255).astype(np.uint8))
red_channel = Lambda(lambda img: img[:, :, 0])
Module 5: Scoring
Scorers rank tiles by tissue content quality for use with ScoreTiler.
from histolab.scorer import NucleiScorer, CellularityScorer, Scorer
import numpy as np
# Built-in scorers
nuclei = NucleiScorer() # Scores by nuclei density (grayscale threshold + count)
cellularity = CellularityScorer() # Scores by overall cellular content
# Custom scorer
class ColorVarianceScorer(Scorer):
def __call__(self, tile):
"""Score tiles by color variance (higher = more informative)."""
tile_array = np.array(tile.image)
return np.var(tile_array, axis=(0, 1)).sum()
score_tiler = ScoreTiler(
tile_size=(512, 512), n_tiles=30,
scorer=ColorVarianceScorer()
)
Module 6: Visualization
Built-in methods and matplotlib patterns for inspecting slides, masks, and tiles.
import matplotlib.pyplot as plt
from histolab.masks import TissueMask
# Built-in: mask overlay on slide thumbnail
slide.locate_mask(TissueMask())
# Built-in: tile location preview
tiler.locate_tiles(slide, n_tiles=20)
# Manual side-by-side: slide vs mask
mask = TissueMask()
mask_array = mask(slide)
fig, axes = plt.subplots(1, 2, figsize=(15, 7))
axes[0].imshow(slide.thumbnail); axes[0].set_title("Slide"); axes[0].axis('off')
axes[1].imshow(mask_array, cmap='gray'); axes[1].set_title("Mask"); axes[1].axis('off')
plt.tight_layout()
plt.show()
# Display extracted tiles in grid
from pathlib import Path
from PIL import Image
tile_paths = list(Path("output/tiles/").glob("*.png"))[:16]
fig, axes = plt.subplots(4, 4, figsize=(12, 12))
for