monarch-database
Overview
The Monarch Initiative integrates disease-phenotype-gene relationships from 30+ biomedical databases (OMIM, Orphanet, ClinVar, MGI, ZFIN, Reactome) into a unified knowledge graph. The REST API at https://api.monarchinitiative.org/v3/api provides access to associations between genes, diseases, and phenotypes using MONDO disease IDs, Human Phenotype Ontology (HPO) terms, and standard gene identifiers. No authentication is required; the service is free for academic use.
When to Use
- Mapping a disease (MONDO ID) to all associated causal genes and their evidence sources
- Retrieving phenotype profiles (HP terms) for a disease to build phenotypic similarity models
- Ranking candidate genes by phenotypic similarity to a patient's HPO symptom list
- Querying cross-species gene-phenotype associations (mouse, zebrafish, fly) for model organism comparisons
- Exploring rare disease gene-phenotype networks for diagnostic candidate generation
- Resolving entity metadata (gene symbol, disease name, phenotype label) from a MONDO/HP/HGNC ID
- Use
opentargets-databaseinstead when you need drug-target evidence scores or tractability data alongside disease associations - Use
clinvar-databasewhen you need clinical pathogenicity classifications with submitter review status
Prerequisites
- Python packages:
requests,pandas,matplotlib - Data requirements: MONDO IDs (e.g.,
MONDO:0007374), HP term IDs (e.g.,HP:0001250), or gene symbols/HGNC IDs - Environment: internet connection; no API key required
- Rate limits: no published rate limit; use
time.sleep(0.3)between batch requests; avoid bursts over 10 requests/second
pip install requests pandas matplotlib
Quick Start
import requests
MONARCH_API = "https://api.monarchinitiative.org/v3/api"
def monarch_get(endpoint: str, params: dict = None) -> dict:
"""GET request to Monarch API; raises on HTTP errors."""
r = requests.get(f"{MONARCH_API}{endpoint}", params=params, timeout=30)
r.raise_for_status()
return r.json()
# Get all genes associated with Marfan syndrome (MONDO:0007374)
result = monarch_get("/association/all", params={
"subject": "MONDO:0007374",
"category": "biolink:GeneToDiseaseAssociation",
"limit": 10
})
print(f"Total gene associations: {result['total']}")
for item in result["items"][:5]:
obj = item.get("object", {})
print(f" Gene: {obj.get('label', 'N/A')} ({obj.get('id', 'N/A')})")
# Total gene associations: 3
# Gene: FBN1 (HGNC:3603)
Core API
Query 1: Disease-Gene Associations
Retrieve all genes associated with a disease by MONDO ID. Returns causal gene records with evidence metadata.
import requests
import pandas as pd
import time
MONARCH_API = "https://api.monarchinitiative.org/v3/api"
def monarch_get(endpoint, params=None):
r = requests.get(f"{MONARCH_API}{endpoint}", params=params, timeout=30)
r.raise_for_status()
return r.json()
def get_disease_genes(mondo_id: str, limit: int = 200) -> pd.DataFrame:
"""Return DataFrame of genes associated with a disease."""
result = monarch_get("/association/all", params={
"subject": mondo_id,
"category": "biolink:CausalGeneToDiseaseAssociation",
"limit": limit
})
rows = []
for item in result.get("items", []):
obj = item.get("object", {})
rows.append({
"gene_id": obj.get("id"),
"gene_symbol": obj.get("label"),
"taxon": obj.get("taxon", {}).get("label") if obj.get("taxon") else None,
"relation": item.get("predicate"),
"evidence_count": len(item.get("evidence", [])),
})
return pd.DataFrame(rows)
# Cystic fibrosis (MONDO:0009861)
df = get_disease_genes("MONDO:0009861")
print(f"Genes for cystic fibrosis: {len(df)}")
print(df[["gene_symbol", "gene_id", "relation"]].to_string(index=False))
# Genes for cystic fibrosis: 1
# gene_symbol gene_id relation
# CFTR HGNC:1884 biolink:causes
Query 2: Disease-Phenotype Associations
Retrieve HPO phenotype terms linked to a disease. Useful for building phenotype profiles and similarity scoring.
def get_disease_phenotypes(mondo_id: str, limit: int = 200) -> pd.DataFrame:
"""Return DataFrame of phenotypes (HP terms) for a disease."""
result = monarch_get("/association/all", params={
"subject": mondo_id,
"category": "biolink:DiseaseToPhenotypicFeatureAssociation",
"limit": limit
})
rows = []
for item in result.get("items", []):
obj = item.get("object", {})
rows.append({
"hp_id": obj.get("id"),
"phenotype": obj.get("label"),
"frequency": item.get("frequency", {}).get("label") if item.get("frequency") else None,
"onset": item.get("onset", {}).get("label") if item.get("onset") else None,
})
return pd.DataFrame(rows)
# Marfan syndrome (MONDO:0007374)
df = get_disease_phenotypes("MONDO:0007374", limit=50)
print(f"Phenotypes for Marfan syndrome: {len(df)}")
print(df[["phenotype", "hp_id", "frequency"]].head(8).to_string(index=False))
# Phenotypes for Marfan syndrome: 26
# phenotype hp_id frequency
# Aortic root aneurysm HP:0002616 HP:0040281 ...
Query 3: Entity Lookup
Retrieve metadata for any Monarch entity (gene, disease, phenotype) by its identifier.
def get_entity(entity_id: str) -> dict:
"""Retrieve metadata for a gene, disease, or phenotype by its ID."""
result = monarch_get(f"/entity/{entity_id}")
return result
# Look up HP:0001250 (Seizure)
hp = get_entity("HP:0001250")
print(f"Name: {hp.get('name')}")
print(f"ID: {hp.get('id')}")
print(f"Description: {hp.get('description', '')[:120]}")
print(f"Synonyms: {[s.get('val') for s in hp.get('synonyms', [])[:3]]}")
# Name: Seizure
# ID: HP:0001250
# Description: A seizure is an intermittent abnormality of nervous system physiology ...
# Look up a MONDO disease
disease = get_entity("MONDO:0007374")
print(f"\nDisease: {disease.get('name')}")
print(f"ID: {disease.get('id')}")
Query 4: Text Search for Entities
Search for entities by free-text label, useful for resolving disease names or phenotype terms to IDs.
def search_entities(query: str, category: str = None, limit: int = 10) -> list:
"""Search Monarch entities by label/synonym."""
params = {"q": query, "limit": limit}
if category:
params["category"] = category
result = monarch_get("/search", params=params)
return result.get("items", [])
# Search for "Ehlers-Danlos" diseases
hits = search_entities("Ehlers-Danlos", category="biolink:Disease", limit=8)
for hit in hits:
print(f" {hit.get('id'):<25} {hit.get('name', 'N/A')}")
# MONDO:0020066 Ehlers-Danlos syndrome
# MONDO:0007522 classical Ehlers-Danlos syndrome
# MONDO:0007528 hypermobile Ehlers-Danlos syndrome
# MONDO:0007523 kyphoscoliotic Ehlers-Danlos syndrome
Query 5: Gene-to-Disease Associations
Retrieve diseases associated with a gene. Useful for understanding a gene's disease spectrum.
def get_gene_diseases(gene_id: str, limit: int = 100) -> pd.DataFrame:
"""Return DataFrame of diseases associated with a gene."""
result = monarch_get("/association/all", params={
"subject": gene_id,
"category": "biolink:GeneToDiseaseAssociation",
"limit": limit
})
rows = []
for item in result.get("items", []):
obj = item.get("object", {})
rows.append({
"disease_id": obj.get("id"),
"disease_name": obj.get("label"),
"predicate": item.get("predicate"),
})
return pd.DataFrame(rows)
# Diseases caused by FBN1 (HGNC:3603)
df = get_gene_diseases("HGNC:3603")
print(f"Diseases linked to FBN1: {len(df)}")
print(df[["disease_name", "disease_id"]].h