LlamaIndex - Data Framework for LLM Applications
The leading framework for connecting LLMs with your data.
When to use LlamaIndex
Use LlamaIndex when:
- Building RAG (retrieval-augmented generation) applications
- Need document question-answering over private data
- Ingesting data from multiple sources (300+ connectors)
- Creating knowledge bases for LLMs
- Building chatbots with enterprise data
- Need structured data extraction from documents
Metrics:
- 45,100+ GitHub stars
- 23,000+ repositories use LlamaIndex
- 300+ data connectors (LlamaHub)
- 1,715+ contributors
- v0.14.7 (stable)
Use alternatives instead:
- LangChain: More general-purpose, better for agents
- Haystack: Production search pipelines
- txtai: Lightweight semantic search
- Chroma: Just need vector storage
Quick start
Installation
# Starter package (recommended)
pip install llama-index
# Or minimal core + specific integrations
pip install llama-index-core
pip install llama-index-llms-openai
pip install llama-index-embeddings-openai
5-line RAG example
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# Load documents
documents = SimpleDirectoryReader("data").load_data()
# Create index
index = VectorStoreIndex.from_documents(documents)
# Query
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)
Core concepts
1. Data connectors - Load documents
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.readers.web import SimpleWebPageReader
from llama_index.readers.github import GithubRepositoryReader
# Directory of files
documents = SimpleDirectoryReader("./data").load_data()
# Web pages
reader = SimpleWebPageReader()
documents = reader.load_data(["https://example.com"])
# GitHub repository
reader = GithubRepositoryReader(owner="user", repo="repo")
documents = reader.load_data(branch="main")
# Manual document creation
doc = Document(
text="This is the document content",
metadata={"source": "manual", "date": "2025-01-01"}
)
2. Indices - Structure data
from llama_index.core import VectorStoreIndex, ListIndex, TreeIndex
# Vector index (most common - semantic search)
vector_index = VectorStoreIndex.from_documents(documents)
# List index (sequential scan)
list_index = ListIndex.from_documents(documents)
# Tree index (hierarchical summary)
tree_index = TreeIndex.from_documents(documents)
# Save index
index.storage_context.persist(persist_dir="./storage")
# Load index
from llama_index.core import load_index_from_storage, StorageContext
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
3. Query engines - Ask questions
# Basic query
query_engine = index.as_query_engine()
response = query_engine.query("What is the main topic?")
print(response)
# Streaming response
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Explain quantum computing")
for text in response.response_gen:
print(text, end="", flush=True)
# Custom configuration
query_engine = index.as_query_engine(
similarity_top_k=3, # Return top 3 chunks
response_mode="compact", # Or "tree_summarize", "simple_summarize"
verbose=True
)
4. Retrievers - Find relevant chunks
# Vector retriever
retriever = index.as_retriever(similarity_top_k=5)
nodes = retriever.retrieve("machine learning")
# With filtering
retriever = index.as_retriever(
similarity_top_k=3,
filters={"metadata.category": "tutorial"}
)
# Custom retriever
from llama_index.core.retrievers import BaseRetriever
class CustomRetriever(BaseRetriever):
def _retrieve(self, query_bundle):
# Your custom retrieval logic
return nodes
Agents with tools
Basic agent
from llama_index.core.agent import FunctionAgent
from llama_index.llms.openai import OpenAI
# Define tools
def multiply(a: int, b: int) -> int:
"""Multiply two numbers."""
return a * b
def add(a: int, b: int) -> int:
"""Add two numbers."""
return a + b
# Create agent
llm = OpenAI(model="gpt-4o")
agent = FunctionAgent.from_tools(
tools=[multiply, add],
llm=llm,
verbose=True
)
# Use agent
response = agent.chat("What is 25 * 17 + 142?")
print(response)
RAG agent (document search + tools)
from llama_index.core.tools import QueryEngineTool
# Create index as before
index = VectorStoreIndex.from_documents(documents)
# Wrap query engine as tool
query_tool = QueryEngineTool.from_defaults(
query_engine=index.as_query_engine(),
name="python_docs",
description="Useful for answering questions about Python programming"
)
# Agent with document search + calculator
agent = FunctionAgent.from_tools(
tools=[query_tool, multiply, add],
llm=llm
)
# Agent decides when to search docs vs calculate
response = agent.chat("According to the docs, what is Python used for?")
Advanced RAG patterns
Chat engine (conversational)
from llama_index.core.chat_engine import CondensePlusContextChatEngine
# Chat with memory
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context", # Or "context", "react"
verbose=True
)
# Multi-turn conversation
response1 = chat_engine.chat("What is Python?")
response2 = chat_engine.chat("Can you give examples?") # Remembers context
response3 = chat_engine.chat("What about web frameworks?")
Metadata filtering
from llama_index.core.vector_stores import MetadataFilters, ExactMatchFilter
# Filter by metadata
filters = MetadataFilters(
filters=[
ExactMatchFilter(key="category", value="tutorial"),
ExactMatchFilter(key="difficulty", value="beginner")
]
)
retriever = index.as_retriever(
similarity_top_k=3,
filters=filters
)
query_engine = index.as_query_engine(filters=filters)
Structured output
from pydantic import BaseModel
from llama_index.core.output_parsers import PydanticOutputParser
class Summary(BaseModel):
title: str
main_points: list[str]
conclusion: str
# Get structured response
output_parser = PydanticOutputParser(output_cls=Summary)
query_engine = index.as_query_engine(output_parser=output_parser)
response = query_engine.query("Summarize the document")
summary = response # Pydantic model
print(summary.title, summary.main_points)
Data ingestion patterns
Multiple file types
# Load all supported formats
documents = SimpleDirectoryReader(
"./data",
recursive=True,
required_exts=[".pdf", ".docx", ".txt", ".md"]
).load_data()
Web scraping
from llama_index.readers.web import BeautifulSoupWebReader
reader = BeautifulSoupWebReader()
documents = reader.load_data(urls=[
"https://docs.python.org/3/tutorial/",
"https://docs.python.org/3/library/"
])
Database
from llama_index.readers.database import DatabaseReader
reader = DatabaseReader(
sql_database_uri="postgresql://user:pass@localhost/db"
)
documents = reader.load_data(query="SELECT * FROM articles")
API endpoints
from llama_index.readers.json import JSONReader
reader = JSONReader()
documents = reader.load_data("https://api.example.com/data.json")
Vector store integrations
Chroma (local)
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb
# Initialize Chroma
db = chromadb.PersistentClient(path="./chroma_db")
collection = db.get_or_create_collection("my_collection")
# Create vector store
vector_store = ChromaVectorStore(chroma_collection=collection)
# Use in index
from llama_index.core import StorageContext
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, st