Report Generator Skill
Generate professional markdown and HTML reports from data with charts, tables, and analysis.
Instructions
You are a report generation expert. When invoked:
-
Analyze Data:
- Understand data structure and content
- Identify key metrics and insights
- Calculate statistics and trends
- Detect patterns and anomalies
- Generate executive summaries
-
Create Report Structure:
- Design clear, logical sections
- Create table of contents
- Add executive summary
- Include detailed analysis
- Provide recommendations
-
Generate Visualizations:
- Create tables for structured data
- Generate charts (bar, line, pie, scatter)
- Add badges and indicators
- Include code blocks and examples
- Format numbers and percentages
-
Format Output:
- Generate markdown reports
- Create HTML reports with styling
- Export to PDF
- Add branding and customization
- Ensure responsive design
Usage Examples
@report-generator data.csv
@report-generator --format html
@report-generator --template executive-summary
@report-generator --charts --pdf
@report-generator --compare baseline.json current.json
Report Types
Executive Summary Report
def generate_executive_summary(data, title="Executive Summary"):
"""
Generate high-level executive summary report
"""
from datetime import datetime
report = f"""# {title}
**Generated:** {datetime.now().strftime('%B %d, %Y at %I:%M %p')}
---
## Key Highlights
"""
# Calculate key metrics
metrics = calculate_key_metrics(data)
for metric in metrics:
icon = "✅" if metric['status'] == 'good' else "⚠️" if metric['status'] == 'warning' else "❌"
report += f"{icon} **{metric['name']}**: {metric['value']}\n"
report += f"""
---
## Performance Overview
| Metric | Current | Previous | Change |
|--------|---------|----------|--------|
"""
for metric in metrics:
if 'previous' in metric:
change = calculate_change(metric['current'], metric['previous'])
arrow = "↑" if change > 0 else "↓" if change < 0 else "→"
color = "green" if change > 0 else "red" if change < 0 else "gray"
report += f"| {metric['name']} | {metric['current']:,} | {metric['previous']:,} | {arrow} {abs(change):.1f}% |\n"
report += """
---
## Recommendations
"""
recommendations = generate_recommendations(metrics)
for i, rec in enumerate(recommendations, 1):
priority = rec.get('priority', 'medium')
emoji = "🔴" if priority == 'high' else "🟡" if priority == 'medium' else "🟢"
report += f"{i}. {emoji} **{rec['title']}**\n"
report += f" {rec['description']}\n\n"
return report
Data Analysis Report
import pandas as pd
import numpy as np
from datetime import datetime
def generate_data_analysis_report(df, title="Data Analysis Report"):
"""
Generate comprehensive data analysis report
"""
report = f"""# {title}
**Date:** {datetime.now().strftime('%Y-%m-%d')}
**Dataset:** {len(df):,} rows × {len(df.columns)} columns
---
## Table of Contents
1. [Dataset Overview](#dataset-overview)
2. [Data Quality](#data-quality)
3. [Statistical Summary](#statistical-summary)
4. [Distributions](#distributions)
5. [Correlations](#correlations)
6. [Insights](#insights)
---
## Dataset Overview
### Basic Information
- **Total Rows:** {len(df):,}
- **Total Columns:** {len(df.columns)}
- **Memory Usage:** {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB
- **Duplicate Rows:** {df.duplicated().sum():,}
### Column Information
| Column | Type | Non-Null | Unique | Sample Values |
|--------|------|----------|--------|---------------|
"""
for col in df.columns:
dtype = str(df[col].dtype)
non_null = df[col].count()
unique = df[col].nunique()
samples = df[col].dropna().head(3).tolist()
sample_str = ", ".join(str(s) for s in samples)
report += f"| {col} | {dtype} | {non_null:,} | {unique:,} | {sample_str} |\n"
report += """
---
## Data Quality
### Missing Values
"""
missing = df.isnull().sum()
if missing.sum() > 0:
report += "| Column | Missing Count | Missing % |\n"
report += "|--------|---------------|----------|\n"
for col in missing[missing > 0].index:
count = missing[col]
pct = (count / len(df)) * 100
report += f"| {col} | {count:,} | {pct:.1f}% |\n"
else:
report += "✅ No missing values detected.\n"
report += "\n### Data Type Issues\n\n"
# Check for potential type issues
type_issues = []
for col in df.select_dtypes(include=['object']):
# Check if column should be numeric
try:
pd.to_numeric(df[col], errors='raise')
type_issues.append(f"- `{col}` appears to be numeric but stored as string")
except:
pass
# Check if column should be datetime
try:
pd.to_datetime(df[col], errors='raise')
if df[col].str.contains(r'\d{4}-\d{2}-\d{2}').any():
type_issues.append(f"- `{col}` appears to be datetime but stored as string")
except:
pass
if type_issues:
report += "\n".join(type_issues) + "\n"
else:
report += "✅ No data type issues detected.\n"
report += """
---
## Statistical Summary
### Numeric Columns
"""
# Add statistics for numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns
if len(numeric_cols) > 0:
stats = df[numeric_cols].describe()
report += stats.to_markdown() + "\n"
# Add additional statistics
report += "\n### Additional Statistics\n\n"
report += "| Column | Median | Mode | Std Dev | Variance |\n"
report += "|--------|--------|------|---------|----------|\n"
for col in numeric_cols:
median = df[col].median()
mode = df[col].mode().iloc[0] if not df[col].mode().empty else "N/A"
std = df[col].std()
var = df[col].var()
report += f"| {col} | {median:.2f} | {mode} | {std:.2f} | {var:.2f} |\n"
report += """
### Categorical Columns
"""
categorical_cols = df.select_dtypes(include=['object']).columns
if len(categorical_cols) > 0:
for col in categorical_cols[:5]: # Limit to first 5
report += f"\n#### {col}\n\n"
value_counts = df[col].value_counts().head(10)
report += "| Value | Count | Percentage |\n"
report += "|-------|-------|------------|\n"
for value, count in value_counts.items():
pct = (count / len(df)) * 100
report += f"| {value} | {count:,} | {pct:.1f}% |\n"
report += """
---
## Distributions
"""
# Analyze distributions of numeric columns
for col in numeric_cols[:5]: # Limit to first 5
report += f"\n### {col} Distribution\n\n"
q1 = df[col].quantile(0.25)
q2 = df[col].quantile(0.50)
q3 = df[col].quantile(0.75)
iqr = q3 - q1
# Detect outliers
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]
report += f"""
**Quartiles:**
- Q1 (25%): {q1:.2f}
- Q2 (50%, Median): {q2:.2f}
- Q3 (75%): {q3:.2f}
- IQR: {iqr:.2f}
**Outliers:** {len(outliers)} ({len(outliers)/len(df)*100:.1f}%)
- Lower bound: {lower_bound:.2f}
- Upper bound: {upper_bound:.2f}
"""
report += """
---
## Correlations
"""
if len(numeric_cols) > 1:
corr_matrix = df[numeric_cols].corr()
report += "\n### Correlation Matrix\n\n"
report += corr_matrix.to_markdown() + "\n"
# Find strong correlations
report += "\n### Strong Correlations (|r| > 0.7)\n\n"
strong_corr = []
for i in range(len(c