Page monitoring methodology
Patterns for tracking web page changes, detecting content removal, and preserving important pages before they disappear.
Monitoring service comparison
Free-tier limits and retention windows shift annually — verify at the service's pricing page before relying on a specific number. The columns below reflect a 2026 snapshot.
| Service | Free Tier | Best For | History | Alert Speed |
|---|---|---|---|---|
| Visualping | A few daily checks (free plan tightened in recent years) | Visual changes | Standard | Minutes |
| ChangeTower | Yes (verify current limits) | Compliance, archiving | Multi-year on paid plans | Minutes |
| Distill.io | ~5 monitors with 7-day history | Element-level tracking | Limited on free tier | Seconds |
| Wachete | Limited | Login-protected pages | 12 months | Minutes |
| UptimeRobot | 50 monitors at 5-minute intervals (free SMS removed) | Uptime only | 60 days | 5-min checks |
| changedetection.io | Self-hosted; free | Privacy / DIY | Disk space | Configurable |
| urlwatch | Self-hosted; free | Cron-driven CLI | Configurable | Configurable |
Quick-start: Monitor a page
Distill.io element monitoring
// Distill.io allows CSS/XPath selectors for precise monitoring
// Example selectors for common use cases:
// Monitor news article headlines
const newsSelector = '.article-headline, h1.title, .story-title';
// Monitor price changes
const priceSelector = '.price, .product-price, [data-price]';
// Monitor stock/availability
const availabilitySelector = '.in-stock, .availability, .stock-status';
// Monitor specific paragraph or section
const sectionSelector = '#main-content p:first-child';
// Monitor table data
const tableSelector = 'table.data-table tbody tr';
Python monitoring script
import requests
import hashlib
import json
import smtplib
from email.mime.text import MIMEText
from datetime import datetime
from pathlib import Path
from typing import Optional
from bs4 import BeautifulSoup
class PageMonitor:
"""Simple page change monitor with local storage."""
def __init__(self, storage_dir: Path):
self.storage_dir = storage_dir
self.storage_dir.mkdir(parents=True, exist_ok=True)
self.state_file = storage_dir / 'monitor_state.json'
self.state = self._load_state()
def _load_state(self) -> dict:
if self.state_file.exists():
return json.loads(self.state_file.read_text())
return {'pages': {}}
def _save_state(self):
self.state_file.write_text(json.dumps(self.state, indent=2))
def _get_page_hash(self, url: str, selector: Optional[str] = None) -> tuple[str, str]:
"""Get content hash and content for a page or element."""
response = requests.get(url, timeout=30, headers={
'User-Agent': 'Mozilla/5.0 (PageMonitor/1.0)'
})
response.raise_for_status()
if selector:
soup = BeautifulSoup(response.text, 'html.parser')
element = soup.select_one(selector)
content = element.get_text(strip=True) if element else ''
else:
content = response.text
content_hash = hashlib.sha256(content.encode()).hexdigest()
return content_hash, content
def add_page(self, url: str, name: str, selector: Optional[str] = None):
"""Add a page to monitor."""
content_hash, content = self._get_page_hash(url, selector)
self.state['pages'][url] = {
'name': name,
'selector': selector,
'last_hash': content_hash,
'last_check': datetime.now().isoformat(),
'last_content': content[:1000], # Store preview
'change_count': 0
}
self._save_state()
print(f"Added: {name} ({url})")
def check_page(self, url: str) -> Optional[dict]:
"""Check single page for changes."""
if url not in self.state['pages']:
return None
page = self.state['pages'][url]
selector = page.get('selector')
try:
new_hash, new_content = self._get_page_hash(url, selector)
except Exception as e:
return {
'url': url,
'name': page['name'],
'status': 'error',
'error': str(e)
}
changed = new_hash != page['last_hash']
result = {
'url': url,
'name': page['name'],
'status': 'changed' if changed else 'unchanged',
'previous_content': page['last_content'],
'new_content': new_content[:1000] if changed else None
}
if changed:
page['last_hash'] = new_hash
page['last_content'] = new_content[:1000]
page['change_count'] += 1
# Archive the change
archive_file = self.storage_dir / f"{hashlib.md5(url.encode()).hexdigest()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
archive_file.write_text(new_content)
page['last_check'] = datetime.now().isoformat()
self._save_state()
return result
def check_all(self) -> list[dict]:
"""Check all monitored pages."""
results = []
for url in self.state['pages']:
result = self.check_page(url)
if result:
results.append(result)
return results
# Usage
monitor = PageMonitor(Path('./page_monitor_data'))
# Add pages to monitor
monitor.add_page(
'https://example.com/important-page',
'Important Page',
selector='.main-content' # Optional: monitor specific element
)
# Check for changes
results = monitor.check_all()
for result in results:
if result['status'] == 'changed':
print(f"CHANGED: {result['name']}")
print(f" Previous: {result['previous_content'][:100]}...")
print(f" New: {result['new_content'][:100]}...")
Uptime monitoring
UptimeRobot API integration
import requests
from typing import List, Optional
class UptimeRobotClient:
"""UptimeRobot API client for monitoring page availability."""
def __init__(self, api_key: str):
self.api_key = api_key
# v2 still works as of 2026 but is in maintenance mode; v3 is
# the current REST API at https://api.uptimerobot.com/v3 with
# a different request shape (Bearer auth, JSON bodies).
self.base_url = "https://api.uptimerobot.com/v2"
def _request(self, endpoint: str, params: dict = None) -> dict:
data = {'api_key': self.api_key}
if params:
data.update(params)
response = requests.post(f"{self.base_url}/{endpoint}", data=data)
return response.json()
def get_monitors(self) -> List[dict]:
"""Get all monitors."""
result = self._request('getMonitors')
return result.get('monitors', [])
def create_monitor(self, friendly_name: str, url: str,
monitor_type: int = 1) -> dict:
"""Create a new monitor.
Types: 1=HTTP(s), 2=Keyword, 3=Ping, 4=Port
"""
return self._request('newMonitor', {
'friendly_name': friendly_name,
'url': url,
'type': monitor_type
})
def get_monitor_uptime(self, monitor_id: int,
custom_uptime_ratios: str = "7-30-90") -> dict:
"""Get uptime statistics for a monitor."""
return self._request('getMonitors', {
'monitors': monitor_id,
'custom_uptime_ratios': custom_uptime_ratios
})
def pause_monitor(self, monitor_id: int) -> dict:
"""Pause a monitor."""
return self._request('editMonitor', {
'id': monitor_id,
'status': 0
})
def resume_monitor(self, monitor_id: int) -> dict:
"""Resume a monitor."""
return self._request('editMoni