# ===============================================================
# llms.txt — Large-Language-Model Crawler Policy
# Maintainer: NUMBERS Institutes and Education LLC
# Last updated: 2025-06-24
#
# Goal ─────────────
# • Allow short, attributed excerpts that drive clicks.
# • Block model-training and long verbatim reuse (zero-click loss).
# • Protect private / paid content & internal APIs.
# ===============================================================

# ------------- Global rules (apply to all current & future LLM bots) -------------
allow: /
disallow_training: /                             # No parameter training on any path
max_tokens: 160                                 # Never quote >160 tokens in one answer
excerpt_percentage: 10                          # Or >10 % of a page, whichever is smaller
attribution: required                           # Show a visible, clickable canonical URL
disallow_image_generation: *                    # Don’t create images from our assets

# Completely off-limits sections
disallow: /members/
disallow: /checkout/
disallow: /static/private/
disallow: /api/

# ------------- Contact & legal -------------
contact: admin@num8ers.com
# Accessing the site with an LLM crawler signals acceptance of this policy.
# Non-compliance will be logged and may trigger legal action under UAE copyright law.

# ------------- Bot-specific overrides (read-only, no training) -------------
User-agent: GPTBot
allow: /
disallow_training: /
crawl-delay: 10

User-agent: Google-Extended        # Gemini / Bard
allow: /
disallow_training: /
crawl-delay: 10

User-agent: PerplexityBot
allow: /
disallow_training: /
crawl-delay: 10

User-agent: XAI                    # Grok
allow: /
disallow_training: /
crawl-delay: 10

User-agent: ClaudeBot              # Anthropic
allow: /
disallow_training: /
crawl-delay: 10

User-agent: MetaAI                 # Llama-family crawler (draft)
allow: /
disallow_training: /
crawl-delay: 10