# llms.txt — Sportskeeda
# Domain: https://www.sportskeeda.com
# Purpose: Public-facing policy for AI/LLM crawlers, data collectors, and AI-assisted products.
# Status: Draft (replace placeholders as needed)
# Last-Updated: 2025-08-18 07:47 
[owner]
name: Sportskeeda (Absolute Sports Media Pvt. Ltd.)
domain: sportskeeda.com
contact: https://www.sportskeeda.com/contact
[policy]
# Human-readable summary:
# - You may index and display short previews (snippets) of public articles WITH proper attribution and a link.
# - Training of foundation or fine-tuned models on Sportskeeda content is NOT permitted without written permission.
# - Commercial re-use (including AI answers or products) requires a license.
# - Respect robots.txt, meta tags, and rate limits. Honor per-page opt-outs.
# - Do not collect or process any personal data from non-public areas.
allowed_uses:
  - ephemeral_previews_with_attribution  # short excerpts/snippets in search or link-previews (<= 160 chars)
  - noncommercial_research_indexing      # evaluation and indexing to improve search quality (no training)
  - citation_and_linking                 # titles/URLs/metadata for the purpose of driving traffic
disallowed_uses:
  - model_training                        # training or fine-tuning on our content
  - synthetic_derivatives_without_license # paraphrases/summaries used in products or UIs without a license
  - bulk_republication                    # redistributing full text, images, data, or feeds
  - paywall_circumvention                 # accessing gated/subscriber-only content
  - profile_or_face_dataset_creation      # extracting faces, biometrics, or user data
  - prompt_injection_crawling             # using exploits to bypass controls
[attribution]
required: true
format: "© Sportskeeda — Title, Author (if shown), and Canonical URL"
link: "https://www.sportskeeda.com{canonical_path}"
snippet_char_limit: 160
[cache_and_storage]
# Cache only what you need for snippet display. Do not persist full text unless licensed.
ephemeral_cache_max_age_hours: 24
store_full_text: false
store_images: false
[rate_limits]
# Match or be stricter than robots.txt. These are upper bounds; slower is better.
max_requests_per_minute_per_ip: 2
max_concurrent_requests_per_ip: 1
crawl_window_utc: 01:00-06:00  # Prefer low-traffic hours; not a hard requirement but a courtesy.
respect_if_modified_since: true
backoff_on_429_or_503: true
[per_page_opt_out]
# Honor any of the following signals on a page:
meta_robots_noai: true           # 
meta_google_noai: true           # 
http_header_ai_policy: true      # X-AI-Policy: noai
link_rel_noai: true              # 
class_or_attr_noai: true         # data-ai="noai" (treat as do-not-train/do-not-ingest)
[paths]
# Path-level guidance for AI crawlers (does not replace robots.txt)
disallow:
  - /account/
  - /login/
  - /privacy
  - /terms
  - /subscribe
  - /payments/
  - /admin/
  - /wp-admin/
  - /cdn-cgi/
  - /cart/
allow:
  - /
  - /news/
  - /esports/
  - /mma/
  - /football/
  - /cricket/
  - /basketball/
  - /tennis/
  - /wwe/
  - /f1/
  - /gaming/
  - /entertainment/
[agents]
# Agent-specific guidance. If your agent is not listed, follow [policy].
# OpenAI
"GPTBot":
  allow_snippets_only: true
  disallow_training: true
  obey_chatgpt_user_header: true
"OAI-SearchBot":
  allow_snippets_only: true
  disallow_training: true
"ChatGPT-User":
  purpose: "Retrieval for user prompts with source linking"
  snippet_only: true
# Google
"Google-Extended":
  disallow_training: true
  allow_snippets_only: true
# Anthropic
"ClaudeBot":
  disallow_training: true
  allow_snippets_only: true
# Perplexity
"PerplexityBot":
  allow_snippets_only: true
  disallow_training: true
# Common Crawl
"CCBot":
  disallow: true
# Meta (Llama/AI research collection)
"Meta-ExternalAgent":
  disallow_training: true
  allow_snippets_only: true
# Apple
"Applebot-Extended":
  disallow_training: true
  allow_snippets_only: true
# Amazon
"Amazonbot":
  allow_snippets_only: true
  disallow_training: true
# Others (fallback)
"*":
  disallow_training: true
  allow_snippets_only: true
[licensing]
# For any use beyond previews/snippets (e.g., training, RAG at scale, derivative summaries in apps),
# obtain a written license.
required_for_commercial_use: true
how_to_request: "Email or form listed in [owner.contact] with intended use, scope, and volumes."
[enforcement]
# Non-compliance may result in technical blocking and legal action under applicable laws.
monitoring: active
log_misuse: true
legal_reserved_rights: true
[technical_notes]
# This file is complementary to robots.txt and site terms. Where conflicts exist,
# Sportskeeda's Terms of Use and robots.txt take precedence.
is_human_readable: true
machine_parse_hint: INI/YAML-like
canonical_location: https://www.sportskeeda.com/llms.txt
[sitemaps]
# Helpful for discovery of public content without hammering the origin.
primary: https://www.sportskeeda.com/sitemap.xml
additional:
  - https://www.sportskeeda.com/sitemap-news.xml
  - https://www.sportskeeda.com/robots.txt
# End of llms.txt