# llms.txt — Sportskeeda # Domain: https://www.sportskeeda.com # Purpose: Public-facing policy for AI/LLM crawlers, data collectors, and AI-assisted products. # Status: Draft (replace placeholders as needed) # Last-Updated: 2025-08-18 07:47 [owner] name: Sportskeeda (Absolute Sports Media Pvt. Ltd.) domain: sportskeeda.com contact: https://www.sportskeeda.com/contact [policy] # Human-readable summary: # - You may index and display short previews (snippets) of public articles WITH proper attribution and a link. # - Training of foundation or fine-tuned models on Sportskeeda content is NOT permitted without written permission. # - Commercial re-use (including AI answers or products) requires a license. # - Respect robots.txt, meta tags, and rate limits. Honor per-page opt-outs. # - Do not collect or process any personal data from non-public areas. allowed_uses: - ephemeral_previews_with_attribution # short excerpts/snippets in search or link-previews (<= 160 chars) - noncommercial_research_indexing # evaluation and indexing to improve search quality (no training) - citation_and_linking # titles/URLs/metadata for the purpose of driving traffic disallowed_uses: - model_training # training or fine-tuning on our content - synthetic_derivatives_without_license # paraphrases/summaries used in products or UIs without a license - bulk_republication # redistributing full text, images, data, or feeds - paywall_circumvention # accessing gated/subscriber-only content - profile_or_face_dataset_creation # extracting faces, biometrics, or user data - prompt_injection_crawling # using exploits to bypass controls [attribution] required: true format: "© Sportskeeda — Title, Author (if shown), and Canonical URL" link: "https://www.sportskeeda.com{canonical_path}" snippet_char_limit: 160 [cache_and_storage] # Cache only what you need for snippet display. Do not persist full text unless licensed. ephemeral_cache_max_age_hours: 24 store_full_text: false store_images: false [rate_limits] # Match or be stricter than robots.txt. These are upper bounds; slower is better. max_requests_per_minute_per_ip: 2 max_concurrent_requests_per_ip: 1 crawl_window_utc: 01:00-06:00 # Prefer low-traffic hours; not a hard requirement but a courtesy. respect_if_modified_since: true backoff_on_429_or_503: true [per_page_opt_out] # Honor any of the following signals on a page: meta_robots_noai: true # meta_google_noai: true # http_header_ai_policy: true # X-AI-Policy: noai link_rel_noai: true # class_or_attr_noai: true # data-ai="noai" (treat as do-not-train/do-not-ingest) [paths] # Path-level guidance for AI crawlers (does not replace robots.txt) disallow: - /account/ - /login/ - /privacy - /terms - /subscribe - /payments/ - /admin/ - /wp-admin/ - /cdn-cgi/ - /cart/ allow: - / - /news/ - /esports/ - /mma/ - /football/ - /cricket/ - /basketball/ - /tennis/ - /wwe/ - /f1/ - /gaming/ - /entertainment/ [agents] # Agent-specific guidance. If your agent is not listed, follow [policy]. # OpenAI "GPTBot": allow_snippets_only: true disallow_training: true obey_chatgpt_user_header: true "OAI-SearchBot": allow_snippets_only: true disallow_training: true "ChatGPT-User": purpose: "Retrieval for user prompts with source linking" snippet_only: true # Google "Google-Extended": disallow_training: true allow_snippets_only: true # Anthropic "ClaudeBot": disallow_training: true allow_snippets_only: true # Perplexity "PerplexityBot": allow_snippets_only: true disallow_training: true # Common Crawl "CCBot": disallow: true # Meta (Llama/AI research collection) "Meta-ExternalAgent": disallow_training: true allow_snippets_only: true # Apple "Applebot-Extended": disallow_training: true allow_snippets_only: true # Amazon "Amazonbot": allow_snippets_only: true disallow_training: true # Others (fallback) "*": disallow_training: true allow_snippets_only: true [licensing] # For any use beyond previews/snippets (e.g., training, RAG at scale, derivative summaries in apps), # obtain a written license. required_for_commercial_use: true how_to_request: "Email or form listed in [owner.contact] with intended use, scope, and volumes." [enforcement] # Non-compliance may result in technical blocking and legal action under applicable laws. monitoring: active log_misuse: true legal_reserved_rights: true [technical_notes] # This file is complementary to robots.txt and site terms. Where conflicts exist, # Sportskeeda's Terms of Use and robots.txt take precedence. is_human_readable: true machine_parse_hint: INI/YAML-like canonical_location: https://www.sportskeeda.com/llms.txt [sitemaps] # Helpful for discovery of public content without hammering the origin. primary: https://www.sportskeeda.com/sitemap.xml additional: - https://www.sportskeeda.com/sitemap-news.xml - https://www.sportskeeda.com/robots.txt # End of llms.txt