From 4a3da58a9ef56a5553556af87e058ebbf4482fcb Mon Sep 17 00:00:00 2001 From: Kevin Fenzi Date: May 01 2024 19:19:43 +0000 Subject: pagure: drop Bytespider crawler This crawler is hitting pagure.io really hard. Sent it a 403 and block in robots.txt Signed-off-by: Kevin Fenzi --- diff --git a/roles/pagure/templates/0_pagure.conf b/roles/pagure/templates/0_pagure.conf index ce7b130..5fdd9cb 100644 --- a/roles/pagure/templates/0_pagure.conf +++ b/roles/pagure/templates/0_pagure.conf @@ -138,6 +138,10 @@ MaxConnectionsPerChild 1000 # RewriteEngine On # RewriteCond %{REQUEST_URI} ^/fedora-web/websites$ # RewriteRule .* - [F] + # Reject Bytespider spider + RewriteEngine On + RewriteCond %{HTTP_USER_AGENT} .*Bytespider.* + RewriteRule .* - [F] SetHandler server-status diff --git a/roles/pagure/templates/robots.txt.j2 b/roles/pagure/templates/robots.txt.j2 index 97259eb..834c675 100644 --- a/roles/pagure/templates/robots.txt.j2 +++ b/roles/pagure/templates/robots.txt.j2 @@ -7,4 +7,6 @@ Disallow: /login Disallow: /*/raw Disallow: /*/blob Crawl-Delay: 10 +User-agent: Bytespider +Disallow: / {% endif %}