| |
@@ -5,6 +5,7 @@
|
| |
import os
|
| |
import re
|
| |
from datetime import datetime
|
| |
+ from requests.utils import unquote
|
| |
from copr_common.request import SafeRequest
|
| |
from copr_backend.helpers import BackendConfigReader
|
| |
|
| |
@@ -118,6 +119,21 @@
|
| |
url, bot.group(1))
|
| |
continue
|
| |
|
| |
+ # Convert encoded characters from their %40 values back to @.
|
| |
+ url = unquote(url)
|
| |
+
|
| |
+ # I don't know how or why but occasionally there is an URL that is
|
| |
+ # encoded twice (%2540oamg -> %40oamg - > @oamg), and yet its status
|
| |
+ # code is 200. AFAIK these appear only for EPEL-7 chroots and their
|
| |
+ # User-Agent is something like urlgrabber/3.10%20yum/3.4.3
|
| |
+ # I wasn't able to reproduce such accesses, and we decided to not count
|
| |
+ # them
|
| |
+ if url != unquote(url):
|
| |
+ log.warning("Skipping: %s (double encoded URL, user-agent: '%s', "
|
| |
+ "status: %s)", access["cs-uri-stem"],
|
| |
+ access["cs(User-Agent)"], access["sc-status"])
|
| |
+ continue
|
| |
+
|
| |
# We don't want to count every accessed URL, only those pointing to
|
| |
# RPM files and repo file
|
| |
key_strings = url_to_key_strings(url)
|
| |
@@ -125,6 +141,12 @@
|
| |
log.debug("Skipping: %s", url)
|
| |
continue
|
| |
|
| |
+ if any(x for x in key_strings
|
| |
+ if x.startswith("chroot_rpms_dl_stat|")
|
| |
+ and x.endswith("|srpm-builds")):
|
| |
+ log.debug("Skipping %s (SRPM build)", url)
|
| |
+ continue
|
| |
+
|
| |
log.debug("Processing: %s", url)
|
| |
|
| |
# When counting RPM access, we want to iterate both project hits and
|
| |
Fixing the issues that we discovered in PR#2274