aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--handlers.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/handlers.py b/handlers.py
index 9f48349..6a24a65 100644
--- a/handlers.py
+++ b/handlers.py
@@ -16,6 +16,7 @@ from .helpers import *
import locale, html, string
from datetime import datetime
from abc import *
+from html2text import html2text
class Site(ABC):
regex = None
@@ -52,7 +53,7 @@ class Website(Site):
entry.tags = [self.tag]
entry.slug = "".join(i for i in html.unescape(m["title"]).lower() if i in string.ascii_letters)
entry.updated_at = datetime.strptime(m["updatedAt"], self.date_format)
- entry.version = m["version"]
+ entry.version = m["version"] if "version" in m.groupdict() else ""
entry.readonly = True
try:
@@ -85,20 +86,18 @@ class Website(Site):
locale.resetlocale(locale.LC_ALL)
- r = requests.get(self.prefix + m["entryURL"])
- d = self.regexes["desc"].match(r.text)
- if d and "description" in d.groups():
- entry.description = html.unescape(d["description"])
+ r = requests.get(self.prefix + html.unescape(m["entryURL"]))
+ d = self.regexes["desc"].search(r.text)
+ if d and "description" in d.groupdict():
+ entry.description = html2text(html.unescape(d["description"]))
-
- if not exists:
- session.add(entry)
+ session.add(entry)
session.commit()
class CCAN(Website):
regexes = {
"list" : re.compile(r"<TR.*?><.*?><IMG SRC=\"/img/type-(?P<type>.*?)\.gif\".*?<A HREF=\"(?P<entryURL>ccan-view\.pl\?a=view\&i=\d*?)\">(?P<title>.*?)(<I>v</I>(?P<version>.*?))?</A><.*?><A HREF=\"(?P<downloadURL>ccan-dl-auth\.pl/(?P<id>\d*)/.*?)\"><.*?><A HREF=\"ccan-user.pl.*?\">(?P<author>.*?)</A><.*?>\((?P<niveau>\d\.\d)\).*?>(?P<updatedAt>\d\d\.\d\d\.\d\d\ \d\d\:\d\d).*?</TR>"),
- "desc" : re.compile(r"<TD>Beschreibung:</TD><TD>(?P<description>.*?)</TD></TR>")
+ "desc" : re.compile(r"<TD>(?:Beschreibung|Description):</TD><TD>(?P<description>.*?)</TD></TR>")
}
address = "https://ccan.de/cgi-bin/ccan/ccan-view.pl?a=&sc=tm&so=d&nr=100000&pg=0&ac=ty-ti-ni-tm-rp-ev&reveal=1"
@@ -108,7 +107,7 @@ class CCAN(Website):
class CC(Website):
regexes = {
- "list" : re.compile(r"<tr><td align=\"right\">.*?<a href=\"/(?P<entryURL>download\.php\?act=getinfo&amp;dl=\d*?)\">(?P<title>.*?)</a></td><td align=\"right\"><a href=\"(?P<downloadURL>downloads/dl\d*?/.*?)\"><img src=\"picz/dl\.gif\" alt=\"Runterladen\" title=\"Runterladen\" border=\"0\"></a></td><td>(<a href=\"user\.php.*?\">|)(?P<author>.*?)(</a>|)</td>.*?<td style=\"border-right:0px;\">(?P<updatedAt>\d\d\.\d\d\.\d\d\d\d \d{1,2}:\d\d)</td></tr>"),
+ "list" : re.compile(r"<tr><td align=\"right\">.*?<a href=\"/(?P<entryURL>download\.php\?act=getinfo&amp;dl=\d*?)\">(?P<title>.*?)(?: v(?P<version>.*?)|)</a></td><td align=\"right\"><a href=\"(?P<downloadURL>downloads/dl\d*?/.*?)\"><img src=\"picz/dl\.gif\" alt=\"Runterladen\" title=\"Runterladen\" border=\"0\"></a></td><td>(<a href=\"user\.php.*?\">|)(?P<author>.*?)(</a>|)</td>.*?<td style=\"border-right:0px;\">(?P<updatedAt>\d\d\.\d\d\.\d\d\d\d \d{1,2}:\d\d)</td></tr>"),
"desc" : re.compile("")
}
address = "https://cc-archive.lwrl.de/download.php"