1 files changed, 132 insertions, 0 deletions
diff --git a/handlers.py b/handlers.py
new file mode 100644
index 0000000..8139c44
--- /dev/null
+++ b/handlers.py
@@ -0,0 +1,132 @@
+# Copyright (c) 2018, George Tokmaji
+
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+from .helpers import *
+import locale, html, string
+from datetime import datetime
+from abc import *
+
+class Site(ABC):
+    regex = None
+    
+    @abstractmethod
+    def process(self):
+        return
+
+class Parry(Site):
+    def process(self):
+        yield {}
+
+class Website(Site):
+    tag = ""
+    prefix = ""
+    address = ""
+    date_format = ""
+    regexes = {
+        "list" : "",
+        "desc" : ""
+            }
+    
+    def process(self):
+        r = requests.get(self.address)
+        if r:
+            i = 0
+            for m in self.regexes["list"].finditer(r.text):
+                #if i > 10:
+                    #break
+                print(m["title"])
+                id = str(ObjectId.from_datetime(datetime.strptime(m["updatedAt"], self.date_format)))
+                if id not in database["entries"]:
+                    entry = {
+                        "title" : html.unescape(m["title"]),
+                        "voting" : {
+                            "sum" : round(float(m["niveau"]), 0) if "niveau" in m.groups() else 0,
+                            "count" : 0,
+                            "votes" : None
+                            },
+                        "tags" : [self.tag],
+                        "files" : [],
+                        "dependencies" : [],
+                        "deleted" : False,
+                        "description" : "",
+                        "pic" : None,
+                        "author" : {
+                            "username" : html.unescape(m["author"])
+                            },
+                        "slug" : "",
+                        "updatedAt" : datetime.strptime(m["updatedAt"], self.date_format).isoformat(),
+                        "__v" : 1,
+                        "comments" : None,
+                        "id" : id,
+                        "__intern" : {
+                            "entryURL" : self.prefix + m["entryURL"]
+                            }
+                        }
+                    
+                    downloadURL = self.prefix + m["downloadURL"]
+                    r = requests.get(downloadURL, stream=True, allow_redirects=True)
+                    if not r:
+                        continue
+                    
+                    locale.setlocale(locale.LC_ALL, "C")
+                    entry["files"] = [{
+                            "metadata" : {
+                                "hashes" : {
+                                    "sha1" : calculateHashForResource(r).hexdigest()
+                                    },
+                                "downloadURL" : downloadURL
+                                },
+                            "aliases" : None,
+                            "deleted" : False,
+                            "_id" : entry["id"],
+                            "filename" : m["downloadURL"].split("/")[-1],
+                            "content-type" : r.headers.get("Content-Type", "application/octet-stream"),
+                            "length" : int(r.headers["Content-Length"]),
+                            "chunkSize" : 4096, # what is this for
+                            "uploadDate" : datetime.strptime(r.headers["Date"], "%a, %d %b %Y %H:%M:%S GMT").isoformat(),
+                            }
+                        ]
+                    
+                    locale.setlocale(locale.LC_ALL, "")
+                
+                    entry["createdAt"] = entry["updatedAt"]
+                    entry["slug"] = "".join(filter(lambda x: x in string.ascii_letters, entry["title"].lower()))
+                    
+                    r = requests.get(entry["__intern"]["entryURL"])
+                    d = self.regexes["desc"].match(r.text)
+                    if d and "description" in d.groups():
+                        entry["description"] = html.unescape(d["description"])
+                    yield entry
+                i += 1
+
+class CCAN(Website):
+    regexes = {
+        "list" : re.compile(r"<TR.*?><.*?><IMG SRC=\"/img/type-(?P<type>.*?)\.gif\".*?<A HREF=\"(?P<entryURL>ccan-view\.pl\?a=view\&i=\d*?)\">(?P<title>.*?)(<I>v</I>(?P<version>.*?))?</A><.*?><A HREF=\"(?P<downloadURL>ccan-dl-auth\.pl/(?P<id>\d*)/.*?)\"><.*?><A HREF=\"ccan-user.pl.*?\">(?P<author>.*?)</A><.*?>\((?P<niveau>\d\.\d)\).*?>(?P<updatedAt>\d\d\.\d\d\.\d\d\ \d\d\:\d\d).*?</TR>"),
+        "desc" : re.compile(r"<TD>Beschreibung:</TD><TD>(?P<description>.*?)</TD></TR>")
+        }
+    
+    address = "https://ccan.de/cgi-bin/ccan/ccan-view.pl?a=&sc=tm&so=d&nr=100000&pg=0&ac=ty-ti-ni-tm-rp-ev&reveal=1"
+    tag = "ccan"
+    prefix = "https://ccan.de/cgi-bin/ccan/"
+    date_format = "%d.%m.%y %H:%M"
+
+class CC(Website):
+    regexes = {
+        "list" : re.compile(r"<tr><td align=\"right\">.*?<a href=\"/(?P<entryURL>download\.php\?act=getinfo&amp;dl=\d*?)\">(?P<title>.*?)</a></td><td align=\"right\"><a href=\"(?P<downloadURL>downloads/dl\d*?/.*?)\"><img src=\"picz/dl\.gif\" alt=\"Runterladen\" title=\"Runterladen\" border=\"0\"></a></td><td>(<a href=\"user\.php.*?\">|)(?P<author>.*?)(</a>|)</td>.*?<td style=\"border-right:0px;\">(?P<updatedAt>\d\d\.\d\d\.\d\d\d\d \d{1,2}:\d\d)</td></tr>"),
+        "desc" : re.compile("")
+        }
+    address = "https://cc-archive.lwrl.de/download.php"
+    tag = "clonk-center"
+    prefix = "https://cc-archive.lwrl.de/"
+    date_format = "%d.%m.%Y %H:%M"