| ... | ... |
@@ -0,0 +1 @@ |
| 1 |
+newsProxy.py |
| ... | ... |
@@ -0,0 +1,175 @@ |
| 1 |
+#!/usr/bin/env python3 |
|
| 2 |
+from http.server import BaseHTTPRequestHandler, HTTPServer |
|
| 3 |
+import datetime |
|
| 4 |
+from time import perf_counter |
|
| 5 |
+import os |
|
| 6 |
+import json |
|
| 7 |
+import urllib.parse |
|
| 8 |
+import configuration |
|
| 9 |
+import socket |
|
| 10 |
+import requests |
|
| 11 |
+from userio import * |
|
| 12 |
+ |
|
| 13 |
+# Early cache version |
|
| 14 |
+import hashlib |
|
| 15 |
+from pathlib import Path |
|
| 16 |
+ |
|
| 17 |
+ |
|
| 18 |
+import newsParser |
|
| 19 |
+ |
|
| 20 |
+server = None |
|
| 21 |
+csvFilename = None |
|
| 22 |
+connectionResults = None |
|
| 23 |
+ |
|
| 24 |
+ |
|
| 25 |
+def navigationHeader(url): |
|
| 26 |
+ pageContent = "<div class=\"mangaChapNavig\">\n" |
|
| 27 |
+ pageContent += " <div class=\"mangaChapNavigHome\"><a href=\".\"><div class=\"arrow-up\"></div><a></div>\n" |
|
| 28 |
+ pageContent += " <span id=\"mangaFSStatus\" style=\"display: none;\">0</span>\n" |
|
| 29 |
+ pageContent += " <span id=\"mangaFSToggle\" onclick=\"openFullscreen();\"><div class=\"fs-on\"></div></div>\n" |
|
| 30 |
+ opacityValue=3; |
|
| 31 |
+ if 7 <= datetime.datetime.now().hour and 20 >= datetime.datetime.now().hour: |
|
| 32 |
+ opacityValue=10; |
|
| 33 |
+ pageContent += " <span id=\"mangaOpacityMinus\" onclick=\"onOpacityMinus()\">☼</span>" |
|
| 34 |
+ pageContent += " <span id=\"mangaOpacityValue\">"+str(opacityValue)+"</span>" |
|
| 35 |
+ pageContent += " <span id=\"mangaOpacityPlus\" onclick=\"onOpacityPlus()\">☀</span>\n" |
|
| 36 |
+ if url is not None: |
|
| 37 |
+ pageContent += " <span id=\"nav-link\"><a href=\""+url+"\" target=\"new\">link</a></span>\n" |
|
| 38 |
+ else: |
|
| 39 |
+ pageContent += newsParser.supportedList() |
|
| 40 |
+ pageContent += "</div>\n" |
|
| 41 |
+ return pageContent |
|
| 42 |
+ |
|
| 43 |
+class MyServer(BaseHTTPRequestHandler): |
|
| 44 |
+ def log_message(self, format, *args): |
|
| 45 |
+ # To silence the default output of server (too verbose) |
|
| 46 |
+ return |
|
| 47 |
+ def do_GET(self): |
|
| 48 |
+ global csvFilename |
|
| 49 |
+ global csvFilenameTemp |
|
| 50 |
+ global connectionResults |
|
| 51 |
+ rootdir = 'pages/static/' |
|
| 52 |
+ csvFilename = rootdir + server['csv'] |
|
| 53 |
+ csvFilenameTemp = rootdir + server['csvTemp'] |
|
| 54 |
+ if not os.path.exists(rootdir + self.path) or self.path == '/index.html' or self.path == '/': |
|
| 55 |
+ if self.headers['X-Real-IP'] is None: |
|
| 56 |
+ say("From: "+self.client_address[0]+" GET Received : "+self.path)
|
|
| 57 |
+ else: |
|
| 58 |
+ say("From: "+self.headers['X-Real-IP']+" GET Received : "+self.path)
|
|
| 59 |
+ page=[] |
|
| 60 |
+ |
|
| 61 |
+ data_begin = "" |
|
| 62 |
+ with open(configuration.get_pageBegin(),'r') as fStart: |
|
| 63 |
+ data_begin += fStart.read().replace( 'CSTAPPNAME', server['name'] ) |
|
| 64 |
+ |
|
| 65 |
+ data_end = "" |
|
| 66 |
+ with open(configuration.get_pageEnd(),'r') as fEnd: |
|
| 67 |
+ data_end += fEnd.read().replace( 'CSTAPPNAME', server['name'] ).replace( 'CSTAPPVERSION', server['version'] ) |
|
| 68 |
+ |
|
| 69 |
+ |
|
| 70 |
+ s = self.path |
|
| 71 |
+ urlArgs = urllib.parse.parse_qs(s[2:]) |
|
| 72 |
+ url = None |
|
| 73 |
+ if "url" in urlArgs: |
|
| 74 |
+ url = urlArgs['url'][0] |
|
| 75 |
+ |
|
| 76 |
+ data_page = "" |
|
| 77 |
+ |
|
| 78 |
+ #Forging response |
|
| 79 |
+ if configuration.get_debug() != 0: |
|
| 80 |
+ data_page += "<p>Request: "+self.path+"</p>\n" |
|
| 81 |
+ data_page += "<xmp>"+json.dumps(urlArgs)+"</xmp>\n" |
|
| 82 |
+ |
|
| 83 |
+ #Form |
|
| 84 |
+ data_form = "<div class=\"input-container\">\n\ |
|
| 85 |
+<form action=\"\">\n\ |
|
| 86 |
+<input type=\"text\" id=\"srvIp\" name=\"url\" value=\"\" size=\"50\">\n\ |
|
| 87 |
+<input type=\"submit\" class=\"button\" value=\"Get Article\">\n\ |
|
| 88 |
+</form>\n\ |
|
| 89 |
+</div>\n" |
|
| 90 |
+ data_page += data_form |
|
| 91 |
+ data_page += navigationHeader(url); |
|
| 92 |
+ data_page += "<div id=\"article-current\">\n" |
|
| 93 |
+ |
|
| 94 |
+ # Try cached version first |
|
| 95 |
+ if url is None: |
|
| 96 |
+ say("Root")
|
|
| 97 |
+ else: |
|
| 98 |
+ urlHash=hashlib.md5(str.encode(url)) |
|
| 99 |
+ urlHashStr=str(urlHash.hexdigest()) |
|
| 100 |
+ cacheFilename = server['cachedir']+"/"+urlHashStr |
|
| 101 |
+ cacheFile = Path(cacheFilename) |
|
| 102 |
+ say("URL Hash: "+urlHashStr)
|
|
| 103 |
+ if cacheFile.is_file(): |
|
| 104 |
+ # Import cached version |
|
| 105 |
+ say("Reading cache file: "+cacheFilename)
|
|
| 106 |
+ txt = Path(cacheFilename).read_text() |
|
| 107 |
+ data_page += txt |
|
| 108 |
+ else: |
|
| 109 |
+ articleNew = newsParser.getArticle(url) |
|
| 110 |
+ data_page += articleNew |
|
| 111 |
+ # Write cache |
|
| 112 |
+ say("Creating cache file: "+cacheFilename)
|
|
| 113 |
+ with open(cacheFilename, "w") as newCacheFile: |
|
| 114 |
+ newCacheFile.write(articleNew) |
|
| 115 |
+ newCacheFile.close() |
|
| 116 |
+ |
|
| 117 |
+ |
|
| 118 |
+ data_page += "</div>\n" |
|
| 119 |
+ page.append(data_begin) |
|
| 120 |
+ page.append(data_page) |
|
| 121 |
+ page.append(data_end) |
|
| 122 |
+ content = ''.join(page) |
|
| 123 |
+ self.send_response(200) |
|
| 124 |
+ self.send_header("Content-type", "text/html")
|
|
| 125 |
+ self.send_header('Server',server['name']+" v"+server['version'])
|
|
| 126 |
+ self.end_headers() |
|
| 127 |
+ self.wfile.write(content.encode('utf-8'))
|
|
| 128 |
+ elif not os.path.exists(rootdir + self.path): |
|
| 129 |
+ self.send_header('Server',server['name']+" v"+server['version'])
|
|
| 130 |
+ self.send_error(404, 'file not found') |
|
| 131 |
+ else: |
|
| 132 |
+ try: |
|
| 133 |
+ #say(self.path) |
|
| 134 |
+ f = open(rootdir + self.path,'rb') #open requested file |
|
| 135 |
+ self.send_response(200) |
|
| 136 |
+ if self.path.endswith('.css'):
|
|
| 137 |
+ self.send_header('Content-type','text/css')
|
|
| 138 |
+ elif self.path.endswith('.bmp'):
|
|
| 139 |
+ self.send_header('Content-type','image/x-ms-bmp')
|
|
| 140 |
+ elif self.path.endswith('.png'):
|
|
| 141 |
+ self.send_header('Content-type','image/png')
|
|
| 142 |
+ elif self.path.endswith('.jpg'):
|
|
| 143 |
+ self.send_header('Content-type','image/jpeg')
|
|
| 144 |
+ else: |
|
| 145 |
+ self.send_header('Content-type','text/html')
|
|
| 146 |
+ self.send_header('Server',server['name']+" v"+server['version'])
|
|
| 147 |
+ self.end_headers() |
|
| 148 |
+ self.wfile.write(f.read()) |
|
| 149 |
+ f.close() |
|
| 150 |
+ return |
|
| 151 |
+ except IOError: |
|
| 152 |
+ self.send_header('Server',server['name']+" v"+server['version'])
|
|
| 153 |
+ self.send_error(404, 'file not found') |
|
| 154 |
+ |
|
| 155 |
+if __name__ == "__main__": |
|
| 156 |
+ #global server |
|
| 157 |
+ server = configuration.get_server() |
|
| 158 |
+ say(server['name']+" v"+server['version']) |
|
| 159 |
+ webServer = HTTPServer((server['address'], server['port']), MyServer) |
|
| 160 |
+ say("Server started http://%s:%s" % (server['address'], server['port']))
|
|
| 161 |
+ |
|
| 162 |
+ # ~ newsParser.newsTheAtlantic.article("https://www.theatlantic.com/international/archive/2021/05/biden-israel-gaza-hamas-ceasefire/618949/")
|
|
| 163 |
+ # ~ newsParser.newsLiberation.article("https://www.liberation.fr/politique/un-militaire-ca-se-presente-ou-ca-se-tait-selon-darmanin-20210510_4VGKW767PZCGVLDXY55BHC5CBU/")
|
|
| 164 |
+ # ~ newsParser.newsSCMP.article("https://www.scmp.com/news/china/science/article/3140669/what-does-africa-need-make-more-coronavirus-vaccines?module=lead_hero_story_1&pgtype=homepage")
|
|
| 165 |
+ # ~ newsParser.newsBFM.article("https://www.bfmtv.com/sante/rrisque-accru-de-developper-le-syndrome-de-guillain-barre-avec-le-vaccin-johnson-johnson_AN-202107120507.html")
|
|
| 166 |
+ # ~ newsParser.newsFrandroidCom.article("https://www.frandroid.com/comment-faire/tutoriaux/986141_pass-sanitaire-europeen-comment-le-telecharger-pour-voyager-sereinement")
|
|
| 167 |
+ # ~ newsParser.newsWaPo.article("https://www.washingtonpost.com/business/interactive/2021/pandora-papers-offshore-finance/")
|
|
| 168 |
+ |
|
| 169 |
+ try: |
|
| 170 |
+ webServer.serve_forever() |
|
| 171 |
+ except KeyboardInterrupt: |
|
| 172 |
+ pass |
|
| 173 |
+ |
|
| 174 |
+ webServer.server_close() |
|
| 175 |
+ print("Server stopped.")
|
| ... | ... |
@@ -0,0 +1,4 @@ |
| 1 |
+#!/bin/bash |
|
| 2 |
+cd /home/ycawidro/dev/newsProxy/ |
|
| 3 |
+/usr/bin/python3 newsProxy.py |
|
| 4 |
+ |