Showing 3 changed files with 180 additions and 0 deletions
+1
newsProxy
... ...
@@ -0,0 +1 @@
1
+newsProxy.py
+175
newsProxy.py
... ...
@@ -0,0 +1,175 @@
1
+#!/usr/bin/env python3
2
+from http.server import BaseHTTPRequestHandler, HTTPServer
3
+import datetime
4
+from time import perf_counter 
5
+import os
6
+import json
7
+import urllib.parse
8
+import configuration
9
+import socket
10
+import requests
11
+from userio import * 
12
+
13
+# Early cache version
14
+import hashlib
15
+from pathlib import Path
16
+
17
+
18
+import newsParser
19
+
20
+server = None
21
+csvFilename = None
22
+connectionResults = None
23
+
24
+ 
25
+def navigationHeader(url):
26
+  pageContent = "<div class=\"mangaChapNavig\">\n"
27
+  pageContent += "  <div class=\"mangaChapNavigHome\"><a href=\".\"><div class=\"arrow-up\"></div><a></div>\n"
28
+  pageContent += "  <span id=\"mangaFSStatus\" style=\"display: none;\">0</span>\n"
29
+  pageContent += "  <span id=\"mangaFSToggle\" onclick=\"openFullscreen();\"><div class=\"fs-on\"></div></div>\n"
30
+  opacityValue=3;
31
+  if 7 <= datetime.datetime.now().hour and 20 >= datetime.datetime.now().hour:
32
+    opacityValue=10;
33
+  pageContent += "  <span id=\"mangaOpacityMinus\" onclick=\"onOpacityMinus()\">&#9788;</span>"
34
+  pageContent += "  <span id=\"mangaOpacityValue\">"+str(opacityValue)+"</span>"
35
+  pageContent += "  <span id=\"mangaOpacityPlus\" onclick=\"onOpacityPlus()\">&#9728;</span>\n"
36
+  if url is not None:
37
+    pageContent += "  <span id=\"nav-link\"><a href=\""+url+"\" target=\"new\">link</a></span>\n"
38
+  else:
39
+    pageContent += newsParser.supportedList()
40
+  pageContent += "</div>\n"
41
+  return pageContent
42
+
43
+class MyServer(BaseHTTPRequestHandler):
44
+    def log_message(self, format, *args):
45
+		# To silence the default output of server (too verbose)
46
+        return	
47
+    def do_GET(self):
48
+        global csvFilename
49
+        global csvFilenameTemp
50
+        global connectionResults
51
+        rootdir = 'pages/static/'
52
+        csvFilename = rootdir + server['csv']
53
+        csvFilenameTemp = rootdir + server['csvTemp']
54
+        if not os.path.exists(rootdir + self.path) or self.path == '/index.html' or self.path == '/':
55
+            if self.headers['X-Real-IP'] is None:
56
+                say("From: "+self.client_address[0]+" GET Received : "+self.path)
57
+            else:
58
+                say("From: "+self.headers['X-Real-IP']+" GET Received : "+self.path)
59
+            page=[]
60
+
61
+            data_begin = ""
62
+            with open(configuration.get_pageBegin(),'r') as fStart:
63
+                data_begin += fStart.read().replace( 'CSTAPPNAME', server['name'] )
64
+            
65
+            data_end = ""
66
+            with open(configuration.get_pageEnd(),'r') as fEnd:
67
+                data_end += fEnd.read().replace( 'CSTAPPNAME', server['name'] ).replace( 'CSTAPPVERSION', server['version'] )
68
+
69
+
70
+            s = self.path
71
+            urlArgs = urllib.parse.parse_qs(s[2:])
72
+            url = None
73
+            if "url" in urlArgs:
74
+                url = urlArgs['url'][0]
75
+
76
+            data_page = ""
77
+
78
+            #Forging response
79
+            if configuration.get_debug() != 0:
80
+                data_page += "<p>Request: "+self.path+"</p>\n"
81
+                data_page += "<xmp>"+json.dumps(urlArgs)+"</xmp>\n"
82
+                
83
+            #Form
84
+            data_form = "<div class=\"input-container\">\n\
85
+<form action=\"\">\n\
86
+<input type=\"text\" id=\"srvIp\" name=\"url\" value=\"\" size=\"50\">\n\
87
+<input type=\"submit\" class=\"button\" value=\"Get Article\">\n\
88
+</form>\n\
89
+</div>\n"
90
+            data_page += data_form
91
+            data_page += navigationHeader(url);
92
+            data_page += "<div id=\"article-current\">\n"
93
+
94
+            # Try cached version first
95
+            if url is None:
96
+                say("Root")
97
+            else:
98
+                urlHash=hashlib.md5(str.encode(url))
99
+                urlHashStr=str(urlHash.hexdigest())
100
+                cacheFilename = server['cachedir']+"/"+urlHashStr
101
+                cacheFile = Path(cacheFilename)
102
+                say("URL Hash: "+urlHashStr) 
103
+                if cacheFile.is_file():
104
+                    # Import cached version
105
+                    say("Reading cache file: "+cacheFilename)
106
+                    txt = Path(cacheFilename).read_text()
107
+                    data_page += txt 
108
+                else:
109
+                    articleNew = newsParser.getArticle(url)
110
+                    data_page += articleNew
111
+                    # Write cache
112
+                    say("Creating cache file: "+cacheFilename)
113
+                    with open(cacheFilename, "w") as newCacheFile:
114
+                        newCacheFile.write(articleNew)
115
+                        newCacheFile.close()
116
+
117
+
118
+            data_page += "</div>\n"
119
+            page.append(data_begin)          
120
+            page.append(data_page)
121
+            page.append(data_end)
122
+            content = ''.join(page)
123
+            self.send_response(200)
124
+            self.send_header("Content-type", "text/html")
125
+            self.send_header('Server',server['name']+" v"+server['version'])
126
+            self.end_headers()
127
+            self.wfile.write(content.encode('utf-8'))
128
+        elif not os.path.exists(rootdir + self.path):
129
+            self.send_header('Server',server['name']+" v"+server['version'])
130
+            self.send_error(404, 'file not found')
131
+        else:
132
+            try:
133
+                #say(self.path)
134
+                f = open(rootdir + self.path,'rb') #open requested file
135
+                self.send_response(200)
136
+                if self.path.endswith('.css'):
137
+                    self.send_header('Content-type','text/css')
138
+                elif self.path.endswith('.bmp'):
139
+                    self.send_header('Content-type','image/x-ms-bmp')
140
+                elif self.path.endswith('.png'):
141
+                    self.send_header('Content-type','image/png')
142
+                elif self.path.endswith('.jpg'):
143
+                    self.send_header('Content-type','image/jpeg')
144
+                else:
145
+                    self.send_header('Content-type','text/html')
146
+                self.send_header('Server',server['name']+" v"+server['version'])
147
+                self.end_headers()
148
+                self.wfile.write(f.read())
149
+                f.close()
150
+                return
151
+            except IOError:
152
+                self.send_header('Server',server['name']+" v"+server['version'])
153
+                self.send_error(404, 'file not found')
154
+
155
+if __name__ == "__main__":        
156
+  #global server
157
+  server = configuration.get_server()
158
+  say(server['name']+" v"+server['version'])
159
+  webServer = HTTPServer((server['address'], server['port']), MyServer)
160
+  say("Server started http://%s:%s" % (server['address'], server['port']))
161
+  
162
+  # ~ newsParser.newsTheAtlantic.article("https://www.theatlantic.com/international/archive/2021/05/biden-israel-gaza-hamas-ceasefire/618949/")
163
+  # ~ newsParser.newsLiberation.article("https://www.liberation.fr/politique/un-militaire-ca-se-presente-ou-ca-se-tait-selon-darmanin-20210510_4VGKW767PZCGVLDXY55BHC5CBU/")
164
+  # ~ newsParser.newsSCMP.article("https://www.scmp.com/news/china/science/article/3140669/what-does-africa-need-make-more-coronavirus-vaccines?module=lead_hero_story_1&pgtype=homepage")
165
+  # ~ newsParser.newsBFM.article("https://www.bfmtv.com/sante/rrisque-accru-de-developper-le-syndrome-de-guillain-barre-avec-le-vaccin-johnson-johnson_AN-202107120507.html")
166
+  # ~ newsParser.newsFrandroidCom.article("https://www.frandroid.com/comment-faire/tutoriaux/986141_pass-sanitaire-europeen-comment-le-telecharger-pour-voyager-sereinement")
167
+  # ~ newsParser.newsWaPo.article("https://www.washingtonpost.com/business/interactive/2021/pandora-papers-offshore-finance/")
168
+  
169
+  try:
170
+    webServer.serve_forever()
171
+  except KeyboardInterrupt:
172
+    pass
173
+  
174
+  webServer.server_close()
175
+  print("Server stopped.")
+4
newsProxy.sh
... ...
@@ -0,0 +1,4 @@
1
+#!/bin/bash
2
+cd /home/ycawidro/dev/newsProxy/
3
+/usr/bin/python3 newsProxy.py
4
+