newsProxy/newsProxy.py at 5129fd5d22af6bb81ada935cd9e6ce408fe7ec65 ・ ycawidro/newsProxy ・ Gitprep

newsProxy / newsProxy.py /
ycawidro Cache/Nocache defined by configuration
1 contributor
183 lines | 7.854kb
#!/usr/bin/python3


#!/usr/bin/env python3
from http.server import BaseHTTPRequestHandler, HTTPServer
import datetime
from time import perf_counter 
import os
import json
import urllib.parse
import configuration
import socket
import requests
from userio import * 

# Early cache version
import hashlib
from pathlib import Path


import newsParser

server = None
csvFilename = None
connectionResults = None

 
def navigationHeader(url):
  pageContent = "<div class=\"mangaChapNavig\">\n"
  pageContent += "  <div class=\"mangaChapNavigHome\"><a href=\".\"><div class=\"arrow-up\"></div><a></div>\n"
  pageContent += "  <span id=\"mangaFSStatus\" style=\"display: none;\">0</span>\n"
  pageContent += "  <span id=\"mangaFSToggle\" onclick=\"openFullscreen();\"><div class=\"fs-on\"></div></div>\n"
  opacityValue=3;
  if 7 <= datetime.datetime.now().hour and 20 >= datetime.datetime.now().hour:
    opacityValue=10;
  pageContent += "  <span id=\"mangaOpacityMinus\" onclick=\"onOpacityMinus()\">&#9788;</span>"
  pageContent += "  <span id=\"mangaOpacityValue\">"+str(opacityValue)+"</span>"
  pageContent += "  <span id=\"mangaOpacityPlus\" onclick=\"onOpacityPlus()\">&#9728;</span>\n"
  if url is not None:
    pageContent += "  <span id=\"nav-link\"><a href=\""+url+"\" target=\"new\">link</a></span>\n"
  else:
    pageContent += newsParser.supportedList()
  pageContent += "</div>\n"
  return pageContent

class MyServer(BaseHTTPRequestHandler):
    def log_message(self, format, *args):
		# To silence the default output of server (too verbose)
        return	
    def do_GET(self):
        global csvFilename
        global csvFilenameTemp
        global connectionResults
        rootdir = 'pages/static/'
        csvFilename = rootdir + server['csv']
        csvFilenameTemp = rootdir + server['csvTemp']
        if not os.path.exists(rootdir + self.path) or self.path == '/index.html' or self.path == '/':
            if self.headers['X-Real-IP'] is None:
                say("From: "+self.client_address[0]+" GET Received : "+self.path)
            else:
                say("From: "+self.headers['X-Real-IP']+" GET Received : "+self.path)
            page=[]

            data_begin = ""
            with open(configuration.get_pageBegin(),'r') as fStart:
                data_begin += fStart.read().replace( 'CSTAPPNAME', server['name'] )
            
            data_end = ""
            with open(configuration.get_pageEnd(),'r') as fEnd:
                data_end += fEnd.read().replace( 'CSTAPPNAME', server['name'] ).replace( 'CSTAPPVERSION', server['version'] )


            s = self.path
            urlArgs = urllib.parse.parse_qs(s[2:])
            url = None
            if "url" in urlArgs:
                url = urlArgs['url'][0]

            data_page = ""

            #Forging response
            if configuration.get_debug() != 0:
                data_page += "<p>Request: "+self.path+"</p>\n"
                data_page += "<xmp>"+json.dumps(urlArgs)+"</xmp>\n"
                
            #Form
            data_form = "<div class=\"input-container\">\n\
<form action=\"\">\n\
<input type=\"text\" id=\"srvIp\" name=\"url\" value=\"\" size=\"50\">\n\
<input type=\"submit\" class=\"button\" value=\"Get Article\">\n\
</form>\n\
</div>\n"
            data_page += data_form
            data_page += navigationHeader(url);
            data_page += "<div id=\"article-current\">\n"

            # Try cached version first
            if url is None:
                say("Root")
            else:
                urlHash=hashlib.md5(str.encode(url))
                urlHashStr=str(urlHash.hexdigest())
                cacheFilename = server['cachedir']+"/"+urlHashStr
                cacheFile = Path(cacheFilename)
                say("URL Hash: "+urlHashStr) 
                cachedAlready=cacheFile.is_file()
                if 0 == configuration.isCacheEnabled():
                  say("Cache Disabled")
                  cachedAlready=False
                if cachedAlready:
                    # Import cached version
                    say("Reading cache file: "+cacheFilename)
                    txt = Path(cacheFilename).read_text()
                    data_page += txt 
                else:
                    articleNew = newsParser.getArticle(url)
                    data_page += articleNew
                    # Write cache
                    say("Creating cache file: "+cacheFilename)
                    with open(cacheFilename, "w") as newCacheFile:
                        newCacheFile.write(articleNew)
                        newCacheFile.close()


            data_page += "</div>\n"
            page.append(data_begin)          
            page.append(data_page)
            page.append(data_end)
            content = ''.join(page)
            self.send_response(200)
            self.send_header("Content-type", "text/html")
            self.send_header('Server',server['name']+" v"+server['version'])
            self.end_headers()
            self.wfile.write(content.encode('utf-8'))
        elif not os.path.exists(rootdir + self.path):
            self.send_header('Server',server['name']+" v"+server['version'])
            self.send_error(404, 'file not found')
        else:
            try:
                #say(self.path)
                f = open(rootdir + self.path,'rb') #open requested file
                self.send_response(200)
                if self.path.endswith('.css'):
                    self.send_header('Content-type','text/css')
                elif self.path.endswith('.bmp'):
                    self.send_header('Content-type','image/x-ms-bmp')
                elif self.path.endswith('.png'):
                    self.send_header('Content-type','image/png')
                elif self.path.endswith('.jpg'):
                    self.send_header('Content-type','image/jpeg')
                else:
                    self.send_header('Content-type','text/html')
                self.send_header('Server',server['name']+" v"+server['version'])
                self.end_headers()
                self.wfile.write(f.read())
                f.close()
                return
            except IOError:
                self.send_header('Server',server['name']+" v"+server['version'])
                self.send_error(404, 'file not found')

if __name__ == "__main__":        
  #global server
  server = configuration.get_server()
  say(server['name']+" v"+server['version'])
  webServer = HTTPServer((server['address'], server['port']), MyServer)
  say("Server started http://%s:%s" % (server['address'], server['port']))
  
  # ~ newsParser.newsTheAtlantic.article("https://www.theatlantic.com/international/archive/2021/05/biden-israel-gaza-hamas-ceasefire/618949/")
  # ~ newsParser.newsLiberation.article("https://www.liberation.fr/politique/un-militaire-ca-se-presente-ou-ca-se-tait-selon-darmanin-20210510_4VGKW767PZCGVLDXY55BHC5CBU/")
  # ~ newsParser.newsSCMP.article("https://www.scmp.com/news/china/science/article/3140669/what-does-africa-need-make-more-coronavirus-vaccines?module=lead_hero_story_1&pgtype=homepage")
  # ~ newsParser.newsBFM.article("https://www.bfmtv.com/sante/rrisque-accru-de-developper-le-syndrome-de-guillain-barre-avec-le-vaccin-johnson-johnson_AN-202107120507.html")
  # ~ newsParser.newsFrandroidCom.article("https://www.frandroid.com/comment-faire/tutoriaux/986141_pass-sanitaire-europeen-comment-le-telecharger-pour-voyager-sereinement")
  # ~ newsParser.newsWaPo.article("https://www.washingtonpost.com/business/interactive/2021/pandora-papers-offshore-finance/")
  # ~ newsParser.newsWaPo.article("https://www.washingtonpost.com/technology/2021/05/14/pimeyes-facial-recognition-search-secrecy/")
  
  try:
    webServer.serve_forever()
  except KeyboardInterrupt:
    pass
  
  webServer.server_close()
  print("Server stopped.")