#!/usr/bin/env python3 # encoding: UTF-8 __author__ = 'Yanik Cawidrone' __version__ = '0.1' """ For more see the file 'LICENSE' for copying permission. """ from .newsParser import newsDNA from .newsParser import newsFt from .newsParser import newsLeParisien from .newsParser import newsLiberation from .newsParser import newsWaPo from .newsParser import newsZDNetFr from .newsParser import newsSCMP from .newsParser import newsTelerama from .newsParser import newsCNA from .newsParser import newsViceCom from .newsParser import newsNewYorkTimes from .newsParser import newsMothershipSG from .newsParser import newsLeMonde from .newsParser import newsChallengesFr from .newsParser import newsJDD from .newsParser import newsMidiLibre from .newsParser import newsNouvelObs from .newsParser import newsHuffPost from .newsParser import newsStraitsTimes from .newsParser import newsNewYorker from .newsParser import newsLeFigaro from .newsParser import newsSudOuest from .newsParser import newsBBC from .newsParser import newsTheAtlantic from .newsParser import newsTheStarMy from .newsParser import newsNSTMy from .newsParser import newsLaDepeche from .newsParser import newsTheGuardian from .newsParser import newsBloomberg from .newsParser import newsFranceTVInfo from .newsParser import newsTheVerge from .newsParser import newsBondyBlog from .newsParser import newsFrandroidCom from .newsParser import newsBuzzfeedCom from .newsParser import newsYahooCom from .newsParser import newsBFM from .newsParser import newsDefault from .newsParser import newsLNC from .newsParser import newsSlateCom # ~ from .newsParser import newsTodayOnlineSG def supportedList(): current_module = __import__(__name__) current_content = dir(current_module) newsList = "\n" return newsList def articleElement(typeElement,content): element="" if "\"mainEntityOfPage\": \"https://www.buzzfeed" in content: #print("=================== Buzzfeed") if typeElement is "title": articleElementBegin ="\"headline\": \"" elif typeElement is "description": articleElementBegin ="\"description\": \"" articleElementEnd ="\"," indexElementBegin = content.index(articleElementBegin) indexElementEnd = content.index(articleElementEnd,indexElementBegin) element = content[indexElementBegin+len(articleElementBegin):indexElementEnd] elif "" if typeElement is "image": articleElementBegin ="" indexElementBegin = content.index(articleElementBegin) indexElementEnd = content.index(articleElementEnd,indexElementBegin) element = content[indexElementBegin+len(articleElementBegin):indexElementEnd] else: #print("=================== Generic") articleElementBegin ="" articleElementEnd2 ="\"/>" articleElementEnd3 ="\">" try: # ~ print("Begin Try: "+articleElementBegin) indexElementBegin = content.index(articleElementBegin) except: try: # ~ print("Begin Try: "+articleElementBegin2) indexElementBegin = content.index(articleElementBegin2) except: indexElementBegin = 0 try: #print("End Try: "+articleElementEnd) indexElementEnd = content.index(articleElementEnd,indexElementBegin) except: try: #print("End Try: "+articleElementEnd2) indexElementEnd = content.index(articleElementEnd2,indexElementBegin) except: #print("End Try: "+articleElementEnd3) indexElementEnd = content.index(articleElementEnd3,indexElementBegin) element = content[indexElementBegin+len(articleElementBegin):indexElementEnd] #print("indexes: "+str(indexElementBegin)+" :: "+str(indexElementEnd)) #print("Element["+element+"]") return element def articleTitle(content): print("newsParser.articleTitle") return articleElement("title",content) def articleImage(content): print("newsParser.articleImage") return articleElement("image",content) def articleDescription(content): print("newsParser.articleDescription") return articleElement("description",content) def getArticle(url): data_page = "" if not url is None: if "ft.com" in url: data_page += newsFt.article(url) elif "dna.fr" in url: data_page += newsParser.newsDNA.article(url) elif "washingtonpost.com" in url: data_page += newsParser.newsWaPo.article(url) elif "leparisien.fr" in url: data_page += newsParser.newsLeParisien.article(url) elif "liberation.fr" in url: data_page += newsParser.newsLiberation.article(url) elif "zdnet.fr" in url: data_page += newsParser.newsZDNetFr.article(url) elif "scmp.com" in url: data_page += newsParser.newsSCMP.article(url) elif "telerama.fr" in url: data_page += newsParser.newsTelerama.article(url) elif "channelnewsasia.com" in url: data_page += newsParser.newsCNA.article(url) elif "vice.com" in url: data_page += newsParser.newsViceCom.article(url) elif "nytimes.com" in url: data_page += newsParser.newsNewYorkTimes.article(url) elif "mothership.sg" in url: data_page += newsParser.newsMothershipSG.article(url) elif "lemonde.fr" in url: data_page += newsParser.newsLeMonde.article(url) elif "lejdd.fr" in url: data_page += newsParser.newsJDD.article(url) elif "nouvelobs.com" in url: data_page += newsParser.newsNouvelObs.article(url) elif "huffingtonpost." in url: data_page += newsParser.newsHuffPost.article(url) elif "huffpost.com" in url: data_page += newsParser.newsHuffPost.article(url) elif "straitstimes.com" in url: data_page += newsParser.newsStraitsTimes.article(url) elif "newyorker.com" in url: data_page += newsParser.newsNewYorker.article(url) elif "lefigaro.fr" in url: data_page += newsParser.newsLeFigaro.article(url) elif "sudouest.fr" in url: data_page += newsParser.newsSudOuest.article(url) elif "bbc.com" in url: data_page += newsParser.newsBBC.article(url) elif "theatlantic.com" in url: data_page += newsParser.newsTheAtlantic.article(url) elif "thestar.com.my" in url: data_page += newsParser.newsTheStarMy.article(url) elif "challenges.fr" in url: data_page += newsParser.newsChallengesFr.article(url) elif "depeche.fr" in url: data_page += newsParser.newsLaDepeche.article(url) elif "guardian.com" in url or "guardian.co.uk" in url: data_page += newsParser.newsTheGuardian.article(url) elif "bloomberg.com" in url: data_page += newsParser.newsBloomberg.article(url) elif "francetvinfo.fr" in url: data_page += newsParser.newsFranceTVInfo.article(url) elif "theverge.com" in url: data_page += newsParser.newsTheVerge.article(url) elif "bondyblog.fr" in url: data_page += newsParser.newsBondyBlog.article(url) elif "frandroid.com" in url: data_page += newsParser.newsFrandroidCom.article(url) elif "buzzfeed.com" in url or "buzzfeednews.com" in url: data_page += newsParser.newsBuzzfeedCom.article(url) elif "news.yahoo.com" in url or "afp.com" in url: data_page += newsParser.newsYahooCom.article(url) elif "bfmtv.com" in url: data_page += newsParser.newsBFM.article(url) elif "lnc.nc" in url: data_page += newsParser.newsLNC.article(url) elif "slate.com" in url: data_page += newsParser.newsSlateCom.article(url) else: data_page += "

Generic Extraction, click to open original link

\n" #data_page += "

Supported News:" #data_page += supportedList() #data_page += "

\n" data_page += newsParser.newsDefault.article(url) return data_page