Showing 1 changed files with 0 additions and 41 deletions
-41
newsParser/newsParser/newsBFM.py
... ...
@@ -57,44 +57,3 @@ def article(url):
57 57
   say("LengthAfter : "+str(lenAfter))
58 58
   say("Gain        : "+str(lenGain)+"%")
59 59
   return pageContent
60
-
61
-
62
-def articleOld(url):
63
-  say("Article: "+url)
64
-  r = requests.get(url, allow_redirects=True)
65
-  content = r.text
66
-  pageContent = ""
67
-  articleStrTitle = newsParser.articleTitle(content)
68
-  articleStrImageUrl = newsParser.articleImage(content)
69
-  articleStrDescription = newsParser.articleDescription(content)
70
-  articleCstBegin = "<div class=\"content_body\">"
71
-  articleCstEnd   = "<div class=\"content_body\" id=\"content_body_bottom\">"
72
-  
73
-  pageContent += "<h2>"+articleStrTitle+"</h2>\n"
74
-  pageContent += "<img src=\""+articleStrImageUrl+"\">\n"
75
-  pageContent += "<em>"+articleStrDescription+"</em>\n"
76
-  
77
-  
78
-  pageContent = ""
79
-  pageContent += "<meta property=\"og:type\" content=\"article\">\n"
80
-  pageContent += "<meta property=\"og:title\" content=\""+articleStrTitle+"\">\n"
81
-  pageContent += "<meta property=\"og:description\" content=\""+articleStrDescription+"\">\n"
82
-  pageContent += "<meta property=\"og:url\" content=\""+url+"\">\n"
83
-  pageContent += "<meta property=\"og:image\" content=\""+articleStrImageUrl+"\">\n"
84
-  pageContent += "<meta property=\"og:image:type\" content=\"image/jpeg\">"
85
-  
86
-  indexBegin = content.index(articleCstBegin)
87
-  indexEnd   = content.index(articleCstEnd)
88
-  article_only = ""
89
-  article_only = content[indexBegin:indexEnd]
90
-  article_only = re.sub(r"<amp-img", '<img', article_only)
91
-  article_only = re.sub(r"</amp-img>", '', article_only)
92
-  article_only = re.sub(r"<h2", '<h3', article_only)
93
-  article_only = re.sub(r"</h2>", '</h3>', article_only)
94
-  article_only = re.sub(r"<h1", '<h2', article_only)
95
-  article_only = re.sub(r"</h1>", '</h2>', article_only)
96
-  
97
-  article_only = re.sub(r"href=\"\/", 'href=\"//www.bfmtv.com/', article_only)
98
-  pageContent += "<article>"+article_only+"</article>"
99
-  pageContent = pageContent.replace("><", ">\n<")
100
-  return pageContent