Showing 1 changed files with 0 additions and 46 deletions
-46
newsParser/newsParser/newsSlateCom.py
... ...
@@ -92,49 +92,3 @@ def article(url):
92 92
   say("Length: "+str(len(article_only)))
93 93
   return pageContent
94 94
 
95
-def articleOld(url):
96
-  say("Article: "+url)
97
-  r = requests.get(url, allow_redirects=True)
98
-  content = r.text
99
-  r.html.find('article')
100
-
101
-  articleStrImageUrl = articleImage(content)
102
-  articleStrTitle = articleTitle(content)
103
-  articleStrDescription = articleDescription(content)
104
-  
105
-  pageContent = ""
106
-  pageContent += "<meta property=\"og:type\" content=\"article\">\n"
107
-  pageContent += "<meta property=\"og:title\" content=\""+articleStrTitle+"\">\n"
108
-  pageContent += "<meta property=\"og:description\" content=\""+articleStrDescription+"\">\n"
109
-  pageContent += "<meta property=\"og:url\" content=\""+url+"\">\n"
110
-  pageContent += "<meta property=\"og:image\" content=\""+articleStrImageUrl+"\">\n"
111
-  pageContent += "<meta property=\"og:image:type\" content=\"image/jpeg\">\n"
112
-  
113
-  articleCstBegin = "<article"
114
-  articleCstEnd   = "</article>"
115
-  articleCstBegin2 = "<body"
116
-  articleCstEnd2   = "</body>"
117
-  try:
118
-    indexBegin = content.index(articleCstBegin)
119
-  except:
120
-    try:
121
-      indexBegin = content.index(articleCstBegin2)
122
-    except:
123
-      indexBegin = 0
124
-  try:
125
-    indexEnd   = content.index(articleCstEnd)
126
-  except:
127
-    try:
128
-      indexEnd = content.index(articleCstEnd2)
129
-    except:
130
-      indexEnd = strlen(content)
131
-  article_only = content[indexBegin:indexEnd]
132
-  article_only = re.sub(r"<amp-img", '<img', article_only)
133
-  article_only = re.sub(r"</amp-img>", '', article_only)
134
-  article_only = re.sub(r"<h2", '<h3', article_only)
135
-  article_only = re.sub(r"</h2>", '</h3>', article_only)
136
-  article_only = re.sub(r"<h1", '<h2', article_only)
137
-  article_only = re.sub(r"</h1>", '</h2>', article_only)
138
-  article_only = article_only.replace("><", ">\n<")
139
-  pageContent += "<article>"+article_only+"</article>"
140
-  return pageContent