...
|
...
|
@@ -103,6 +103,7 @@ f.write("<h1 id=\"top\">"+feed_details.title+"</h1>\n")
|
103
|
103
|
|
104
|
104
|
articles=list()
|
105
|
105
|
cpt=0
|
|
106
|
+
|
106
|
107
|
for article in d.entries:
|
107
|
108
|
article_details = ArticleDetails()
|
108
|
109
|
article_details.title = article.title.encode('utf-8').strip()
|
...
|
...
|
@@ -112,8 +113,11 @@ for article in d.entries:
|
112
|
113
|
for link in article.links:
|
113
|
114
|
if "enclosure" == link.rel:
|
114
|
115
|
article_details.enclosure = link.href
|
|
116
|
+
|
|
117
|
+ #~ article_details.content = article.content.value.encode('utf-8').strip()
|
|
118
|
+ #~ print len(article_details.content)
|
115
|
119
|
|
116
|
|
- # Not Wroking as is. Generated image is too big
|
|
120
|
+ # Not Working as is. Generated image is too big
|
117
|
121
|
#if article_details.enclosure is not None:
|
118
|
122
|
#img_content=urllib2.urlopen(article.link).read()
|
119
|
123
|
#article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
|
...
|
...
|
@@ -131,13 +135,47 @@ f.write("<div id=\"article-current\"></div>\n\n")
|
131
|
135
|
cpt=0
|
132
|
136
|
for article in articles:
|
133
|
137
|
print("-- {:d} : {:s}".format(cpt,article.title))
|
134
|
|
- response = urllib2.urlopen(article.link)
|
135
|
|
- article.content = response.read()
|
|
138
|
+ #~ print(" -- {:s}".format(article.link))
|
|
139
|
+ opener = urllib2.build_opener()
|
|
140
|
+ opener.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0')]
|
|
141
|
+ try:
|
|
142
|
+ response = opener.open(article.link)
|
|
143
|
+ article.content = response.read()
|
|
144
|
+ except:
|
|
145
|
+ print(" -- {:d}".format(response.code))
|
|
146
|
+ article.content = None
|
|
147
|
+
|
|
148
|
+ if None == article.content:
|
|
149
|
+ cpt_prev=cpt-1
|
|
150
|
+ if cpt_prev < 0:
|
|
151
|
+ cpt_prev = 0
|
|
152
|
+ cpt_next=cpt+1
|
|
153
|
+ if cpt_next > cpt_num:
|
|
154
|
+ cpt_next = cpt_num
|
|
155
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
156
|
+ f.write("<hr><a name=\"article-"+str(cpt)+"\">")
|
|
157
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
|
158
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
159
|
+ f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
|
160
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
161
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
162
|
+ f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
|
|
163
|
+ f.write(article.content_only)
|
|
164
|
+ f.write("\n</div>\n")
|
|
165
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
|
166
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
167
|
+ f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
|
168
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
169
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
170
|
+ f.write("</div>\n\n")
|
|
171
|
+ cpt=cpt+1
|
|
172
|
+ continue
|
|
173
|
+
|
136
|
174
|
soup_mysite = BeautifulSoup(article.content,"lxml")
|
137
|
175
|
content = soup_mysite.find("article")
|
138
|
176
|
if content == None:
|
139
|
177
|
content = soup_mysite.find('div', attrs={'class':'article-page'})
|
140
|
|
-
|
|
178
|
+
|
141
|
179
|
article.content_only = str(content)
|
142
|
180
|
article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
|
143
|
181
|
article.content_only = article.content_only.replace('<script>require(["twitter/widgets"]);</script>','')
|
...
|
...
|
@@ -193,8 +231,7 @@ for article in articles:
|
193
|
231
|
article.content_only = regexConjug.sub('\\1',article.content_only)
|
194
|
232
|
|
195
|
233
|
# Diet
|
196
|
|
- #~ article.content_only = html_slimmer(article.content_only.strip().replace('\n',' ').replace('\t',' ').replace('\r',' '))
|
197
|
|
- #~ article.content_only = article.content_only.encode("utf-8")
|
|
234
|
+ article.content_only = html_slimmer(article.content_only.strip().replace('\n',' ').replace('\t',' ').replace('\r',' '))
|
198
|
235
|
|
199
|
236
|
cpt_prev=cpt-1
|
200
|
237
|
if cpt_prev < 0:
|