...
|
...
|
@@ -11,6 +11,7 @@ import sys
|
11
|
11
|
import getopt
|
12
|
12
|
from slimmer import html_slimmer
|
13
|
13
|
|
|
14
|
+MAX_ARTICLES=20
|
14
|
15
|
verbose = False
|
15
|
16
|
output_filename = 'default.html'
|
16
|
17
|
rss_url = 'http://www.lemonde.fr/rss/une.xml'
|
...
|
...
|
@@ -30,7 +31,7 @@ for opt, arg in options:
|
30
|
31
|
elif opt == '--version':
|
31
|
32
|
version = arg
|
32
|
33
|
|
33
|
|
-CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
|
34
|
+CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
34
|
35
|
|
35
|
36
|
class Printable:
|
36
|
37
|
def __repr__(self):
|
...
|
...
|
@@ -113,10 +114,12 @@ for article in d.entries:
|
113
|
114
|
for link in article.links:
|
114
|
115
|
if "enclosure" == link.rel:
|
115
|
116
|
article_details.enclosure = link.href
|
|
117
|
+
|
|
118
|
+ if article_details.enclosure == "":
|
|
119
|
+ soup_mysite = BeautifulSoup(article.description,"lxml")
|
|
120
|
+ content = soup_mysite.find("img")
|
|
121
|
+ article_details.enclosure = content.get('src')
|
116
|
122
|
|
117
|
|
- #~ article_details.content = article.content.value.encode('utf-8').strip()
|
118
|
|
- #~ print len(article_details.content)
|
119
|
|
-
|
120
|
123
|
# Not Working as is. Generated image is too big
|
121
|
124
|
#if article_details.enclosure is not None:
|
122
|
125
|
#img_content=urllib2.urlopen(article.link).read()
|
...
|
...
|
@@ -128,6 +131,8 @@ for article in d.entries:
|
128
|
131
|
f.write("\t"+article_details.title+"</div></ul>\n")
|
129
|
132
|
articles.append(article_details)
|
130
|
133
|
cpt=cpt+1
|
|
134
|
+ if cpt > MAX_ARTICLES:
|
|
135
|
+ break
|
131
|
136
|
|
132
|
137
|
cpt_num=cpt
|
133
|
138
|
f.write("\n<a name=\"article-top\"></a>\n")
|
...
|
...
|
@@ -135,7 +140,6 @@ f.write("<div id=\"article-current\"></div>\n\n")
|
135
|
140
|
cpt=0
|
136
|
141
|
for article in articles:
|
137
|
142
|
print("-- {:d} : {:s}".format(cpt,article.title))
|
138
|
|
- #~ print(" -- {:s}".format(article.link))
|
139
|
143
|
opener = urllib2.build_opener()
|
140
|
144
|
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0')]
|
141
|
145
|
try:
|
...
|
...
|
@@ -177,14 +181,11 @@ for article in articles:
|
177
|
181
|
content = soup_mysite.find('div', attrs={'class':'article-page'})
|
178
|
182
|
|
179
|
183
|
if content == None:
|
180
|
|
- #~ content = soup_mysite.find('div', attrs={'id':'block-article'})
|
181
|
184
|
only_text=soup_mysite.find('div', attrs={'id':'the-content'})
|
182
|
185
|
content = "<h1>{:s}</h1><h3>{:s}</h3>{:s}".format(article.title,article.summary,only_text)
|
183
|
|
- #~ content += soup_mysite.find('div', attrs={'id':'the-content'})
|
184
|
|
- #~ content = soup_mysite.find('div', attrs={'class':'article-top'})
|
185
|
|
- #~ content = soup_mysite.find('div', attrs={'class':'inner clearfix'})
|
186
|
186
|
|
187
|
|
- #~ <div id="block-article" class="article" itemscope itemtype="Article">
|
|
187
|
+ if rss_url == "http://www.numerama.com/rss/news.rss":
|
|
188
|
+ content = "<h1>{:s}</h1>{:s}".format(article.title,content)
|
188
|
189
|
|
189
|
190
|
article.content_only = str(content)
|
190
|
191
|
article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
|
...
|
...
|
@@ -267,5 +268,7 @@ for article in articles:
|
267
|
268
|
f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
268
|
269
|
f.write("</div>\n\n")
|
269
|
270
|
cpt=cpt+1
|
|
271
|
+ if cpt > MAX_ARTICLES:
|
|
272
|
+ break
|
270
|
273
|
|
271
|
274
|
f.close()
|