Added numerama.com support ・ c793a4a ・ Gitprep

+13 -10

newsfetch.py

@@ -11,6 +11,7 @@ import sys
 import getopt
 from slimmer import html_slimmer 
 
+MAX_ARTICLES=20
 verbose = False
 output_filename = 'default.html'
 rss_url = 'http://www.lemonde.fr/rss/une.xml'
@@ -30,7 +31,7 @@ for opt, arg in options:
     elif opt == '--version':
         version = arg
 
-CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
+CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
 
 class Printable:
     def __repr__(self):
@@ -113,10 +114,12 @@ for article in d.entries:
   for link in article.links:
     if "enclosure" == link.rel:
       article_details.enclosure = link.href
+      
+  if article_details.enclosure == "":
+    soup_mysite = BeautifulSoup(article.description,"lxml")
+    content = soup_mysite.find("img")
+    article_details.enclosure = content.get('src')
   
-  #~ article_details.content = article.content.value.encode('utf-8').strip()
-  #~ print len(article_details.content)
-
   # Not Working as is. Generated image is too big
   #if article_details.enclosure is not None:
     #img_content=urllib2.urlopen(article.link).read()
@@ -128,6 +131,8 @@ for article in d.entries:
   f.write("\t"+article_details.title+"</div></ul>\n")
   articles.append(article_details)
   cpt=cpt+1
+  if cpt > MAX_ARTICLES:
+    break
 
 cpt_num=cpt
 f.write("\n<a name=\"article-top\"></a>\n")
@@ -135,7 +140,6 @@ f.write("<div id=\"article-current\"></div>\n\n")
 cpt=0
 for article in articles:
   print("-- {:d} : {:s}".format(cpt,article.title))
-  #~ print("  -- {:s}".format(article.link))
   opener = urllib2.build_opener()
   opener.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0')]
   try:
@@ -177,14 +181,11 @@ for article in articles:
     content = soup_mysite.find('div', attrs={'class':'article-page'})
   
   if content == None:
-    #~ content = soup_mysite.find('div', attrs={'id':'block-article'})
     only_text=soup_mysite.find('div', attrs={'id':'the-content'})
     content = "<h1>{:s}</h1><h3>{:s}</h3>{:s}".format(article.title,article.summary,only_text)
-    #~ content += soup_mysite.find('div', attrs={'id':'the-content'})
-    #~ content = soup_mysite.find('div', attrs={'class':'article-top'})
-    #~ content = soup_mysite.find('div', attrs={'class':'inner clearfix'})
     
-    #~ <div id="block-article" class="article" itemscope itemtype="Article">
+  if rss_url == "http://www.numerama.com/rss/news.rss":
+    content = "<h1>{:s}</h1>{:s}".format(article.title,content)
     
   article.content_only = str(content)
   article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
@@ -267,5 +268,7 @@ for article in articles:
   f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
   f.write("</div>\n\n")
   cpt=cpt+1
+  if cpt > MAX_ARTICLES:
+    break
   
 f.close()

●	README.md	+7 -4
●	newsfetch.py	+13 -10

...	...	@@ -24,7 +24,10 @@ The article content (i.e. Feed link) is fetch automatically and the content is e
24	24	- <article>...</article>
25	25
26	26	## Examples
27		-- ./newsfetch.py --url http://www.vice.com/fr/rss
28		-- ./newsfetch.py --url http://www.lemonde.fr/rss/une.xml
29		-- ./newsfetch.py --url https://www.slate.fr/rss.xml
30		-- ./newsfetch.py --url http://www.lesinrocks.com/feeds/feed-a-la-une/
	27	+```bash
	28	+./newsfetch.py --url http://www.vice.com/fr/rss -o vice.fr.html
	29	+./newsfetch.py --url http://www.lemonde.fr/rss/une.xml -o lemonde.html
	30	+./newsfetch.py --url https://www.slate.fr/rss.xml -o slate.fr.html
	31	+./newsfetch.py --url http://www.lesinrocks.com/feeds/feed-a-la-une/ -o lesinrocks.html
	32	+./newsfetch.py --url http://www.numerama.com/rss/news.rss -o numerama.html
	33	+```

...	...	@@ -11,6 +11,7 @@ import sys
11	11	import getopt
12	12	from slimmer import html_slimmer
13	13
	14	+MAX_ARTICLES=20
14	15	verbose = False
15	16	output_filename = 'default.html'
16	17	rss_url = 'http://www.lemonde.fr/rss/une.xml'
...	...	@@ -30,7 +31,7 @@ for opt, arg in options:
30	31	elif opt == '--version':
31	32	version = arg
32	33
33		-CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
	34	+CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
34	35
35	36	class Printable:
36	37	def __repr__(self):
...	...	@@ -113,10 +114,12 @@ for article in d.entries:
113	114	for link in article.links:
114	115	if "enclosure" == link.rel:
115	116	article_details.enclosure = link.href
	117	+
	118	+ if article_details.enclosure == "":
	119	+ soup_mysite = BeautifulSoup(article.description,"lxml")
	120	+ content = soup_mysite.find("img")
	121	+ article_details.enclosure = content.get('src')
116	122
117		- #~ article_details.content = article.content.value.encode('utf-8').strip()
118		- #~ print len(article_details.content)
119		-
120	123	# Not Working as is. Generated image is too big
121	124	#if article_details.enclosure is not None:
122	125	#img_content=urllib2.urlopen(article.link).read()
...	...	@@ -128,6 +131,8 @@ for article in d.entries:
128	131	f.write("\t"+article_details.title+"</div></ul>\n")
129	132	articles.append(article_details)
130	133	cpt=cpt+1
	134	+ if cpt > MAX_ARTICLES:
	135	+ break
131	136
132	137	cpt_num=cpt
133	138	f.write("\n<a name=\"article-top\"></a>\n")
...	...	@@ -135,7 +140,6 @@ f.write("<div id=\"article-current\"></div>\n\n")
135	140	cpt=0
136	141	for article in articles:
137	142	print("-- {:d} : {:s}".format(cpt,article.title))
138		- #~ print(" -- {:s}".format(article.link))
139	143	opener = urllib2.build_opener()
140	144	opener.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:42.0) Gecko/20100101 Firefox/42.0')]
141	145	try:
...	...	@@ -177,14 +181,11 @@ for article in articles:
177	181	content = soup_mysite.find('div', attrs={'class':'article-page'})
178	182
179	183	if content == None:
180		- #~ content = soup_mysite.find('div', attrs={'id':'block-article'})
181	184	only_text=soup_mysite.find('div', attrs={'id':'the-content'})
182	185	content = "<h1>{:s}</h1><h3>{:s}</h3>{:s}".format(article.title,article.summary,only_text)
183		- #~ content += soup_mysite.find('div', attrs={'id':'the-content'})
184		- #~ content = soup_mysite.find('div', attrs={'class':'article-top'})
185		- #~ content = soup_mysite.find('div', attrs={'class':'inner clearfix'})
186	186
187		- #~ <div id="block-article" class="article" itemscope itemtype="Article">
	187	+ if rss_url == "http://www.numerama.com/rss/news.rss":
	188	+ content = "<h1>{:s}</h1>{:s}".format(article.title,content)
188	189
189	190	article.content_only = str(content)
190	191	article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
...	...	@@ -267,5 +268,7 @@ for article in articles:
267	268	f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
268	269	f.write("</div>\n\n")
269	270	cpt=cpt+1
	271	+ if cpt > MAX_ARTICLES:
	272	+ break
270	273
271	274	f.close()