Showing 1 changed files with 83 additions and 32 deletions
+83 -32
newsfetch.py
... ...
@@ -9,15 +9,18 @@ import lxml.html
9 9
 import re
10 10
 import sys
11 11
 import getopt
12
-from slimmer import html_slimmer 
12
+from slimmer import html_slimmer
13
+import weasyprint
13 14
 
14 15
 MAX_ARTICLES=20
15 16
 verbose = False
17
+pdf = False
16 18
 output_filename = 'default.html'
17 19
 rss_url = 'http://www.lemonde.fr/rss/une.xml'
18 20
 
19 21
 options, remainder = getopt.getopt(sys.argv[1:], 'o:v', ['output=', 
20 22
                                                          'verbose',
23
+                                                         'pdf',
21 24
                                                          'url=',
22 25
                                                          ])
23 26
 
... ...
@@ -30,8 +33,16 @@ for opt, arg in options:
30 33
         verbose = True
31 34
     elif opt == '--version':
32 35
         version = arg
36
+    elif opt == '--pdf':
37
+        pdf = True
33 38
 
34
-CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
39
+#~ print pdf
40
+
41
+if pdf == False :
42
+  CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
43
+else:
44
+  CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#fff}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
45
+  
35 46
 
36 47
 class Printable:
37 48
     def __repr__(self):
... ...
@@ -91,17 +102,17 @@ f.write("<head>\n")
91 102
 f.write("	<title>"+feed_details.title+"</title>\n")
92 103
 f.write("	<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n")
93 104
 f.write("	<meta name=\"viewport\" content=\"width=450px, user-scalable=no\">\n")
94
-f.write("	<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n")
95 105
 f.write("	<link rel=\"icon\" type=\"image/ico\" href=\"favicon.ico\">\n")
96
-f.write("	<script>\n")
97
-f.write("	function onArticle(index) {\n")
98
-f.write("		var string_index = \"article-\"+index;\n")
99
-f.write("		var url = location.href;\n")
100
-f.write("		document.getElementById(\"article-current\").innerHTML =\n")
101
-f.write("			document.getElementById(string_index).innerHTML;\n")
102
-f.write("		location.href = \"#article-top\";\n")
103
-f.write("	}\n")
104
-f.write("	</script>\n")
106
+if pdf is False:
107
+  f.write("	<script>\n")
108
+  f.write("	function onArticle(index) {\n")
109
+  f.write("		var string_index = \"article-\"+index;\n")
110
+  f.write("		var url = location.href;\n")
111
+  f.write("		document.getElementById(\"article-current\").innerHTML =\n")
112
+  f.write("			document.getElementById(string_index).innerHTML;\n")
113
+  f.write("		location.href = \"#article-top\";\n")
114
+  f.write("	}\n")
115
+  f.write("	</script>\n")
105 116
 f.write("<style>\n"+CSS+"\n</style>\n")
106 117
 f.write("</head>\n")
107 118
 f.write("<body>\n")
... ...
@@ -128,20 +139,30 @@ for article in d.entries:
128 139
   # Not Working as is. Generated image is too big
129 140
   #if article_details.enclosure is not None:
130 141
     #img_content=urllib2.urlopen(article.link).read()
131
-    #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
132
-  f.write("<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")  
133
-  f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
142
+    #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content) 
143
+  if pdf is False:
144
+    f.write("<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")
145
+    f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
146
+  else:
147
+    f.write("<ul><div style=\"display:inline;\"><a href=\"#article-"+str(cpt)+"\">\n")
148
+    f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"></a><br>\n")
134 149
   f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
135
-  f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8615;</a></div>\n")
136
-  f.write("\t"+article_details.title+"</div></ul>\n")
150
+  if pdf is False:
151
+    f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8615;</a></div>\n")
152
+    f.write("\t"+article_details.title+"</div></ul>\n")
153
+  else:
154
+    f.write("\t<a href=\"#article-"+str(cpt)+"\">"+article_details.title+"</a></div></ul>\n")
137 155
   articles.append(article_details)
138 156
   cpt=cpt+1
139 157
   if cpt > MAX_ARTICLES:
140 158
     break
141 159
 
142 160
 cpt_num=cpt
143
-f.write("\n<a name=\"article-top\"></a>\n")
144
-f.write("<div id=\"article-current\"></div>\n\n")
161
+if pdf is False:
162
+  f.write("\n<a name=\"article-top\"></a>\n")
163
+  f.write("<div id=\"article-current\"></div>\n\n")
164
+
165
+
145 166
 cpt=0
146 167
 for article in articles:
147 168
   print("-- {:d} : {:s}".format(cpt,article.title))
... ...
@@ -161,21 +182,35 @@ for article in articles:
161 182
     cpt_next=cpt+1
162 183
     if cpt_next > cpt_num:
163 184
       cpt_next = cpt_num
164
-    f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
185
+    if pdf is False:
186
+      f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
187
+    elif pdf is True:
188
+      print "flat"
189
+      f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: inline;\">\n")
190
+      
165 191
     f.write("<hr><a name=\"article-"+str(cpt)+"\">")
166 192
     f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
167
-    f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
193
+    if pdf is False:
194
+      f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
168 195
     f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
169
-    f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
170
-    f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
196
+    if pdf is False:
197
+      f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
198
+      f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
199
+    else:
200
+      f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">&#8612;</a></div>\n")
201
+      f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">&#8614;</a></div>\n")
171 202
     f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
172 203
     f.write(article.content_only)
173 204
     f.write("\n</div>\n")
174 205
     f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
175 206
     f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
176 207
     f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
177
-    f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
178
-    f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
208
+    if pdf is False:
209
+      f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
210
+      f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
211
+    else:
212
+      f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">&#8612;</a></div>\n")
213
+      f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">&#8614;</a></div>\n")
179 214
     f.write("</div>\n\n")
180 215
     cpt=cpt+1
181 216
     continue
... ...
@@ -259,24 +294,40 @@ for article in articles:
259 294
   if cpt_next > cpt_num:
260 295
     cpt_next = cpt_num
261 296
 
262
-  f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
297
+  if pdf is False:
298
+    f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
299
+  else:
300
+    f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: inline;\">\n")
263 301
   f.write("<hr><a name=\"article-"+str(cpt)+"\">")
264 302
   f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
265
-  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
303
+  if pdf is False:
304
+    f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
266 305
   f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
267
-  f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
268
-  f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
306
+  if pdf is False:
307
+    f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
308
+    f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
309
+  else:
310
+    f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">&#8612;</a></div>\n")
311
+    f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">&#8614;</a></div>\n")
269 312
   f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
270 313
   f.write(article.content_only)
271 314
   f.write("\n</div>\n")
272 315
   f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
273
-  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
316
+  if pdf is False:
317
+    f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
274 318
   f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
275
-  f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
276
-  f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
319
+  if pdf is False:
320
+    f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
321
+    f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
322
+  else:
323
+    f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">&#8612;</a></div>\n")
324
+    f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">&#8614;</a></div>\n")
277 325
   f.write("</div>\n\n")
278 326
   cpt=cpt+1
279 327
   if cpt > MAX_ARTICLES:
280 328
     break
281 329
   
282 330
 f.close()
331
+
332
+pdf = weasyprint.HTML(filename=output_filename).write_pdf()
333
+file(output_filename+".pdf", 'w').write(pdf)