...
|
...
|
@@ -9,15 +9,18 @@ import lxml.html
|
9
|
9
|
import re
|
10
|
10
|
import sys
|
11
|
11
|
import getopt
|
12
|
|
-from slimmer import html_slimmer
|
|
12
|
+from slimmer import html_slimmer
|
|
13
|
+import weasyprint
|
13
|
14
|
|
14
|
15
|
MAX_ARTICLES=20
|
15
|
16
|
verbose = False
|
|
17
|
+pdf = False
|
16
|
18
|
output_filename = 'default.html'
|
17
|
19
|
rss_url = 'http://www.lemonde.fr/rss/une.xml'
|
18
|
20
|
|
19
|
21
|
options, remainder = getopt.getopt(sys.argv[1:], 'o:v', ['output=',
|
20
|
22
|
'verbose',
|
|
23
|
+ 'pdf',
|
21
|
24
|
'url=',
|
22
|
25
|
])
|
23
|
26
|
|
...
|
...
|
@@ -30,8 +33,16 @@ for opt, arg in options:
|
30
|
33
|
verbose = True
|
31
|
34
|
elif opt == '--version':
|
32
|
35
|
version = arg
|
|
36
|
+ elif opt == '--pdf':
|
|
37
|
+ pdf = True
|
33
|
38
|
|
34
|
|
-CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
|
39
|
+#~ print pdf
|
|
40
|
+
|
|
41
|
+if pdf == False :
|
|
42
|
+ CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
|
43
|
+else:
|
|
44
|
+ CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}img{height:208px}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#fff}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
|
45
|
+
|
35
|
46
|
|
36
|
47
|
class Printable:
|
37
|
48
|
def __repr__(self):
|
...
|
...
|
@@ -91,17 +102,17 @@ f.write("<head>\n")
|
91
|
102
|
f.write(" <title>"+feed_details.title+"</title>\n")
|
92
|
103
|
f.write(" <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n")
|
93
|
104
|
f.write(" <meta name=\"viewport\" content=\"width=450px, user-scalable=no\">\n")
|
94
|
|
-f.write(" <link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n")
|
95
|
105
|
f.write(" <link rel=\"icon\" type=\"image/ico\" href=\"favicon.ico\">\n")
|
96
|
|
-f.write(" <script>\n")
|
97
|
|
-f.write(" function onArticle(index) {\n")
|
98
|
|
-f.write(" var string_index = \"article-\"+index;\n")
|
99
|
|
-f.write(" var url = location.href;\n")
|
100
|
|
-f.write(" document.getElementById(\"article-current\").innerHTML =\n")
|
101
|
|
-f.write(" document.getElementById(string_index).innerHTML;\n")
|
102
|
|
-f.write(" location.href = \"#article-top\";\n")
|
103
|
|
-f.write(" }\n")
|
104
|
|
-f.write(" </script>\n")
|
|
106
|
+if pdf is False:
|
|
107
|
+ f.write(" <script>\n")
|
|
108
|
+ f.write(" function onArticle(index) {\n")
|
|
109
|
+ f.write(" var string_index = \"article-\"+index;\n")
|
|
110
|
+ f.write(" var url = location.href;\n")
|
|
111
|
+ f.write(" document.getElementById(\"article-current\").innerHTML =\n")
|
|
112
|
+ f.write(" document.getElementById(string_index).innerHTML;\n")
|
|
113
|
+ f.write(" location.href = \"#article-top\";\n")
|
|
114
|
+ f.write(" }\n")
|
|
115
|
+ f.write(" </script>\n")
|
105
|
116
|
f.write("<style>\n"+CSS+"\n</style>\n")
|
106
|
117
|
f.write("</head>\n")
|
107
|
118
|
f.write("<body>\n")
|
...
|
...
|
@@ -128,20 +139,30 @@ for article in d.entries:
|
128
|
139
|
# Not Working as is. Generated image is too big
|
129
|
140
|
#if article_details.enclosure is not None:
|
130
|
141
|
#img_content=urllib2.urlopen(article.link).read()
|
131
|
|
- #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
|
132
|
|
- f.write("<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")
|
133
|
|
- f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
|
|
142
|
+ #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
|
|
143
|
+ if pdf is False:
|
|
144
|
+ f.write("<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")
|
|
145
|
+ f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
|
|
146
|
+ else:
|
|
147
|
+ f.write("<ul><div style=\"display:inline;\"><a href=\"#article-"+str(cpt)+"\">\n")
|
|
148
|
+ f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"></a><br>\n")
|
134
|
149
|
f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
135
|
|
- f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↧</a></div>\n")
|
136
|
|
- f.write("\t"+article_details.title+"</div></ul>\n")
|
|
150
|
+ if pdf is False:
|
|
151
|
+ f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↧</a></div>\n")
|
|
152
|
+ f.write("\t"+article_details.title+"</div></ul>\n")
|
|
153
|
+ else:
|
|
154
|
+ f.write("\t<a href=\"#article-"+str(cpt)+"\">"+article_details.title+"</a></div></ul>\n")
|
137
|
155
|
articles.append(article_details)
|
138
|
156
|
cpt=cpt+1
|
139
|
157
|
if cpt > MAX_ARTICLES:
|
140
|
158
|
break
|
141
|
159
|
|
142
|
160
|
cpt_num=cpt
|
143
|
|
-f.write("\n<a name=\"article-top\"></a>\n")
|
144
|
|
-f.write("<div id=\"article-current\"></div>\n\n")
|
|
161
|
+if pdf is False:
|
|
162
|
+ f.write("\n<a name=\"article-top\"></a>\n")
|
|
163
|
+ f.write("<div id=\"article-current\"></div>\n\n")
|
|
164
|
+
|
|
165
|
+
|
145
|
166
|
cpt=0
|
146
|
167
|
for article in articles:
|
147
|
168
|
print("-- {:d} : {:s}".format(cpt,article.title))
|
...
|
...
|
@@ -161,21 +182,35 @@ for article in articles:
|
161
|
182
|
cpt_next=cpt+1
|
162
|
183
|
if cpt_next > cpt_num:
|
163
|
184
|
cpt_next = cpt_num
|
164
|
|
- f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
185
|
+ if pdf is False:
|
|
186
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
187
|
+ elif pdf is True:
|
|
188
|
+ print "flat"
|
|
189
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: inline;\">\n")
|
|
190
|
+
|
165
|
191
|
f.write("<hr><a name=\"article-"+str(cpt)+"\">")
|
166
|
192
|
f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
167
|
|
- f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
193
|
+ if pdf is False:
|
|
194
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
168
|
195
|
f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
169
|
|
- f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
170
|
|
- f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
196
|
+ if pdf is False:
|
|
197
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
198
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
199
|
+ else:
|
|
200
|
+ f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">↤</a></div>\n")
|
|
201
|
+ f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">↦</a></div>\n")
|
171
|
202
|
f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
|
172
|
203
|
f.write(article.content_only)
|
173
|
204
|
f.write("\n</div>\n")
|
174
|
205
|
f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
175
|
206
|
f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
176
|
207
|
f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
177
|
|
- f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
178
|
|
- f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
208
|
+ if pdf is False:
|
|
209
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
210
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
211
|
+ else:
|
|
212
|
+ f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">↤</a></div>\n")
|
|
213
|
+ f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">↦</a></div>\n")
|
179
|
214
|
f.write("</div>\n\n")
|
180
|
215
|
cpt=cpt+1
|
181
|
216
|
continue
|
...
|
...
|
@@ -259,24 +294,40 @@ for article in articles:
|
259
|
294
|
if cpt_next > cpt_num:
|
260
|
295
|
cpt_next = cpt_num
|
261
|
296
|
|
262
|
|
- f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
297
|
+ if pdf is False:
|
|
298
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
299
|
+ else:
|
|
300
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: inline;\">\n")
|
263
|
301
|
f.write("<hr><a name=\"article-"+str(cpt)+"\">")
|
264
|
302
|
f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
265
|
|
- f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
303
|
+ if pdf is False:
|
|
304
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
266
|
305
|
f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
267
|
|
- f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
268
|
|
- f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
306
|
+ if pdf is False:
|
|
307
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
308
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
309
|
+ else:
|
|
310
|
+ f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">↤</a></div>\n")
|
|
311
|
+ f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">↦</a></div>\n")
|
269
|
312
|
f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
|
270
|
313
|
f.write(article.content_only)
|
271
|
314
|
f.write("\n</div>\n")
|
272
|
315
|
f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
273
|
|
- f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
316
|
+ if pdf is False:
|
|
317
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
274
|
318
|
f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
275
|
|
- f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
276
|
|
- f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
319
|
+ if pdf is False:
|
|
320
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
321
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
322
|
+ else:
|
|
323
|
+ f.write("<div id=\"nav-prev\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_prev)+"\">↤</a></div>\n")
|
|
324
|
+ f.write("<div id=\"nav-next\" style=\"display:inline;\"><a href=\"#article-"+str(cpt_next)+"\">↦</a></div>\n")
|
277
|
325
|
f.write("</div>\n\n")
|
278
|
326
|
cpt=cpt+1
|
279
|
327
|
if cpt > MAX_ARTICLES:
|
280
|
328
|
break
|
281
|
329
|
|
282
|
330
|
f.close()
|
|
331
|
+
|
|
332
|
+pdf = weasyprint.HTML(filename=output_filename).write_pdf()
|
|
333
|
+file(output_filename+".pdf", 'w').write(pdf)
|