...
|
...
|
@@ -0,0 +1,176 @@
|
|
1
|
+#!/usr/bin/env python
|
|
2
|
+from urlparse import urlparse
|
|
3
|
+from bs4 import BeautifulSoup
|
|
4
|
+import feedparser
|
|
5
|
+import base64
|
|
6
|
+import pprint
|
|
7
|
+import urllib2
|
|
8
|
+import lxml.html
|
|
9
|
+import sys
|
|
10
|
+import getopt
|
|
11
|
+
|
|
12
|
+verbose = False
|
|
13
|
+output_filename = 'default.html'
|
|
14
|
+rss_url = 'rss_url'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+options, remainder = getopt.getopt(sys.argv[1:], 'o:v', ['output=',
|
|
18
|
+ 'verbose',
|
|
19
|
+ ])
|
|
20
|
+for opt, arg in options:
|
|
21
|
+ if opt in ('-o', '--output'):
|
|
22
|
+ output_filename = arg
|
|
23
|
+ elif opt in ('-u', '--url'):
|
|
24
|
+ rss_url = arg
|
|
25
|
+ elif opt in ('-v', '--verbose'):
|
|
26
|
+ verbose = True
|
|
27
|
+ elif opt == '--version':
|
|
28
|
+ version = arg
|
|
29
|
+
|
|
30
|
+CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
|
|
31
|
+
|
|
32
|
+class Printable:
|
|
33
|
+ def __repr__(self):
|
|
34
|
+ from pprint import pformat
|
|
35
|
+ return "<" + type(self).__name__ + "> " + pformat(vars(self), indent=4, width=1)
|
|
36
|
+
|
|
37
|
+class FeedDetails(Printable):
|
|
38
|
+ num = 0
|
|
39
|
+ title = ""
|
|
40
|
+ link = ""
|
|
41
|
+ subtitle = ""
|
|
42
|
+ def debug_print(self):
|
|
43
|
+ print("FeedDetails:num :{:d}".format(self.num))
|
|
44
|
+ print("FeedDetails:title :{:s}".format(self.title))
|
|
45
|
+ print("FeedDetails:link :{:s}".format(self.link))
|
|
46
|
+ print("FeedDetails:subtitle:{:s}".format(self.subtitle))
|
|
47
|
+
|
|
48
|
+class ArticleDetails(Printable):
|
|
49
|
+ title = ""
|
|
50
|
+ link = ""
|
|
51
|
+ summary = ""
|
|
52
|
+ content = ""
|
|
53
|
+ enclosure = ""
|
|
54
|
+ content_only = ""
|
|
55
|
+ def debug_print(self):
|
|
56
|
+ print("ArticleDetails:title :{:s}".format(self.title))
|
|
57
|
+ print("ArticleDetails:link :{:s}".format(self.link))
|
|
58
|
+ print("ArticleDetails:summary :{:s}".format(self.summary))
|
|
59
|
+ print("ArticleDetails:enclosure :{:s}".format(self.enclosure))
|
|
60
|
+ #~ print("ArticleDetails:content :{:s}".format(self.content))
|
|
61
|
+ print("ArticleDetails:content_only :{:s}".format(self.content))
|
|
62
|
+ print("ArticleDetails:content : ================")
|
|
63
|
+
|
|
64
|
+debug=1
|
|
65
|
+
|
|
66
|
+feed_details=FeedDetails()
|
|
67
|
+
|
|
68
|
+d = feedparser.parse('http://www.lemonde.fr/rss/une.xml')
|
|
69
|
+feed_details.title=d['feed']['title'].encode('utf-8').strip()
|
|
70
|
+feed_details.link=d['feed']['link'].encode('utf-8').strip()
|
|
71
|
+feed_details.subtitle=d['feed']['subtitle'].encode('utf-8').strip()
|
|
72
|
+feed_details.num = len(d['entries'])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+#~ if 1 == debug:
|
|
76
|
+ #~ feed_details.debug_print()
|
|
77
|
+
|
|
78
|
+f = open(output_filename, 'w')
|
|
79
|
+f.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd\">\n")
|
|
80
|
+f.write("<html>\n")
|
|
81
|
+f.write("<head>\n")
|
|
82
|
+f.write(" <title>"+feed_details.title+"</title>\n")
|
|
83
|
+f.write(" <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n")
|
|
84
|
+f.write(" <meta name=\"viewport\" content=\"width=450px, user-scalable=no\">\n")
|
|
85
|
+f.write(" <link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n")
|
|
86
|
+f.write(" <link rel=\"icon\" type=\"image/ico\" href=\"favicon.ico\">\n")
|
|
87
|
+f.write(" <!-- Touch Screen Detection -->\n")
|
|
88
|
+f.write(" <script>\n")
|
|
89
|
+f.write(" function isTouchDevice(){\n")
|
|
90
|
+f.write(" return true == (\"ontouchstart\" in window || window.DocumentTouch && document instanceof DocumentTouch);\n")
|
|
91
|
+f.write(" }\n")
|
|
92
|
+f.write(" </script>\n")
|
|
93
|
+f.write(" <script type=\"text/javascript\">\n")
|
|
94
|
+f.write(" /* Hack for Mobile */\n")
|
|
95
|
+f.write(" if(isTouchDevice()===true) {\n")
|
|
96
|
+f.write(" document.getElementById(\"img\").style.width = 400px;\n")
|
|
97
|
+f.write(" document.getElementById(\"extract-content\").style.width = 400px;\n")
|
|
98
|
+f.write(" document.getElementById(\"article-current\").style.width = 440px;\n")
|
|
99
|
+f.write(" }\n")
|
|
100
|
+f.write(" </script>\n")
|
|
101
|
+f.write(" <script>\n")
|
|
102
|
+f.write(" function onArticle(index) {\n")
|
|
103
|
+f.write(" var string_index = \"article-\"+index;\n")
|
|
104
|
+f.write(" var url = location.href;\n")
|
|
105
|
+f.write(" document.getElementById(\"article-current\").innerHTML =\n")
|
|
106
|
+f.write(" document.getElementById(string_index).innerHTML;\n")
|
|
107
|
+f.write(" location.href = \"#article-top\";\n")
|
|
108
|
+f.write(" }\n")
|
|
109
|
+f.write(" </script>\n")
|
|
110
|
+f.write("<style>\n"+CSS+"\n</style>\n")
|
|
111
|
+f.write("</head>\n")
|
|
112
|
+f.write("<body>\n")
|
|
113
|
+f.write("<h1 id=\"top\">"+feed_details.title+"</h1>\n")
|
|
114
|
+
|
|
115
|
+articles=list()
|
|
116
|
+cpt=0
|
|
117
|
+for article in d.entries:
|
|
118
|
+ article_details = ArticleDetails()
|
|
119
|
+ article_details.title = article.title.encode('utf-8').strip()
|
|
120
|
+ article_details.link = article.link.encode('utf-8').strip("?xtor=RSS-3208")
|
|
121
|
+ #~ article_details.link = urlparse(article.link.encode('utf-8').strip(),allow_fragments=F).geturl()
|
|
122
|
+ article_details.summary = article.summary.encode('utf-8').strip()
|
|
123
|
+ for link in article.links:
|
|
124
|
+ if "enclosure" == link.rel:
|
|
125
|
+ article_details.enclosure = link.href
|
|
126
|
+
|
|
127
|
+ # Npot Wroking as is. Generated image is too big
|
|
128
|
+ #if article_details.enclosure is not None:
|
|
129
|
+ #img_content=urllib2.urlopen(article.link).read()
|
|
130
|
+ #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
|
|
131
|
+ f.write("\t<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")
|
|
132
|
+ f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
|
|
133
|
+ f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
|
134
|
+ f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↧</a></div>\n")
|
|
135
|
+ f.write("\t"+article_details.title+"</div></ul>\n")
|
|
136
|
+ articles.append(article_details)
|
|
137
|
+ cpt=cpt+1
|
|
138
|
+
|
|
139
|
+cpt_num=cpt
|
|
140
|
+f.write("\n<a name=\"article-top\"></a>\n")
|
|
141
|
+f.write("<div id=\"article-current\"></div>\n\n")
|
|
142
|
+cpt=0
|
|
143
|
+for article in articles:
|
|
144
|
+ print("-- {:d} : {:s}".format(cpt,article.title))
|
|
145
|
+ response = urllib2.urlopen(article.link)
|
|
146
|
+ article.content = response.read()
|
|
147
|
+ soup_mysite = BeautifulSoup(article.content,"lxml")
|
|
148
|
+ content = soup_mysite.find("article")
|
|
149
|
+ article.content_only = content
|
|
150
|
+ cpt_prev=cpt-1
|
|
151
|
+ if cpt_prev < 0:
|
|
152
|
+ cpt_prev = 0
|
|
153
|
+ cpt_next=cpt+1
|
|
154
|
+ if cpt_next > cpt_num:
|
|
155
|
+ cpt_next = cpt_num
|
|
156
|
+
|
|
157
|
+ f.write("<!-- ==================== article "+str(cpt)+" ============== -->\n")
|
|
158
|
+ f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
|
|
159
|
+ f.write("<hr>\n<a name=\"article-"+str(cpt)+"\">\n")
|
|
160
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
|
161
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
162
|
+ f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
|
163
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
164
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
165
|
+ f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
|
|
166
|
+ f.write(str(content))
|
|
167
|
+ f.write("\n</div>\n")
|
|
168
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">⇞</a></div>\n")
|
|
169
|
+ f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">↥</a></div> \n")
|
|
170
|
+ f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div> ")
|
|
171
|
+ f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">↤</div>\n")
|
|
172
|
+ f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">↦</div>\n")
|
|
173
|
+ f.write("</div>\n\n")
|
|
174
|
+ cpt=cpt+1
|
|
175
|
+
|
|
176
|
+f.close()
|