Showing 1 changed files with 176 additions and 0 deletions
+176
newsfetch.py
... ...
@@ -0,0 +1,176 @@
1
+#!/usr/bin/env python
2
+from urlparse import urlparse
3
+from bs4 import BeautifulSoup
4
+import feedparser
5
+import base64
6
+import pprint
7
+import urllib2
8
+import lxml.html
9
+import sys
10
+import getopt
11
+
12
+verbose = False
13
+output_filename = 'default.html'
14
+rss_url = 'rss_url'
15
+
16
+
17
+options, remainder = getopt.getopt(sys.argv[1:], 'o:v', ['output=', 
18
+                                                         'verbose',
19
+                                                         ])
20
+for opt, arg in options:
21
+    if opt in ('-o', '--output'):
22
+        output_filename = arg
23
+    elif opt in ('-u', '--url'):
24
+        rss_url = arg
25
+    elif opt in ('-v', '--verbose'):
26
+        verbose = True
27
+    elif opt == '--version':
28
+        version = arg
29
+
30
+CSS="h1,h2{font-weight:700}img,ul{width:440px;padding:0}em,img{text-align:left;align:left}#nav-next:hover,#nav-prev:hover,a:hover{background:#333}body{color:#000;font-family:Helvetica Neue,Helvetica,Arial,sans-serif;background-color:#f0f0f0}h1{font-size:1.5rem;line-height:1.5rem}h2,h3{font-size:1rem;line-height:1rem}details,h3{font-weight:400;font-style:italic}h3{background-color:#cdcdcd}details{font-family:TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;font-size:.5rem}ul{list-style-type:none;color:#00F}ul:hover{cursor:pointer;cursor:hand}figure{margin-left:0;text-align:center}.img-heading,.img-nav{width:50px}#nav-next,#nav-prev,#nav-up{font-size:200%;font-weight:700;color:#00f}#nav-source{font-size:100%;font-weight:700;color:#00f}#article,#article-current{width:440px}.pullquote{padding:.5rem 1.5rem 0;font:700 1em/.8em TradeGothic,Helvetica Neue,Helvetica,Arial,sans-serif;position:relative;margin-bottom:1.5rem;z-index:1}.pullquote:after,.pullquote:before{color:silver;position:absolute;content:'"';font-size:5em;height:.5rem;line-height:.75em;top:0;left:-.07em;z-index:-1}.pullquote:after{content:'"';top:auto;bottom:0;left:auto;right:0;line-height:.36em}a{text-decoration:none}a:link,a:visited{color:#00F}"
31
+
32
+class Printable:
33
+    def __repr__(self):
34
+        from pprint import pformat
35
+        return "<" + type(self).__name__ + "> " + pformat(vars(self), indent=4, width=1)
36
+
37
+class FeedDetails(Printable):
38
+  num = 0
39
+  title = ""
40
+  link = ""
41
+  subtitle = ""
42
+  def debug_print(self):
43
+    print("FeedDetails:num     :{:d}".format(self.num))
44
+    print("FeedDetails:title   :{:s}".format(self.title))
45
+    print("FeedDetails:link    :{:s}".format(self.link))
46
+    print("FeedDetails:subtitle:{:s}".format(self.subtitle))
47
+
48
+class ArticleDetails(Printable):
49
+  title = ""
50
+  link = ""
51
+  summary = ""
52
+  content = ""
53
+  enclosure = ""
54
+  content_only = ""
55
+  def debug_print(self):
56
+    print("ArticleDetails:title     :{:s}".format(self.title))
57
+    print("ArticleDetails:link      :{:s}".format(self.link))
58
+    print("ArticleDetails:summary   :{:s}".format(self.summary))
59
+    print("ArticleDetails:enclosure :{:s}".format(self.enclosure))
60
+    #~ print("ArticleDetails:content   :{:s}".format(self.content))    
61
+    print("ArticleDetails:content_only :{:s}".format(self.content))    
62
+    print("ArticleDetails:content   : ================")
63
+
64
+debug=1
65
+
66
+feed_details=FeedDetails()
67
+
68
+d = feedparser.parse('http://www.lemonde.fr/rss/une.xml')
69
+feed_details.title=d['feed']['title'].encode('utf-8').strip()
70
+feed_details.link=d['feed']['link'].encode('utf-8').strip()
71
+feed_details.subtitle=d['feed']['subtitle'].encode('utf-8').strip()
72
+feed_details.num = len(d['entries'])
73
+
74
+
75
+#~ if 1 == debug:
76
+  #~ feed_details.debug_print()
77
+
78
+f = open(output_filename, 'w')
79
+f.write("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd\">\n")
80
+f.write("<html>\n")
81
+f.write("<head>\n")
82
+f.write("	<title>"+feed_details.title+"</title>\n")
83
+f.write("	<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n")
84
+f.write("	<meta name=\"viewport\" content=\"width=450px, user-scalable=no\">\n")
85
+f.write("	<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n")
86
+f.write("	<link rel=\"icon\" type=\"image/ico\" href=\"favicon.ico\">\n")
87
+f.write("	<!-- Touch Screen Detection -->\n")
88
+f.write("	<script>\n")
89
+f.write("	function isTouchDevice(){\n")
90
+f.write("		return true == (\"ontouchstart\" in window || window.DocumentTouch && document instanceof DocumentTouch);\n")
91
+f.write("	}\n")
92
+f.write("	</script>\n")
93
+f.write("	<script type=\"text/javascript\">\n")
94
+f.write("	/* Hack for Mobile */\n")
95
+f.write("	if(isTouchDevice()===true) {\n")
96
+f.write("		document.getElementById(\"img\").style.width = 400px;\n")
97
+f.write("		document.getElementById(\"extract-content\").style.width = 400px;\n")
98
+f.write("		document.getElementById(\"article-current\").style.width = 440px;\n")
99
+f.write("	}\n")
100
+f.write("	</script>\n")
101
+f.write("	<script>\n")
102
+f.write("	function onArticle(index) {\n")
103
+f.write("		var string_index = \"article-\"+index;\n")
104
+f.write("		var url = location.href;\n")
105
+f.write("		document.getElementById(\"article-current\").innerHTML =\n")
106
+f.write("			document.getElementById(string_index).innerHTML;\n")
107
+f.write("		location.href = \"#article-top\";\n")
108
+f.write("	}\n")
109
+f.write("	</script>\n")
110
+f.write("<style>\n"+CSS+"\n</style>\n")
111
+f.write("</head>\n")
112
+f.write("<body>\n")
113
+f.write("<h1 id=\"top\">"+feed_details.title+"</h1>\n")
114
+
115
+articles=list()
116
+cpt=0
117
+for article in d.entries:
118
+  article_details = ArticleDetails()
119
+  article_details.title = article.title.encode('utf-8').strip()
120
+  article_details.link = article.link.encode('utf-8').strip("?xtor=RSS-3208")
121
+  #~ article_details.link = urlparse(article.link.encode('utf-8').strip(),allow_fragments=F).geturl()
122
+  article_details.summary = article.summary.encode('utf-8').strip()
123
+  for link in article.links:
124
+    if "enclosure" == link.rel:
125
+      article_details.enclosure = link.href
126
+
127
+  # Npot Wroking as is. Generated image is too big
128
+  #if article_details.enclosure is not None:
129
+    #img_content=urllib2.urlopen(article.link).read()
130
+    #article_details.enclosure = "data:image/jpg;base64,"+base64.b64encode(img_content)
131
+  f.write("\t<ul><div onclick=\"onArticle("+str(cpt)+")\" style=\"display:inline;\">\n")  
132
+  f.write("\t<img src=\""+article_details.enclosure+"\" style=\"display:inline;\"><br>\n")
133
+  f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
134
+  f.write("\t<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8615;</a></div>\n")
135
+  f.write("\t"+article_details.title+"</div></ul>\n")
136
+  articles.append(article_details)
137
+  cpt=cpt+1
138
+
139
+cpt_num=cpt
140
+f.write("\n<a name=\"article-top\"></a>\n")
141
+f.write("<div id=\"article-current\"></div>\n\n")
142
+cpt=0
143
+for article in articles:
144
+  print("-- {:d} : {:s}".format(cpt,article.title))
145
+  response = urllib2.urlopen(article.link)
146
+  article.content = response.read()
147
+  soup_mysite = BeautifulSoup(article.content,"lxml")
148
+  content = soup_mysite.find("article")
149
+  article.content_only = content
150
+  cpt_prev=cpt-1
151
+  if cpt_prev < 0:
152
+    cpt_prev = 0
153
+  cpt_next=cpt+1
154
+  if cpt_next > cpt_num:
155
+    cpt_next = cpt_num
156
+  
157
+  f.write("<!-- ==================== article "+str(cpt)+" ============== -->\n")
158
+  f.write("<div class=\"article\" id=\"article-"+str(cpt)+"\" style=\"display: none;\">\n")
159
+  f.write("<hr>\n<a name=\"article-"+str(cpt)+"\">\n")
160
+  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
161
+  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
162
+  f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
163
+  f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
164
+  f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
165
+  f.write("<div class=\"extract-content\" id=\""+str(cpt)+"\">\n")
166
+  f.write(str(content))
167
+  f.write("\n</div>\n")
168
+  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#top\">&#8670;</a></div>\n")
169
+  f.write("<div id=\"nav-up\" style=\"display:inline;\"><a href=\"#article-top\">&#8613;</a></div>&nbsp;\n")
170
+  f.write("<div id=\"nav-source\" style=\"display:inline;\"><a href=\""+article.link+"\" target=\"new-"+str(cpt)+"\">source</a></div>&nbsp;")
171
+  f.write("<div id=\"nav-prev\" onclick=\"onArticle("+str(cpt_prev)+")\" style=\"display:inline;\">&#8612;</div>\n")
172
+  f.write("<div id=\"nav-next\" onclick=\"onArticle("+str(cpt_next)+")\" style=\"display:inline;\">&#8614;</div>\n")
173
+  f.write("</div>\n\n")
174
+  cpt=cpt+1
175
+  
176
+f.close()