...
|
...
|
@@ -72,7 +72,12 @@ def process_match(m):
|
72
|
72
|
debug=1
|
73
|
73
|
|
74
|
74
|
feed_details=FeedDetails()
|
75
|
|
-print rss_url
|
|
75
|
+print("-- rss url : {:s}".format(rss_url))
|
|
76
|
+
|
|
77
|
+parsed_uri = urlparse( rss_url )
|
|
78
|
+domain_url = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
|
|
79
|
+print("-- domain url : {:s}".format(domain_url))
|
|
80
|
+
|
76
|
81
|
d = feedparser.parse(rss_url)
|
77
|
82
|
feed_details.title=d['feed']['title'].encode('utf-8').strip()
|
78
|
83
|
feed_details.link=d['feed']['link'].encode('utf-8').strip()
|
...
|
...
|
@@ -188,7 +193,10 @@ for article in articles:
|
188
|
193
|
content = "<h1>{:s}</h1>{:s}".format(article.title,content)
|
189
|
194
|
|
190
|
195
|
article.content_only = str(content)
|
191
|
|
- article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
|
|
196
|
+ #~ article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
|
|
197
|
+ new_full_domain = " href=\"{:s}".format(domain_url)
|
|
198
|
+ #~ article.content_only = article.content_only.replace(" href=\"/", " href=\"http://www.lemonde.fr/")
|
|
199
|
+ article.content_only = article.content_only.replace(" href=\"/", new_full_domain)
|
192
|
200
|
article.content_only = article.content_only.replace('<script>require(["twitter/widgets"]);</script>','')
|
193
|
201
|
article.content_only = article.content_only.replace('<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script></div>','')
|
194
|
202
|
article.content_only = article.content_only.replace('<div class="toolbar"></div>','')
|