...
|
...
|
@@ -1,36 +0,0 @@
|
1
|
|
-from userio import *
|
2
|
|
-import requests
|
3
|
|
-import re
|
4
|
|
-import newsParser
|
5
|
|
-
|
6
|
|
-def article(url):
|
7
|
|
- say("Article: "+url)
|
8
|
|
- url = url.replace("www","amp")
|
9
|
|
- r = requests.get(url, allow_redirects=True)
|
10
|
|
- content = r.text
|
11
|
|
-
|
12
|
|
- articleStrImageUrl = newsParser.articleImage(content)
|
13
|
|
- articleStrTitle = newsParser.articleTitle(content)
|
14
|
|
- articleStrDescription = newsParser.articleDescription(content)
|
15
|
|
-
|
16
|
|
- pageContent = ""
|
17
|
|
- pageContent += "<meta property=\"og:type\" content=\"article\">\n"
|
18
|
|
- pageContent += "<meta property=\"og:title\" content=\""+articleStrTitle+"\">\n"
|
19
|
|
- pageContent += "<meta property=\"og:description\" content=\""+articleStrDescription+"\">\n"
|
20
|
|
- pageContent += "<meta property=\"og:url\" content=\""+url+"\">\n"
|
21
|
|
- pageContent += "<meta property=\"og:image\" content=\""+articleStrImageUrl+"\">\n"
|
22
|
|
- pageContent += "<meta property=\"og:image:type\" content=\"image/jpeg\">"
|
23
|
|
-
|
24
|
|
- articleCstBegin = "<article"
|
25
|
|
- articleCstEnd = "</article>"
|
26
|
|
- indexBegin = content.index(articleCstBegin)
|
27
|
|
- indexEnd = content.index(articleCstEnd)
|
28
|
|
- article_only = content[indexBegin:indexEnd]
|
29
|
|
- article_only = re.sub(r"<amp-img", '<img', article_only)
|
30
|
|
- article_only = re.sub(r"</amp-img>", '', article_only)
|
31
|
|
- article_only = re.sub(r"<h2", '<h3', article_only)
|
32
|
|
- article_only = re.sub(r"</h2>", '</h3>', article_only)
|
33
|
|
- article_only = re.sub(r"<h1", '<h2', article_only)
|
34
|
|
- article_only = re.sub(r"</h1>", '</h2>', article_only)
|
35
|
|
- pageContent += "<article>"+article_only+"</article>"
|
36
|
|
- return pageContent
|