Removed LNC ・ 6cd4c03 ・ Gitprep

-89

newsParser/newsParser/newsLNC.py

@@ -1,89 +0,0 @@
-from userio import *
-import requests
-import re
-
-def articleImage(content):
-  articleImgBegin ="<meta property=\"og:image\" content=\""
-  articleImgEnd   ="\""
-  indexImgBegin = content.index(articleImgBegin)
-  indexImgEnd   = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
-  try:
-    image = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
-  except:
-    image = "favicon.png"
-  return image
-
-def articleDescription(content):
-  articleImgBegin ="<meta property=\"og:description\" content=\""
-  articleImgEnd   ="\""
-  indexImgBegin = content.index(articleImgBegin)
-  indexImgEnd   = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
-  try:
-    title = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
-  except:
-    title = "Description Extraction Failed"
-  return title
-
-def articleTitle(content):
-  #articleImgBegin ="<meta property=\"og:title\" content=\""
-  articleImgBegin ="\"og:title\" content=\""
-  articleImgEnd   ="\""
-  indexImgBegin = content.index(articleImgBegin)
-  indexImgEnd   = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
-  try:
-    title = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
-  except:
-    title = "Title Extraction Failed"
-  return title
-
-def article(url):
-  say("ArticleDefault: "+url)
-  r = requests.get(url, allow_redirects=True)
-  content = r.text
-
-  articleStrImageUrl = articleImage(content)
-  articleStrTitle = articleTitle(content)
-  articleStrDescription = articleDescription(content)
-  
-  pageContent = ""
-  pageContent += "<meta property=\"og:type\" content=\"article\">\n"
-  pageContent += "<meta property=\"og:title\" content=\""+articleStrTitle+"\">\n"
-  pageContent += "<meta property=\"og:description\" content=\""+articleStrDescription+"\">\n"
-  pageContent += "<meta property=\"og:url\" content=\""+url+"\">\n"
-  pageContent += "<meta property=\"og:image\" content=\""+articleStrImageUrl+"\">\n"
-  pageContent += "<meta property=\"og:image:type\" content=\"image/jpeg\">"
-  
-  #articleCstBegin = "<article"
-  #articleCstEnd   = "</article>"
-  articleCstBegin = "<div class=\"middle-main-content\">"
-  articleCstEnd   = "<div id=\"IOSdialog\""
-  articleCstBegin2 = "<body"
-  articleCstEnd2   = "</body>"
-  try:
-    indexBegin = content.index(articleCstBegin)
-  except:
-    try:
-      indexBegin = content.index(articleCstBegin2)
-    except:
-      indexBegin = 0
-  try:
-    indexEnd   = content.index(articleCstEnd)
-  except:
-    try:
-      indexEnd = content.index(articleCstEnd2)
-    except:
-      indexEnd = strlen(content)
-  #<a href="http://l.lnc.nc/changan" target="_blank">
-  article_only = content[indexBegin:indexEnd]
-  article_only = re.sub(r"<amp-img", '<img', article_only)
-  article_only = re.sub(r"</amp-img>", '', article_only)
-  article_only = re.sub(r"<h2", '<h3', article_only)
-  article_only = re.sub(r"</h2>", '</h3>', article_only)
-  article_only = re.sub(r"<h1", '<h2', article_only)
-  article_only = re.sub(r"</h1>", '</h2>', article_only)
-  article_only = re.sub(r"<a href=\"http://l\.lnc\.nc/changan\" target=\"_blank\"(.+)</a>", '', article_only)
-  article_only = re.sub(r"<div class=\"col-md col-md-8\">", '<div>', article_only)
-  article_only = re.sub(r"<div class=\"tm_center_widget\">", '<div>', article_only)
-  article_only = article_only.replace("><", ">\n<")
-  pageContent += "<article>"+article_only+"</article>"
-  return pageContent

...	...	@@ -1,89 +0,0 @@
1		-from userio import *
2		-import requests
3		-import re
4		-
5		-def articleImage(content):
6		- articleImgBegin ="<meta property=\"og:image\" content=\""
7		- articleImgEnd ="\""
8		- indexImgBegin = content.index(articleImgBegin)
9		- indexImgEnd = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
10		- try:
11		- image = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
12		- except:
13		- image = "favicon.png"
14		- return image
15		-
16		-def articleDescription(content):
17		- articleImgBegin ="<meta property=\"og:description\" content=\""
18		- articleImgEnd ="\""
19		- indexImgBegin = content.index(articleImgBegin)
20		- indexImgEnd = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
21		- try:
22		- title = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
23		- except:
24		- title = "Description Extraction Failed"
25		- return title
26		-
27		-def articleTitle(content):
28		- #articleImgBegin ="<meta property=\"og:title\" content=\""
29		- articleImgBegin ="\"og:title\" content=\""
30		- articleImgEnd ="\""
31		- indexImgBegin = content.index(articleImgBegin)
32		- indexImgEnd = content.index(articleImgEnd,indexImgBegin+len(articleImgBegin))
33		- try:
34		- title = content[indexImgBegin+len(articleImgBegin):indexImgEnd]
35		- except:
36		- title = "Title Extraction Failed"
37		- return title
38		-
39		-def article(url):
40		- say("ArticleDefault: "+url)
41		- r = requests.get(url, allow_redirects=True)
42		- content = r.text
43		-
44		- articleStrImageUrl = articleImage(content)
45		- articleStrTitle = articleTitle(content)
46		- articleStrDescription = articleDescription(content)
47		-
48		- pageContent = ""
49		- pageContent += "<meta property=\"og:type\" content=\"article\">\n"
50		- pageContent += "<meta property=\"og:title\" content=\""+articleStrTitle+"\">\n"
51		- pageContent += "<meta property=\"og:description\" content=\""+articleStrDescription+"\">\n"
52		- pageContent += "<meta property=\"og:url\" content=\""+url+"\">\n"
53		- pageContent += "<meta property=\"og:image\" content=\""+articleStrImageUrl+"\">\n"
54		- pageContent += "<meta property=\"og:image:type\" content=\"image/jpeg\">"
55		-
56		- #articleCstBegin = "<article"
57		- #articleCstEnd = "</article>"
58		- articleCstBegin = "<div class=\"middle-main-content\">"
59		- articleCstEnd = "<div id=\"IOSdialog\""
60		- articleCstBegin2 = "<body"
61		- articleCstEnd2 = "</body>"
62		- try:
63		- indexBegin = content.index(articleCstBegin)
64		- except:
65		- try:
66		- indexBegin = content.index(articleCstBegin2)
67		- except:
68		- indexBegin = 0
69		- try:
70		- indexEnd = content.index(articleCstEnd)
71		- except:
72		- try:
73		- indexEnd = content.index(articleCstEnd2)
74		- except:
75		- indexEnd = strlen(content)
76		- #<a href="http://l.lnc.nc/changan" target="_blank">
77		- article_only = content[indexBegin:indexEnd]
78		- article_only = re.sub(r"<amp-img", '<img', article_only)
79		- article_only = re.sub(r"</amp-img>", '', article_only)
80		- article_only = re.sub(r"<h2", '<h3', article_only)
81		- article_only = re.sub(r"</h2>", '</h3>', article_only)
82		- article_only = re.sub(r"<h1", '<h2', article_only)
83		- article_only = re.sub(r"</h1>", '</h2>', article_only)
84		- article_only = re.sub(r"<a href=\"http://l\.lnc\.nc/changan\" target=\"_blank\"(.+)</a>", '', article_only)
85		- article_only = re.sub(r"<div class=\"col-md col-md-8\">", '<div>', article_only)
86		- article_only = re.sub(r"<div class=\"tm_center_widget\">", '<div>', article_only)
87		- article_only = article_only.replace("><", ">\n<")
88		- pageContent += "<article>"+article_only+"</article>"
89		- return pageContent