Showing 1 changed files with 3 additions and 1 deletions
+3 -1
newsParser/newsParser/newsXXX.skel
... ...
@@ -35,7 +35,8 @@ def article(url):
35 35
   article_only = re.sub(r"</h2>", '</h3>', article_only)
36 36
   article_only = re.sub(r"<h1", '<h2', article_only)
37 37
   article_only = re.sub(r"</h1>", '</h2>', article_only)
38
-  article_only = re.sub(r'<script(.+?)</script>','',article_only)
38
+  article_only = re.sub(r'<script(.+?)</script>','',article_only,flags=re.M|re.S)
39
+  article_only = re.sub(r'<script(.+?)/>','',article_only)
39 40
   #article_only = re.sub(r'','',article_only)
40 41
   article_only = re.sub(r"href=\"/",'href="https://xxxxx/',article_only)
41 42
   article_only = re.sub(r"src=\"/",'src="https://xxxxx/',article_only)
... ...
@@ -43,6 +44,7 @@ def article(url):
43 44
   article_only = re.sub(r'^\s*$', '',article_only,flags=re.M|re.S)
44 45
   article_only = re.sub(r"><",'>\n<',article_only)
45 46
 
47
+  #pageContent += "\n"+article_only+"\n"
46 48
   pageContent += "<article>\n"+article_only+"\n</article>\n"
47 49
   lenAfter=len(article_only)
48 50
   lenGain=float(10000-int(float(100*lenAfter/lenBefore)*100))/100