...
|
...
|
@@ -74,8 +74,15 @@ def article(url):
|
74
|
74
|
article_only = re.sub(r'<div style="filter:blur(.+?)" class="w-100 mw-100 h-auto" width="600" height="(.+?)">','<div><h2>'+articleStrTitle+'</h2>',article_only)
|
75
|
75
|
article_only = re.sub(r'<div style="min-height:358px"/>','<div>',article_only)
|
76
|
76
|
#article_only = re.sub(r'','',article_only)
|
77
|
|
- article_only = re.sub(r"href=\"/",'href="https://xxxxx/',article_only)
|
78
|
|
- article_only = re.sub(r"src=\"/",'src="https://xxxxx/',article_only)
|
|
77
|
+ say("LenCurrent: "+str(len(article_only)))
|
|
78
|
+ article_only = re.sub(r'<div class="article-body grid-center grid-body" data-qa="article-body">','<div>',article_only)
|
|
79
|
+ article_only = re.sub(r'<div data-qa="article-image" class="hide-for-print">','<div>',article_only)
|
|
80
|
+ article_only = re.sub(r'<div class="article-body grid-full-bleed" data-qa="article-body">','<div>',article_only)
|
|
81
|
+ article_only = re.sub(r'<div class="dib gray-dark pl-xs pr-xs font-sans-serif light font-xxxxs lh-md" style="--primary-border-color:"/>','',article_only)
|
|
82
|
+ #article_only = re.sub(r'<div class="article-body grid-center grid-body" data-qa="article-body">(.?)</div>', r'\1',article_only,flags=re.M|re.S)
|
|
83
|
+ say("LenCurrent: "+str(len(article_only)))
|
|
84
|
+ #article_only = re.sub(r"href=\"/",'href="https://xxxxx/',article_only)
|
|
85
|
+ #article_only = re.sub(r"src=\"/",'src="https://xxxxx/',article_only)
|
79
|
86
|
article_only = re.sub(r"^$",'',article_only)
|
80
|
87
|
article_only = re.sub(r'^\s*$', '',article_only,flags=re.M|re.S)
|
81
|
88
|
article_only = re.sub(r"><",'>\n<',article_only)
|