Showing 1 changed files with 37 additions and 1 deletions
+37 -1
sources/news-nyt.php
... ...
@@ -91,13 +91,17 @@ foreach ($articles as $article ) {
91 91
 	$article_only = re_remove($article_only, '/<div id="bottom-slug" class="(.+?)"><p>Advertisement<\/p><\/div>/');
92 92
 	$article_only = re_remove($article_only, '/define\((.+?)}\);/');
93 93
   $article_only = re_remove($article_only, '/var _gaq = _gaq \|\| \[\];/');
94
+	$article_only = re_remove($article_only, '/<div class="ad sponsor-wrapper" style="text-align:center;height:100%;display:block"><div id="sponsor" class="" data-position="sponsor"><\/div><\/div>/');
95
+	$article_only = re_remove($article_only, '/<div class="ad top-wrapper" style="text-align:center;height:100%;display:block;min-height:250px"><div id="top" class="place-ad" data-position="top"><\/div><\/div>/');
96
+	//$article_only = re_remove($article_only, '//');
97
+	//$article_only = re_remove($article_only, '//');
94 98
 	//$article_only = re_remove($article_only, '//');
95 99
 
96 100
 
97 101
 	//Some little replacements
98 102
 	$re = '/<div id="top-wrapper" class="ResponsiveAd-(.+?)">/';
99 103
 	$article_only = preg_replace($re, '<div>', $article_only);
100
-	$re = '/<time class="css-(.+?)>(.+?)<\/time>/';
104
+	$re = '/<li><time class="css-(.+?)>(.+?)<\/time><\/li>/';
101 105
 	$article_only = preg_replace($re, '<time>\2</time>', $article_only);
102 106
 	$re = '/<p class="css-(.+?) (.+?)">/';
103 107
 	$article_only = preg_replace($re, '<p>', $article_only);
... ...
@@ -116,10 +120,42 @@ foreach ($articles as $article ) {
116 120
 	$re = '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/';
117 121
 	$article_only = preg_replace($re, '<div>', $article_only);
118 122
   $re = '/<div class="bottom-of-article">/';
123
+  $article_only = preg_replace($re, '<div>', $article_only);
124
+  $re = '/<header class="css-(.+?)"><h3>(.+?)<\/h3><\/header>/';
125
+  $article_only = preg_replace($re, '', $article_only);
126
+  $re = '/<header class="css-(.+?) (.+?)"><div id="sponsor-wrapper" class="css-(.+?)">/';
127
+  $article_only = preg_replace($re, '<header><div>', $article_only);
128
+  $re = '/<li><time>(.+?)<\/time><\/li>/';
129
+  $article_only = preg_replace($re, '<em>\1</em>', $article_only);
130
+  $re = '/<\/div><\/div><\/header><section name="articleBody" itemprop="articleBody" class="css-(.+?)"><div>/';
131
+  $article_only = preg_replace($re, '</div></div></header><section><div>', $article_only);
132
+  $re = '/<header><div>/';
133
+  $article_only = preg_replace($re, '<div>', $article_only);
134
+  $re = '/<\/div><\/div><\/header><section><div>/';
135
+  $article_only = preg_replace($re, '</div></div><section><div>', $article_only);
136
+  $re = '/<li class="css-(.+?)">/';
137
+  $article_only = preg_replace($re, '<li style="display: none;">', $article_only);
138
+  $re = '/<a href="(.+?)amp;module=RelatedCoverage&amp;pgtype=Article&amp;region=Footer" class="css-(.+?)"><div>/';
139
+  $article_only = preg_replace($re, '<a style="display: none;"><div>', $article_only);
140
+  $re = '/<div><figure class="toneNews" aria-label="media" role="group" itemscope="" itemprop="associatedMedia" itemid="(.+?)-thumbLarge.jpg" itemtype="http:\/\/schema\.org\/ImageObject"><div>/';
141
+  $article_only = preg_replace($re, '<div><figure><div>', $article_only);
142
+  $re = '/<div><figure><div>/';
143
+  $article_only = preg_replace($re, '<div>', $article_only);
144
+  $re = '/<\/figure><\/div>/';
145
+  $article_only = preg_replace($re, '', $article_only);
146
+  $re = '/<div id="bottom-wrapper" class="css-(.+?)">/';
147
+  $article_only = preg_replace($re, '<div>', $article_only);
119 148
 
120 149
 	$article_only = re_remove($article_only, '/<div><button aria-haspopup="true" aria-expanded="false" (.+?)><\/button><\/div>/');
121 150
 	$article_only = re_remove($article_only, '/<a class="css-(.+?)" href="#site-content">Skip to content<\/a><a class="css-(.+?)" href="#site-index">Skip to site index<\/a>/');
122 151
 	$article_only = re_remove($article_only, '/<div><span class=""><i class="OpenCommentsButton-icon--(.+?)"><span class="OpenCommentsButton-text--(.+?)"><\/span><\/i><\/span><\/div>/');
152
+  $article_only = re_remove($article_only, '/<p><\/p>/');
153
+  $article_only = re_remove($article_only, '/<div><h2>(.+?)<\/h2><\/div>/');
154
+  $article_only = re_remove($article_only, '/<time class="css-(.+?) (.+?)" datetime="(.+?)">(.+?)<\/time>/');
155
+  $article_only = re_remove($article_only, '/<div><button type="button" class="css-(.+?)">Show All<\/button><\/div>/');
156
+  $article_only = re_remove($article_only, '/<figcaption itemprop="caption description" class="css-(.+?) (.+?)"><\/figcaption>/');
157
+  $article_only = re_remove($article_only, '/<div class="ad bottom-wrapper" style="text-align:center;height:100%;display:block;min-height:90px"><div id="bottom" class="" data-position="bottom"><\/div><\/div>/');
158
+  //$article_only = re_remove($article_only, '//');
123 159
 
124 160
 	//Finally remove empty lines
125 161
 	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);