Showing 1 changed files with 18 additions and 1 deletions
+18 -1
sources/news-nyt.php
... ...
@@ -78,6 +78,18 @@ foreach ($articles as $article ) {
78 78
 	$article_only = re_remove($article_only, '/<div id="sponsor-slug" (.+?)><p>Supported by<\/p><\/div>/');
79 79
 	$article_only = re_remove($article_only, '/<div class="ad top-wrapper" style="text-align:center;height:100%;display:block;min-height:250px"><div id="top"><\/div><\/div>/');
80 80
 	$article_only = re_remove($article_only, '/<div class="ad sponsor-wrapper" style="text-align:center;height:100%;display:block"><div id="sponsor"><\/div><\/div>/');
81
+	$article_only = re_remove($article_only, '/<div data-testid="lazyimage-container" style="(.+?)"><\/div>/');
82
+  $article_only = re_remove($article_only, '/<span class="(.+?)">Image<\/span>/');
83
+  $article_only = re_remove($article_only, '/<div><\/div>/');
84
+	$article_only = re_remove($article_only, '/<aside class="(.+?)"><\/aside>/');
85
+	$article_only = re_remove($article_only, '/<div id="top-slug" class="(.+?)"><p>Advertisement<\/p><\/div>/');
86
+	$article_only = re_remove($article_only, '/<span class="(.+?)">Video<\/span>/');
87
+	$article_only = re_remove($article_only, '/<span>\. <a href="http:\/\/www\.nytreprints\.com\/">Order Reprints<\/a> \| <a href="http:\/\/www\.nytimes\.com\/pages\/todayspaper\/index\.html">Today’s Paper<\/a> \| <a href="https:\/\/www\.nytimes\.com\/subscriptions\/Multiproduct\/(.+?)">Subscribe<\/a><\/span>/');
88
+	$article_only = re_remove($article_only, '/<div id="bottom-slug" class="(.+?)"><p>Advertisement<\/p><\/div>/');
89
+	$article_only = re_remove($article_only, '/define\((.+?)}\);/');
90
+  $article_only = re_remove($article_only, '/var _gaq = _gaq \|\| \[\];/');
91
+	//$article_only = re_remove($article_only, '//');
92
+
81 93
 
82 94
 	//Some little replacements
83 95
 	$re = '/<div id="top-wrapper" class="ResponsiveAd-(.+?)">/';
... ...
@@ -88,6 +100,8 @@ foreach ($articles as $article ) {
88 100
 	$article_only = preg_replace($re, '<p>', $article_only);
89 101
 	$re = '/<h1 class=(.+?)><span>(.+?)<\/span><\/h1>/';
90 102
 	$article_only = preg_replace($re, '<h1>\2</h1>', $article_only);
103
+  $re = '/<h2 class="(.+?)>(.+?)<\/h2>/';
104
+	$article_only = preg_replace($re, '<h2>\2</h2>', $article_only);
91 105
 	$re = '/<h3 class=(.+?)>(.+?)<\/h3>/';
92 106
 	$article_only = preg_replace($re, '<h3>\2</h3>', $article_only);
93 107
 	$re = '/<div class="css-(.+?) StoryBodyCompanionColumn">/';
... ...
@@ -98,12 +112,15 @@ foreach ($articles as $article ) {
98 112
 	$article_only = preg_replace($re, '<h2>\2</h2>', $article_only);
99 113
 	$re = '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/';
100 114
 	$article_only = preg_replace($re, '<div>', $article_only);
115
+  $re = '/<div class="bottom-of-article">/';
101 116
 
102
-
117
+  /*
103 118
 	$article_only = preg_replace('/<li class="css-(.+?)"><div><ul class="css-(.+?)">/', '<li>', $article_only);
104 119
 	$article_only = preg_replace('/<\/ul><\/div><\/li>/', '</li>', $article_only);
120
+   */
105 121
 	$article_only = re_remove($article_only, '/<div><button aria-haspopup="true" aria-expanded="false" (.+?)><\/button><\/div>/');
106 122
 	$article_only = re_remove($article_only, '/<a class="css-(.+?)" href="#site-content">Skip to content<\/a><a class="css-(.+?)" href="#site-index">Skip to site index<\/a>/');
123
+	$article_only = re_remove($article_only, '/<div><span class=""><i class="OpenCommentsButton-icon--(.+?)"><span class="OpenCommentsButton-text--(.+?)"><\/span><\/i><\/span><\/div>/');
107 124
 
108 125
 	//Finally remove empty lines
109 126
 	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);