Showing 12 changed files with 957 additions and 0 deletions
+10
sources/gennews.jessie.sh
... ...
@@ -0,0 +1,10 @@
1
+#!/bin/bash
2
+cd /var/www/html/newsen/sources
3
+php news-nyt.php > nyt.html 2> /dev/null
4
+php news-wapo.php > wapo.html 2> /dev/null
5
+php news-guardian.php > guardian.html 2> /dev/null
6
+php news-latimes.php > latimes.html 2> /dev/null
7
+php news-cna.php > cna.html 2> /dev/null
8
+php news-aje.php > aje.html 2> /dev/null
9
+php news-bbc.php > bbc.html 2> /dev/null
10
+scp *html rss.php kawi.fr:/var/www/html/newsen/sources/ &> /dev/null
+1
sources/gennews.sh
... ...
@@ -0,0 +1 @@
1
+gennews.jessie.sh
+6
sources/gennews.wheezy.sh
... ...
@@ -0,0 +1,6 @@
1
+#!/bin/bash
2
+cd /var/www/newsen/sources
3
+php news-nyt.php > nyt.html 2> /dev/null
4
+php news-wapo.php > wapo.html 2> /dev/null
5
+php news-guardian.php > guardian.html 2> /dev/null
6
+php news-latimes.php > latimes.html 2> /dev/null
+126
sources/news-aje.php
... ...
@@ -0,0 +1,126 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_AJE);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->channel->title;
11
+  $channel['link'] = $xml->channel->link;
12
+  $channel['description'] = $xml->channel->description;
13
+  $channel['pubDate'] = $xml->channel->pubDate;
14
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
15
+  echo '<h4>' . $channel['title'] . '</h4>';
16
+  $cpt=0;
17
+  foreach ($xml->channel->item as $item) {
18
+    $article = array();
19
+    $article['title'] = $item->title;
20
+    $article['link'] = $item->link;
21
+    $article['pubDate'] = $item->pubDate;
22
+    $article['timestamp'] = strtotime($item->pubDate);
23
+    $article['description'] = $item->description;
24
+    $article['image']=$item->mediacontent['url'];
25
+    $articles[$cpt] = $article;
26
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
27
+    //echo '<img id="img-list" src="'.$article['image'].'" style="display:block;"><br>';
28
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
29
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
30
+    echo $article['title'].'&nbsp;&nbsp;';
31
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
32
+    $cpt++;
33
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
34
+      break;
35
+    }
36
+  }
37
+}
38
+echo '</div><!-- ./panel-body -->';
39
+echo '</div><!-- ./panel panel-default -->';
40
+echo '</div><!-- ./col-md-6 -->';
41
+echo '<div class="col-md-6">';
42
+echo '<div class="panel panel-default">';
43
+echo '<div class="panel-body">';
44
+echo '<a name="article-top"></a><div id="article-current"></div>';
45
+$cpt=0;
46
+foreach ($articles as $article ) {
47
+  $cpt_prev=$cpt-1;
48
+  $cpt_next=$cpt+1;
49
+  echo '<!-- ==================== article '.$cpt.'============== -->';
50
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
51
+  echo "<hr>";
52
+  echo "<a name=\"article-$cpt\">";
53
+  $article_content = file_get_contents($article['link']);
54
+  $doc = new DOMDocument();
55
+  $doc->preserveWhiteSpace = false;
56
+  $doc->formatOutput       = true;
57
+  $doc->loadHTML($article_content);
58
+  $articles = $doc->getElementsByTagName('article');
59
+  $article_only="";
60
+  foreach ( $articles as $node) {
61
+    $article_only = DOMinnerHTML($node);
62
+  }
63
+
64
+  $orgStrings = array(' href="/', ' src="/');
65
+  $newStrings = array(' href="https://www.aljazeera.com/', ' src="https://www.aljazeera.com/');
66
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
67
+
68
+	//Clean some sections
69
+	//$article_only = re_remove($article_only, '//');
70
+	$article_only = re_remove($article_only, '/<div id="(.+?)-slug" class="ResponsiveAd-(.+?)"><p>Advertisement<\/p><\/div>/');
71
+	$article_only = re_remove($article_only, '/<aside class="css-(.+?)"><span><\/span><\/aside>/');
72
+	$article_only = re_remove($article_only, '/<li class="css-(.+?)">(.+?)<\/li>/');
73
+	$article_only = re_remove($article_only, '/<svg(.+?)>(.+?)<\/svg>/');
74
+	$article_only = re_remove($article_only, '/<button id="desktop-sections-button" (.+?)>(.+?)<\/button>/');
75
+	//$article_only = re_remove($article_only, '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/');
76
+	$article_only = re_remove($article_only, '/<div id="sponsor-slug" (.+?)><p>Supported by<\/p><\/div>/');
77
+	$article_only = re_remove($article_only, '/<div class="ad top-wrapper" style="text-align:center;height:100%;display:block;min-height:250px"><div id="top"><\/div><\/div>/');
78
+	$article_only = re_remove($article_only, '/<div class="ad sponsor-wrapper" style="text-align:center;height:100%;display:block"><div id="sponsor"><\/div><\/div>/');
79
+
80
+	//Some little replacements
81
+	$re = '/<div id="top-wrapper" class="ResponsiveAd-(.+?)">/';
82
+	$article_only = preg_replace($re, '<div>', $article_only);
83
+	$re = '/<time class="css-(.+?)>(.+?)<\/time>/';
84
+	$article_only = preg_replace($re, '<time>\2</time>', $article_only);
85
+	$re = '/<p class="css-(.+?) (.+?)">/';
86
+	$article_only = preg_replace($re, '<p>', $article_only);
87
+	$re = '/<h1 class=(.+?)><span>(.+?)<\/span><\/h1>/';
88
+	$article_only = preg_replace($re, '<h1>\2</h1>', $article_only);
89
+	$re = '/<h3 class=(.+?)>(.+?)<\/h3>/';
90
+	$article_only = preg_replace($re, '<h3>\2</h3>', $article_only);
91
+	$re = '/<div class="css-(.+?) StoryBodyCompanionColumn">/';
92
+	$article_only = preg_replace($re, '<div>', $article_only);
93
+	$re = '/<div class="css-(.+?)">/';
94
+	$article_only = preg_replace($re, '<div>', $article_only);
95
+	$re = '/<div><h2 class=(.+?)>(.+?)<\/h2><\/div>/';
96
+	$article_only = preg_replace($re, '<h2>\2</h2>', $article_only);
97
+	$re = '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/';
98
+	$article_only = preg_replace($re, '<div>', $article_only);
99
+
100
+
101
+	$article_only = preg_replace('/<li class="css-(.+?)"><div><ul class="css-(.+?)">/', '<li>', $article_only);
102
+	$article_only = preg_replace('/<\/ul><\/div><\/li>/', '</li>', $article_only);
103
+	$article_only = re_remove($article_only, '/<div><button aria-haspopup="true" aria-expanded="false" (.+?)><\/button><\/div>/');
104
+	$article_only = re_remove($article_only, '/<a class="css-(.+?)" href="#site-content">Skip to content<\/a><a class="css-(.+?)" href="#site-index">Skip to site index<\/a>/');
105
+
106
+	//Finally remove empty lines
107
+	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
108
+  
109
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
110
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
111
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
112
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
113
+  echo '<div class="extract-content" id="'.$cpt.'">'.'<h1>'.$article['title'].'</h1>'.$article_only.'</div>';
114
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
115
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
116
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
117
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
118
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
119
+  $cpt++;
120
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
121
+      break;
122
+  }
123
+}
124
+?>
125
+
126
+
+126
sources/news-bbc.php
... ...
@@ -0,0 +1,126 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_BBC);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->channel->title;
11
+  $channel['link'] = $xml->channel->link;
12
+  $channel['description'] = $xml->channel->description;
13
+  $channel['pubDate'] = $xml->channel->pubDate;
14
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
15
+  echo '<h4>' . $channel['title'] . '</h4>';
16
+  $cpt=0;
17
+  foreach ($xml->channel->item as $item) {
18
+    $article = array();
19
+    $article['title'] = $item->title;
20
+    $article['link'] = $item->link;
21
+    $article['pubDate'] = $item->pubDate;
22
+    $article['timestamp'] = strtotime($item->pubDate);
23
+    $article['description'] = $item->description;
24
+    $articles[$cpt] = $article;
25
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
26
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
27
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
28
+    echo $article['title'].'&nbsp;&nbsp;';
29
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
30
+    $cpt++;
31
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
32
+      break;
33
+    }
34
+  }
35
+}
36
+echo '</div><!-- ./panel-body -->';
37
+echo '</div><!-- ./panel panel-default -->';
38
+echo '</div><!-- ./col-md-6 -->';
39
+echo '<div class="col-md-6">';
40
+echo '<div class="panel panel-default">';
41
+echo '<div class="panel-body">';
42
+echo '<a name="article-top"></a><div id="article-current"></div>';
43
+$cpt=0;
44
+foreach ($articles as $article ) {
45
+  $cpt_prev=$cpt-1;
46
+  $cpt_next=$cpt+1;
47
+  echo '<!-- ==================== article '.$cpt.'============== -->';
48
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
49
+  echo "<hr>";
50
+  echo "<a name=\"article-$cpt\">";
51
+  $article_content = file_get_contents($article['link']);
52
+  //DEBUG
53
+  //dump2file("bbc", $cpt, $article['link']."<br>\n".$article_content);
54
+  $doc = new DOMDocument();
55
+  $doc->preserveWhiteSpace = false;
56
+  $doc->formatOutput       = true;
57
+  $doc->loadHTML($article_content);
58
+  $articles = $doc->getElementsByTagName('div');
59
+  $article_only="";
60
+  $section_found = false;
61
+  foreach ( $articles as $node) {
62
+    if(0==strcmp("story-body",$node->getAttribute('class'))) {
63
+      $article_only = DOMinnerHTML($node);
64
+      $section_found = true;
65
+      break;
66
+    }
67
+  }
68
+
69
+  if(false == $section_found ) {
70
+    $article_only = "Section Not Found";
71
+    foreach ( $articles as $node) {
72
+      if(0==strcmp("vxp-media__body",$node->getAttribute('class'))) {
73
+        $article_only = "<p>Video</p>".DOMinnerHTML($node);
74
+        $section_found = true;
75
+        break;
76
+      }
77
+    }
78
+  }
79
+
80
+  //Clean some sections
81
+	//$article_only = re_remove($article_only, '//');
82
+	$article_only = re_remove($article_only, '/<div id="topic-tags"><div id="(.+?)"><noscript><\/noscript><\/div><\/div>/');
83
+	$article_only = re_remove($article_only, '/<svg class="extracted-svg ex-(.+?)" (.+?)<\/path><\/g><\/svg>/');
84
+	$article_only = re_remove($article_only, '/<svg class="twite__share-icon" (.+?)<\/svg>/');
85
+	$article_only = re_remove($article_only, '/<span class="twite__icon twite__icon--(.+?)" data-platform="(.+?)"><\/span>/');
86
+	$article_only = re_remove($article_only, '/<p class="twite__channel-(.+?)" aria-hidden="true">(.+?)<\/p>/');
87
+	$article_only = re_remove($article_only, '/<span class="off-screen">Share this (.+?)<\/span>/');
88
+  $article_only = re_remove($article_only, '/                    <span class="extracted__icon extracted__icon--(.+?)" data-platform="(.+?)">/');
89
+	$article_only = re_remove($article_only, '/                    <\/span>/');
90
+	$article_only = re_remove($article_only, '/<p class="twite__copy-text">Copy this link<\/p>/');
91
+	$article_only = re_remove($article_only, '/<span class="off-screen">Close share panel<\/span>/');
92
+	$article_only = re_remove($article_only, '/<div class="twite__close-button-graphic" aria-hidden="true"><\/div>/');
93
+	$article_only = re_remove($article_only, '/if \(window.bbcdotcom && bbcdotcom.adverts && bbcdotcom.adverts.slotAsync\) /');
94
+	$article_only = re_remove($article_only, '/bbcdotcom.adverts.slotAsync\(\'mpu\', \[1,2,3\]\);/');
95
+
96
+  
97
+  //$article_only = preg_replace('//', '', $article_only);
98
+  $article_only = preg_replace('/<div class="with-extracted-share-icons">/', '<div class="with-extracted-share-icons" style="display:none">', $article_only);
99
+  $article_only = preg_replace('/<img class="js-image-replace" alt="(.+?)" src="(.+?)" width="(.+?)" height="(.+?)"><span class="off-screen">Image copyright<\/span>/', '<img src="\2"><br>Copyright ', $article_only);
100
+  $article_only = preg_replace('/<a class="extracted__channel-link (.+?) tabindex="-1">/', '<a>', $article_only);
101
+  $article_only = preg_replace('/<a class="twite__(.+?)-link" (.+?)>/', '<a>', $article_only);
102
+  $article_only = preg_replace('/<li class="twite__channel-out (.+?)" aria-hidden="true">/', '<li>', $article_only);
103
+  $article_only = preg_replace('/<li class="twite__channel twite__channel--(.+?) twite__channel-click--(.+?)">/', '<li>', $article_only);
104
+
105
+
106
+	//Finally remove empty lines
107
+	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
108
+  
109
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
110
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
111
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
112
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
113
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
114
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
115
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
116
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
117
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
118
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
119
+  $cpt++;
120
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
121
+      break;
122
+  }
123
+}
124
+?>
125
+
126
+
+117
sources/news-cna.php
... ...
@@ -0,0 +1,117 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_CNA);
5
+$orgStrings = array( 'media:content');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+//    $article['image']=$item->mediacontent['url'];
28
+    $articles[$cpt] = $article;
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+//    echo '<img id="img-list" src="'.$article['image'].'" style="display:block;"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  $doc = new DOMDocument();
58
+  $doc->preserveWhiteSpace = false;
59
+  $doc->formatOutput       = true;
60
+  $doc->loadHTML($article_content);
61
+  $articles = $doc->getElementsByTagName('article');
62
+  $article_only="";
63
+  foreach ( $articles as $node) {
64
+    $article_only = DOMinnerHTML($node);
65
+  }
66
+  
67
+  $SEARCH_SUB='<div class="c-save-for-later--default is-article-bottom-position"';
68
+  $pos_start=strpos($article_only, $SEARCH_SUB);
69
+  if($pos_start) {
70
+    $article_only = substr($article_only,0,$pos_start);
71
+  }
72
+
73
+  //Extract image
74
+  $re = '/<link rel="image_src" href="(.+?)"\/>/';
75
+  preg_match($re, $article_content, $matches);
76
+  $imageurl = $matches[1];
77
+
78
+	//Clean some sections
79
+	//$article_only = re_remove($article_only, '//');
80
+	$article_only = re_remove($article_only, '/        <h2 class="sharing__title">/');
81
+	$article_only = re_remove($article_only, '/                Share this content<\/h2>/');
82
+	$article_only = re_remove($article_only, '/<button class="save-for-later__toggle i-save-for-later" data-js-atom="save-for-later-toggle" data-grunticon-embed="true" title="Bookmark article to read later"><\/button>/');
83
+	$article_only = re_remove($article_only, '/<h2 class="save-for-later__title">Bookmark<\/h2>/');
84
+	$article_only = re_remove($article_only, '/<div class="advertisement__container" data-js-atom="advertisement-wrapper" id="ad-outstream-desktop-news-story"><\/div>/');
85
+	$article_only = re_remove($article_only, '/<div class="advertisement__container" data-js-atom="advertisement-wrapper" id="ad-imu1-desktop-news-story"><\/div>/');
86
+	$article_only = re_remove($article_only, '/<div class="advertisement__container" data-js-atom="advertisement-wrapper" id="ad-imu1-mobile-news-story"><\/div>/');
87
+	$article_only = re_remove($article_only, '/<div><\/div>/');
88
+  $article_only = re_remove($article_only, '/<p><\/p>/');
89
+	$article_only = re_remove($article_only, '/sizes="\(min-width: 992px\) 670px, 100vw"/');
90
+
91
+  
92
+	$article_only = preg_replace('/<figure class="picture__wrapper"><picture class="picture__container"><source data-srcset="(.+?)>(.+?)<\/source><\/picture>/', '<figure>', $article_only);
93
+  $article_only = preg_replace('/<ul class="sharing__list">/', '<ul class="sharing__list" style="display:none">', $article_only);
94
+  $article_only = preg_replace('/    <aside class="c-advertisement--rectangle is-hidden-tablet is-hidden-(.+?)"(.+?)><span class="advertisement__title">Advertisement<\/span>/', '<aside>', $article_only);
95
+  $article_only = preg_replace('/<aside class="c-advertisement--rectangle-float (.+?)><span class="advertisement__title">Advertisement<\/span>/', '<aside>', $article_only);
96
+
97
+	//Finally remove empty lines
98
+	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
99
+  
100
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
101
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
102
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
103
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
104
+  echo '<div class="extract-content" id="'.$cpt.'"><img src="'.$imageurl.'">'.$article_only.'</div>';
105
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
106
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
107
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
108
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
109
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
110
+  $cpt++;
111
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
112
+      break;
113
+  }
114
+}
115
+?>
116
+
117
+
+46
sources/news-constants.php
... ...
@@ -0,0 +1,46 @@
1
+<?php
2
+include_once( 'rss.php' );
3
+$NEWS_RSS_MAX_ITEMS = 10;
4
+
5
+function DOMinnerHTML(DOMNode $element) 
6
+{ 
7
+    $innerHTML = ""; 
8
+    $children  = $element->childNodes;
9
+
10
+    foreach ($children as $child) 
11
+    { 
12
+        $innerHTML .= $element->ownerDocument->saveHTML($child);
13
+    }
14
+
15
+    return $innerHTML; 
16
+}
17
+
18
+function http_get_contents($url, $opts = [])
19
+{
20
+  $ch = curl_init();
21
+  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
22
+  curl_setopt($ch, CURLOPT_TIMEOUT, 10);
23
+  curl_setopt($ch, CURLOPT_USERAGENT, "Wget 1");
24
+  curl_setopt($ch, CURLOPT_URL, $url);
25
+  if(is_array($opts) && $opts) {
26
+    foreach($opts as $key => $val) {
27
+      curl_setopt($ch, $key, $val);
28
+    }
29
+  }
30
+  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
31
+  if(FALSE === ($retval = curl_exec($ch))) {
32
+    error_log(curl_error($ch));
33
+  } else {
34
+    return $retval;
35
+  }
36
+}
37
+
38
+function re_remove($source, $regex) {
39
+    return preg_replace($regex, '', $source);
40
+}
41
+
42
+function dump2file($prefix, $index, $content) {
43
+  $filename = "dump-".$prefix."-".$index.".html";
44
+  file_put_contents($filename,$content);
45
+}
46
+?>
+109
sources/news-guardian.php
... ...
@@ -0,0 +1,109 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_GUARDIAN);
5
+$orgStrings = array( 'media:content');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image'] = $item->mediacontent['url'];
28
+    $articles[$cpt] = $article;
29
+
30
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
31
+    echo '<img id="img-guardian" src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
33
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
34
+    echo $article['title'].'&nbsp;&nbsp;';
35
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
36
+    $cpt++;
37
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
38
+      break;
39
+    }
40
+  }
41
+}
42
+echo '</div><!-- ./panel-body -->';
43
+echo '</div><!-- ./panel panel-default -->';
44
+echo '</div><!-- ./col-md-6 -->';
45
+echo '<div class="col-md-6">';
46
+echo '<div class="panel panel-default">';
47
+echo '<div class="panel-body">';
48
+echo '<a name="article-top"></a><div id="article-current"></div>';
49
+$cpt=0;
50
+foreach ($articles as $article ) {
51
+  $cpt_prev=$cpt-1;
52
+  $cpt_next=$cpt+1;
53
+  echo '<!-- ==================== article '.$cpt.'============== -->';
54
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
55
+  echo "<hr>";
56
+  echo "<a name=\"article-$cpt\">";
57
+  $article_content = file_get_contents($article['link']);
58
+  
59
+  $doc = new DOMDocument();
60
+  $doc->preserveWhiteSpace = false;
61
+  $doc->formatOutput       = true;
62
+  $doc->loadHTML($article_content);
63
+  $articles = $doc->getElementsByTagName('article');
64
+  $article_only="";
65
+  foreach ( $articles as $node) {
66
+    $article_only = DOMinnerHTML($node);
67
+  }
68
+  //$article_only=$article_content;
69
+
70
+//  $article_only = re_remove($article_only, '//');
71
+  $article_only = re_remove($article_only, '/<span class="syndication__link button button--syndication-reprint button--small">Reuse this content<\/span>/');
72
+  $article_only = re_remove($article_only, '/<span class=" u-h ">Share on Messenger<\/span>/');
73
+  $article_only = re_remove($article_only, '/<span class=" u-h ">Share on WhatsApp<\/span>/');
74
+  $article_only = re_remove($article_only, '/<span class=" u-h ">Share on Google+<\/span>/');
75
+  $article_only = re_remove($article_only, '/<span class=" u-h ">Share on Pinterest<\/span>/');
76
+  $article_only = re_remove($article_only, '/<span class=" u-h ">Share on LinkedIn<\/span>/');
77
+  $article_only = re_remove($article_only, '/<input type="checkbox" id="show-caption" class="mobile-only u-h reveal-caption__checkbox"><label class="mobile-only reveal-caption reveal-caption--img" for="show-caption">/'); 
78
+  $article_only = re_remove($article_only, '/<svg (.+?)><\/path><\/svg>/'); 
79
+  $article_only = re_remove($article_only, '/<span class="inline-icon__fallback button">(.+?)<\/span>/'); 
80
+  $article_only = preg_replace('/<span class="inline-expand-image inline-icon centered-icon rounded-icon article__fullscreen modern-visible">/', '<span>', $article_only);
81
+  $article_only = preg_replace('/<li class="social__item social__item--(.+?)>/', '<li>', $article_only);
82
+  $article_only = preg_replace('/<span class="inline-share-(.+?)>/', '<span>', $article_only);
83
+//  $article_only = preg_replace('//', '<span>', $article_only);
84
+  $article_only = preg_replace('/<a class="syndication__action" data-link-name="meta-syndication-article" (.+?)>/', '<span>', $article_only);
85
+  $article_only = preg_replace('/<a class="social__action js-social__action--bottom social-icon-wrapper" (.+?)>/', '<a>', $article_only);
86
+  $article_only = preg_replace('/<div class="u-responsive-ratio" style="padding-bottom: (.+?)">/', '<div>', $article_only);
87
+  $article_only = preg_replace('/<ul class="social social--top js-social--top u-unstyled u-cf" data-component="social">/', '<ul style="display:none;">', $article_only);
88
+  $article_only = preg_replace('/<div class="submeta__syndication">/', '<div style="display:none;">', $article_only);
89
+  $article_only = preg_replace('/<div data-component="share" class="submeta__share">/', '<div style="display:none;">', $article_only);
90
+  $article_only = preg_replace('/<div data-component="share" class="submeta__keywords">/', '<div style="display:none;">', $article_only);
91
+  $article_only = preg_replace('/<div class="submeta__section-labels">/', '<div style="display:none;">', $article_only);
92
+  $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
93
+  
94
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
95
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
96
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
97
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
98
+  echo '<div class="extract-content" id="'.$cpt.'">'."<h1>".$article['title']."</h1>".$article_only.'</div>';
99
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
100
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
101
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
102
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
103
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
104
+  $cpt++;
105
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
106
+      break;
107
+  }
108
+}
109
+?>
+178
sources/news-latimes.php
... ...
@@ -0,0 +1,178 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_LATIMES);
5
+$orgStrings = array( 'media:content');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image']=$item->mediacontent['url'];
28
+    $articles[$cpt] = $article;
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+    echo '<img src="'.$article['image'].'" style="display:block;"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  $doc = new DOMDocument();
58
+  $doc->preserveWhiteSpace = false;
59
+  $doc->formatOutput       = true;
60
+  $doc->loadHTML($article_content);
61
+  $articles = $doc->getElementsByTagName('article');
62
+  $article_only="";
63
+  foreach ( $articles as $node) {
64
+    $article_only = DOMinnerHTML($node);
65
+  }
66
+/*
67
+  $orgStrings = array(' href="/');
68
+  $newStrings = array(' href="https://www.latimes.com/');
69
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
70
+  $orgStrings = array(' src="/');
71
+  $newStrings = array(' src="https://www.latimes.com/');
72
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
73
+ */
74
+
75
+	//Clean some sections
76
+  //$article_only = re_remove($article_only, '//');
77
+  $article_only = re_remove($article_only, '/<a class="button button-icon button-icon-hover-accent " href=(.+?)> <i class="fa fa-share button-icon-inset-accent"><\/i> <\/a>/');
78
+  $article_only = re_remove($article_only, '/<button class="button button-icon button-icon-hover-accent (.+?)> <i class="fa fa-(.+?) button-icon-inset-accent"><\/i> <\/button>/');
79
+  $article_only = re_remove($article_only, '/<div class="spaced spaced-sm spaced-bottom hidden-desktop hidden-tablet"><\/div>/');
80
+  $article_only = re_remove($article_only, '/<div class="spaced spaced-md spaced-bottom hidden-mobile"><\/div>/');
81
+  $article_only = re_remove($article_only, '/<div class="ad-caption-text align-center uppercase">Advertisement<\/div> /');
82
+  $article_only = re_remove($article_only, '/<div class="desktop-nativo mobile-yieldmo inline-ad-arrow "> <i class="fa fa-angle-down flex-container-column flex-row-reverse"><\/i> <\/div>/');
83
+  $article_only = re_remove($article_only, '/<div class="desktop-nativo mobile-yieldmo inline-ad-arrow less-spacing hidden-desktop hidden-mobile"> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
84
+  $article_only = re_remove($article_only, '/<div class="desktop-nativo mobile-yieldmo inline-ad-arrow less-spacing hidden-desktop hidden-tablet "> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
85
+  $article_only = re_remove($article_only, '/<div class="mobile-dfp inline-ad-arrow hidden-desktop"> <i class="fa fa-angle-down flex-container-column flex-row-reverse"><\/i> <\/div>/');
86
+  $article_only = re_remove($article_only, '/<div class="mobile-dfp inline-ad-arrow less-spacing hidden-desktop hidden-mobile"> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
87
+  $article_only = re_remove($article_only, '/<div class="mobile-dfp inline-ad-arrow less-spacing hidden-desktop hidden-tablet "> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
88
+  $article_only = re_remove($article_only, '/<div class="mobile-nativo inline-ad-arrow hidden-desktop"> <i class="fa fa-angle-down flex-container-column flex-row-reverse"><\/i> <\/div>/');
89
+  $article_only = re_remove($article_only, '/<div class="mobile-nativo inline-ad-arrow less-spacing hidden-desktop hidden-mobile"> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
90
+  $article_only = re_remove($article_only, '/<div class="mobile-nativo inline-ad-arrow less-spacing hidden-desktop hidden-tablet "> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
91
+  $article_only = re_remove($article_only, '/<div class="mobile-yieldmo inline-ad-arrow hidden-desktop"> <i class="fa fa-angle-down flex-container-column flex-row-reverse"><\/i> <\/div>/');
92
+  $article_only = re_remove($article_only, '/<div class="mobile-yieldmo inline-ad-arrow less-spacing hidden-desktop hidden-mobile"> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
93
+  $article_only = re_remove($article_only, '/<div class="mobile-yieldmo inline-ad-arrow less-spacing hidden-desktop hidden-tablet "> <i class="fa fa-angle-up flex-container-column flex-row-reverse"><\/i> <\/div>/');
94
+  
95
+  //Some little replacements
96
+	$re = '/<div class="preview-text">/';
97
+	$article_only = preg_replace($re, '<div style="display:none;">', $article_only);
98
+	$re = '/<div class="flex-container-row ">/';
99
+	$article_only = preg_replace($re, '<div style="display:none;">', $article_only);
100
+	$re = '/<div class="flex-container-column">/';
101
+	$article_only = preg_replace($re, '<div style="display:none;">', $article_only);
102
+	$re = '/<li class="collection-item flex-container-row align-items-start collection-item-border-bottom ">/';
103
+	$article_only = preg_replace($re, '<li>', $article_only);
104
+  $re = '/<div class="flex">/';
105
+  $article_only = preg_replace($re, '<div class="flex" style="display:none;">', $article_only);
106
+  $re='/<ul class="tag-list-wrapper list-unstyled flex-container-row align-left">/';
107
+  $article_only = preg_replace($re, '<ul style="display:none;">', $article_only);
108
+  $re = '/<div class="mobile-yieldmo inline-ad hidden-desktop" data-inline-ad-count="4"> <div class="wrapper clearfix col pb-feature pb-layout-item pb-f-ads-yieldmo" data-pb-name="Yieldmo Ad" data-pb-curated="curated" id=""> <div id="(.+?)"><\/div> <script>window.serviceCallbacks.push(.+?)<\/script>/';
109
+  $article_only = preg_replace($re, '<div><div>', $article_only);
110
+  
111
+  $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
112
+
113
+  $article_only = re_remove($article_only, '/<span class="sharebar align-right flex">    <\/span>/');
114
+  $article_only = re_remove($article_only, '/  "data-arctrack-item-page": "",/');
115
+  $article_only = re_remove($article_only, '/  "data-arctrack-item-producer": "lanews",/');
116
+  $article_only = re_remove($article_only, '/  "data-arctrack-item-videoid": "(.+?)",/');
117
+  $article_only = re_remove($article_only, '/  "data-arctrack-item-classification": "(.+?)",/');
118
+  $article_only = re_remove($article_only, '/  "data-arctrack-item-name": "(.+?)",/');
119
+  $article_only = re_remove($article_only, '/"data-arctrack-item-autoplay": "false"/');
120
+  $article_only = re_remove($article_only, '/              type: "video",/');
121
+  $article_only = re_remove($article_only, '/              system: "Goldfish",/');
122
+  $article_only = re_remove($article_only, '/              url: "http:\/\/www.latimes.com\/resizer\/(.+?)",/');
123
+  $article_only = re_remove($article_only, '/              headline: "(.+?)",/');
124
+  $article_only = re_remove($article_only, '/              brightcoveVideoId: "(.+?)",/');
125
+  $article_only = re_remove($article_only, '/              goldfishVideoId: "(.+?)",/');
126
+  $article_only = re_remove($article_only, '/              urlId: "(.+?)",/');
127
+  $article_only = re_remove($article_only, '/              site: "",/');
128
+  $article_only = re_remove($article_only, '/              date: "(.+?)",/');
129
+  $article_only = re_remove($article_only, '/              duration: "(.+?)",/');
130
+  $article_only = re_remove($article_only, '/              description: "(.+?)",/');
131
+  $article_only = re_remove($article_only, '/              rand: "(.+?)",/');
132
+  $article_only = re_remove($article_only, '/              analyticsData:/');
133
+  $article_only = re_remove($article_only, '/      const videos(.+?) = \[/');
134
+  $article_only = re_remove($article_only, '/^            },$/m');
135
+  $article_only = re_remove($article_only, '/^            {$/m');
136
+  $article_only = re_remove($article_only, '/^}$/m');
137
+  $article_only = re_remove($article_only, '/^{$/m');
138
+  $article_only = re_remove($article_only, '/                  sharebar: "(.+?)",/');
139
+  $article_only = re_remove($article_only, '/<div class="goldfish-info" data-script-path="diodxcszlbmwk" data-environment="prod"><\/div>/');
140
+  $article_only = re_remove($article_only, '/<div class="brightcove-info" data-account="(.+?)" data-player="(.+?)"><\/div>/');
141
+  $article_only = re_remove($article_only, '/<div class="logo logo-full logo-src-1 header-logo"><\/div>/');
142
+  $article_only = re_remove($article_only, '/<div class="card flex-container-column align-items-stretch"> <div class="card-header flex-container-row align-items-center">  <\/div> <\/div>/');
143
+  $article_only = re_remove($article_only, '/<div id="pb-(.+?)"><\/div>/');
144
+  $article_only = re_remove($article_only, '/<button disabled name="loading" class="button-icon button-accent button-rounded button-lg button-loading"> <i class="fa fa-circle-o-notch fa-spin fa-fw"><\/i> <\/button>/');
145
+  $article_only = re_remove($article_only, '/<button name="error" aria-label="error" type="submit" class="button-icon button-error button-lg button-rounded"> <i class="fa fa-exclamation"><\/i> <\/button>/');
146
+  $article_only = re_remove($article_only, '/<div> <h6 class="flex capitalized">Today\'s Headlines Newsletter<\/h6> <\/div>/');
147
+  $article_only = re_remove($article_only, '/  <div class="caption-text">Weekdays<\/div>/');
148
+  $article_only = re_remove($article_only, '/<div class="align-center"> <button name="show-comments" (.+?)> Be the first to comment <\/button> <button name="hide-comments" (.+?)>Hide Comments<\/button> <\/div>/');
149
+  $re = '/<div class="wrapper clearfix full pb-feature pb-layout-item pb-f-article-video-playlist" data-pb-name="Video Playlist" data-pb-curated="curated" id="(.+?)">/';
150
+  $article_only = preg_replace($re, '<div>', $article_only);
151
+  $article_only = re_remove($article_only, '/<div class="trb_cm_so" data-role="cm_container"> <div data-role="comments" data-comments-id="(.+?)" class="so_comments hidden" data-sitename="lanewsprod" data-content-id="lanews(.+?)" data-type="promo-comment" data-publisher="tronc"><\/div> <\/div>/');
152
+  $article_only = re_remove($article_only, '/<div class="wrapper clearfix full pb-feature pb-layout-item pb-f-article-comments" data-pb-name="Article Comments" data-pb-curated="curated" id="(.+?)">   <noscript>Please enable JavaScript to view the <a href="http:\/\/solidopinion.com\/">comments powered by SolidOpinion.<\/a>/');
153
+  $re = '/<\/noscript> <\/div>/';
154
+  $article_only = preg_replace($re, '<div></div>', $article_only);
155
+  $article_only = re_remove($article_only, '/<form data-cm-aid="(.+?)" data-captcha-site-key=(.+?)>(.+?)<\/form>/');
156
+  //$article_only = re_remove($article_only, '//');
157
+  //$article_only = re_remove($article_only, '//');
158
+  //$article_only = re_remove($article_only, '//');
159
+  $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
160
+
161
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
162
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
163
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
164
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
165
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
166
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
167
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
168
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
169
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
170
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
171
+  $cpt++;
172
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
173
+      break;
174
+  }
175
+}
176
+?>
177
+
178
+
+125
sources/news-nyt.php
... ...
@@ -0,0 +1,125 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_NYT);
5
+$orgStrings = array( 'media:content');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image']=$item->mediacontent['url'];
28
+    $articles[$cpt] = $article;
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+    echo '<img id="img-list" src="'.$article['image'].'" style="display:block;"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  $doc = new DOMDocument();
58
+  $doc->preserveWhiteSpace = false;
59
+  $doc->formatOutput       = true;
60
+  $doc->loadHTML($article_content);
61
+  $articles = $doc->getElementsByTagName('article');
62
+  $article_only="";
63
+  foreach ( $articles as $node) {
64
+    $article_only = DOMinnerHTML($node);
65
+  }
66
+  
67
+	//Clean some sections
68
+	//$article_only = re_remove($article_only, '//');
69
+	$article_only = re_remove($article_only, '/<div id="(.+?)-slug" class="ResponsiveAd-(.+?)"><p>Advertisement<\/p><\/div>/');
70
+	$article_only = re_remove($article_only, '/<aside class="css-(.+?)"><span><\/span><\/aside>/');
71
+	$article_only = re_remove($article_only, '/<li class="css-(.+?)">(.+?)<\/li>/');
72
+	$article_only = re_remove($article_only, '/<svg(.+?)>(.+?)<\/svg>/');
73
+	$article_only = re_remove($article_only, '/<button id="desktop-sections-button" (.+?)>(.+?)<\/button>/');
74
+	//$article_only = re_remove($article_only, '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/');
75
+	$article_only = re_remove($article_only, '/<div id="sponsor-slug" (.+?)><p>Supported by<\/p><\/div>/');
76
+	$article_only = re_remove($article_only, '/<div class="ad top-wrapper" style="text-align:center;height:100%;display:block;min-height:250px"><div id="top"><\/div><\/div>/');
77
+	$article_only = re_remove($article_only, '/<div class="ad sponsor-wrapper" style="text-align:center;height:100%;display:block"><div id="sponsor"><\/div><\/div>/');
78
+
79
+	//Some little replacements
80
+	$re = '/<div id="top-wrapper" class="ResponsiveAd-(.+?)">/';
81
+	$article_only = preg_replace($re, '<div>', $article_only);
82
+	$re = '/<time class="css-(.+?)>(.+?)<\/time>/';
83
+	$article_only = preg_replace($re, '<time>\2</time>', $article_only);
84
+	$re = '/<p class="css-(.+?) (.+?)">/';
85
+	$article_only = preg_replace($re, '<p>', $article_only);
86
+	$re = '/<h1 class=(.+?)><span>(.+?)<\/span><\/h1>/';
87
+	$article_only = preg_replace($re, '<h1>\2</h1>', $article_only);
88
+	$re = '/<h3 class=(.+?)>(.+?)<\/h3>/';
89
+	$article_only = preg_replace($re, '<h3>\2</h3>', $article_only);
90
+	$re = '/<div class="css-(.+?) StoryBodyCompanionColumn">/';
91
+	$article_only = preg_replace($re, '<div>', $article_only);
92
+	$re = '/<div class="css-(.+?)">/';
93
+	$article_only = preg_replace($re, '<div>', $article_only);
94
+	$re = '/<div><h2 class=(.+?)>(.+?)<\/h2><\/div>/';
95
+	$article_only = preg_replace($re, '<h2>\2</h2>', $article_only);
96
+	$re = '/<div role="toolbar" aria-label="Social Media Share buttons, Save button, and Comments Panel with current comment count" class="css-(.+?)" data-testid="share-tools">/';
97
+	$article_only = preg_replace($re, '<div>', $article_only);
98
+
99
+
100
+	$article_only = preg_replace('/<li class="css-(.+?)"><div><ul class="css-(.+?)">/', '<li>', $article_only);
101
+	$article_only = preg_replace('/<\/ul><\/div><\/li>/', '</li>', $article_only);
102
+	$article_only = re_remove($article_only, '/<div><button aria-haspopup="true" aria-expanded="false" (.+?)><\/button><\/div>/');
103
+	$article_only = re_remove($article_only, '/<a class="css-(.+?)" href="#site-content">Skip to content<\/a><a class="css-(.+?)" href="#site-index">Skip to site index<\/a>/');
104
+
105
+	//Finally remove empty lines
106
+	$article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
107
+  
108
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
109
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
110
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
111
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
112
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
113
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
114
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
115
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
116
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
117
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
118
+  $cpt++;
119
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
120
+      break;
121
+  }
122
+}
123
+?>
124
+
125
+
+85
sources/news-wapo.php
... ...
@@ -0,0 +1,85 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_WAPO);
5
+$orgStrings = array( 'media:thumbnail');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image'] = $item->mediacontent['url'];
28
+    $articles[$cpt] = $article;
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  
58
+  $doc = new DOMDocument();
59
+  $doc->preserveWhiteSpace = false;
60
+  $doc->formatOutput       = true;
61
+  $doc->loadHTML($article_content);
62
+  $articles = $doc->getElementsByTagName('article');
63
+  $article_only="";
64
+  foreach ( $articles as $node) {
65
+    $article_only = DOMinnerHTML($node);
66
+  }
67
+  //$article_only=$article_content;
68
+  
69
+  
70
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
71
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
72
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
73
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
74
+  echo '<div class="extract-content" id="'.$cpt.'">'."<h1>".$article['title']."</h1>".$article_only.'</div>';
75
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
76
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
77
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
78
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
79
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
80
+  $cpt++;
81
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
82
+      break;
83
+  }
84
+}
85
+?>
+28
sources/rss.php
... ...
@@ -0,0 +1,28 @@
1
+<?php
2
+define('NEWS_RSS_NYT', 'http://rss.nytimes.com/services/xml/rss/nyt/World.xml');
3
+define('NEWS_RSS_WAPO', 'http://feeds.washingtonpost.com/rss/world');
4
+define('NEWS_RSS_GUARDIAN', 'https://www.theguardian.com/world/rss');
5
+define('NEWS_RSS_LATIMES', 'http://www.latimes.com/world/rss2.0.xml');
6
+define('NEWS_RSS_CNA', 'https://www.channelnewsasia.com/rssfeeds/8395986');
7
+define('NEWS_RSS_BBC', 'http://feeds.bbci.co.uk/news/rss.xml');
8
+define('NEWS_RSS_AJE', 'https://www.aljazeera.com/xml/rss/all.xml');
9
+
10
+$array_title=array(
11
+  "nyt" => "New York Times",
12
+  "wapo" => "Washington Post",
13
+  "guardian" => "Guardian",
14
+  "latimes" => "LA Times",
15
+  "cna" => "Channel NewsAsia",
16
+  "bbc" => "BBC Top Stories",
17
+  "aje" => "AlJazeera English"
18
+);
19
+$array_url=array(
20
+  "nyt" => NEWS_RSS_NYT,
21
+  "wapo" => NEWS_RSS_WAPO,
22
+  "guardian" => NEWS_RSS_GUARDIAN,
23
+  "latimes" => NEWS_RSS_LATIMES,
24
+  "cna" => NEWS_RSS_CNA,
25
+  "bbc" => NEWS_RSS_BBC,
26
+  "aje" => NEWS_RSS_AJE
27
+);
28
+?>