| ... | ... |
@@ -7,4 +7,5 @@ php news-latimes.php > latimes.html 2> /dev/null |
| 7 | 7 |
php news-cna.php > cna.html 2> /dev/null |
| 8 | 8 |
php news-aje.php > aje.html 2> /dev/null |
| 9 | 9 |
php news-bbc.php > bbc.html 2> /dev/null |
| 10 |
+php news-smh.php > smh.html 2> /dev/null |
|
| 10 | 11 |
scp *html rss.php kawi.fr:/var/www/html/newsen/sources/ &> /dev/null |
| ... | ... |
@@ -4,3 +4,8 @@ php news-nyt.php > nyt.html 2> /dev/null |
| 4 | 4 |
php news-wapo.php > wapo.html 2> /dev/null |
| 5 | 5 |
php news-guardian.php > guardian.html 2> /dev/null |
| 6 | 6 |
php news-latimes.php > latimes.html 2> /dev/null |
| 7 |
+php news-cna.php > cna.html 2> /dev/null |
|
| 8 |
+php news-aje.php > aje.html 2> /dev/null |
|
| 9 |
+php news-bbc.php > bbc.html 2> /dev/null |
|
| 10 |
+php news-smh.php > smh.html 2> /dev/null |
|
| 11 |
+#scp *html rss.php kawi.fr:/var/www/html/newsen/sources/ &> /dev/null |
| ... | ... |
@@ -0,0 +1,114 @@ |
| 1 |
+<?php |
|
| 2 |
+include_once( 'news-constants.php' ); |
|
| 3 |
+ |
|
| 4 |
+$rss_content = file_get_contents(NEWS_RSS_SMH); |
|
| 5 |
+$xml = simplexml_load_string($rss_content); |
|
| 6 |
+if ($xml === false) {
|
|
| 7 |
+ echo 'Failed to read RSS'; |
|
| 8 |
+} else {
|
|
| 9 |
+ $channel = array(); |
|
| 10 |
+ $channel['title'] = $xml->channel->title; |
|
| 11 |
+ $channel['link'] = $xml->channel->link; |
|
| 12 |
+ $channel['description'] = $xml->channel->description; |
|
| 13 |
+ $channel['pubDate'] = $xml->channel->pubDate; |
|
| 14 |
+ $channel['timestamp'] = strtotime($xml->channel->pubDate); |
|
| 15 |
+ echo '<h4>' . $channel['title'] . '</h4>'; |
|
| 16 |
+ $cpt=0; |
|
| 17 |
+ foreach ($xml->channel->item as $item) {
|
|
| 18 |
+ $article = array(); |
|
| 19 |
+ $article['title'] = $item->title; |
|
| 20 |
+ $article['link'] = $item->link; |
|
| 21 |
+ $SEARCHREF="?ref=rss"; |
|
| 22 |
+ //Remove reference from links |
|
| 23 |
+ $posref=strpos($article['link'], $SEARCHREF); |
|
| 24 |
+ if( $posref ) {
|
|
| 25 |
+ $article['link'] = substr($article['link'],0,$posref); |
|
| 26 |
+ } |
|
| 27 |
+ $article['pubDate'] = $item->pubDate; |
|
| 28 |
+ $article['timestamp'] = strtotime($item->pubDate); |
|
| 29 |
+ $article['description'] = $item->description; |
|
| 30 |
+ $article['image']=$item->mediacontent['url']; |
|
| 31 |
+ $articles[$cpt] = $article; |
|
| 32 |
+ echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
|
|
| 33 |
+ //echo '<img id="img-list" src="'.$article['image'].'" style="display:block;"><br>'; |
|
| 34 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
| 35 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div> '; |
|
| 36 |
+ echo $article['title'].' '; |
|
| 37 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'; |
|
| 38 |
+ $cpt++; |
|
| 39 |
+ if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
|
|
| 40 |
+ break; |
|
| 41 |
+ } |
|
| 42 |
+ } |
|
| 43 |
+} |
|
| 44 |
+echo '</div><!-- ./panel-body -->'; |
|
| 45 |
+echo '</div><!-- ./panel panel-default -->'; |
|
| 46 |
+echo '</div><!-- ./col-md-6 -->'; |
|
| 47 |
+echo '<div class="col-md-6">'; |
|
| 48 |
+echo '<div class="panel panel-default">'; |
|
| 49 |
+echo '<div class="panel-body">'; |
|
| 50 |
+echo '<a name="article-top"></a><div id="article-current"></div>'; |
|
| 51 |
+$cpt=0; |
|
| 52 |
+foreach ($articles as $article ) {
|
|
| 53 |
+ $cpt_prev=$cpt-1; |
|
| 54 |
+ $cpt_next=$cpt+1; |
|
| 55 |
+ echo '<!-- ==================== article '.$cpt.'============== -->'; |
|
| 56 |
+ echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n"; |
|
| 57 |
+ echo "<hr>"; |
|
| 58 |
+ echo "<a name=\"article-$cpt\">"; |
|
| 59 |
+ $article_content = file_get_contents($article['link']); |
|
| 60 |
+ $doc = new DOMDocument(); |
|
| 61 |
+ $doc->preserveWhiteSpace = false; |
|
| 62 |
+ $doc->formatOutput = true; |
|
| 63 |
+ $doc->loadHTML($article_content); |
|
| 64 |
+ $articles = $doc->getElementsByTagName('article');
|
|
| 65 |
+ $article_only=""; |
|
| 66 |
+ if( isset( $articles[0] ) ) {
|
|
| 67 |
+ $article_only=DOMinnerHTML($articles[0]); |
|
| 68 |
+ } |
|
| 69 |
+ |
|
| 70 |
+ //Clean some sections |
|
| 71 |
+ //$article_only = re_remove($article_only, '//'); |
|
| 72 |
+ $article_only = re_remove($article_only, '/<header class="_2qhpu"><h2 class="_2qhpu">Most Viewed in (.+)<\/h2><\/header>/'); |
|
| 73 |
+ $article_only = re_remove($article_only, '/<div><section aria-live="polite" aria-busy="true" class="_2zaYr" style="height:380px"><div class="_29X_Z"><span class="_2wzgv D5idv _3lVFK"><span class="_29Qt8"><\/span><span class="_3qqDc">Loading<\/span><\/span><\/div><\/section><\/div>/'); |
|
| 74 |
+ $article_only = re_remove($article_only, '/<div class="_8Cum8"><a class="_4oUp_" href="(.+?)">License this article<\/a><\/div>/'); |
|
| 75 |
+ $article_only = re_remove($article_only, '/<p class="_3MW1W">Our new podcast series from the team behind Phoebe\'s Fall<\/p>/'); |
|
| 76 |
+ $article_only = re_remove($article_only, '/<p class="_29xCU"><svg class="QMY9O" height="18px" width="18px" aria-hidden="true" focusable="false"><use xlink:href="#icon-headphones"><\/use><\/svg>View episodes<\/p>/'); |
|
| 77 |
+ $article_only = re_remove($article_only, '/<p class="ZybjO">A relationship banned under traditional law.<\/p>/'); |
|
| 78 |
+ $article_only = re_remove($article_only, '/<section class="aKWhn noPrint"><div class="dLGbc"><\/div><\/section>/'); |
|
| 79 |
+ $article_only = re_remove($article_only, '/<button class="_30X7v _2-NUy">(.+)<\/button>/'); |
|
| 80 |
+ $article_only = re_remove($article_only, '/<script type="application\/ld\+json">(.+)<\/script>/'); |
|
| 81 |
+ $article_only = re_remove($article_only, '/<div id="adspot-N-6x2-pos1" class="_1-uzy"><\/div>/'); |
|
| 82 |
+ $article_only = re_remove($article_only, '/<div id="adspot-N-300x164-pos1" class="_1-uzy"><\/div>/'); |
|
| 83 |
+ $article_only = re_remove($article_only, '/<div class="noPrint"><\/div>/'); |
|
| 84 |
+ $article_only = re_remove($article_only, '/<script src="https:\/\/ad.doubleclick.net\/(.+)<\/script>/'); |
|
| 85 |
+ $article_only = re_remove($article_only, '/<div class="ymInT _3uJpn noPrint"><span class="_2wzgv D5idv _3lVFK"><span class="_29Qt8"><\/span><span class="_3qqDc">Loading<\/span><\/span><\/div>/'); |
|
| 86 |
+ $article_only = re_remove($article_only, '/<div class="_1lC_G"><\/div>/'); |
|
| 87 |
+ $article_only = re_remove($article_only, '/<div class="_1H_qO"><\/div>/'); |
|
| 88 |
+ $article_only = re_remove($article_only, '/<p> <\/p>/'); |
|
| 89 |
+ |
|
| 90 |
+ //Some little replacements |
|
| 91 |
+// $re = '/<\/div><\/div><\/div><\/section><\/aside><section class="_1ysFk"><div class="_1665V undefined">/'; |
|
| 92 |
+// $article_only = preg_replace($re, '</section></aside><section class="_1ysFk"><div class="_1665V undefined">', $article_only); |
|
| 93 |
+ |
|
| 94 |
+ //Finally remove empty lines |
|
| 95 |
+ $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
|
|
| 96 |
+ |
|
| 97 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
| 98 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> '; |
|
| 99 |
+ echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> ';
|
|
| 100 |
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
|
|
| 101 |
+ echo '<div class="extract-content" id="'.$cpt.'">'.'<h1>'.$article['title'].'</h1>'.$article_only.'</div>'; |
|
| 102 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
| 103 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div> '; |
|
| 104 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> '; |
|
| 105 |
+ echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> ';
|
|
| 106 |
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
|
|
| 107 |
+ $cpt++; |
|
| 108 |
+ if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
|
|
| 109 |
+ break; |
|
| 110 |
+ } |
|
| 111 |
+} |
|
| 112 |
+?> |
|
| 113 |
+ |
|
| 114 |
+ |
| ... | ... |
@@ -6,6 +6,7 @@ define('NEWS_RSS_LATIMES', 'http://www.latimes.com/world/rss2.0.xml');
|
| 6 | 6 |
define('NEWS_RSS_CNA', 'https://www.channelnewsasia.com/rssfeeds/8395986');
|
| 7 | 7 |
define('NEWS_RSS_BBC', 'http://feeds.bbci.co.uk/news/rss.xml');
|
| 8 | 8 |
define('NEWS_RSS_AJE', 'https://www.aljazeera.com/xml/rss/all.xml');
|
| 9 |
+define('NEWS_RSS_SMH', 'https://www.smh.com.au/rss/feed.xml');
|
|
| 9 | 10 |
|
| 10 | 11 |
$array_title=array( |
| 11 | 12 |
"nyt" => "New York Times", |
| ... | ... |
@@ -14,7 +15,8 @@ $array_title=array( |
| 14 | 15 |
"latimes" => "LA Times", |
| 15 | 16 |
"cna" => "Channel NewsAsia", |
| 16 | 17 |
"bbc" => "BBC Top Stories", |
| 17 |
- "aje" => "AlJazeera English" |
|
| 18 |
+ "aje" => "AlJazeera English", |
|
| 19 |
+ "smh" => "Sydney Morning Herald" |
|
| 18 | 20 |
); |
| 19 | 21 |
$array_url=array( |
| 20 | 22 |
"nyt" => NEWS_RSS_NYT, |
| ... | ... |
@@ -23,6 +25,7 @@ $array_url=array( |
| 23 | 25 |
"latimes" => NEWS_RSS_LATIMES, |
| 24 | 26 |
"cna" => NEWS_RSS_CNA, |
| 25 | 27 |
"bbc" => NEWS_RSS_BBC, |
| 26 |
- "aje" => NEWS_RSS_AJE |
|
| 28 |
+ "aje" => NEWS_RSS_AJE, |
|
| 29 |
+ "smh" => NEWS_RSS_SMH |
|
| 27 | 30 |
); |
| 28 | 31 |
?> |