... | ... |
@@ -7,4 +7,5 @@ php news-latimes.php > latimes.html 2> /dev/null |
7 | 7 |
php news-cna.php > cna.html 2> /dev/null |
8 | 8 |
php news-aje.php > aje.html 2> /dev/null |
9 | 9 |
php news-bbc.php > bbc.html 2> /dev/null |
10 |
+php news-smh.php > smh.html 2> /dev/null |
|
10 | 11 |
scp *html rss.php kawi.fr:/var/www/html/newsen/sources/ &> /dev/null |
... | ... |
@@ -4,3 +4,8 @@ php news-nyt.php > nyt.html 2> /dev/null |
4 | 4 |
php news-wapo.php > wapo.html 2> /dev/null |
5 | 5 |
php news-guardian.php > guardian.html 2> /dev/null |
6 | 6 |
php news-latimes.php > latimes.html 2> /dev/null |
7 |
+php news-cna.php > cna.html 2> /dev/null |
|
8 |
+php news-aje.php > aje.html 2> /dev/null |
|
9 |
+php news-bbc.php > bbc.html 2> /dev/null |
|
10 |
+php news-smh.php > smh.html 2> /dev/null |
|
11 |
+#scp *html rss.php kawi.fr:/var/www/html/newsen/sources/ &> /dev/null |
... | ... |
@@ -0,0 +1,114 @@ |
1 |
+<?php |
|
2 |
+include_once( 'news-constants.php' ); |
|
3 |
+ |
|
4 |
+$rss_content = file_get_contents(NEWS_RSS_SMH); |
|
5 |
+$xml = simplexml_load_string($rss_content); |
|
6 |
+if ($xml === false) { |
|
7 |
+ echo 'Failed to read RSS'; |
|
8 |
+} else { |
|
9 |
+ $channel = array(); |
|
10 |
+ $channel['title'] = $xml->channel->title; |
|
11 |
+ $channel['link'] = $xml->channel->link; |
|
12 |
+ $channel['description'] = $xml->channel->description; |
|
13 |
+ $channel['pubDate'] = $xml->channel->pubDate; |
|
14 |
+ $channel['timestamp'] = strtotime($xml->channel->pubDate); |
|
15 |
+ echo '<h4>' . $channel['title'] . '</h4>'; |
|
16 |
+ $cpt=0; |
|
17 |
+ foreach ($xml->channel->item as $item) { |
|
18 |
+ $article = array(); |
|
19 |
+ $article['title'] = $item->title; |
|
20 |
+ $article['link'] = $item->link; |
|
21 |
+ $SEARCHREF="?ref=rss"; |
|
22 |
+ //Remove reference from links |
|
23 |
+ $posref=strpos($article['link'], $SEARCHREF); |
|
24 |
+ if( $posref ) { |
|
25 |
+ $article['link'] = substr($article['link'],0,$posref); |
|
26 |
+ } |
|
27 |
+ $article['pubDate'] = $item->pubDate; |
|
28 |
+ $article['timestamp'] = strtotime($item->pubDate); |
|
29 |
+ $article['description'] = $item->description; |
|
30 |
+ $article['image']=$item->mediacontent['url']; |
|
31 |
+ $articles[$cpt] = $article; |
|
32 |
+ echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">'; |
|
33 |
+ //echo '<img id="img-list" src="'.$article['image'].'" style="display:block;"><br>'; |
|
34 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
35 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div> '; |
|
36 |
+ echo $article['title'].' '; |
|
37 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'; |
|
38 |
+ $cpt++; |
|
39 |
+ if( $cpt > $NEWS_RSS_MAX_ITEMS ) { |
|
40 |
+ break; |
|
41 |
+ } |
|
42 |
+ } |
|
43 |
+} |
|
44 |
+echo '</div><!-- ./panel-body -->'; |
|
45 |
+echo '</div><!-- ./panel panel-default -->'; |
|
46 |
+echo '</div><!-- ./col-md-6 -->'; |
|
47 |
+echo '<div class="col-md-6">'; |
|
48 |
+echo '<div class="panel panel-default">'; |
|
49 |
+echo '<div class="panel-body">'; |
|
50 |
+echo '<a name="article-top"></a><div id="article-current"></div>'; |
|
51 |
+$cpt=0; |
|
52 |
+foreach ($articles as $article ) { |
|
53 |
+ $cpt_prev=$cpt-1; |
|
54 |
+ $cpt_next=$cpt+1; |
|
55 |
+ echo '<!-- ==================== article '.$cpt.'============== -->'; |
|
56 |
+ echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n"; |
|
57 |
+ echo "<hr>"; |
|
58 |
+ echo "<a name=\"article-$cpt\">"; |
|
59 |
+ $article_content = file_get_contents($article['link']); |
|
60 |
+ $doc = new DOMDocument(); |
|
61 |
+ $doc->preserveWhiteSpace = false; |
|
62 |
+ $doc->formatOutput = true; |
|
63 |
+ $doc->loadHTML($article_content); |
|
64 |
+ $articles = $doc->getElementsByTagName('article'); |
|
65 |
+ $article_only=""; |
|
66 |
+ if( isset( $articles[0] ) ) { |
|
67 |
+ $article_only=DOMinnerHTML($articles[0]); |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ //Clean some sections |
|
71 |
+ //$article_only = re_remove($article_only, '//'); |
|
72 |
+ $article_only = re_remove($article_only, '/<header class="_2qhpu"><h2 class="_2qhpu">Most Viewed in (.+)<\/h2><\/header>/'); |
|
73 |
+ $article_only = re_remove($article_only, '/<div><section aria-live="polite" aria-busy="true" class="_2zaYr" style="height:380px"><div class="_29X_Z"><span class="_2wzgv D5idv _3lVFK"><span class="_29Qt8"><\/span><span class="_3qqDc">Loading<\/span><\/span><\/div><\/section><\/div>/'); |
|
74 |
+ $article_only = re_remove($article_only, '/<div class="_8Cum8"><a class="_4oUp_" href="(.+?)">License this article<\/a><\/div>/'); |
|
75 |
+ $article_only = re_remove($article_only, '/<p class="_3MW1W">Our new podcast series from the team behind Phoebe\'s Fall<\/p>/'); |
|
76 |
+ $article_only = re_remove($article_only, '/<p class="_29xCU"><svg class="QMY9O" height="18px" width="18px" aria-hidden="true" focusable="false"><use xlink:href="#icon-headphones"><\/use><\/svg>View episodes<\/p>/'); |
|
77 |
+ $article_only = re_remove($article_only, '/<p class="ZybjO">A relationship banned under traditional law.<\/p>/'); |
|
78 |
+ $article_only = re_remove($article_only, '/<section class="aKWhn noPrint"><div class="dLGbc"><\/div><\/section>/'); |
|
79 |
+ $article_only = re_remove($article_only, '/<button class="_30X7v _2-NUy">(.+)<\/button>/'); |
|
80 |
+ $article_only = re_remove($article_only, '/<script type="application\/ld\+json">(.+)<\/script>/'); |
|
81 |
+ $article_only = re_remove($article_only, '/<div id="adspot-N-6x2-pos1" class="_1-uzy"><\/div>/'); |
|
82 |
+ $article_only = re_remove($article_only, '/<div id="adspot-N-300x164-pos1" class="_1-uzy"><\/div>/'); |
|
83 |
+ $article_only = re_remove($article_only, '/<div class="noPrint"><\/div>/'); |
|
84 |
+ $article_only = re_remove($article_only, '/<script src="https:\/\/ad.doubleclick.net\/(.+)<\/script>/'); |
|
85 |
+ $article_only = re_remove($article_only, '/<div class="ymInT _3uJpn noPrint"><span class="_2wzgv D5idv _3lVFK"><span class="_29Qt8"><\/span><span class="_3qqDc">Loading<\/span><\/span><\/div>/'); |
|
86 |
+ $article_only = re_remove($article_only, '/<div class="_1lC_G"><\/div>/'); |
|
87 |
+ $article_only = re_remove($article_only, '/<div class="_1H_qO"><\/div>/'); |
|
88 |
+ $article_only = re_remove($article_only, '/<p> <\/p>/'); |
|
89 |
+ |
|
90 |
+ //Some little replacements |
|
91 |
+// $re = '/<\/div><\/div><\/div><\/section><\/aside><section class="_1ysFk"><div class="_1665V undefined">/'; |
|
92 |
+// $article_only = preg_replace($re, '</section></aside><section class="_1ysFk"><div class="_1665V undefined">', $article_only); |
|
93 |
+ |
|
94 |
+ //Finally remove empty lines |
|
95 |
+ $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only); |
|
96 |
+ |
|
97 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
98 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> '; |
|
99 |
+ echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> '; |
|
100 |
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'; |
|
101 |
+ echo '<div class="extract-content" id="'.$cpt.'">'.'<h1>'.$article['title'].'</h1>'.$article_only.'</div>'; |
|
102 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '; |
|
103 |
+ echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div> '; |
|
104 |
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> '; |
|
105 |
+ echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> '; |
|
106 |
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'; |
|
107 |
+ $cpt++; |
|
108 |
+ if( $cpt > $NEWS_RSS_MAX_ITEMS ) { |
|
109 |
+ break; |
|
110 |
+ } |
|
111 |
+} |
|
112 |
+?> |
|
113 |
+ |
|
114 |
+ |
... | ... |
@@ -6,6 +6,7 @@ define('NEWS_RSS_LATIMES', 'http://www.latimes.com/world/rss2.0.xml'); |
6 | 6 |
define('NEWS_RSS_CNA', 'https://www.channelnewsasia.com/rssfeeds/8395986'); |
7 | 7 |
define('NEWS_RSS_BBC', 'http://feeds.bbci.co.uk/news/rss.xml'); |
8 | 8 |
define('NEWS_RSS_AJE', 'https://www.aljazeera.com/xml/rss/all.xml'); |
9 |
+define('NEWS_RSS_SMH', 'https://www.smh.com.au/rss/feed.xml'); |
|
9 | 10 |
|
10 | 11 |
$array_title=array( |
11 | 12 |
"nyt" => "New York Times", |
... | ... |
@@ -14,7 +15,8 @@ $array_title=array( |
14 | 15 |
"latimes" => "LA Times", |
15 | 16 |
"cna" => "Channel NewsAsia", |
16 | 17 |
"bbc" => "BBC Top Stories", |
17 |
- "aje" => "AlJazeera English" |
|
18 |
+ "aje" => "AlJazeera English", |
|
19 |
+ "smh" => "Sydney Morning Herald" |
|
18 | 20 |
); |
19 | 21 |
$array_url=array( |
20 | 22 |
"nyt" => NEWS_RSS_NYT, |
... | ... |
@@ -23,6 +25,7 @@ $array_url=array( |
23 | 25 |
"latimes" => NEWS_RSS_LATIMES, |
24 | 26 |
"cna" => NEWS_RSS_CNA, |
25 | 27 |
"bbc" => NEWS_RSS_BBC, |
26 |
- "aje" => NEWS_RSS_AJE |
|
28 |
+ "aje" => NEWS_RSS_AJE, |
|
29 |
+ "smh" => NEWS_RSS_SMH |
|
27 | 30 |
); |
28 | 31 |
?> |