Showing 2 changed files with 171 additions and 2 deletions
+146
sources/news-scmp.php
... ...
@@ -0,0 +1,146 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_SCMP);
5
+$orgStrings = array( 'media:content');
6
+$newStrings = array( 'mediacontent');
7
+$rss_content  = str_replace($orgStrings, $newStrings, $rss_content);
8
+$xml = simplexml_load_string($rss_content);
9
+if ($xml === false) {
10
+  echo 'Failed to read RSS';
11
+} else {
12
+  $channel = array();
13
+  $channel['title'] = $xml->channel->title;
14
+  $channel['link'] = $xml->channel->link;
15
+  $channel['description'] = $xml->channel->description;
16
+  $channel['pubDate'] = $xml->channel->pubDate;
17
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
18
+  echo '<h4>' . $channel['title'] . '</h4>';
19
+  $cpt=0;
20
+  foreach ($xml->channel->item as $item) {
21
+    $article = array();
22
+    $article['title'] = $item->title;
23
+    $article['link'] = $item->link;
24
+    $SEARCHREF="?ref=rss";
25
+    //Remove reference from links
26
+    $posref=strpos($article['link'], $SEARCHREF);
27
+    if( $posref ) {
28
+      $article['link'] = substr($article['link'],0,$posref);
29
+    }
30
+    $article['pubDate'] = $item->pubDate;
31
+    $article['timestamp'] = strtotime($item->pubDate);
32
+    $article['description'] = $item->description;
33
+    $article['image'] = $item->mediacontent['url'];
34
+    $articles[$cpt] = $article;
35
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">'.PHP_EOL;
36
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>'.PHP_EOL;
37
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;'.PHP_EOL;
38
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;'.PHP_EOL;
39
+    echo $article['title'].'&nbsp;&nbsp;';
40
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'.PHP_EOL;
41
+    $cpt++;
42
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
43
+      break;
44
+    }
45
+  }
46
+}
47
+
48
+echo '</div><!-- ./col-4 -->'.PHP_EOL;
49
+echo '<div id="article-display" class="col-6">'.PHP_EOL;
50
+echo '<a name="article-top"></a><div id="article-current"></div>'.PHP_EOL;
51
+echo '</div><!-- ./col-6 -->'.PHP_EOL;
52
+
53
+$cpt=0;
54
+foreach ($articles as $article ) {
55
+  $cpt_prev=$cpt-1;
56
+  $cpt_next=$cpt+1;
57
+  echo '<!-- ==================== article '.$cpt.'============== -->';
58
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
59
+  echo "<hr>";
60
+  echo "<a name=\"article-$cpt\">";
61
+  $article_content = file_get_contents($article['link']);
62
+  $doc = new DOMDocument();
63
+  $doc->preserveWhiteSpace = false;
64
+  $doc->formatOutput       = true;
65
+  $libxml_previous_state = libxml_use_internal_errors(true);
66
+  $doc->loadHTML($article_content);
67
+  libxml_clear_errors();
68
+  libxml_use_internal_errors($libxml_previous_state);
69
+  $article_only=$article_content;
70
+  $SEARCH="window.__APOLLO_STATE__=";
71
+  $pos_start = strpos($article_only,$SEARCH,0);
72
+  if($pos_start) {
73
+    $index_start=$pos_start+strlen($SEARCH);
74
+    $SEARCH="</script><script>";
75
+    $pos_end = strpos($article_only,$SEARCH,$index_start);
76
+    if($pos_end) {
77
+      $article_only=substr($article_only,$index_start,$pos_end-$index_start);
78
+      $article_json = json_decode($article_only, true);
79
+      $cpt3=0;
80
+      $cpt2=0;
81
+      $articleJsonID="";
82
+      $tempArticle = "";
83
+      foreach( $article_json as $item ){
84
+        if( 1 == $cpt3 ) {
85
+          $root_query = $item["ROOT_QUERY"];
86
+          foreach( $root_query as $key => $value ) {
87
+            if( 1 == $cpt2 ) {
88
+              $articleJsonID = $value["id"];
89
+            }
90
+            $cpt2++;
91
+          }
92
+          DEBUG($articleJsonID);
93
+          $cpt2=0;
94
+          foreach($item[$articleJsonID] as $key => $value ) {
95
+            $SEARCH="body";
96
+            $pos = strpos($key,$SEARCH,0);
97
+            if( false !== $pos )
98
+            {
99
+              foreach($value["json"] as $htmlItem) {
100
+                $htmlType = $htmlItem["type"];
101
+                if( $htmlType == "p" ) {
102
+                  $contentType=$htmlItem["children"][0]["type"];
103
+                  if( $contentType == "text" ) {
104
+                    $tempArticle .= "<p>".$htmlItem["children"][0]["data"]."</p>".PHP_EOL;
105
+                  } else if( $contentType == "img" ) {
106
+                    $imgTitle = $htmlItem["children"][0]["attribs"]["title"];
107
+                    $imgUrl = $htmlItem["children"][0]["attribs"]["src"];
108
+                    $tempArticle .= "<p><img src=\"".$imgUrl."\"><em>".$imgTitle."</em></p>".PHP_EOL;
109
+                  }
110
+                }
111
+              }
112
+            }
113
+            $cpt2++;
114
+          }
115
+        }
116
+        $cpt3++;
117
+      }
118
+      $article_only = $tempArticle;
119
+    } else {
120
+      $article_only = "Extraction failed";
121
+    }
122
+  } else {
123
+    $article_only = "Extraction failed";
124
+  }
125
+
126
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
127
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
128
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
129
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'.PHP_EOL;
130
+  echo '<div class="extract-content" id="'.$cpt.'">'.'<h1>'.$article['title'].'</h1>'.$article_only.'</div>';
131
+  //echo '<div class="extract-content" id="'.$cpt.'">'.'<h1>'.$article['title'].'</h1>';
132
+  //echo '<xmp>'.$article_only.'</xmp></div>'.PHP_EOL;
133
+  //echo '</div>'.PHP_EOL;
134
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
135
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
136
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
137
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
138
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'.PHP_EOL;
139
+  $cpt++;
140
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
141
+      break;
142
+  }
143
+}
144
+?>
145
+
146
+
+25 -2
sources/rss.php
... ...
@@ -6,6 +6,8 @@ define('NEWS_RSS_CNA', 'https://www.channelnewsasia.com/rssfeeds/8395986');
6 6
 define('NEWS_RSS_BBC', 'http://feeds.bbci.co.uk/news/rss.xml');
7 7
 define('NEWS_RSS_AJE', 'https://www.aljazeera.com/xml/rss/all.xml');
8 8
 define('NEWS_RSS_SMH', 'https://www.smh.com.au/rss/world.xml');
9
+define('NEWS_RSS_SCMP', 'https://www.scmp.com/rss/91/feed');
10
+//define('NEWS_RSS_STAR', '');
9 11
 
10 12
 $array_title=array(
11 13
   "nyt" => "New York Times",
... ...
@@ -13,7 +15,8 @@ $array_title=array(
13 15
   "cna" => "Channel NewsAsia",
14 16
   "bbc" => "BBC Top Stories",
15 17
   "aje" => "AlJazeera English",
16
-  "smh" => "Sydney Morning Herald"
18
+  "smh" => "Sydney Morning Herald",
19
+  "scmp" => "South China Morning Post"
17 20
 );
18 21
 $array_url=array(
19 22
   "nyt" => NEWS_RSS_NYT,
... ...
@@ -21,6 +24,26 @@ $array_url=array(
21 24
   "cna" => NEWS_RSS_CNA,
22 25
   "bbc" => NEWS_RSS_BBC,
23 26
   "aje" => NEWS_RSS_AJE,
24
-  "smh" => NEWS_RSS_SMH
27
+  "smh" => NEWS_RSS_SMH,
28
+  "scmp" => NEWS_RSS_SCMP
25 29
 );
30
+
31
+function DEBUG($Message) {
32
+  global $DEBUG;
33
+  if($DEBUG) {
34
+    $stderr = fopen('php://stderr', 'w');
35
+    fwrite($stderr,"[DEBUG]".$Message.PHP_EOL);
36
+    fclose($stderr);
37
+  }
38
+}
39
+function WARNING($Message) {
40
+  $stderr = fopen('php://stderr', 'w');
41
+  fwrite($stderr,"[WARNING]".$Message.PHP_EOL);
42
+  fclose($stderr);
43
+}
44
+function ERROR($Message) {
45
+  $stderr = fopen('php://stderr', 'w');
46
+  fwrite($stderr,"[ERROR]".$Message.PHP_EOL);
47
+  fclose($stderr);
48
+}
26 49
 ?>