Showing 2 changed files with 269 additions and 0 deletions
+266
sources/news-lobs.php
... ...
@@ -0,0 +1,266 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = http_get_contents(NEWS_RSS_LOBS);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->channel->title;
11
+  $channel['link'] = $xml->channel->link;
12
+  $channel['description'] = $xml->channel->description;
13
+  $channel['pubDate'] = $xml->channel->pubDate;
14
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
15
+  //echo '<h4>' . $channel['title'] . '</h4>';
16
+  echo '<h4>' . $channel['title'].'<button id="html-btn" onclick="printHTMLAll(\''.$channel['title'].'\')" style="display:inline;"><img src="img/html5.png" width="24px" height="24px"></button></h4>'.PHP_EOL;
17
+  $cpt=0;
18
+  foreach ($xml->channel->item as $item) {
19
+    $article = array();
20
+    $article['title'] = $item->title;
21
+    $article['link'] = $item->link;
22
+    $orgStrings = array('?xtor=RSS-13');
23
+    $newStrings = array('');
24
+    $article['link']  = str_replace($orgStrings, $newStrings, $article['link']);
25
+    $article['pubDate'] = $item->pubDate;
26
+    $article['timestamp'] = strtotime($item->pubDate);
27
+    $article['description'] = $item->description;
28
+    //$article['image'] = $item->enclosure['url'];
29
+    $articles[$cpt] = $article; 
30
+
31
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">'.PHP_EOL;
32
+    //echo '<div class="img-menu"><img src="'.$article['image'].'" style="display:inline;" width="100%"></div><br>'.PHP_EOL;
33
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;'.PHP_EOL;
34
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
35
+    echo $article['title'].'&nbsp;&nbsp;'.PHP_EOL;
36
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'.PHP_EOL;
37
+    $cpt++;
38
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
39
+      break;
40
+    }
41
+  }
42
+}
43
+echo '</div><!-- ./col-4 -->';
44
+echo '<div id="article-display" class="col-6">'.PHP_EOL;
45
+echo '<a name="article-top"></a><div id="article-current"></div>';
46
+echo '</div><!-- ./col-6 -->'.PHP_EOL;
47
+$cpt=0;
48
+foreach ($articles as $article ) {
49
+  $cpt_prev=$cpt-1;
50
+  $cpt_next=$cpt+1;
51
+  warning($article['link']);
52
+  echo PHP_EOL.PHP_EOL."<!-- ==================== article '.$cpt.'============== -->".PHP_EOL;
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr><a name=\"article-$cpt\">\n";
55
+  $article_content = file_get_contents($article['link']);
56
+  $article_content_utf8 = mb_convert_encoding($article_content, 'HTML-ENTITIES', "UTF-8");
57
+  $article_content = $article_content_utf8;
58
+  $doc = new DOMDocument();
59
+  $doc->preserveWhiteSpace = false;
60
+  $doc->formatOutput       = true;
61
+  $libxml_previous_state = libxml_use_internal_errors(true);
62
+  $doc->loadHTML($article_content);
63
+  libxml_clear_errors();
64
+  libxml_use_internal_errors($libxml_previous_state);
65
+  $articles = $doc->getElementsByTagName('div');
66
+  $article_only = "";
67
+  foreach ( $articles as $node) {
68
+    $classname = $node->getAttribute('id');
69
+    if($classname == 'ObsArticle-body' ) {
70
+      $article_only = DOMinnerHTML($node);
71
+      break;
72
+    }
73
+  }
74
+  if( 0 == strlen($article_only)) {
75
+    $article_only = "Extraction failed";
76
+  }
77
+
78
+  $metas = $doc->getElementsByTagName('meta');
79
+  $chapo = "";
80
+  foreach ( $metas as $node) {
81
+    $classname = $node->getAttribute('property');
82
+    if(0==strcmp($classname,'og:description') ) {
83
+      $chapo = $node->getAttribute('content');
84
+    }else if(0==strcmp($classname,'og:image') ) {
85
+      $article['image'] = $node->getAttribute('content');
86
+    }
87
+  }
88
+
89
+  $isPortfolio=false;
90
+  if( 0 == strlen($article_only) ) {
91
+    //<div class="layout layout--portfolio">
92
+    $node = getElementByClass($doc, 'div', 'layout layout--portfolio', $offset = 0);
93
+    if( false !== $node ) {
94
+      //Article is a portfolio
95
+      $article_only = DOMinnerHTML($node);
96
+      $isPortfolio=true;
97
+    }
98
+  }
99
+
100
+  $orgStrings = array( ' href="/',
101
+    ' src="/sites/',
102
+    '-src="/sites/',
103
+    '<img src=');
104
+  $newStrings = array( ' href="http://www.slate.fr/',
105
+    ' src="http://www.slate.fr/sites/',
106
+    '-src="http://www.slate.fr/sites/',
107
+    '<img width="100%" src=' );
108
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
109
+  
110
+  $orgStrings = array( '<div class="row">',
111
+    '<div class="col-md-2">',
112
+    '<div class="col-md-8">',
113
+    '<div class="container width_wrap">',
114
+    '<div class="col-left">',
115
+    '<div class="article-header__inner col-sm-10 col-sm-offset-1 col-md-8 col-md-offset-2">',
116
+    '<div class="content-left col-md-8 col-auto">',
117
+    '<p class="article-header__infos article-infos">',
118
+    '<div class="article-thumb-wrapper">',
119
+    '<div class="article-thumb">',
120
+    '<figure class="preload article-image">',
121
+    '<div class="article-content row">',
122
+    '<h3> </h3>',
123
+    '<h1>',
124
+    '</h1>',
125
+    '<p style="text-align:center">' );
126
+  $newStrings = array('<div>',
127
+    '<div>',
128
+    '<div>',
129
+    '<div>',
130
+    '<div>',
131
+    '<div>',
132
+    '<div>',
133
+    '<p>',
134
+    '<div>',
135
+    '<div>',
136
+    '<figure>',
137
+    '<div>',
138
+    '',
139
+    '<h4>',
140
+    '</h4>',
141
+    '<p>');
142
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
143
+  
144
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
145
+  $article_only = $temp;
146
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
147
+  $article_only = $temp;
148
+  $re = '/ style="height:(.+?)width: 640px;">/';
149
+  $temp = preg_replace($re, '>', $article_only); 
150
+  $article_only = $temp;
151
+  $re = '/<script>\(function \(\) \{var sasCallOptions = \{ siteId: 59629(.+?)<\/script>/';
152
+  $temp = preg_replace($re, '', $article_only); 
153
+  $article_only = $temp;
154
+  $re = '/<img data-file-id=(.+?)src="(.+?)"(.+?)>/';
155
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only); 
156
+  $article_only = $temp;
157
+  $re='/<div class="media_embed" height="(.+?)" width="(.+?)">/';
158
+  $temp = preg_replace($re, '<div>', $article_only); 
159
+  $article_only = $temp;
160
+  $re='/<iframe allowfullscreen="" frameborder="0" height="(.+?)" src="(.+?)" width="(.+?)"><\/iframe>/';
161
+  $temp = preg_replace($re, '<iframe frameborder="0" width="100%" src="\\2"></iframe>', $article_only); 
162
+  $article_only = $temp;
163
+  $re='/<img width="100%" src="(.+?)" style="height:(.+?)width: (.+?)">/';
164
+  $temp = preg_replace($re, '<img width="100%" src="\\1">', $article_only); 
165
+  $article_only = $temp;
166
+  $re='/<div class="icon icon--(.+?)"> (.+)/';
167
+  $temp = preg_replace($re, '<div>', $article_only); 
168
+  $article_only = $temp;
169
+  $re='/<div class="article-header__breadcrumb">/';
170
+  $temp = preg_replace($re, '<div>', $article_only); 
171
+  $article_only = $temp;
172
+
173
+  $re='/<img class="image image--full lazyload" data-full-src=/';
174
+  $temp = preg_replace($re, '<img width="100%" src=', $article_only); 
175
+  $article_only = $temp;
176
+
177
+  $re='/<span class="sharing-btn__numbers">(.+)<\/span>/';
178
+  $temp = preg_replace($re, '', $article_only); 
179
+  $article_only = $temp;
180
+  
181
+  $re='/<span class="sharing-btn__bg"><\/span>/';
182
+  $temp = preg_replace($re, '', $article_only); 
183
+  $article_only = $temp;
184
+
185
+  $re='/<script data-cfasync="false" src=(.+)<\/script><script>(.+?)<\/script>/';
186
+  $temp = preg_replace($re, '', $article_only); 
187
+  $article_only = $temp;
188
+
189
+  //Clean so called social
190
+  $re='/<a data-share="(fb|li|tw)" data-url="(.+)">  <div>/';
191
+  $temp = preg_replace($re, '<a><div>', $article_only); 
192
+  $article_only = $temp;
193
+  $re='/<a class="sharing-btn sharing-btn--circle sharing-btn--whatsapp" href="whatsapp:(.+)>  <div>/';
194
+  $temp = preg_replace($re, '<a><div>', $article_only); 
195
+  $article_only = $temp;
196
+  $re='/<a href="fb-messenger:(.+) class="sharing-btn sharing-btn--circle sharing-btn--messenger" target="_blank">  <div>/';
197
+  $temp = preg_replace($re, '<a><div>', $article_only); 
198
+  $article_only = $temp;
199
+  $re='/<a href="https:\/\/www\.facebook\.com\/dialog\/send(.+)target="_blank">  <div>/';
200
+  $temp = preg_replace($re, '<a><div>', $article_only); 
201
+  $article_only = $temp;
202
+  $re='/<div id="inreadbis" class="ad-wrapper"><span id="wrapper_inread"><div(.+)<\/div><\/span><\/div>/';
203
+  $temp = preg_replace($re, '', $article_only); 
204
+  $article_only = $temp;
205
+  $re='/<\/div> <\/a> <a><div>/';
206
+  $temp = preg_replace($re, '',$article_only);
207
+  $article_only = $temp;
208
+
209
+  //Remove Emptylines
210
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
211
+  $article_only = $temp;
212
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
213
+  $article_only = $temp;
214
+
215
+  
216
+  $re='/<iframe allow="autoplay; encrypted-media" allowfullscreen="" frameborder="0" height="(.+?)" src="https:\/\/www\.youtube\.com\/embed\/(.+?)" width="(.+?)"><\/iframe>/';
217
+  $temp = preg_replace($re, '<iframe allow="autoplay; encrypted-media" allowfullscreen="" frameborder="0" width="100%" src="https://www.youtube.com/embed/\\2"></iframe>', $article_only); 
218
+  $article_only = $temp;
219
+
220
+  $SEARCH='<div class="sharing-tools sharing-tools--align-center">';
221
+  $posend = strpos($article_only,$SEARCH);
222
+  if($posend) {
223
+    $temp = substr( $article_only, 0, $posend );
224
+    $article_only = $temp;
225
+  }
226
+
227
+  //Portfolio specific
228
+  if($isPortfolio) {
229
+    $re = '/<img class="cover parallax-enable fit lazyload" data-speed="2" data-full-src="(.+?)" alt="(.+?)">/';
230
+    $temp = preg_replace($re, '<img src="\\1" alt="\\2">', $article_only );
231
+    $article_only = $temp;
232
+    $re = '/" width="(.+)" height="(.+)">/';
233
+    $temp = preg_replace($re, '>', $article_only );
234
+    $article_only = $temp;
235
+  }
236
+  //Force HTTPS
237
+  $article_only = add_https($article_only, 'www.twitter.com');
238
+  $article_only = add_https($article_only, 'www.facebook.com');
239
+  $article_only = add_https($article_only, 'pbs.twimg.com');
240
+  $article_only = add_https($article_only, 'schema.org');
241
+  $article_only = add_https($article_only, 'www.w3.org');
242
+  $article_only = add_https($article_only, 'www.slate.fr');
243
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
244
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
245
+  echo '<button id="html-btn" onclick="printHTML()" style="display:inline;"><img src="img/html5.png" width="24px" height="24px"></button>'.PHP_EOL;
246
+  echo '<button id="pdf-btn" onclick="printPDF()" style="display:inline;"><img src="img/pdf.png" width="24px" height="24px"></button>'.PHP_EOL;
247
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
248
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'.PHP_EOL;
249
+  echo '<div class="extract-content" id="'.$cpt.'">'.PHP_EOL;
250
+  echo '<h1>'.$article['title'].'</h1>'.PHP_EOL;
251
+  echo '<img src="'.$article['image'].'">'.PHP_EOL;
252
+  echo '<p><em>'.$chapo.'</em></p>'.PHP_EOL;
253
+  echo $article_only;
254
+  echo '</div>'.PHP_EOL;
255
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
256
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
257
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
258
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
259
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'.PHP_EOL;
260
+  $cpt++;
261
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
262
+      break;
263
+  }
264
+}
265
+?>
266
+
+3
sources/rss.php
... ...
@@ -10,6 +10,7 @@ define('NEWS_RSS_NUMERAMA', 'https://www.numerama.com/rss/news.rss');
10 10
 define('NEWS_RSS_HUFFINGTONPOSTFR', 'https://www.huffingtonpost.fr/feeds/index.xml');
11 11
 define('NEWS_RSS_FRANCEINFO', 'https://www.francetvinfo.fr/titres.rss');
12 12
 define('NEWS_RSS_CONSPIRACY', 'http://www.conspiracywatch.info/feed');
13
+define('NEWS_RSS_LOBS', 'http://www.nouvelobs.com/rss.xml');
13 14
 
14 15
 $array_title=array(
15 16
   "lemonde" => "LeMonde.fr",
... ...
@@ -20,6 +21,7 @@ $array_title=array(
20 21
   "vicefr" => "Vice.fr",
21 22
   "franceinfo" => "FranceInfo",
22 23
   "lesinrocks" => "LesInrocks.fr",
24
+  "lobs" => "L'Obs",
23 25
   "conspiracy" => "Conspiracy Watch"
24 26
 );
25 27
 $array_url=array(
... ...
@@ -31,6 +33,7 @@ $array_url=array(
31 33
   "vicefr" => NEWS_RSS_VICEFR,
32 34
   "franceinfo" => NEWS_RSS_FRANCEINFO,
33 35
   "lesinrocks" => NEWS_RSS_LESINROCKS,
36
+  "lobs" => NEWS_RSS_LOBS,
34 37
   "conspiracy" => NEWS_RSS_CONSPIRACY
35 38
 );
36 39