Showing 1 changed files with 92 additions and 24 deletions
+92 -24
sources/news-liberation.fr.php
... ...
@@ -37,33 +37,33 @@ if ($xml === false) {
37 37
     }
38 38
 
39 39
     $articles[$cpt] = $article;
40
-    echo "<div onclick=\"onArticle($cpt)\" style=\"display:inline;\">\n";
41
-    echo '<img width="100%" src="'.$article['image'].'" style="display:inline;"><br>';
42
-    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> ';
43
-    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div> ';
44
-    echo $article['title'].' ';
45
-    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
40
+    echo "<div onclick=\"onArticle($cpt)\" style=\"display:inline;\">".PHP_EOL;
41
+    echo '<img width="100%" src="'.$article['image'].'" style="display:inline;"><br>'.PHP_EOL;
42
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>'.PHP_EOL;
43
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>';
44
+    echo $article['title'].PHP_EOL;
45
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'.PHP_EOL;
46 46
     $cpt++;
47 47
     if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
48 48
       break;
49 49
     }
50 50
   }
51 51
 }
52
-echo '</div><!-- ./panel-body -->';
53
-echo '</div><!-- ./panel panel-default -->';
54
-echo '</div><!-- ./col-md-6 -->';
55
-echo '<div class="col-md-6">';
56
-echo '<div class="panel panel-default">';
57
-echo '<div class="panel-body">';
58
-echo '<a name="article-top"></a><div id="article-current"></div>';
52
+echo '</div><!-- ./panel-body -->'.PHP_EOL;
53
+echo '</div><!-- ./panel panel-default -->'.PHP_EOL;
54
+echo '</div><!-- ./col-md-6 -->'.PHP_EOL;
55
+echo '<div class="col-md-6">'.PHP_EOL;
56
+echo '<div class="panel panel-default">'.PHP_EOL;
57
+echo '<div class="panel-body">'.PHP_EOL;
58
+echo '<a name="article-top"></a><div id="article-current"></div>'.PHP_EOL;
59 59
 $cpt=0;
60 60
 foreach ($articles as $article ) {
61 61
   $cpt_prev=$cpt-1;
62 62
   $cpt_next=$cpt+1;
63
-  echo '<!-- ==================== article '.$cpt.'============== -->';
63
+  echo PHP_EOL.PHP_EOL.'<!-- ==================== article '.$cpt.'============== -->'.PHP_EOL;
64 64
   echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
65 65
   echo "<hr>";
66
-  echo "<a name=\"article-$cpt\">";
66
+  echo "<a name=\"article-$cpt\"></a>";
67 67
   $article_content = file_get_contents($article['link']);
68 68
   $article_content_utf8 = mb_convert_encoding($article_content, 'HTML-ENTITIES', "UTF-8");
69 69
   $doc = new DOMDocument();
... ...
@@ -79,6 +79,7 @@ foreach ($articles as $article ) {
79 79
     $article_only = DOMinnerHTML($node);
80 80
   }
81 81
 
82
+
82 83
   $SEARCH_SUB='<aside class="aside-column side-flow-bloc width-padded">';
83 84
   $pos_stop = strpos($article_only, $SEARCH_SUB);
84 85
   $article_only = substr($article_only, 0, $pos_stop);
... ...
@@ -110,9 +111,19 @@ foreach ($articles as $article ) {
110 111
       }
111 112
     }
112 113
   }
114
+  DEBUG("Article($cpt) : length = ".strlen($article_only));
115
+  $isCheckNews=false;
116
+  if( 0 == strlen($article_only)) {
117
+    $articles = $doc->getElementsByTagName('article');
118
+    $article_only="";
119
+    foreach ( $articles as $node) {
120
+      $article_only = DOMinnerHTML($node);
121
+    }
122
+    $isCheckNews=true;
123
+  }
113 124
 
114 125
   $orgStrings = array( ' href="/' );
115
-  $newStrings = array( ' href="http://www.liberation.fr/' );
126
+  $newStrings = array( ' href="https://www.liberation.fr/' );
116 127
   $article_only  = str_replace($orgStrings, $newStrings, $article_only);
117 128
   
118 129
   $orgStrings = array( '<p lang="en" dir="ltr">',
... ...
@@ -122,6 +133,7 @@ foreach ($articles as $article ) {
122 133
       '<h1 class="article-headline">',
123 134
       '</h1>',
124 135
       '<h2 class="article-standfirst read-left-padding">',
136
+      '<h2 class="article-standfirst">',
125 137
       '<h2 class="intertitre">',
126 138
       '</h2>',
127 139
       '<figure class="article-image article-header-image"><a role="button" class="figure-zoom">',
... ...
@@ -133,6 +145,7 @@ foreach ($articles as $article ) {
133 145
       '<img class="live-image"',
134 146
       '<a class="slug"',
135 147
       '<figcaption class="legende" data-caption="',
148
+      '<figcaption class="read-left-padding caption">',
136 149
       '</figcaption>' );
137 150
   $newStrings = array( '<p>',
138 151
       '<p>',
... ...
@@ -142,6 +155,7 @@ foreach ($articles as $article ) {
142 155
       '</h3>',
143 156
       '<h4>',
144 157
       '<h4>',
158
+      '<h4>',
145 159
       '</h4>',
146 160
       '<figure>',
147 161
       '<figure>',
... ...
@@ -152,6 +166,7 @@ foreach ($articles as $article ) {
152 166
       '<img width="100%"',
153 167
       '<a',
154 168
       '<em>',
169
+      '<em>',
155 170
       '</em>' );
156 171
 
157 172
   $article_only = str_replace($orgStrings, $newStrings, $article_only);
... ...
@@ -176,12 +191,64 @@ foreach ($articles as $article ) {
176 191
   $newStrings = array( '','','', '<a target="new" href=' );
177 192
   $article_only = str_replace($orgStrings, $newStrings, $article_only);
178 193
 
194
+  //Checknews Specific
195
+  if($isCheckNews) {
196
+    $re='/<\/form>/';
197
+    $temp = preg_replace($re, "", $article_only);
198
+    $article_only = $temp;
199
+    $re='/<form class="checknews-form" method="GET" action="\/checknews\/search\/" name="checknews_box">(.+)<\/a>/';
200
+    $temp = preg_replace($re, "", $article_only);
201
+    $article_only = $temp;
202
+    /*
203
+    $re='//';
204
+    $temp = preg_replace($re, "", $article_only);
205
+    $article_only = $temp;
206
+     */
207
+    $re='/<li class="answer no-tags">(.+?)<\/li>/';
208
+    $temp = preg_replace($re, "", $article_only);
209
+    $article_only = $temp;
210
+    $re='/<li class="answer no-tags">(.+)<ul class="checknews-tags">/';
211
+    $temp = preg_replace($re, "<li><ul>", $article_only);
212
+    $article_only = $temp;
213
+    $re='/<p class="btn" href="https:\/\/www\.liberation\.fr\/checknews\/(.+)Voir la réponse<br><span>(.+)<\/span> <\/p> <\/div> <\/a> <\/li>/';
214
+    $article_only = preg_replace($re, "</div></a></li>", $article_only);
215
+    $re='/<a class="btn about-button" href="https:\/\/www\.liberation\.fr\/checknews\/voter\/"> Accédez à toutes les questions <\/a>/';
216
+    $article_only = preg_replace($re, "", $article_only);
217
+    $re='/<script type="text\/javascript" src="https:\/\/statics\.liberation\.fr\/newsite\/(.+)<\/script>/';
218
+    $article_only = preg_replace($re, "", $article_only);
219
+    $re='/<div class="checknews-header"> <div class="title"> <h3>(Nos dernières réponses|Le vote des lecteurs)<\/h3> <\/div> <\/div>/';
220
+    $article_only = preg_replace($re, "", $article_only);
221
+    $re='/<p> Voici les cinq questions qui sont en tête des votes.<br> Vous aussi, donnez votre avis et choisissez les sujets que la rédaction va traiter. <\/p>/';
222
+    $article_only = preg_replace($re, "", $article_only);
223
+    $re='/<\/div><\/a><\/li> <li><ul>/';
224
+    $article_only = preg_replace($re, "", $article_only);
225
+    $re='/<ul id="question" class="checknews-answers vote">/';
226
+    $article_only = preg_replace($re, "", $article_only);
227
+    $re='/<ul class="checknews-answers">/';
228
+    $article_only = preg_replace($re, "", $article_only);
229
+    $re='/<div class="checknews-box last-content"> <li><ul>/';
230
+    $article_only = preg_replace($re, "<div><li>", $article_only);
231
+    $re='/ target="_blank">/';
232
+    $article_only = preg_replace($re, ">", $article_only);
233
+    $re='/<\/div><\/a><\/li>/';
234
+    $article_only = preg_replace($re, "", $article_only);
235
+    $article_only = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
236
+    $article_only = preg_replace('/\s\s+/', ' ', $article_only);
237
+    $re='/<div class="checknews-box last-content"> <li><ul>/';
238
+    $article_only = preg_replace($re, "<div>", $article_only);
239
+
240
+  }
241
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
242
+  $article_only = $temp;
243
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
244
+  $article_only = $temp;
245
+
179 246
   //Force HTTPS
180 247
   $article_only = add_https($article_only, 'www.twitter.com');
181 248
   $article_only = add_https($article_only, 'www.facebook.com');
182 249
   $article_only = add_https($article_only, 'pbs.twimg.com');
183 250
   $article_only = add_https($article_only, 'schema.org');
184
-  $article_only = add_https($article_only, 'www.w3.org');
251
+  //$article_only = add_https($article_only, 'www.w3.org');
185 252
   $article_only = add_https($article_only, 'videos.senat.fr');
186 253
   $article_only = add_https($article_only, '9w4t.mjt.lu');
187 254
   $article_only = add_https($article_only, 'www.lefigaro.fr');
... ...
@@ -190,13 +257,14 @@ foreach ($articles as $article ) {
190 257
   echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
191 258
   echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
192 259
   echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
193
-  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
194
-  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
195
-  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
196
-  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
197
-  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
198
-  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
199
-  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
260
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'.PHP_EOL;
261
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>'.PHP_EOL;
262
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
263
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
264
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
265
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
266
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'.PHP_EOL;
267
+  echo '<!-- ENDOF ==================== article '.$cpt.'============== -->'.PHP_EOL;
200 268
   $cpt++;
201 269
   if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
202 270
       break;