Showing 2 changed files with 124 additions and 30 deletions
+104 -30
sources/news-lemonde.fr.php
... ...
@@ -26,30 +26,30 @@ if ($xml === false) {
26 26
     $article['description'] = $item->description;
27 27
     $article['image'] = $item->enclosure['url'];
28 28
     $articles[$cpt] = $article;
29
-    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
-    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
31
-    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;';
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">'.PHP_EOL;
30
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>'.PHP_EOL;
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;'.PHP_EOL;
32 32
     echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div>&nbsp;';
33
-    echo $article['title'].'&nbsp;&nbsp;';
34
-    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
33
+    echo $article['title'].'&nbsp;&nbsp;'.PHP_EOL;
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'.PHP_EOL;
35 35
     $cpt++;
36 36
     if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37 37
       break;
38 38
     }
39 39
   }
40 40
 }
41
-echo '</div><!-- ./panel-body -->';
42
-echo '</div><!-- ./panel panel-default -->';
43
-echo '</div><!-- ./col-md-6 -->';
44
-echo '<div class="col-md-6">';
45
-echo '<div class="panel panel-default">';
46
-echo '<div class="panel-body">';
47
-echo '<a name="article-top"></a><div id="article-current"></div>';
41
+echo '</div><!-- ./panel-body -->'.PHP_EOL;
42
+echo '</div><!-- ./panel panel-default -->'.PHP_EOL;
43
+echo '</div><!-- ./col-md-6 -->'.PHP_EOL;
44
+echo '<div class="col-md-6">'.PHP_EOL;
45
+echo '<div class="panel panel-default">'.PHP_EOL;
46
+echo '<div class="panel-body">'.PHP_EOL;
47
+echo '<a name="article-top"></a><div id="article-current"></div>'.PHP_EOL;
48 48
 $cpt=0;
49 49
 foreach ($articles as $article ) {
50 50
   $cpt_prev=$cpt-1;
51 51
   $cpt_next=$cpt+1;
52
-  echo '<!-- ==================== article '.$cpt.'============== -->';
52
+  echo PHP_EOL.PHP_EOL.'<!-- ==================== article '.$cpt.'============== -->'.PHP_EOL;
53 53
   echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54 54
   echo "<hr>";
55 55
   echo "<a name=\"article-$cpt\">";
... ...
@@ -73,12 +73,35 @@ foreach ($articles as $article ) {
73 73
   
74 74
   $SEARCH_SUB='<span class="ea_article">édition abonné</span>';
75 75
   $pos_start=strpos($article_content, $SEARCH_SUB);
76
+  if(!$pos_start) {
77
+    //Second Test
78
+    DEBUG("article($cpt) : Non Abonne 1");
79
+    $SEARCH_SUB='<p class="article__status"><span class="icon__premium"></span>Article réservé aux abonnés</p>';
80
+    $pos_start=strpos($article_content, $SEARCH_SUB);
81
+    if(!$pos_start) {
82
+      DEBUG("article($cpt) : Non Abonne 2");
83
+    } else {
84
+      DEBUG("article($cpt) : Abonne 2");
85
+    }
86
+  } /*else {
87
+    DEBUG("article($cpt) : Abonne 1");
88
+    }*/
76 89
   if($pos_start) {
77
-    $figures = $doc->getElementsByTagName('figure');
78
-    $figure = DOMinnerHTML($figures[0]);
79
-    $re = '/<img (.+?) class="illu lazy-retina" data-src="(.+?)" (.+?) data-lazyload="true" alt="(.+?)" title="(.+?)" (.+?)>/';
80
-    preg_match($re, $figure, $array);
81
-    $figure = '<img width="100%" src="'.$array[2].'"><figcaption>'.$array[4].'<br>&nbsp;<em>('.$array[5].')</em></figcaption>';
90
+    try {
91
+      $figures = $doc->getElementsByTagName('figure');
92
+      if( NULL === $figures[0] ) {
93
+        DEBUG("article($cpt) : No Image");
94
+      } else {
95
+        $figure = DOMinnerHTML($figures[0]);
96
+        $re = '/<img (.+?) class="illu lazy-retina" data-src="(.+?)" (.+?) data-lazyload="true" alt="(.+?)" title="(.+?)" (.+?)>/';
97
+        preg_match($re, $figure, $array);
98
+        if(count($array) >= 5 ) {
99
+          $figure = '<img width="100%" src="'.$array[2].'"><figcaption>'.$array[4].'<br>&nbsp;<em>('.$array[5].')</em></figcaption>';
100
+        }
101
+      }
102
+    } catch(Exception $e) {
103
+      ERROR("article($cpt) : Exception".$e->getMessage());
104
+    }
82 105
 
83 106
     $article_abonne = str_replace("www.lemonde.fr", "abonnes.lemonde.fr", $article['link']);
84 107
     $article_content = file_get_contents($article_abonne);
... ...
@@ -98,11 +121,64 @@ foreach ($articles as $article ) {
98 121
       break;
99 122
     }
100 123
   }
101
- 
124
+
102 125
   $orgStrings = array(' href="/');
103 126
   $newStrings = array(' href="https://www.lemonde.fr/');
104 127
   $article_only  = str_replace($orgStrings, $newStrings, $article_only);
105
-  
128
+/*  
129
+  $orgStrings = array(' src="/');
130
+  $newStrings = array(' src="https://www.lemonde.fr/');
131
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
132
+ */
133
+
134
+  //Remove Blank lines
135
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
136
+  $article_only = $temp;
137
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
138
+  $article_only = $temp;
139
+
140
+  //Remove Social section
141
+  $re = '/<li class="meta__icon"> <a class="js-social"(.+)<\/span> <\/li>/s';
142
+  $temp = preg_replace($re, '', $article_only);
143
+  $article_only = $temp;
144
+
145
+  //Cleanup end-of-article extraction
146
+  $re = '/<section class="article__comments">(.+)commentaires <\/a> <\/section>/';
147
+  $temp = preg_replace($re, '', $article_only);
148
+  $article_only = $temp;
149
+  $re = '/<li><a class="insert__teaser" (.+)<\/a><\/li>/';
150
+  $temp = preg_replace($re, '', $article_only);
151
+  $article_only = $temp;
152
+  $re = '/<p class="insert__title">Les plus lus<\/p>/';
153
+  $temp = preg_replace($re, '', $article_only);
154
+  $article_only = $temp;
155
+  $re = '/<li> <a href="http(.+)Recherche<\/a> <\/li>/';
156
+  $temp = preg_replace($re, '', $article_only);
157
+  $article_only = $temp;
158
+  $re = '/<div class="bd__header"> <span>Annonces automobiles<\/span> <span>avec La Centrale<\/span> <\/div>/';
159
+  $temp = preg_replace($re, '', $article_only);
160
+  $article_only = $temp;
161
+  $re = '/<div class="bd__header"> <span>Annonces immobilières<\/span> <span>avec Logic-Immo<\/span> <\/div>/';
162
+  $temp = preg_replace($re, '', $article_only);
163
+  $article_only = $temp;
164
+  $re = '/<p class="article__siblings-title">Dans la même rubrique<\/p>/';
165
+  $temp = preg_replace($re, '', $article_only);
166
+  $article_only = $temp;
167
+  $re = '/<footer class=" old__article-footer">(.+)<\/footer>/';
168
+  $temp = preg_replace($re, '', $article_only);
169
+  $article_only = $temp;
170
+  $re = '/<a href="https:\/\/www.lemonde.fr\/services\/"><h4 class="area__title area__title--bd">Services<\/h4><\/a>/';
171
+  $temp = preg_replace($re, '', $article_only);
172
+  $article_only = $temp;
173
+  $re = '/<a class="insert__action button button--dark" target="_blank" data-target="jelec-sidebar" href="https:\/\/journal.lemonde.fr\/">Lire le journal numérique<\/a>/';
174
+  $temp = preg_replace($re, '', $article_only);
175
+  $article_only = $temp;
176
+
177
+  //Remove Thumbnail
178
+  $re = '/<img src="\/thumbnail\/journal\/(.+) class="insert__media" alt="">/';
179
+  $temp = preg_replace($re, '', $article_only);
180
+  $article_only = $temp;
181
+
106 182
   $orgStrings = array( 
107 183
       '<p class="txt3 description-article" itemprop="description">',
108 184
       '<p class="bloc_signature">',
... ...
@@ -144,9 +220,6 @@ foreach ($articles as $article ) {
144 220
   $article_only = $temp;
145 221
   $temp = preg_replace('/\s\s+/', ' ', $article_only);
146 222
   $article_only = $temp;
147
-  //$re = '/<a target="_blank" onclick="return false;" (.+?)">(.+?)<\/a>/';
148
-  //$temp = preg_replace($re, '\\3', $article_only);
149
-  //$article_only = $temp;
150 223
   $re = '/<a class="lien_interne rub"(.+?)>(.+?)<\/a>/';
151 224
   $temp = preg_replace($re, '\\2', $article_only);
152 225
   $article_only = $temp;
... ...
@@ -159,13 +232,14 @@ foreach ($articles as $article ) {
159 232
   echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
160 233
   echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
161 234
   echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
162
-  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
163
-  echo '<div class="extract-content" id="'.$cpt.'">'.$figure.$article_only.'</div>';
164
-  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
165
-  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
166
-  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
167
-  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
168
-  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
235
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'.PHP_EOL;
236
+  echo '<div class="extract-content" id="'.$cpt.'">'.$figure.$article_only.'</div>'.PHP_EOL;
237
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
238
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
239
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
240
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'.PHP_EOL;
241
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'.PHP_EOL;
242
+  echo '<!-- ENDOF ==================== article '.$cpt.'============== -->'.PHP_EOL;
169 243
   $cpt++;
170 244
   if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
171 245
       break;
+20
sources/rss.php
... ...
@@ -1,4 +1,5 @@
1 1
 <?php
2
+$DEBUG=1;
2 3
 define('NEWS_RSS_LEMONDE', 'https://www.lemonde.fr/rss/une.xml');
3 4
 define('NEWS_RSS_LIBERATION', 'http://rss.liberation.fr/rss/latest/');
4 5
 define('NEWS_RSS_SLATEFR', 'https://www.slate.fr/rss.xml');
... ...
@@ -37,4 +38,23 @@ $array_url=array(
37 38
   "lesinrocks" => NEWS_RSS_LESINROCKS,
38 39
   "conspiracy" => NEWS_RSS_CONSPIRACY
39 40
 );
41
+
42
+function DEBUG($Message) {
43
+  global $DEBUG;
44
+  if($DEBUG) {
45
+    $stderr = fopen('php://stderr', 'w'); 
46
+    fwrite($stderr,"[DEBUG]".$Message.PHP_EOL); 
47
+    fclose($stderr); 
48
+  }
49
+}
50
+function WARNING($Message) {
51
+  $stderr = fopen('php://stderr', 'w'); 
52
+  fwrite($stderr,"[WARNING]".$Message.PHP_EOL); 
53
+  fclose($stderr); 
54
+}
55
+function ERROR($Message) {
56
+  $stderr = fopen('php://stderr', 'w'); 
57
+  fwrite($stderr,"[ERROR]".$Message.PHP_EOL); 
58
+  fclose($stderr); 
59
+}
40 60
 ?>