...
|
...
|
@@ -26,30 +26,30 @@ if ($xml === false) {
|
26
|
26
|
$article['description'] = $item->description;
|
27
|
27
|
$article['image'] = $item->enclosure['url'];
|
28
|
28
|
$articles[$cpt] = $article;
|
29
|
|
- echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
|
30
|
|
- echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
|
31
|
|
- echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> ';
|
|
29
|
+ echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">'.PHP_EOL;
|
|
30
|
+ echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>'.PHP_EOL;
|
|
31
|
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '.PHP_EOL;
|
32
|
32
|
echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down fa-2x"></i></a></div> ';
|
33
|
|
- echo $article['title'].' ';
|
34
|
|
- echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>';
|
|
33
|
+ echo $article['title'].' '.PHP_EOL;
|
|
34
|
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a><br></div></div>'.PHP_EOL;
|
35
|
35
|
$cpt++;
|
36
|
36
|
if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
|
37
|
37
|
break;
|
38
|
38
|
}
|
39
|
39
|
}
|
40
|
40
|
}
|
41
|
|
-echo '</div><!-- ./panel-body -->';
|
42
|
|
-echo '</div><!-- ./panel panel-default -->';
|
43
|
|
-echo '</div><!-- ./col-md-6 -->';
|
44
|
|
-echo '<div class="col-md-6">';
|
45
|
|
-echo '<div class="panel panel-default">';
|
46
|
|
-echo '<div class="panel-body">';
|
47
|
|
-echo '<a name="article-top"></a><div id="article-current"></div>';
|
|
41
|
+echo '</div><!-- ./panel-body -->'.PHP_EOL;
|
|
42
|
+echo '</div><!-- ./panel panel-default -->'.PHP_EOL;
|
|
43
|
+echo '</div><!-- ./col-md-6 -->'.PHP_EOL;
|
|
44
|
+echo '<div class="col-md-6">'.PHP_EOL;
|
|
45
|
+echo '<div class="panel panel-default">'.PHP_EOL;
|
|
46
|
+echo '<div class="panel-body">'.PHP_EOL;
|
|
47
|
+echo '<a name="article-top"></a><div id="article-current"></div>'.PHP_EOL;
|
48
|
48
|
$cpt=0;
|
49
|
49
|
foreach ($articles as $article ) {
|
50
|
50
|
$cpt_prev=$cpt-1;
|
51
|
51
|
$cpt_next=$cpt+1;
|
52
|
|
- echo '<!-- ==================== article '.$cpt.'============== -->';
|
|
52
|
+ echo PHP_EOL.PHP_EOL.'<!-- ==================== article '.$cpt.'============== -->'.PHP_EOL;
|
53
|
53
|
echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
|
54
|
54
|
echo "<hr>";
|
55
|
55
|
echo "<a name=\"article-$cpt\">";
|
...
|
...
|
@@ -73,12 +73,35 @@ foreach ($articles as $article ) {
|
73
|
73
|
|
74
|
74
|
$SEARCH_SUB='<span class="ea_article">édition abonné</span>';
|
75
|
75
|
$pos_start=strpos($article_content, $SEARCH_SUB);
|
|
76
|
+ if(!$pos_start) {
|
|
77
|
+ //Second Test
|
|
78
|
+ DEBUG("article($cpt) : Non Abonne 1");
|
|
79
|
+ $SEARCH_SUB='<p class="article__status"><span class="icon__premium"></span>Article réservé aux abonnés</p>';
|
|
80
|
+ $pos_start=strpos($article_content, $SEARCH_SUB);
|
|
81
|
+ if(!$pos_start) {
|
|
82
|
+ DEBUG("article($cpt) : Non Abonne 2");
|
|
83
|
+ } else {
|
|
84
|
+ DEBUG("article($cpt) : Abonne 2");
|
|
85
|
+ }
|
|
86
|
+ } /*else {
|
|
87
|
+ DEBUG("article($cpt) : Abonne 1");
|
|
88
|
+ }*/
|
76
|
89
|
if($pos_start) {
|
77
|
|
- $figures = $doc->getElementsByTagName('figure');
|
78
|
|
- $figure = DOMinnerHTML($figures[0]);
|
79
|
|
- $re = '/<img (.+?) class="illu lazy-retina" data-src="(.+?)" (.+?) data-lazyload="true" alt="(.+?)" title="(.+?)" (.+?)>/';
|
80
|
|
- preg_match($re, $figure, $array);
|
81
|
|
- $figure = '<img width="100%" src="'.$array[2].'"><figcaption>'.$array[4].'<br> <em>('.$array[5].')</em></figcaption>';
|
|
90
|
+ try {
|
|
91
|
+ $figures = $doc->getElementsByTagName('figure');
|
|
92
|
+ if( NULL === $figures[0] ) {
|
|
93
|
+ DEBUG("article($cpt) : No Image");
|
|
94
|
+ } else {
|
|
95
|
+ $figure = DOMinnerHTML($figures[0]);
|
|
96
|
+ $re = '/<img (.+?) class="illu lazy-retina" data-src="(.+?)" (.+?) data-lazyload="true" alt="(.+?)" title="(.+?)" (.+?)>/';
|
|
97
|
+ preg_match($re, $figure, $array);
|
|
98
|
+ if(count($array) >= 5 ) {
|
|
99
|
+ $figure = '<img width="100%" src="'.$array[2].'"><figcaption>'.$array[4].'<br> <em>('.$array[5].')</em></figcaption>';
|
|
100
|
+ }
|
|
101
|
+ }
|
|
102
|
+ } catch(Exception $e) {
|
|
103
|
+ ERROR("article($cpt) : Exception".$e->getMessage());
|
|
104
|
+ }
|
82
|
105
|
|
83
|
106
|
$article_abonne = str_replace("www.lemonde.fr", "abonnes.lemonde.fr", $article['link']);
|
84
|
107
|
$article_content = file_get_contents($article_abonne);
|
...
|
...
|
@@ -98,11 +121,64 @@ foreach ($articles as $article ) {
|
98
|
121
|
break;
|
99
|
122
|
}
|
100
|
123
|
}
|
101
|
|
-
|
|
124
|
+
|
102
|
125
|
$orgStrings = array(' href="/');
|
103
|
126
|
$newStrings = array(' href="https://www.lemonde.fr/');
|
104
|
127
|
$article_only = str_replace($orgStrings, $newStrings, $article_only);
|
105
|
|
-
|
|
128
|
+/*
|
|
129
|
+ $orgStrings = array(' src="/');
|
|
130
|
+ $newStrings = array(' src="https://www.lemonde.fr/');
|
|
131
|
+ $article_only = str_replace($orgStrings, $newStrings, $article_only);
|
|
132
|
+ */
|
|
133
|
+
|
|
134
|
+ //Remove Blank lines
|
|
135
|
+ $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
|
|
136
|
+ $article_only = $temp;
|
|
137
|
+ $temp = preg_replace('/\s\s+/', ' ', $article_only);
|
|
138
|
+ $article_only = $temp;
|
|
139
|
+
|
|
140
|
+ //Remove Social section
|
|
141
|
+ $re = '/<li class="meta__icon"> <a class="js-social"(.+)<\/span> <\/li>/s';
|
|
142
|
+ $temp = preg_replace($re, '', $article_only);
|
|
143
|
+ $article_only = $temp;
|
|
144
|
+
|
|
145
|
+ //Cleanup end-of-article extraction
|
|
146
|
+ $re = '/<section class="article__comments">(.+)commentaires <\/a> <\/section>/';
|
|
147
|
+ $temp = preg_replace($re, '', $article_only);
|
|
148
|
+ $article_only = $temp;
|
|
149
|
+ $re = '/<li><a class="insert__teaser" (.+)<\/a><\/li>/';
|
|
150
|
+ $temp = preg_replace($re, '', $article_only);
|
|
151
|
+ $article_only = $temp;
|
|
152
|
+ $re = '/<p class="insert__title">Les plus lus<\/p>/';
|
|
153
|
+ $temp = preg_replace($re, '', $article_only);
|
|
154
|
+ $article_only = $temp;
|
|
155
|
+ $re = '/<li> <a href="http(.+)Recherche<\/a> <\/li>/';
|
|
156
|
+ $temp = preg_replace($re, '', $article_only);
|
|
157
|
+ $article_only = $temp;
|
|
158
|
+ $re = '/<div class="bd__header"> <span>Annonces automobiles<\/span> <span>avec La Centrale<\/span> <\/div>/';
|
|
159
|
+ $temp = preg_replace($re, '', $article_only);
|
|
160
|
+ $article_only = $temp;
|
|
161
|
+ $re = '/<div class="bd__header"> <span>Annonces immobilières<\/span> <span>avec Logic-Immo<\/span> <\/div>/';
|
|
162
|
+ $temp = preg_replace($re, '', $article_only);
|
|
163
|
+ $article_only = $temp;
|
|
164
|
+ $re = '/<p class="article__siblings-title">Dans la même rubrique<\/p>/';
|
|
165
|
+ $temp = preg_replace($re, '', $article_only);
|
|
166
|
+ $article_only = $temp;
|
|
167
|
+ $re = '/<footer class=" old__article-footer">(.+)<\/footer>/';
|
|
168
|
+ $temp = preg_replace($re, '', $article_only);
|
|
169
|
+ $article_only = $temp;
|
|
170
|
+ $re = '/<a href="https:\/\/www.lemonde.fr\/services\/"><h4 class="area__title area__title--bd">Services<\/h4><\/a>/';
|
|
171
|
+ $temp = preg_replace($re, '', $article_only);
|
|
172
|
+ $article_only = $temp;
|
|
173
|
+ $re = '/<a class="insert__action button button--dark" target="_blank" data-target="jelec-sidebar" href="https:\/\/journal.lemonde.fr\/">Lire le journal numérique<\/a>/';
|
|
174
|
+ $temp = preg_replace($re, '', $article_only);
|
|
175
|
+ $article_only = $temp;
|
|
176
|
+
|
|
177
|
+ //Remove Thumbnail
|
|
178
|
+ $re = '/<img src="\/thumbnail\/journal\/(.+) class="insert__media" alt="">/';
|
|
179
|
+ $temp = preg_replace($re, '', $article_only);
|
|
180
|
+ $article_only = $temp;
|
|
181
|
+
|
106
|
182
|
$orgStrings = array(
|
107
|
183
|
'<p class="txt3 description-article" itemprop="description">',
|
108
|
184
|
'<p class="bloc_signature">',
|
...
|
...
|
@@ -144,9 +220,6 @@ foreach ($articles as $article ) {
|
144
|
220
|
$article_only = $temp;
|
145
|
221
|
$temp = preg_replace('/\s\s+/', ' ', $article_only);
|
146
|
222
|
$article_only = $temp;
|
147
|
|
- //$re = '/<a target="_blank" onclick="return false;" (.+?)">(.+?)<\/a>/';
|
148
|
|
- //$temp = preg_replace($re, '\\3', $article_only);
|
149
|
|
- //$article_only = $temp;
|
150
|
223
|
$re = '/<a class="lien_interne rub"(.+?)>(.+?)<\/a>/';
|
151
|
224
|
$temp = preg_replace($re, '\\2', $article_only);
|
152
|
225
|
$article_only = $temp;
|
...
|
...
|
@@ -159,13 +232,14 @@ foreach ($articles as $article ) {
|
159
|
232
|
echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> ';
|
160
|
233
|
echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> ';
|
161
|
234
|
echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> ';
|
162
|
|
- echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
|
163
|
|
- echo '<div class="extract-content" id="'.$cpt.'">'.$figure.$article_only.'</div>';
|
164
|
|
- echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> ';
|
165
|
|
- echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div> ';
|
166
|
|
- echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> ';
|
167
|
|
- echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> ';
|
168
|
|
- echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>';
|
|
235
|
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>'.PHP_EOL;
|
|
236
|
+ echo '<div class="extract-content" id="'.$cpt.'">'.$figure.$article_only.'</div>'.PHP_EOL;
|
|
237
|
+ echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> '.PHP_EOL;
|
|
238
|
+ echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div> '.PHP_EOL;
|
|
239
|
+ echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> '.PHP_EOL;
|
|
240
|
+ echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> '.PHP_EOL;
|
|
241
|
+ echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div></div>'.PHP_EOL;
|
|
242
|
+ echo '<!-- ENDOF ==================== article '.$cpt.'============== -->'.PHP_EOL;
|
169
|
243
|
$cpt++;
|
170
|
244
|
if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
|
171
|
245
|
break;
|