...
|
...
|
@@ -60,13 +60,23 @@ foreach ($articles as $article ) {
|
60
|
60
|
$doc->loadHTML($article_content);
|
61
|
61
|
$articles = $doc->getElementsByTagName('article');
|
62
|
62
|
$article_only="";
|
63
|
|
- foreach ( $articles as $node) {
|
64
|
|
- $article_only = DOMinnerHTML($node);
|
|
63
|
+ if( isset($articles[0]) ) {
|
|
64
|
+ $article_only=DOMinnerHTML($articles[0]);
|
|
65
|
+ } else {
|
|
66
|
+ $article_only = "Extraction Failed";
|
|
67
|
+ break;
|
65
|
68
|
}
|
|
69
|
+ $figure="";
|
66
|
70
|
|
67
|
71
|
$SEARCH_SUB='<span class="ea_article">édition abonné</span>';
|
68
|
72
|
$pos_start=strpos($article_content, $SEARCH_SUB);
|
69
|
73
|
if($pos_start) {
|
|
74
|
+ $figures = $doc->getElementsByTagName('figure');
|
|
75
|
+ $figure = DOMinnerHTML($figures[0]);
|
|
76
|
+ $re = '/<img (.+?) class="illu lazy-retina" data-src="(.+?)" (.+?) data-lazyload="true" alt="(.+?)" title="(.+?)" (.+?)>/';
|
|
77
|
+ preg_match($re, $figure, $array);
|
|
78
|
+ $figure = '<img width="100%" src="'.$array[2].'"><figcaption>'.$array[4].'<br> <em>('.$array[5].')</em></figcaption>';
|
|
79
|
+
|
70
|
80
|
$article_abonne = str_replace("www.lemonde.fr", "abonnes.lemonde.fr", $article['link']);
|
71
|
81
|
$article_content = file_get_contents($article_abonne);
|
72
|
82
|
$doc = new DOMDocument();
|
...
|
...
|
@@ -75,8 +85,11 @@ foreach ($articles as $article ) {
|
75
|
85
|
$doc->loadHTML($article_content);
|
76
|
86
|
$articles = $doc->getElementsByTagName('article');
|
77
|
87
|
$article_only="";
|
78
|
|
- foreach ( $articles as $node) {
|
79
|
|
- $article_only = DOMinnerHTML($node);
|
|
88
|
+ if( isset($articles[0]) ) {
|
|
89
|
+ $article_only=DOMinnerHTML($articles[0]);
|
|
90
|
+ } else {
|
|
91
|
+ $article_only = "Extraction Failed for Subscribed Article";
|
|
92
|
+ break;
|
80
|
93
|
}
|
81
|
94
|
}
|
82
|
95
|
|
...
|
...
|
@@ -141,7 +154,7 @@ foreach ($articles as $article ) {
|
141
|
154
|
echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> ';
|
142
|
155
|
echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left fa-2x"></i></div> ';
|
143
|
156
|
echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right fa-2x"></i></div>';
|
144
|
|
- echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
|
|
157
|
+ echo '<div class="extract-content" id="'.$cpt.'">'.$figure.$article_only.'</div>';
|
145
|
158
|
echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home fa-2x"></i></a></div> ';
|
146
|
159
|
echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up fa-2x"></i></a></div> ';
|
147
|
160
|
echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link fa-2x"></i></a></div> ';
|