Showing 7 changed files with 792 additions and 0 deletions
+9
sources/gennews.sh
... ...
@@ -0,0 +1,9 @@
1
+#!/bin/bash
2
+#cd /var/www/html/news/sources
3
+cd /var/www/news/sources
4
+php news-lemonde.fr.php > lemonde.html 2> /dev/null
5
+php news-liberation.fr.php > liberation.html 2> /dev/null
6
+php news-slate.fr.php > slatefr.html 2> /dev/null
7
+php news-vice.fr.php > vicefr.html 2> /dev/null
8
+php news-lesinrocks.fr.php > lesinrocks.html 2> /dev/null
9
+
+41
sources/news-constants.php
... ...
@@ -0,0 +1,41 @@
1
+<?php
2
+define('NEWS_RSS_LEMONDE', 'http://www.lemonde.fr/rss/une.xml');
3
+define('NEWS_RSS_LIBERATION', 'http://rss.liberation.fr/rss/latest/');
4
+define('NEWS_RSS_SLATEFR', 'https://www.slate.fr/rss.xml');
5
+define('NEWS_RSS_VICEFR', 'https://www.vice.com/fr/rss');
6
+define('NEWS_RSS_LESINROCKS', 'http://www.lesinrocks.com/feeds/feed-a-la-une/');
7
+$NEWS_RSS_MAX_ITEMS = 10;
8
+
9
+function DOMinnerHTML(DOMNode $element) 
10
+{ 
11
+    $innerHTML = ""; 
12
+    $children  = $element->childNodes;
13
+
14
+    foreach ($children as $child) 
15
+    { 
16
+        $innerHTML .= $element->ownerDocument->saveHTML($child);
17
+    }
18
+
19
+    return $innerHTML; 
20
+}
21
+
22
+function http_get_contents($url, $opts = [])
23
+{
24
+  $ch = curl_init();
25
+  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
26
+  curl_setopt($ch, CURLOPT_TIMEOUT, 10);
27
+  curl_setopt($ch, CURLOPT_USERAGENT, "Wget 1");
28
+  curl_setopt($ch, CURLOPT_URL, $url);
29
+  if(is_array($opts) && $opts) {
30
+    foreach($opts as $key => $val) {
31
+      curl_setopt($ch, $key, $val);
32
+    }
33
+  }
34
+  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
35
+  if(FALSE === ($retval = curl_exec($ch))) {
36
+    error_log(curl_error($ch));
37
+  } else {
38
+    return $retval;
39
+  }
40
+}
41
+?>
+139
sources/news-lemonde.fr.php
... ...
@@ -0,0 +1,139 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_LEMONDE);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->channel->title;
11
+  $channel['link'] = $xml->channel->link;
12
+  $channel['description'] = $xml->channel->description;
13
+  $channel['pubDate'] = $xml->channel->pubDate;
14
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
15
+  echo '<h4>' . $channel['title'] . '</h4>';
16
+  $cpt=0;
17
+  foreach ($xml->channel->item as $item) {
18
+    $article = array();
19
+    $article['title'] = $item->title;
20
+    $article['link'] = $item->link;
21
+    $orgStrings = array('?xtor=RSS-3208');
22
+    $newStrings = array('');
23
+    $article['link']  = str_replace($orgStrings, $newStrings, $article['link']);
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image'] = $item->enclosure['url'];
28
+    $articles[$article['timestamp']] = $article;  
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  $doc = new DOMDocument();
58
+  $doc->preserveWhiteSpace = false;
59
+  $doc->formatOutput       = true;
60
+  $doc->loadHTML($article_content);
61
+  $articles = $doc->getElementsByTagName('article');
62
+  $article_only="";
63
+  foreach ( $articles as $node) {
64
+    $article_only = DOMinnerHTML($node);
65
+  }
66
+ 
67
+  $orgStrings = array(' href="/');
68
+  $newStrings = array(' href="https://www.lemonde.fr/');
69
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
70
+  
71
+  $orgStrings = array( 
72
+      '<p class="txt3 description-article" itemprop="description">',
73
+      '<p class="bloc_signature">',
74
+      '<span id="publisher" itemprop="Publisher" data-source="LE MONDE">',
75
+      '<div class="toolbar"></div>',
76
+      '<div id="articleBody" class="contenu_article js_article_body" itemprop="articleBody">',
77
+      '<h4>',
78
+      '</h4>',
79
+      '<h1 class="tt2" itemprop="Headline">',
80
+      '</h1>',
81
+      '<h2 class="taille_courante">',
82
+      '<h2 class="intertitre">',
83
+      '</h2>',
84
+      '<span>Le Monde</span>',
85
+      '<figure class="illustration_haut   " style="width: 534px">',
86
+      '<img width="534" data-lazyload="false" src="',
87
+      ' onload="lmd.pic(this);" onerror="lmd.pic(this);" class="lazy-retina"' );
88
+  $newStrings = array( 
89
+      '<p>',
90
+      '<p>',
91
+      '<span>',
92
+      '',
93
+      '<div>',
94
+      '<h5>',
95
+      '</h5>',
96
+      '<h3>',
97
+      '</h3>',
98
+      '<h4>',
99
+      '<h4>',
100
+      '</h4>',
101
+      'Le Monde',
102
+      '<figure>',
103
+      '<img width="100%" src="',
104
+      '' );
105
+
106
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
107
+
108
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
109
+  $article_only = $temp;
110
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
111
+  $article_only = $temp;
112
+  $re = '/<a target="_blank" onclick="return false;" (.+?)">(.+?)<\/a>/';
113
+  $temp = preg_replace($re, '\\3', $article_only);
114
+  $article_only = $temp;
115
+  //$re = '/<a class="lien_interne rub"(.+?)>(.+?)<\/a>/';
116
+  //$temp = preg_replace($re, '\\2', $article_only);
117
+  //$article_only = $temp;
118
+  $re = '/<img (.+?)data-src="(.+?)"(.+?)data-lazyload="true"(.+?)>/';
119
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only); 
120
+  $article_only = $temp;
121
+  $re = '/<a href="http:\/\/ad.apsalar.com\/api\/v1\/ad(.+?)target="_blank(.+?)<\/a>/';
122
+  $temp = preg_replace($re, '', $article_only); 
123
+  $article_only = $temp;
124
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
125
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
126
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
127
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div>';
128
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
129
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
130
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up"></i></a></div>&nbsp;&nbsp;';
131
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
132
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
133
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div></div>';
134
+  $cpt++;
135
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
136
+      break;
137
+  }
138
+}
139
+?>
+160
sources/news-lesinrocks.fr.php
... ...
@@ -0,0 +1,160 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_LESINROCKS);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->channel->title;
11
+  $channel['link'] = $xml->channel->link;
12
+  $channel['description'] = $xml->channel->description;
13
+  $channel['pubDate'] = $xml->channel->pubDate;
14
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
15
+  echo '<h4>' . $channel['title'] . '</h4>';
16
+  $cpt=0;
17
+  foreach ($xml->channel->item as $item) {
18
+    $article = array();
19
+    $article['title'] = $item->title;
20
+    $article['link'] = $item->link;
21
+    $orgStrings = array('?xtor=RSS-3208');
22
+    $newStrings = array('');
23
+    $article['link']  = str_replace($orgStrings, $newStrings, $article['link']);
24
+    $article['pubDate'] = $item->pubDate;
25
+    $article['timestamp'] = strtotime($item->pubDate);
26
+    $article['description'] = $item->description;
27
+    $article['image'] = $item->enclosure['url'];
28
+    $articles[$article['timestamp']] = $article;  
29
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
30
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
31
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down"></i></a></div>&nbsp;';
33
+    echo $article['title'].'&nbsp;&nbsp;';
34
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a><br></div></div>';
35
+    $cpt++;
36
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
37
+      break;
38
+    }
39
+  }
40
+}
41
+echo '</div><!-- ./panel-body -->';
42
+echo '</div><!-- ./panel panel-default -->';
43
+echo '</div><!-- ./col-md-6 -->';
44
+echo '<div class="col-md-6">';
45
+echo '<div class="panel panel-default">';
46
+echo '<div class="panel-body">';
47
+echo '<a name="article-top"></a><div id="article-current"></div>';
48
+$cpt=0;
49
+foreach ($articles as $article ) {
50
+  $cpt_prev=$cpt-1;
51
+  $cpt_next=$cpt+1;
52
+  echo '<!-- ==================== article '.$cpt.'============== -->';
53
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
54
+  echo "<hr>";
55
+  echo "<a name=\"article-$cpt\">";
56
+  $article_content = file_get_contents($article['link']);
57
+  $article_content_utf8 = mb_convert_encoding($article_content, 'HTML-ENTITIES', "UTF-8");
58
+  $doc = new DOMDocument();
59
+  $doc->preserveWhiteSpace = false;
60
+  $doc->formatOutput       = true;
61
+  //$doc->loadHTML($article_content);
62
+  $doc->loadHTML($article_content_utf8);
63
+  $articles = $doc->getElementsByTagName('article');
64
+  $article_only="";
65
+  foreach ( $articles as $node) {
66
+    $article_only = DOMinnerHTML($node);
67
+  }
68
+
69
+  $orgStrings = array( ' href="/',
70
+    '<img src=');
71
+  $newStrings = array( ' href="http://www.lesinrocks.com/',
72
+    '<img width="100%" src=' );
73
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
74
+  
75
+  $orgStrings = array( '<div class="row">',
76
+    '<div class="col-md-2">',
77
+    '<div class="col-md-8">',
78
+    '<div class="container width_wrap">',
79
+    '<div class="col-left">',
80
+    '<h3> </h3>',
81
+    '<span>PAR</span>',
82
+    '<p></p>',
83
+    '<div style="padding-top: 56.25% ">',
84
+    '<h1>',
85
+    '</h1>',
86
+    /*
87
+    '<iframe width="480" height="270"',
88
+    'width="492" height="360" frameborder="0"',
89
+    'width="640" height="360" frameborder="0"',
90
+    'width="640" height="320" frameborder="0"',
91
+    '<div style="padding-top: 73.170731707317% ">'*/ );
92
+  $newStrings = array('<div>',
93
+    '<div>',
94
+    '<div>',
95
+    '<div>',
96
+    '<div>',
97
+    '',
98
+    'par ',
99
+    '',
100
+    '<div>',
101
+    '<h4>',
102
+    '</h4>',
103
+    /*
104
+    '<iframe width="100%"',
105
+    'width ="100%" frameborder="0"',
106
+    'width ="100%" frameborder="0"',
107
+    'width ="100%" frameborder="0"',
108
+    '<div>'*/);
109
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
110
+  
111
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
112
+  $article_only = $temp;
113
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
114
+  $article_only = $temp;
115
+  $re='/<img class="alignnone size-full(.+?)" src="(.+?)"(.+?)>/';
116
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only);
117
+  $article_only = $temp;
118
+  $re='/<img class="aligncenter size-full(.+?)" src="(.+?)"(.+?)>/';
119
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only);
120
+  $article_only = $temp;
121
+  $re='/<div class="OUTBRAIN"(.+?)data-ob-template="lesinrocks"><\/div>/';
122
+  $temp = preg_replace($re, '', $article_only);
123
+  $article_only = $temp;
124
+  $re='/<div style="padding-top:(.+?)">/';
125
+  $temp = preg_replace($re, '<div>', $article_only);
126
+  $article_only = $temp;
127
+  $re='/<iframe width="(.+?)" height="(.+?)" /';
128
+  $temp = preg_replace($re, '<iframe width="100%" ', $article_only);
129
+  $article_only = $temp;
130
+  $re='/ width="(.+?)" height="(.+?)" frameborder="0" /';
131
+  $temp = preg_replace($re, ' width="100%" frameborder="0" ', $article_only);
132
+  $article_only = $temp;
133
+  $SEARCH='Prolongez votre lecture !';
134
+  $posend = strpos($article_only,$SEARCH);
135
+  if($posend) {
136
+    $temp = substr( $article_only, 0, $posend );
137
+    $article_only = $temp;
138
+  }
139
+  $SEARCH='<div class="native-ad-in-content">';
140
+  $posend = strpos($article_only,$SEARCH);
141
+  if($posend) {
142
+    $temp = substr( $article_only, 0, $posend );
143
+    $article_only = $temp;
144
+  }
145
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
146
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
147
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
148
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div>';
149
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
150
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
151
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up"></i></a></div>&nbsp;&nbsp;';
152
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
153
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
154
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div></div>';
155
+  $cpt++;
156
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
157
+      break;
158
+  }
159
+}
160
+?>
+172
sources/news-liberation.fr.php
... ...
@@ -0,0 +1,172 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+$rss_content = file_get_contents(NEWS_RSS_LIBERATION);
5
+$xml = simplexml_load_string($rss_content);
6
+if ($xml === false) {
7
+  echo 'Failed to read RSS';
8
+} else {
9
+  $channel = array();
10
+  $channel['title'] = $xml->title;
11
+  $channel['pubDate'] = $xml->updated;
12
+  $channel['timestamp'] = strtotime($xml->updated);
13
+  echo '<h4>' . $channel['title'] . '</h4>';
14
+  $cpt=0;
15
+  foreach ($xml->entry as $item)
16
+  {
17
+    $article = array();
18
+    $article['title'] = $item->title;
19
+    $article['link'] = $item->link['href'];
20
+    $orgStrings = array('?xtor=rss-450');
21
+    $newStrings = array('');
22
+    $article['link']  = str_replace($orgStrings, $newStrings, $article['link']);
23
+    $article['pubDate'] = $item->updated;
24
+    $article['timestamp'] = strtotime($item->updated);
25
+    $article['description'] = $item->summary;
26
+    $article['image']="liberation.jpg";
27
+    foreach ($item->link as $item2 )
28
+    {
29
+      if( 0 == strcmp( $item2['type'], "image/jpeg") ) {
30
+        $pos_start=strpos($item2['href'],"?modified_at");
31
+        $article['image']=substr($item2['href'],0,$pos_start);
32
+      }
33
+    }
34
+
35
+    $articles[$article['timestamp']] = $article;
36
+    echo "<div onclick=\"onArticle($cpt)\" style=\"display:inline;\">\n";
37
+    echo '<img width="100%" src="'.$article['image'].'" style="display:inline;"><br>';
38
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div> ';
39
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down"></i></a></div> ';
40
+    echo $article['title'].' ';
41
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a><br></div></div>';
42
+    $cpt++;
43
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
44
+      break;
45
+    }
46
+  }
47
+}
48
+echo '</div><!-- ./panel-body -->';
49
+echo '</div><!-- ./panel panel-default -->';
50
+echo '</div><!-- ./col-md-6 -->';
51
+echo '<div class="col-md-6">';
52
+echo '<div class="panel panel-default">';
53
+echo '<div class="panel-body">';
54
+echo '<a name="article-top"></a><div id="article-current"></div>';
55
+$cpt=0;
56
+foreach ($articles as $article ) {
57
+  $cpt_prev=$cpt-1;
58
+  $cpt_next=$cpt+1;
59
+  echo '<!-- ==================== article '.$cpt.'============== -->';
60
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
61
+  echo "<hr>";
62
+  echo "<a name=\"article-$cpt\">";
63
+  $article_content = file_get_contents($article['link']);
64
+  $article_content_utf8 = mb_convert_encoding($article_content, 'HTML-ENTITIES', "UTF-8");
65
+  $doc = new DOMDocument();
66
+  $doc->preserveWhiteSpace = false;
67
+  $doc->formatOutput       = true;
68
+  $doc->loadHTML($article_content_utf8);
69
+  $articles = $doc->getElementsByTagName('article');
70
+  $article_only="";
71
+  foreach ( $articles as $node) {
72
+    $article_only = DOMinnerHTML($node);
73
+  }
74
+
75
+  $SEARCH_SUB='<aside class="aside-column side-flow-bloc width-padded">';
76
+  $pos_stop = strpos($article_only, $SEARCH_SUB);
77
+  $article_only = substr($article_only, 0, $pos_stop);
78
+ 
79
+  if(0==strcmp($article_only,"")) {
80
+    //$SEARCH_SUB='itemscope="" itemtype="http://schema.org/NewsArticle">';
81
+    $SEARCH_SUB='itemtype="http://schema.org/NewsArticle">';
82
+    $pos_start=strpos($article_content_utf8, $SEARCH_SUB);
83
+    if($pos_start) {
84
+      $article_only = substr($article_content_utf8, $pos_start + strlen($SEARCH_SUB));
85
+      $SEARCH_SUB='<aside class="outbrain">';
86
+      $pos_stop=strpos($article_only, $SEARCH_SUB);
87
+      if($pos_stop) {
88
+        $article_only = substr($article_only, 0, $pos_stop);
89
+      } else {
90
+        echo "pos_stop null";
91
+      }
92
+    } else {
93
+      echo "pos_start null";
94
+    }
95
+  }
96
+
97
+  $orgStrings = array( ' href="/' );
98
+  $newStrings = array( ' href="http://www.liberation.fr/' );
99
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
100
+  
101
+  $orgStrings = array( '<p lang="en" dir="ltr">',
102
+      '<p dir="ltr">',
103
+      '<h4>',
104
+      '</h4>',
105
+      '<h1 class="article-headline">',
106
+      '</h1>',
107
+      '<h2 class="article-standfirst read-left-padding">',
108
+      '<h2 class="intertitre">',
109
+      '</h2>',
110
+      '<figure class="article-image article-header-image"><a role="button" class="figure-zoom">',
111
+      '<figure class="article-image article-header-image"><a role="button" class="figure-zoom js-figure-zoom">',
112
+      '<img width="534" data-lazyload="false" src="',
113
+      '<i class="zoom-icon"> <svg class="icon " role="img" width="22" height="22"><use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#icon-zoom"></use></svg></i>',
114
+      ' onload="lmd.pic(this);" onerror="lmd.pic(this);" class="lazy-retina"',
115
+      ' width="960">',
116
+      '<figcaption class="legende" data-caption="',
117
+      '</figcaption>' );
118
+  $newStrings = array( '<p>',
119
+      '<p>',
120
+      '<h5>',
121
+      '</h5>',
122
+      '<h3>',
123
+      '</h3>',
124
+      '<h4>',
125
+      '<h4>',
126
+      '</h4>',
127
+      '<figure>',
128
+      '<figure>',
129
+      '<img width="100%" src="',
130
+      '',
131
+      '',
132
+      ' width="100%">',
133
+      '<em>',
134
+      '</em>' );
135
+
136
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
137
+
138
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
139
+  $article_only = $temp;
140
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
141
+  $article_only = $temp;
142
+  $re = '/<a class="lien_interne rub"(.+)>(.+)<\/a>/';
143
+  $temp = preg_replace($re, '\\2', $article_only);
144
+  $article_only = $temp;
145
+  $re = '/<img (.+)data-src="(.+?)"(.+?)data-lazyload="true"(.+?)>/';
146
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only); 
147
+  $article_only = $temp;
148
+  $re = '/<a href="http:\/\/ad.apsalar.com\/api\/v1\/ad(.+?)target="_blank(.+?)<\/a>/';
149
+  $temp = preg_replace($re, '', $article_only); 
150
+  $article_only = $temp;
151
+  $re = '/<span class="share">(.+?)<\/span>/';
152
+  $temp = preg_replace($re, '', $article_only); 
153
+  $article_only = $temp;
154
+  $orgStrings = array( '<ul class="article-rel-list">','<li class="article-rel-item">  </li>','</ul>');
155
+  $newStrings = array( '','','' );
156
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
157
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
158
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
159
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
160
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div>';
161
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
162
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
163
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up"></i></a></div>&nbsp;&nbsp;';
164
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
165
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
166
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div></div>';
167
+  $cpt++;
168
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
169
+      break;
170
+  }
171
+}
172
+?>
+140
sources/news-slate.fr.php
... ...
@@ -0,0 +1,140 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+//$rss_content = file_get_contents(NEWS_RSS_SLATEFR);
5
+$rss_content = http_get_contents(NEWS_RSS_SLATEFR);
6
+$xml = simplexml_load_string($rss_content);
7
+if ($xml === false) {
8
+  echo 'Failed to read RSS';
9
+} else {
10
+  $channel = array();
11
+  $channel['title'] = $xml->channel->title;
12
+  $channel['link'] = $xml->channel->link;
13
+  $channel['description'] = $xml->channel->description;
14
+  $channel['pubDate'] = $xml->channel->pubDate;
15
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
16
+  echo '<h4>' . $channel['title'] . '</h4>';
17
+  $cpt=0;
18
+  foreach ($xml->channel->item as $item) {
19
+    $article = array();
20
+    $article['title'] = $item->title;
21
+    $article['link'] = $item->link;
22
+    $orgStrings = array('?xtor=RSS-3208');
23
+    $newStrings = array('');
24
+    $article['link']  = str_replace($orgStrings, $newStrings, $article['link']);
25
+    $article['pubDate'] = $item->pubDate;
26
+    $article['timestamp'] = strtotime($item->pubDate);
27
+    $article['description'] = $item->description;
28
+    $article['image'] = $item->enclosure['url'];
29
+    $articles[$article['timestamp']] = $article;  
30
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
31
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
32
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
33
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down"></i></a></div>&nbsp;';
34
+    echo $article['title'].'&nbsp;&nbsp;';
35
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a><br></div></div>';
36
+    $cpt++;
37
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
38
+      break;
39
+    }
40
+  }
41
+}
42
+echo '</div><!-- ./panel-body -->';
43
+echo '</div><!-- ./panel panel-default -->';
44
+echo '</div><!-- ./col-md-6 -->';
45
+echo '<div class="col-md-6">';
46
+echo '<div class="panel panel-default">';
47
+echo '<div class="panel-body">';
48
+echo '<a name="article-top"></a><div id="article-current"></div>';
49
+$cpt=0;
50
+foreach ($articles as $article ) {
51
+  $cpt_prev=$cpt-1;
52
+  $cpt_next=$cpt+1;
53
+  echo '<!-- ==================== article '.$cpt.'============== -->';
54
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
55
+  echo "<hr>";
56
+  echo "<a name=\"article-$cpt\">";
57
+  //$article_content = file_get_contents($article['link']);
58
+  $article_content = http_get_contents($article['link']);
59
+  $doc = new DOMDocument();
60
+  $doc->preserveWhiteSpace = false;
61
+  $doc->formatOutput       = true;
62
+  $doc->loadHTML($article_content);
63
+  $articles = $doc->getElementsByTagName('article');
64
+  $article_only="";
65
+  foreach ( $articles as $node) {
66
+    $article_only = DOMinnerHTML($node);
67
+  }
68
+
69
+  $orgStrings = array( ' href="/',
70
+    ' src="/sites/',
71
+    '<img src=');
72
+  $newStrings = array( ' href="http://www.slate.fr/',
73
+    ' src="http://www.slate.fr/sites/',
74
+    '<img width="100%" src=' );
75
+  $article_only  = str_replace($orgStrings, $newStrings, $article_only);
76
+  
77
+  $orgStrings = array( '<div class="row">',
78
+    '<div class="col-md-2">',
79
+    '<div class="col-md-8">',
80
+    '<div class="container width_wrap">',
81
+    '<div class="col-left">',
82
+    '<h3> </h3>',
83
+    '<h1>',
84
+    '</h1>',
85
+    '<p style="text-align:center">' );
86
+  $newStrings = array('<div>',
87
+    '<div>',
88
+    '<div>',
89
+    '<div>',
90
+    '<div>',
91
+    '',
92
+    '<h4>',
93
+    '</h4>',
94
+    '<p>');
95
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
96
+  
97
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
98
+  $article_only = $temp;
99
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
100
+  $article_only = $temp;
101
+  $re = '/ style="height:(.+?)width: 640px;">/';
102
+  $temp = preg_replace($re, '>', $article_only); 
103
+  $article_only = $temp;
104
+  $re = '/<script>\(function \(\) \{var sasCallOptions = \{ siteId: 59629(.+?)<\/script>/';
105
+  $temp = preg_replace($re, '', $article_only); 
106
+  $article_only = $temp;
107
+  $re = '/<img data-file-id=(.+?)src="(.+?)"(.+?)>/';
108
+  $temp = preg_replace($re, '<img width="100%" src="\\2">', $article_only); 
109
+  $article_only = $temp;
110
+  $re='/<div class="media_embed" height="(.+?)" width="(.+?)">/';
111
+  $temp = preg_replace($re, '<div>', $article_only); 
112
+  $article_only = $temp;
113
+  $re='/<iframe allowfullscreen="" frameborder="0" height="(.+?)" src="(.+?)" width="(.+?)"><\/iframe>/';
114
+  $temp = preg_replace($re, '<iframe frameborder="0" width="100%" src="\\2"></iframe>', $article_only); 
115
+  $article_only = $temp;
116
+  $re='/<img width="100%" src="(.+?)" style="height:(.+?)width: (.+?)">/';
117
+  $temp = preg_replace($re, '<img width="100%" src="\\1">', $article_only); 
118
+  $article_only = $temp;
119
+  $SEARCH='<div class="read-also">';
120
+  $posend = strpos($article_only,$SEARCH);
121
+  if($posend) {
122
+    $temp = substr( $article_only, 0, $posend );
123
+    $article_only = $temp;
124
+  }
125
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
126
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
127
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
128
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div>';
129
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
130
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
131
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up"></i></a></div>&nbsp;&nbsp;';
132
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
133
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
134
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div></div>';
135
+  $cpt++;
136
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
137
+      break;
138
+  }
139
+}
140
+?>
+131
sources/news-vice.fr.php
... ...
@@ -0,0 +1,131 @@
1
+<?php
2
+include_once( 'news-constants.php' );
3
+
4
+//$rss_content = file_get_contents(NEWS_RSS_VICEFR);
5
+$rss_content = http_get_contents(NEWS_RSS_VICEFR);
6
+$rss_content = str_replace("<content:encoded>","<contentEncoded>",$rss_content);
7
+$rss_content = str_replace("</content:encoded>","</contentEncoded>",$rss_content);
8
+$articles = array();
9
+$xml = simplexml_load_string($rss_content);
10
+if ($xml === false) {
11
+  echo 'Failed to read RSS';
12
+} else {
13
+  $channel = array();
14
+  $channel['title'] = $xml->channel->title;
15
+  $channel['link'] = $xml->channel->link;
16
+  $channel['description'] = $xml->channel->description;
17
+  $channel['pubDate'] = $xml->channel->pubDate;
18
+  $channel['timestamp'] = strtotime($xml->channel->pubDate);
19
+  echo '<h4>' . $channel['title'] . '</h4>';
20
+  $cpt=0;
21
+  foreach ($xml->channel->item as $item) {
22
+    $article = array();
23
+    $article['title'] = $item->title;
24
+    $article['link'] = $item->link;
25
+    $article['pubDate'] = $item->pubDate;
26
+    $article['timestamp'] = strtotime($item->pubDate);
27
+    $article['description'] = $item->description;
28
+    $article['content'] = $item->contentEncoded;
29
+    $article['image'] = $item->enclosure['url'];
30
+    //$articles[$article['timestamp']] = $article; 
31
+    $articles[$cpt]=$article;
32
+    echo '<div onclick="onArticle('.$cpt.')" style="display:inline;">';
33
+    echo '<img src="'.$article['image'].'" style="display:inline;" width="100%"><br>';
34
+    echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
35
+    echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-down"></i></a></div>&nbsp;';
36
+    echo $article['title'].'&nbsp;&nbsp;';
37
+    echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a><br></div></div>';
38
+    $cpt++;
39
+    if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
40
+      break;
41
+    }
42
+  }
43
+}
44
+echo '</div><!-- ./panel-body -->';
45
+echo '</div><!-- ./panel panel-default -->';
46
+echo '</div><!-- ./col-md-6 -->';
47
+echo '<div class="col-md-6">';
48
+echo '<div class="panel panel-default">';
49
+echo '<div class="panel-body">';
50
+echo '<a name="article-top"></a><div id="article-current"></div>';
51
+$cpt=0;
52
+foreach ($articles as $article ) {
53
+  $cpt_prev=$cpt-1;
54
+  $cpt_next=$cpt+1;
55
+  echo '<!-- ==================== article '.$cpt.'============== -->';
56
+  echo "<div class=\"article\" id=\"article-$cpt\" style=\"display: none;\">\n";
57
+  echo "<hr>";
58
+  echo "<a name=\"article-$cpt\">";
59
+  //$article_content = file_get_contents($article['link']);
60
+  //$article_content = http_get_contents($article['link']);
61
+  $article_only = $article['content'];
62
+  /*$SEARCHSUBDESCSTART='<meta name="twitter:description" content="';
63
+  $pos_desc_start = strpos($article_content, $SEARCHSUBDESCSTART);
64
+  $pos_desc_start += strlen($SEARCHSUBDESCSTART);
65
+  $SEARCHSUBDESCSTOP='"/>';
66
+  $pos_desc_stop = strpos($article_content, $SEARCHSUBDESCSTOP, $pos_desc_start);
67
+  $description = substr($article_content, $pos_desc_start, $pos_desc_stop - $pos_desc_start);
68
+  echo "<h3>$description</h3>\n";
69
+  $article_only="";
70
+  $SEARCH_SUB1='"body":"';
71
+  $pos_start = strpos($article_content, $SEARCH_SUB1);
72
+  $SEARCH_SUB2='","url"';
73
+  $pos_stop = strpos($article_content, $SEARCH_SUB2, $pos_start);
74
+  $article_only = html_entity_decode(substr($article_content, $pos_start + strlen($SEARCH_SUB1), $pos_stop - $pos_start-strlen($SEARCH_SUB1)));
75
+   */
76
+		
77
+  //Dirty
78
+  /*
79
+  $orgStrings = array('\u003C', '\u003E', '\u002F', '\"\"', '\"');
80
+  $newStrings = array('<', '>', '/', '""', '"');
81
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
82
+  
83
+  $orgStrings = array('</p><p><br>');
84
+  $newStrings = array('');
85
+  $article_only = str_replace($orgStrings, $newStrings, $article_only);
86
+   */  
87
+  $temp = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $article_only);
88
+  $article_only = $temp;
89
+  $temp = preg_replace('/\s\s+/', ' ', $article_only);
90
+  $article_only = $temp;
91
+  $re='/<div data-iframely-id="(.+?)" data-embedded-url="https:\/\/www.youtube.com\/watch\?v=(.+?)"(.+?)>/';
92
+  $newre='<div><iframe width="100%" src="https://www.youtube.com/embed/\\2?feature=oembed" frameborder="0" gesture="media" allowfullscreen></iframe>';
93
+  $temp = preg_replace($re, $newre, $article_only);
94
+  $article_only = $temp;
95
+  $re='/<div data-iframely-id="(.+?)" data-embedded-url="https:\/\/www.youtube.com\/watch\?time_continue=(.+?)&v=(.+?)"(.+?)>/';
96
+  $newre='<div><iframe width="100%" src="https://www.youtube.com/embed/\\3?feature=oembed&start=\\2" frameborder="0" gesture="media" allowfullscreen></iframe>';
97
+  $temp = preg_replace($re, $newre, $article_only);
98
+  $article_only = $temp;
99
+  $re='/<div style="max-width: (.+?)px;"/';
100
+  $newre='<div ';
101
+  $temp = preg_replace($re, $newre, $article_only);
102
+  $article_only = $temp;
103
+  $re='/<div style="left: 0; width: 100%; height: 0; position: relative; padding-bottom: 56.(.+?)" data-iframely-smart-iframe="true">/';
104
+  $newre='<div>';
105
+  $temp = preg_replace($re, $newre, $article_only);
106
+  $article_only = $temp;
107
+  $re='/<div data-iframely-id="(.+?)" data-embedded-url="https:\/\/www.youtube.com\/embed\/(.+?)"(.+?)>/';
108
+  $newre='<div><iframe width="100%" src="https://www.youtube.com/embed/\\2" frameborder="0" gesture="media" allowfullscreen></iframe>';
109
+  $temp = preg_replace($re, $newre, $article_only);
110
+  $article_only = $temp;
111
+  $re='/<div  data-iframely-id="(.+?)" data-embedded-url="https:\/\/twitter.com\/(.+?)"(.+?)>/';
112
+  $newre='<div><iframe width="100%" src="https://publish.twitter.com/oembed?https://twitter.com/\\2"></iframe>';
113
+  $temp = preg_replace($re, $newre, $article_only);
114
+  $article_only = $temp;
115
+
116
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div>&nbsp;&nbsp;';
117
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
118
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
119
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div>';
120
+  echo '<div class="extract-content" id="'.$cpt.'">'.$article_only.'</div>';
121
+  echo '<div id="nav-up" style="display:inline;"><a href="#top"><i class="fa fa-home"></i></a></div> ';
122
+  echo '<div id="nav-up" style="display:inline;"><a href="#article-top"><i class="fa fa-chevron-up"></i></a></div>&nbsp;&nbsp;';
123
+  echo '<div id="nav-source" style="display:inline;"><a href="'.$article['link'].'" target="new-'.$cpt.'"><i class="fa fa-link"></i></a></div>&nbsp;&nbsp;';
124
+  echo '<div id="nav-prev" onclick="onArticle('.$cpt_prev.')" style="display:inline;"><i class="fa fa-chevron-left"></i></div>&nbsp;&nbsp;';
125
+  echo '<div id="nav-next" onclick="onArticle('.$cpt_next.')" style="display:inline;"><i class="fa fa-chevron-right"></i></div></div>';
126
+  $cpt++;
127
+  if( $cpt > $NEWS_RSS_MAX_ITEMS ) {
128
+      break;
129
+  }
130
+}
131
+?>