#!/usr/bin/python3
from feedgen.feed import FeedGenerator
import argparse
import requests
import re
import json
from bs4 import BeautifulSoup
raw_page = None
raw_meta = None
cpt=0
output_html = f"page.{cpt}.html"
APPNAME="RadioFrance2RSS"
APPTTL=86400
meta_image=None
meta_title=None
meta_published_time=None
meta_modified_time=None
meta_url=None
meta_description=None
meta_author=None
url_scheme=None
url_fqdn=None
def download_page(url):
global cpt
try:
response = requests.get(url)
if response.status_code == 200:
output_html = f"page.{cpt}.html"
with open(output_html, 'wb') as file:
file.write(response.content)
return response.content.decode("utf-8")
print(f"Downloaded successfully and saved to {output_html}")
cpt+=1
else:
print(f"Failed to download. Status code: {response.status_code}")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
return None
def meta_property(raw,key):
pattern=r''
matches = re.findall(pattern, raw)
if matches:
return matches[0]
else:
return None
def meta_name(raw,key):
pattern=r''
matches = re.findall(pattern, raw)
if matches:
return matches[0]
else:
return None
def meta_extract():
global raw_meta
global meta_image
global meta_title
global meta_published_time
global meta_modified_time
global meta_url
global meta_description
global meta_author
str_meta_start='