Пример #1
0
def apply_srem(input_raster_file: str, metadata_file: str,
               output_raster_file: str) -> None:
    out_dir = os.path.join(os.path.dirname(output_raster_file))
    os.makedirs(out_dir, exist_ok=True)
    m_parser = MetadataParser(metadata_file)
    band_id = BAND_ID[os.path.splitext(input_raster_file)[0][-3:]].value
    platform = m_parser.get_platform()
    wavelength = WAVELENGTHS[platform][band_id]
    angles = m_parser.get_mean_angles(band_id)

    with rasterio.open(input_raster_file) as src:
        toa_reflectance = src.read(1) / REFLECTANCE_SCALING_FACTOR
        profile = src.profile
        nodata_mask = (toa_reflectance == 0)

    surface_reflectance = srem.srem(toa_reflectance=toa_reflectance,
                                    wavelength=wavelength,
                                    **angles)
    scaled_surface_reflectance = \
        surface_reflectance * REFLECTANCE_SCALING_FACTOR
    # crop values less than 1 for defining 1 as minimum value.
    scaled_surface_reflectance[scaled_surface_reflectance < 1] = 1
    scaled_surface_reflectance[nodata_mask] = 0

    profile.update(driver='GTiff', compress='deflate', nodata=0)
    with rasterio.open(output_raster_file, 'w', **profile) as dst:
        dst.write(scaled_surface_reflectance.astype(profile['dtype']),
                  indexes=1)
Пример #2
0
def test_get_mean_angles(S2A_metadata_file, angle_keys):
    m_parser = MetadataParser(S2A_metadata_file)
    for band_id in BAND_ID:
        angles = m_parser.get_mean_angles(band_id.value)
        assert set(angles.keys()) == angle_keys
        for angle in angles.values():
            assert isinstance(angle, float)
Пример #3
0
    def fetch(self) -> None:
        try:
            r = requests.get(self.url, headers=self.headers)
        except ConnectionError:
            self.is_invalid = True
            return

        r.encoding = "utf-8"
        page = MetadataParser(html=r.text)

        self.title = page.get_metadata("title")
        self.description = page.get_metadata("description")
        self.image = page.get_metadata("image")
Пример #4
0
    def url_matcher(self, msg, match):
        url = match.group(0)
        r = requests.head(url)
        max_size = self.config['DOC_MAX_SIZE']
        max_len = self.config['DOC_MAX_LEN']

        # files that are too big cause trouble. Let's just ignore them.
        if 'content-length' in r.headers and \
           int(r.headers['content-length']) > max_size:
            return

        # ignore anything that is not allowed in configuration
        allowed_content_types = self.config['ALLOWED_CONTENT_TYPES']
        content_type = ''
        if 'content-type' in r.headers:
            content_type = re.sub(r'\s*\;.*$', '', r.headers['content-type'])
            content_type = content_type.strip()

        if content_type not in allowed_content_types:
            return

        html = requests.get(url).text
        readable_article = Document(html).summary()
        readable_article = self.text_cleanup(readable_article)

        if len(readable_article) > max_len:
            readable_article = readable_article[:max_len] + '...'

        readable_title = Document(html).title()

        page = MetadataParser(html=html)
        readable_description = page.get_metadata('description')

        if readable_description is None:
            readable_description = ''

        readable_description = self.text_cleanup(readable_description)

        description = ''
        if len(readable_description) > len(readable_article):
            description = readable_description
        else:
            description = readable_article

        if description:
            return "~> {}\n~> {}\n~> {}".format(url,
                                            readable_title,
                                            description)
        else:
            return "~> {}\n~> {}".format(url,
                                     readable_title) 
Пример #5
0
    def url_matcher(self, msg, match):
        url = match.group(0)
        r = requests.head(url)
        max_size = self.config['DOC_MAX_SIZE']
        max_len = self.config['DOC_MAX_LEN']

        # files that are too big cause trouble. Let's just ignore them.
        if 'content-length' in r.headers and \
           int(r.headers['content-length']) > max_size:
            return

        # ignore anything that is not allowed in configuration
        allowed_content_types = self.config['ALLOWED_CONTENT_TYPES']
        content_type = ''
        if 'content-type' in r.headers:
            content_type = re.sub(r'\s*\;.*$', '', r.headers['content-type'])
            content_type = content_type.strip()

        if content_type not in allowed_content_types:
            return

        html = requests.get(url).text
        readable_article = Document(html).summary()
        readable_article = self.text_cleanup(readable_article)

        if len(readable_article) > max_len:
            readable_article = readable_article[:max_len] + '...'

        readable_title = Document(html).title()

        page = MetadataParser(html=html)
        readable_description = page.get_metadata('description')

        if readable_description is None:
            readable_description = ''

        readable_description = self.text_cleanup(readable_description)

        description = ''
        if len(readable_description) > len(readable_article):
            description = readable_description
        else:
            description = readable_article

        if description:
            return "~> {}\n~> {}\n~> {}".format(url, readable_title,
                                                description)
        else:
            return "~> {}\n~> {}".format(url, readable_title)
Пример #6
0
    def __init__(self, fpga_hostname = 'localhost', rffe_hostname = 'localhost', debug = False):
        self.fpga_hostname = fpga_hostname
        self.rffe_hostname = rffe_hostname
        self.debug = debug

        from metadata_parser import MetadataParser
        self.metadata_parser = MetadataParser()
Пример #7
0
def create_link_preview(page: MetadataParser, page_meta: dict,
                        url: str) -> Optional[str]:
    """
    Create a preview bookmark card from a URL.

    :param MetadataParser page: Page object create from URL to be parsed.
    :param dict page_meta: Page metadata parsed from the head of the target URL.
    :param str url: URL of the linked third-party post/article.

    :returns: Optional[str]
    """
    try:
        title, description, page_type = parse_scraped_metadata(page_meta)
        image = page.get_metadata_link("image",
                                       allow_encoded_uri=True,
                                       require_public_global=True)
        if title is not None and description is not None:
            preview = f"\n\n<b>{title}</b>\n{description}\n{url}"
            if page_type:
                preview += f"\n{page_type.title()}"
            if image:
                preview += f"\n{image}"
            return preview
    except Exception as e:
        LOGGER.error(
            f"Unexpected error while generating link preview card: {e}")
Пример #8
0
def scrape_metadata_from_url(url: str) -> Optional[str]:
    """
    Fetch metadata for a given URL.

    :param str url: Link to third-party content, for which to create a link preview.

    :returns: Optional[str]
    """
    try:
        # Parse page metadata as dict
        page = MetadataParser(
            url=url,
            url_headers=headers,
            search_head_only=True,
            only_parse_http_ok=True,
            raise_on_invalid=True,
        )
        page_meta = page.parsed_result.metadata
        if page_meta:
            return create_link_preview(page, page_meta, url)
    except HTTPError as e:
        LOGGER.error(f"Failed to fetch metadata for URL `{url}`: {e}")
    except RequestException as e:
        LOGGER.error(
            f"RequestException error while scraping metadata for URL `{url}`: {e}"
        )
    except InvalidDocument as e:
        LOGGER.error(
            f"InvalidDocument encountered while fetching metadata for URL `{url}`: {e}"
        )
    except Exception as e:
        LOGGER.error(
            f"Unexpected error while scraping metadata for URL `{url}`: {e}")
Пример #9
0
    def fetch_metadata(self) -> None:

        if self.album_art or self.title:
            return

        if not self.md:
            req = Request('GET',
                          self.song_link,
                          headers={'User-Agent': 'curl/7.54.0'})
            prepped = req.prepare()
            s = requests.Session()
            r = s.send(prepped)

            if r.status_code == 200:
                self.share_link = r.url

                mp = MetadataParser(html=r.text, search_head_only=True)
                self.md = mp.metadata
                self.title = self.md['og']['title']
                image_link = self.md['og']['image']

                if image_link[0:5] == 'http:':
                    image_link = 'https:' + image_link[5:]

                self.album_art = image_link
Пример #10
0
    def get_or_create_ressource(self, url):
        try:
            ressource = self.get(url=url)
            raise Exception
        except Ressource.DoesNotExist:
            ressource = Ressource(url=url)

            md_strategy = ['og', 'dc', 'page', 'meta', ]
            md = MetadataParser(url=url)

            ressource.title = md.get_metadata('title', )
            ressource.excerpt = md.get_metadata('description', )
            ressource.image = md.get_metadata('image', )

        ressource.save()

        return ressource
Пример #11
0
def get_first_appearance(page: metadata_parser.MetadataParser,
                         prop: str) -> str:
    """
    obtiene el primer elemento para la propiedad en la pagina pasada
    """
    prop_data = page.get_metadatas(prop)
    if type(prop_data) is list:
        return prop_data[0]
    else:
        return None
Пример #12
0
def fetch_metadata(html: str) -> Tuple[str, str, str]:
    """
    Extracting page metadata.
    :param html: HTML of the document
    :return: Tuple of title, keywords and descriptions
    """
    parser = MetadataParser(html=html, search_head_only=True)

    return (
        parser.metadata['page'].get('title', ''),
        parser.metadata['meta'].get('keywords', ''),
        parser.metadata['meta'].get('description', '')
    )
Пример #13
0
    def get_metadata_parser(cls, file_path: str) -> MetadataParser:
        """This method returns a metadata parser compatible with the file at the specified path"""
        with open(file_path) as metadata_file:
            header_line = metadata_file.readline()
            if "METADATA:2.0" in header_line:
                # parse this file with MetadataV2 parser
                return MetadataParser(file_path)
            if ";" not in header_line:
                # parse as no version
                return MetadataParserLegacy(file_path)

            # parse as versions 1.0.1 -> 1.1.8
            return MetadataParserLegacy(file_path)
        return None
Пример #14
0
def fetch_president_articles():
    from metadata_parser import MetadataParser

    created = 0
    updated = 0

    articles_to_fetch = PresidentCandidateArticle.objects.filter(
        information__isnull=True)

    for article in articles_to_fetch:
        page = MetadataParser(url=article.url)

        title = first_or_none(page.get_metadatas('title'))
        description = first_or_none(page.get_metadatas('description'))
        site = first_or_none(page.get_metadatas('site_name'))
        url = page.get_url_canonical()
        url = url if url else article.url
        image_url = page.get_metadata_link('image')

        information_obj, is_created = PresidentCandidateArticleInformation.objects.update_or_create(
            article=article,
            defaults={
                'title': title,
                'description': description,
                'site': site,
                'url': url
            })

        save_image_from_url(information_obj.image, image_url)

        if is_created:
            created += 1
        else:
            updated += 1

    return {'created': created, 'updated': updated}
Пример #15
0
from metadata_parser import MetadataParser
from opengraph import OpenGraph
import webpreview

url = 'https://health.usnews.com/wellness/health-buzz/articles/2018-01-05/smelling-your-partners-shirt-could-decrease-your-stress-levels-study-says'
page = MetadataParser(url=url)
print page.metadata
print page.get_metadata('title')

og = OpenGraph(url=url)
print og

wb = webpreview.OpenGraph(url, ['og:title', 'og:description'])
print wb.title
print wb.description
Пример #16
0
from metadata_parser import MetadataParser

if 0:
    a= MetadataParser(url='http://cnn.com')
    print a.get_metadata('title')

    b= MetadataParser(url='http://nyt.com')
    print b.get_metadata('title')

    c= MetadataParser(url='http://thedailybeast.com')
    print c.get_metadata('title')

    print "\n-------------------------------------------------------\n"
    print a.metadata
    print "\n-------------------------------------------------------\n"
    print b.metadata
    print "\n-------------------------------------------------------\n"
    print c.metadata
    print "\n-------------------------------------------------------\n"

    print c.get_metadata('title')
    print c.get_metadata('canonical')
    print c.get_metadata('url')
    print c.absolute_url(c.get_metadata('canonical'))
    print c.absolute_url(c.get_metadata('url'))
    print c.get_discrete_url()

if 0:
    a= MetadataParser(url='http://liqr.co/rsvpnewyork')
    print "title:"
    print a.get_metadata('title')
Пример #17
0
from metadata_parser import MetadataParser
import pdb
import pprint

# hey use lxml >= 2.3.5 ; use 3.x though!
# otherwise this site will break ! http://www.nasa.gov/externalflash/discovery/index.html

if 0:
    a = MetadataParser(url='http://cnn.com')
    print(a.get_metadata('title'))

    b = MetadataParser(url='http://nyt.com')
    print(b.get_metadata('title'))

    c = MetadataParser(url='http://thedailybeast.com')
    print(c.get_metadata('title'))

    print("\n-------------------------------------------------------\n")
    print(a.metadata)
    print("\n-------------------------------------------------------\n")
    print(b.metadata)
    print("\n-------------------------------------------------------\n")
    print(c.metadata)
    print("\n-------------------------------------------------------\n")

    print(c.get_metadata('title'))
    print(c.get_metadata('canonical'))
    print(c.get_metadata('url'))
    print(c.absolute_url(c.get_metadata('canonical')))
    print(c.absolute_url(c.get_metadata('url')))
    print(c.get_discrete_url())
Пример #18
0
from metadata_parser import MetadataParser

# hey use lxml >= 2.3.5 ; use 3.x though!
# otherwise this site will break ! http://www.nasa.gov/externalflash/discovery/index.html

if 0:
    a = MetadataParser(url='http://cnn.com')
    print(a.get_metadata('title'))

    b = MetadataParser(url='http://nyt.com')
    print(b.get_metadata('title'))

    c = MetadataParser(url='http://thedailybeast.com')
    print(c.get_metadata('title'))

    print("\n-------------------------------------------------------\n")
    print(a.metadata)
    print("\n-------------------------------------------------------\n")
    print(b.metadata)
    print("\n-------------------------------------------------------\n")
    print(c.metadata)
    print("\n-------------------------------------------------------\n")

    print(c.get_metadata('title'))
    print(c.get_metadata('canonical'))
    print(c.get_metadata('url'))
    print(c.absolute_url(c.get_metadata('canonical')))
    print(c.absolute_url(c.get_metadata('url')))
    print(c.get_discrete_url())

if 0:
Пример #19
0
 def __init__(self, html):
     self.meta = MetadataParser(html=html).metadata
     try:
         self.data = json.load(json.dumps(FlatterDict(self.meta)))
     except:
         self.data = self.meta
Пример #20
0
def social_card_image(page_url):
    parser = MetadataParser(url=page_url, search_head_only=True)

    link = parser.get_metadata_link('image', strategy=['og'])

    return link
def parsearticle(article, pathuuid):
    mainimage = {}
    images = []
    req = requests.get(
        "http://" + os.getenv("RENDER_HOST") + ":3000/render/" +
        urllib.parse.quote_plus(json.loads(article.decode('utf-8'))["link"]))
    print("http://" + os.getenv("RENDER_HOST") + ":3000/render/" +
          urllib.parse.quote_plus(json.loads(article.decode('utf-8'))["link"]))
    articletext = MetadataParser(html=json.loads(req.text)['html'])
    imgurl = str(articletext.get_metadata('image'))
    if not imgurl.startswith("http"):
        imgurl = 'http:' + imgurl
    imgurlnopost = imgurl.rsplit('?', 1)[0]
    imgname = imgurlnopost.rsplit('/', 1)[-1]
    imgpath = pathuuid + '/' + imgname + str(uuid.uuid4())
    publication = json.loads(article.decode('utf-8'))["publication"]
    category = json.loads(article.decode('utf-8'))["category"]
    title = json.loads(article.decode('utf-8'))["title"]
    articleurl = json.loads(article.decode('utf-8'))["link"]
    geturl = None
    os.mkdir(pathuuid)
    count = 0
    try:
        geturl = urllib.request.urlretrieve(imgurl, imgpath)
    except:
        pass
    while not geturl:
        req = requests.get("http://" + os.getenv("RENDER_HOST") +
                           ":3000/render/" + urllib.parse.quote_plus(
                               json.loads(article.decode('utf-8'))["link"]))
        articletext = MetadataParser(html=json.loads(req.text)['html'])
        imgurl = str(articletext.get_metadata('image'))
        imgurlnopost = imgurl.rsplit('?', 1)[0]
        imgname = imgurlnopost.rsplit('/', 1)[-1]
        try:
            geturl = urllib.request.urlretrieve(imgurl, imgpath)
            count += 1
        except:
            if count > 10:
                raise ValueError('Article failed too many times')
            pass
    mainimage['imgurl'] = imgurl
    mainimage['imgname'] = imgname
    mainimage['imgpath'] = imgpath
    mainimage['content_type'] = geturl[1]['Content-Type']
    images.append(mainimage)
    images1 = getimages(
        json.loads(req.text)['html'],
        json.loads(req.text)['tree']['frameTree']['resources'], images,
        pathuuid)
    try:
        articletext = fulltext(json.loads(req.text)['html'], language='en')
    except:
        articletext = ""
    thing = {}
    thing['title'] = json.loads(article.decode('utf-8'))["title"]
    thing['articletext'] = articletext
    thing['summary'] = summarize(articletext)
    thing['assets'] = images1
    thing['publication'] = publication
    thing['category'] = category
    thing['articleurl'] = articleurl
    thing['html'] = json.loads(req.text)['html']

    return thing
Пример #22
0
	  	"(select distinct url_md5 from url_meta) " \
	  "group by url order by count(*) desc;"

cur.execute(sql)
urls = cur.fetchall()

i = 0
for url in urls:
	i = i + 1
	url 				= remove_characters(url[0])
	try:	
		headers 		= {
		    				'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
		    				'From': '*****@*****.**'  # This is another valid field
						  }
		page 			= MetadataParser(url=url, requests_timeout=5, url_headers=headers)
		title 			= remove_characters(page.get_metadata('title'))
		url_resolved 	= remove_characters(page.get_metadata('url'))
		image 			= remove_characters(page.get_metadata('image'))
		description 	= remove_characters(page.get_metadata('description'))

		sql 			= "insert into url_meta (title, description, url, url_md5, image) " \
			  			  "values ('" + title + "', '" + description + "', '" + url_resolved + "', md5('" + url + "'), '" + image + "');"
	except Exception as e:
		e 				= remove_characters(str(e))
		sql 			= "insert into url_meta (title, description, url, url_md5, image) " \
			  			  "values ('error', '" + e + "', '" + url + "', md5('" + url + "'), '');"
	finally:
		cur.execute(sql) 
		cur.execute("commit;")
		if i % 100 == 0: print i
Пример #23
0
class BPMExperiment():

    def __init__(self, fpga_hostname = 'localhost', rffe_hostname = 'localhost', debug = False):
        self.fpga_hostname = fpga_hostname
        self.rffe_hostname = rffe_hostname
        self.debug = debug

        from metadata_parser import MetadataParser
        self.metadata_parser = MetadataParser()

    def load_from_metadata(self, input_metadata_filename):
        # Parse metadata file into a dictionary
        self.metadata_parser.parse(input_metadata_filename)
        self.metadata = self.metadata_parser.options

    def get_metadata_lines(self):
        experiment_parameters = list(self.metadata.keys())
        lines = []
        for key in experiment_parameters:
            lines.append(key + ' = ' + self.metadata[key] + '\n')
        return lines

    def run(self, data_filename, datapath):
        if datapath == 'adc':
            data_rate_decim_factor = '1'
            acq_channel = '0'
            acq_npts = '100000'
        elif datapath == 'tbt':
            data_rate_decim_factor = self.metadata['adc_clock_sampling_harmonic'].split()[0] # FIXME: data_rate_decim_factor should be ideally read from FPGA
            acq_channel = '1'
            acq_npts = '100000'
        elif datapath == 'fofb':
            data_rate_decim_factor = '1000' # FIXME: data_rate_decim_factor should be ideally read from FPGA
            acq_channel = '3'
            acq_npts = '1000000'

        deswitching_phase_offset = str(int(self.metadata['dsp_deswitching_phase'].split()[0]) - int(self.metadata['rffe_switching_phase'].split()[0]))

        import subprocess
        # Run FPGA configuration commands
        command_argument_list = ['fcs_client']
        command_argument_list.extend(['--setdivclk', self.metadata['rffe_switching_frequency_ratio'].split()[0]])
        command_argument_list.extend(['--setkx', self.metadata['bpm_Kx'].split()[0]])
        command_argument_list.extend(['--setky', self.metadata['bpm_Ky'].split()[0]])
        command_argument_list.extend(['--setphaseclk', deswitching_phase_offset])
        command_argument_list.extend(['--setsw' + self.metadata['rffe_switching'].split()[0]])
        command_argument_list.extend(['--setwdw' + self.metadata['dsp_sausaging'].split()[0]])
        command_argument_list.extend(['--setsamples', acq_npts])
        command_argument_list.extend(['--setchan', acq_channel])
        command_argument_list.extend(['--setfpgahostname', self.fpga_hostname])
        if not self.debug:
            subprocess.call(command_argument_list)
        else:
            print(command_argument_list)

        # Run RFFE configuration commands
        command_argument_list = ['fcs_client']
        command_argument_list.extend(['--setfesw' + self.metadata['rffe_switching'].split()[0]])
        att_items = self.metadata['rffe_attenuators'].split(',')
        i = 1
        for item in att_items:
            item.strip()
            command_argument_list.extend(['--setfeatt' + str(i), item.split()[0]])
            i = i+1
        command_argument_list.extend(['--setrffehostname', self.rffe_hostname])
        if not self.debug:
            subprocess.call(command_argument_list)
        else:
            print(command_argument_list)

        # TODO: Check if everything was properly set

        # Timestamp the start of data acquisition
        # FIXME: timestamp should ideally come together with data.
        from time import time
        t = time()

        # Run acquisition
        command_argument_list = ['fcs_client']
        command_argument_list.append('--startacq')
        command_argument_list.extend(['--setfpgahostname', self.fpga_hostname])
        if not self.debug:
            p = subprocess.call(command_argument_list)
        else:
            print(command_argument_list)

        # The script execution is blocked here until data acquisition has completed

        # Get the result of data acquisition and write it to data file
        command_argument_list = ['fcs_client']
        command_argument_list.extend(['--getcurve', acq_channel])
        command_argument_list.extend(['--setfpgahostname', self.fpga_hostname])

        # Ensure file path exists
        import os
        path = os.path.dirname(data_filename)
        try:
            os.makedirs(path)
        except OSError as exception:
            if not os.path.isdir(path):
                raise

        f = open(data_filename, 'x')
        if not self.debug:
            p = subprocess.call(command_argument_list, stdout=f)
        else:
            f.writelines(['10 11 -9 80\n54 5 6 98\n']);
            print(command_argument_list)
        f.close()

        # Compute data file signature
        f = open(data_filename, 'r')
        text = f.read()
        f.close()

        import hashlib
        if self.metadata['data_signature_method'].split()[0] == 'md5':
            md = hashlib.md5()
        elif self.metadata['data_signature_method'].split()[0] == 'sha-1':
            md = hashlib.sha1()
        elif self.metadata['data_signature_method'].split()[0] == 'sha-256':
            md = hashlib.sha256()
        md.update(text.encode(f.encoding))
        filesignature = md.hexdigest()

        # Format date and hour as an standard UTC timestamp (ISO 8601)
        from time import strftime, gmtime
        from math import floor
        ns = int(floor((t * 1e9) % 1e9))
        timestamp_start = '%s.%09dZ' % (strftime('%Y-%m-%dT%H:%M:%S', gmtime(t)), ns)

        # Trhow away absolute path of data filename
        data_filename_basename = os.path.basename(data_filename)

        # Build metadata file based on template metadata file and post-processed metadata

        config_base_metadata_lines = self.get_metadata_lines()

        config_automatic_lines = [];
        config_automatic_lines.append('data_original_filename = ' + data_filename_basename + '\n')
        config_automatic_lines.append('data_signature = ' + filesignature + '\n')
        config_automatic_lines.append('dsp_data_rate_decim_factor = ' + data_rate_decim_factor + '\n')
        config_automatic_lines.append('timestamp_start = ' + timestamp_start + '\n')
        #config_automatic_lines.append('adc_board_temperature = ' + '0' + ' C\n') #TODO: implement ADC temperature read on FPGA
        #config_automatic_lines.append('rffe_board_temperature = ' + '0' + ' C\n') #TODO: implement RFFE temperature read on FPGA

        config_fromfile_lines = []
        config_fromfile_lines.extend(config_base_metadata_lines)
        config_fromfile_lines.extend(config_automatic_lines)

        # Metadata file is placed in the same path and with the same filename as the data file, but with .metadata extension
        from os.path import basename
        output_metadata_filename = os.path.splitext(data_filename)[0] + '.metadata'

        f = open(output_metadata_filename, 'x')
        f.writelines(sorted(config_fromfile_lines))
        f.close()
Пример #24
0
from metadata_parser import MetadataParser
import pdb
import pprint

# hey use lxml >= 2.3.5 ; use 3.x though!
# otherwise this site will break ! http://www.nasa.gov/externalflash/discovery/index.html

if 0:
    a = MetadataParser(url='http://cnn.com')
    print(a.get_metadata('title'))

    b = MetadataParser(url='http://nyt.com')
    print(b.get_metadata('title'))

    c = MetadataParser(url='http://thedailybeast.com')
    print(c.get_metadata('title'))

    print("\n-------------------------------------------------------\n")
    print(a.metadata)
    print("\n-------------------------------------------------------\n")
    print(b.metadata)
    print("\n-------------------------------------------------------\n")
    print(c.metadata)
    print("\n-------------------------------------------------------\n")

    print(c.get_metadata('title'))
    print(c.get_metadata('canonical'))
    print(c.get_metadata('url'))
    print(c.absolute_url(c.get_metadata('canonical')))
    print(c.absolute_url(c.get_metadata('url')))
    print(c.get_discrete_url())
Пример #25
0
def _test_get_platform(metadata_file, expected_platform):
    m_parser = MetadataParser(metadata_file)
    platform = m_parser.get_platform()
    assert platform == expected_platform