Python parse_atom_bytes示例，atoma.parse_atom_bytes Python示例

示例#1

0

显示文件

def search(STATE=None):
    if STATE == None: raise Exception('You must provide STATE')

    if STATE.search_results == []:
        query = input('Enter search term: ')
        if query == '':
            return [], STATE

        query = parse_search_query(query)
        params = {
            'search_query': query,
            'start': 0,
            'max_results': 50,
            'sortBy': 'relevance',
            'sortOrder': 'descending'
        }
        query = '&'.join([str(x) + '=' + str(y) for x, y in params.items()])

        r = requests.get(endpoint() + query, timeout=10)
        results = atoma.parse_atom_bytes(r.content).entries
        STATE.search_results = results
    else:
        results = STATE.search_results

    results = [feed.Feed(r) for r in results]
    return results, STATE

示例#2

0

显示文件

 def fetch(self):
     get = self.session.get if self.session is not None else requests.get
     resp = get(self.url, headers=generate_headers(self.url))
     try:
         return parse_atom_bytes(resp.content)
     except:
         return parse_rss_bytes(resp.content)

示例#3

0

显示文件

文件： arxiv_api.py 项目： ctahan/cli-arxiv

def search(STATE=None):
    if STATE == None: raise Exception('You must provide STATE')

    if STATE.search_results == []:
        print('ti:title        au:author     abs:abstract      co:comment')
        print('jr:journal ref  cat:subj-cat  rn:report number')
        print('BLANK INPUT TO GO BACK')
        print(' ')
        query = input('Enter search term: ')
        if query == '':
            return [], STATE

        params = {
            'search_query': query,
            'start': 0,
            'max_results': 50,
            'sortBy': 'relevance',
            'sortOrder': 'descending'
        }
        query = '&'.join([str(x) + '=' + str(y) for x, y in params.items()])

        r = requests.get(endpoint() + query, timeout=10)
        results = atoma.parse_atom_bytes(r.content).entries
        STATE.search_results = results
    else:
        results = STATE.search_results

    results = [feed.Feed(r) for r in results]
    return results, STATE

示例#4

0

显示文件

文件： cadastral_inspire_downloader.py 项目： jsanz/cidownloader

def get_provinces_atoms_url(url, province_code=None):
    """
    Lee el atom general de Catastro Inspire que contiene los diferentes
    Atoms para cada provincia.

    Devuelve una lista con url a los atoms y el título.
    """
    response = requests.get(url)
    feed = atoma.parse_atom_bytes(response.content)

    atoms_provincias = []

    for entry in feed.entries:
        if province_code is not None:
            if os.path.basename(
                    entry.links[0].href).split('.')[3] == 'atom_{}'.format(
                        str(province_code).zfill(2)):
                url = parse_url(entry.links[0].href)
                title = entry.title.value
                atoms_provincias.append((url, title))
        else:
            url = parse_url(entry.links[0].href)
            title = entry.title.value
            atoms_provincias.append((url, title))

    return atoms_provincias

示例#5

0

显示文件

def syncronize(config_file, config, username, last_updated, mastodon):
    pixelfeed_get = requests.get(
        'https://pixelfed.social/users/{}.atom'.format(username))
    pixelfeed = atoma.parse_atom_bytes(pixelfeed_get.content)

    latest_post = pixelfeed.entries[0]
    last_updated_atom = latest_post.updated

    if last_updated == last_updated_atom:
        print("Up-to-date")
        sys.exit(0)

    config['updated'] = last_updated_atom

    with open(config_file, "w") as f:
        toml.dump(config, f)
        print("Config file updated")

    image_name = latest_post.title.value
    image_url = re.search(r"(?P<url>https?://[^\s]+)", latest_post.summary.value) \
        .group("url") \
        .rstrip('">') \
        .replace("_thumb", "")
    tmp = tempfile.NamedTemporaryFile(suffix=".jpg")
    get_image = requests.get(image_url)
    tmp.write(get_image.content)
    mastodon_media = mastodon.media_post(tmp.name)
    mastodon.status_post(image_name, media_ids=mastodon_media['id'])
    print("Status posted: ", image_name)
    tmp.close()

示例#6

0

显示文件

    def get(self, code_id):
        """
        Bounces the information from 9gag rss api to our own,
        with caching.

        Example :: http://127.0.0.1:5000/9GAGComic
        
        Arguments:
            code_id {str} -- Name of the channel
        
        Returns:
            [requests.text] -- Information aqquired from 9gag rss api feed.
        """
        cache_timer = 20  # 20 secs
        # TODO #Feature, Remove the oldest item in cache
        if len(cache.keys()) >= 1:  # do we have anything stored
            if (int(time.time()) - max(cache.keys()) <
                    cache_timer):  # (OLD TIME - CACHED TIME) < TIME GAP
                logger.debug('Using Cached Version.')
                return cache[max(cache.keys())]
        params = {'code': code_id, 'format': '1'}
        logger.debug('Getting Fresh Copy.')
        resp = requests.get('https://9gag-rss.com/api/rss/get', params=params)
        feed = atoma.parse_atom_bytes(resp.content)
        cache[int(time.time())] = resp.text
        return resp.text

示例#7

0

显示文件

 def get_posts(self):
     try:
         response = requests.get(
             "http://kempfolds.blogspot.com/feeds/posts/default")
         feed = atoma.parse_atom_bytes(response.content)
         return feed
     except requests.exceptions.RequestException as e:
         print(e)

示例#8

0

显示文件

 async def fetch(self):
     async with self.session.get(self.url,
                                 headers=generate_headers(
                                     self.url)) as response:
         content = await response.read()
         try:
             return parse_atom_bytes(content)
         except:
             return parse_rss_bytes(content)

示例#9

0

显示文件

文件： redmine.py 项目： tmichela/redmine-zulip

    def _get_feed(self):
        """Get issues from rss url"""
        r = requests.get(self.feed)
        if r.status_code != requests.codes.ok:
            log.debug(
                f'{r.status_code} Error: {r.reason} for url: {self.feed}')
            return []

        return atoma.parse_atom_bytes(r.content).entries

示例#10

0

显示文件

 def feeds_public(self, sort="hot"):
     r = self.session.get("https://ruqqus.com/feeds/" + sort)
     try:
         return [
             self.post(i.id_.split('/')[-2])
             for i in atoma.parse_atom_bytes(r.content).entries
         ]
     except atoma.exceptions.FeedXMLError:
         return r

示例#11

0

显示文件

文件： main.py 项目： eshikvtumane/python_newsletter_telegram_bot

def get_python_insider_news():
    response = requests.get(
        'http://feeds.feedburner.com/PythonInsider?fmt=xml')
    feed = atoma.parse_atom_bytes(response.content)
    t = 0
    d = feedparser.parse(
        url_file_stream_or_string=
        'view-source:http://feeds.feedburner.com/PythonInsider?fmt=xml')
    entries = d['entries']
    t = 0

示例#12

0

显示文件

文件： feed_resource.py 项目： D3N14L/concourse-feed-resource

def _get_feed(feed_content, payload):
    if payload['source']['type'] == 'atom':
        feed = atoma.parse_atom_bytes(feed_content)
    elif payload['source']['type'] == 'rss':
        feed = atoma.parse_rss_bytes(feed_content)
    else:
        raise Exception(
            "SourceError", "Unkonwn feed type '%s'. Choose 'rss' or 'atom'." %
            payload['source']['type'])
    return feed

示例#13

0

显示文件

文件： main.py 项目： tubone24/redmine_slack_notify

def get_single_issue_by_atom(issue_id):
    url = ATOM_URL + "issues" + "/" + issue_id + ".atom" + "?key=" + ATOM_KEY
    response = requests.get(url, timeout=(3.0, 7.5))
    print(url)
    feed = atoma.parse_atom_bytes(response.content)
    if feed.entries is None or len(feed.entries) == 0:
        return False
    latest_entry = feed.entries[-1]
    author = latest_entry.authors[0].name
    content = sanitize_html_tag(latest_entry.content.value)
    return wrap_long_text("【更新】【{author}】 {content}".format(author=author, content=content))

示例#14

0

显示文件

def crawl_arxiv(categories: List[str],
                max_results: int = 1000,
                sleep_time: int = 5,
                fetch_size: int = 100,
                output: str = '.'):
    docs = []
    base_url = 'http://export.arxiv.org/api/query?'
    base_oai = 'http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:{}&metadataPrefix=arXiv'
    oai_tag = '{http://www.openarchives.org/OAI/2.0/}'
    meta_list = []
    for category in categories:
        print('Looking up papers in {}'.format(category))
        url = "{}search_query=cat:{}&max_results={}&sortBy=lastUpdatedDate&sortOrder=descending".format(
            base_url, category, max_results)
        response = requests.get(url)
        feed = atoma.parse_atom_bytes(response.content)
        entries = feed.entries

        for entry in tqdm(entries):
            entry_link = entry.id_
            entry_index = entry_link.rfind('/')
            entry_id = entry_link[entry_index + 1:]
            version_marker = entry_id.rfind('v')
            entry_id = entry_id[:version_marker]
            oai_url = base_oai.format(entry_id)

            metadata_response = requests.get(oai_url)
            if metadata_response.status_code == 200:
                metadata = metadata_response.text
                root = ET.fromstring(metadata)
                record = root.find('{}GetRecord'.format(oai_tag))
                if record is not None:
                    license_link = find_license(record)
                    if is_cc_license(license_link):
                        setattr(entry, 'license', license_link)
                        meta = download_document(entry, output)
                        docs.append(entry)

                        meta_list.append(meta)
                        if len(docs) >= fetch_size:
                            break

            sleep(sleep_time)

        if len(docs) >= fetch_size:
            print("I found what I was looking for. We can stop searching.")
            break
    print('Found {} documents'.format(len(docs)))
    with open('{}/meta.json'.format(output), 'w') as fout:
        json.dump(meta_list, fout)

    return docs, meta_list

示例#15

0

显示文件

文件： models.py 项目： uktrade/digital-workspace-v2

    def get_context(self, request, *args, **kwargs):
        context = super(HomePage, self).get_context(request, *args, **kwargs)

        # Quick links
        quick_links = QuickLink.objects.all().order_by("result_weighting",
                                                       "title")
        context["quick_links"] = quick_links

        # News
        news_items = (NewsPage.objects.live().public().order_by(
            "-pinned_on_home",
            "home_news_order_pages__order",
            "-first_published_at",
        )[:8])
        context["news_items"] = news_items

        #  Tweets
        if not cache.get("homepage_tweets"):
            cache.set(
                "homepage_tweets",
                sorted(get_tweets(), key=lambda x: x.created_at, reverse=True),
                3000,
            )

        context["tweets"] = cache.get("homepage_tweets")[:3]

        # What's popular
        context["whats_popular_items"] = WhatsPopular.objects.all()

        # How do I
        context["how_do_i_items"] = (HowDoI.objects.filter(
            include_link_on_homepage=True).live().public().order_by(
                "title", )[:10])

        # GOVUK news
        if not cache.get("homepage_govuk_news"):
            govuk_news_feed_url = "https://www.gov.uk/search/news-and-communications.atom?organisations%5B%5D=department-for-international-trade"

            response = requests.get(govuk_news_feed_url)
            feed = atoma.parse_atom_bytes(response.content)

            cache.set(
                "homepage_govuk_news",
                feed.entries[:6],
                3000,
            )

        context["govuk_feed"] = cache.get("homepage_govuk_news")

        return context

示例#16

0

显示文件

文件： version_rss_api.py 项目： larsborn/software-version-api

    def __call__(self) -> Optional[str]:
        response = self.session.get(
            F'https://github.com/{self.user}/{self.repo}/releases.atom')
        response.raise_for_status()
        feed = atoma.parse_atom_bytes(response.content)
        versions = []
        for entry in feed.entries:
            title = entry.title.value
            if any(block in title.lower() for block in self.VERSION_BLOCKLIST):
                continue
            version = self.version_from_title(title)
            if version:
                versions.append(packaging.version.parse(version))

        return str(max(versions)) if len(versions) > 0 else None

示例#17

0

显示文件

    def get_weather(self):
        r = requests.get(self.link)
        if r.status_code != 200:
            raise ValueError("request returned HTTP response of {}".format(
                r.status_code))

        feed = atoma.parse_atom_bytes(r.content)
        self.title = feed.title.value
        self.updated = feed.updated
        current_conditions = None
        self.forecast = OrderedDict()
        self.alerts = []
        for entry in feed.entries:
            if entry.categories[0].term == self.translate(
                    'Current Conditions'):
                if current_conditions is not None:
                    raise ValueError(
                        "There is more than one current conditions...")
                current_conditions = entry
            elif entry.categories[0].term == self.translate(
                    'Warnings and Watches'):
                if entry.summary.value == self.translate(
                        'No watches or warnings in effect.'):
                    self.has_alerts = False
                else:
                    self.has_alerts = True
                    self.alerts.append(entry)
            elif entry.categories[0].term == self.translate(
                    'Weather Forecasts'):
                e = entry.title.value.split(':')[0]
                self.forecast[e] = entry
            else:
                print(
                    "Error, unidentified category {}. Notify developer".format(
                        entry.categories[0].term))
        self.current_conditions = {}

        self.current_summary = current_conditions.title.value

        current_conditions = html.unescape(
            current_conditions.summary.value).split('<br/>\n')
        for entry in current_conditions:
            entry_elems = entry.split(':</b>')
            self.current_conditions[entry_elems[0][3:].strip(
            )] = entry_elems[1].split('<br/>')[0].strip()

示例#18

0

显示文件

def getListOfRecentCommits(repo_name: str) -> Generator[dict, None, None]:

    # Fetch commit feed
    commit_feed_raw = requests.get(f"https://github.com/{repo_name}/commits.atom")

    # Turn into atom object
    commit_feed = atoma.parse_atom_bytes(commit_feed_raw.content)

    # Handle each entry
    for commit in commit_feed.entries:
        
        # Build data output
        yield {
            "name": commit.title.value,
            "author": commit.authors[0].name,
            "date": commit.updated.strftime("%b %d, %Y"),
            "number":commit.links[0].href.split("/")[-1]
        }

    return

示例#19

0

显示文件

文件： updater.py 项目： pathikritmodak/GithubReleaseTracker

def main():
    repos_list = get_repos_list(REPOS_FILENAME)
    urls = map(create_url, repos_list)
    atom_feeds = map(get_atom_feed, urls)
    parsed_atom_feeds = map(lambda x: atoma.parse_atom_bytes(x), atom_feeds)
    releases_last_24h = list(retrieve_releases_in_last_24h(parsed_atom_feeds))
    version_list = []
    url_list = []
    for i in range(len(releases_last_24h)):
        versions_list = list(map(lambda x: x.title.value,
                                 releases_last_24h[i]))
        version_urls = list(map(lambda x: x.links, releases_last_24h[i]))
        version_list.extend(versions_list)
        url_list.extend(version_urls)
    url_decoded_list = []
    for j in range(len(url_list)):
        ver_urls = list(map(lambda x: x.href, url_list[j]))
        url_decoded_list.extend(ver_urls)

    pd.set_option('display.max_colwidth', -1)
    df = pd.DataFrame(list(zip(version_list, url_decoded_list)),
                      columns=['version', 'url'])
    df['repo'] = df['url'].str.split('/').str[4]

    if len(version_list) < 1:
        print("No new versions released")
    else:
        print('New versions released: ' + '\n' +
              tabulate(df, headers='keys', tablefmt="psql", showindex=False))
        for i in range(len(df)):
            data = {
                'version': [df.loc[i, 'version']],
                'url': [df.loc[i, 'url']],
                'repo': [df.loc[i, 'repo']]
            }
            df2 = pd.DataFrame(data, columns=['version', 'url', 'repo'])
            data = np.squeeze(np.asarray(df2))
            slack.send_slack_message(data)

示例#20

0

显示文件

文件： cadastral_inspire_downloader.py 项目： jsanz/cidownloader

def get_municipality_atoms_url(atom_url, codmun=None):
    """
    Lee el atom específico para cada parroquia. 
    
    Devuelve el url del Atom de cada municipio con su epsg.

    Se puede pasar un parámetro codmun para devolver sólo este municipio.
    """

    response = requests.get(atom_url)

    feed = atoma.parse_atom_bytes(response.content)

    urls = []
    for entry in feed.entries:
        url = parse_url(entry.links[0].href)
        epsg = entry.categories[0].term.split('/')[-1]
        codmun_atom = os.path.basename(url).split('.')[4]

        if codmun is None or codmun == codmun_atom:
            urls.append((url, epsg))

    return urls

示例#21

0

显示文件

def main():  # pylint: disable=too-many-locals
    """Intended to be used as part of a GitHub Action.

    Requires INPUT_LAYERID, INPUT_TIMEFRAME and INPUT_UNITS environment
    variables to be set within the environment that the script is run.

    Prints "set-output" commands that create Outputs within GitHub Actions.
    The Outputs created are:

        * updateFound - True if an update was found within the specified
          timeframe, otherwise False
        * publishedTime - The time the data update was published in the
          'Pacific/Auckland' timezone
        * totalFeatures - The total number of features in the entire dataset
          after the update
        * adds - The number of added features in the update
        * modifies - The number of modified features in the update
        * deletes - The number of deleted features in the update

    Raises
    ------
    ValueError
        If environment variable INPUT_UNITS is not either "minutes", "hours"
        or "days".
    """
    layer_id = os.environ["INPUT_LAYERID"]
    timeframe = int(os.environ["INPUT_TIMEFRAME"])
    units = os.environ["INPUT_UNITS"]

    if units not in ["minutes", "hours", "days"]:
        raise ValueError("units should be either 'minutes', 'hours' or 'days'")

    response = requests.get(
        f"{KX_SITE_URL}/feeds/layers/{layer_id}/revisions/")
    feed = atoma.parse_atom_bytes(response.content)

    update_found = False
    dataset_title = None
    revision_number = None
    total_features = None
    adds = None
    modifies = None
    deletes = None
    published_time = None

    todays_date = pendulum.now("UTC")

    for entry in feed.entries:

        published_time = pendulum.instance(entry.published)
        time_since_publish = diff_timeframe(todays_date, published_time, units)

        if time_since_publish < timeframe:
            total_features, adds, modifies, deletes, total_changes = extract_feature_counts(
                entry.summary.value)

            # Ignore vector / table dataset updates with no feature changes
            if total_changes == 0:
                continue

            update_found = True
            dataset_title = entry.title.value.split(f" ({layer_id}", 1)[0]
            revision_number = entry.title.value.rsplit(" ", 1)[-1]

            # Skip for raster datasets where feature counts are 'None'
            if total_features:
                # Add commas as thousands separators on feature counts
                adds = f"{int(adds):,}"
                modifies = f"{int(modifies):,}"
                deletes = f"{int(deletes):,}"
                total_features = f"{int(total_features):,}"

            # Find only the most recent change
            break

        # Set published_time to None if the dataset update is not within the required timeframe
        published_time = None

    # Modify published time to readable format in local timezone
    if published_time:
        published_time = published_time.in_timezone(OUTPUT_TIMEZONE)
        published_time = published_time.format(OUTPUT_TIME_FORMAT)

    print(f"::set-output name=updateFound::{update_found}")
    print(f"::set-output name=datasetTitle::{dataset_title}")
    print(f"::set-output name=revisionNumber::{revision_number}")
    print(f"::set-output name=publishedTime::{published_time}")
    print(f"::set-output name=totalFeatures::{total_features}")
    print(f"::set-output name=adds::{adds}")
    print(f"::set-output name=modifies::{modifies}")
    print(f"::set-output name=deletes::{deletes}")

示例#22

0

显示文件

文件： check_new_commit.py 项目： gitter-badger/covid19-data

import os

import atoma
import requests


def notify(title, text):
    os.system(
        """osascript -e 'display notification "{}" with title "{}"'""".format(
            text, title))


timestamp_lastcommit = atoma.parse_atom_bytes(
    requests.get(
        "https://github.com/CSSEGISandData/COVID-19/commits/master.atom").
    content).updated.timestamp()

if os.path.getmtime("cntry_stat.json") < timestamp_lastcommit:
    notify(
        title="COVID-19 data updated",
        text="There is a new commit in the COVID-19 data repository!",
    )

示例#23

0

显示文件

def main():
	parser = argparse.ArgumentParser()
	# Required parameters

	parser.add_argument(
	    "-r","--request_interval",
	    default=3600,
	    type=int,
	    help="The request_interval (Default: 3600) means sending request to arxiv api every seconds of request_interval.",
	)

	parser.add_argument(
	    "-q","--arxiv_query",
	    default="nlp+OR+bert",
	    type=str,
	    help="The arxiv_query (default: 'nlp+OR+bert') is specified for automatic retreival of latest papers from arxiv. Space should be replaced with '+'. More refers to: https://arxiv.org/help/api.",
	)

	parser.add_argument(
	    "-d","--days_since",
	    default=10,
	    type=int,
	    help="The days_since (default: 10) defines the number of past days since considered to be an update, e.g., 10 means that ony the papers on ariXv with the update date no more than 10 days ago are considered to be forwarded and posted to Twitter.",
	)

	parser.add_argument(
	    "-t","--hashtags_prepend",
	    default="#NLP,#MachineLearning",
	    type=str,
	    help="The list of hashtags (default: '#NLP,#MachineLearning') you want to prepend the tweet, seperated by ','.",
	)

	args = parser.parse_args()


	ARXIV_QUERY = args.arxiv_query
	REQUEST_INTERVAL=args.request_interval

	HASHTAGS2PREPEND=args.hashtags_prepend.split(",")


	# verification
	auth = tweepy.OAuthHandler(TWITTER_APP_KEY,TWITTER_APP_SECRET)
	auth.set_access_token(TWITTER_KEY, TWITTER_SECRET)
	api = tweepy.API(auth)

	import urllib.request
	# for details of arxiv api, see: https://arxiv.org/help/api/user-manual#title_id_published_updated
	url = 'http://export.arxiv.org/api/query?search_query=all:'+ARXIV_QUERY+'&start=0&max_results=1&sortBy=lastUpdatedDate&sortOrder=descending'
	data = urllib.request.urlopen(url).read()

	last_timestamp=datetime.timestamp(datetime.now())-args.days_since*3600*24

	logger.info("Start listening with arguments: "+str(args))
	while True:
		data_obj=atoma.parse_atom_bytes(data)
		next_timestamp=datetime.timestamp(data_obj.updated)
		logger.info("Get a paper from arXiv updated at " + str(data_obj.updated)+": "+data_obj.entries[0].id_)
		# if next_timestamp > last_timestamp. that means update found and thus call Twitter API to update status
		if (next_timestamp-last_timestamp)>0.0:
			# post a tweet on Twitter
			post_content=" ".join(HASHTAGS2PREPEND)+' new arxiv '+ ARXIV_QUERY +' related paper: '+data_obj.+entries[0].id_'\n'+data_obj.entries[0].title.value+"."
			if len(post_content)>280:
			    # twitter allows a tweet up to 280 characters
			    post_content=post_content[:277]+"..."
			# post 
			try:
				response=api.update_status(post_content,tweet_mode="extended")
			except TweepError as err:
				# if there is an error, wait REQUEST_INTERVAL seconds before next call and thus last_timestamp set to be next_timestamp+1
				last_timestamp=next_timestamp+1
				logger.error(err)
				logger.error("Go to sleep "+str(REQUEST_INTERVAL)+" seconds")
				continue
			if response.full_text != None:
				logger.info("Successfully post the tweet:\n===================\n"+ response.full_text+"\n===================")
			else:
				logger.error(response)
			last_timestamp=next_timestamp
		else:
			logger.info("Start sleeping given days_since =",str(args.days_since),"days")
			time.sleep(REQUEST_INTERVAL)
			logger.info("No update in the last "+str(REQUEST_INTERVAL)+" seconds")

示例#24

0

显示文件

文件： main.py 项目： eshikvtumane/python_newsletter_telegram_bot

def get_psf_news():
    response = requests.get('http://pyfound.blogspot.com/atom.xml')
    feed = atoma.parse_atom_bytes(response.content)
    t = 0

示例#25

0

显示文件

def query_arxiv(query):
    response = req.get(ARXIV_BASE_URL, params={'search_query': query})
    feed = atoma.parse_atom_bytes(response.content)
    return feed

示例#26

0

显示文件

#/usr/bin/python3

import atoma
import requests
import re
import pandas as pd

#Proxy support
proxies = {
    "http": "http://<<proxy>>:80",
    "https": "http://<<proxy>>:80",
}

response = requests.get('https://github.com/security-advisories',
                        proxies=proxies)
feed = atoma.parse_atom_bytes(response.content)

new_items = []

#Collect all Vulnerabilities with a CVE
for vuln in feed.entries:
    new_item = {}
    new_item['Id'] = re.findall(r'\[(.*?)\]', vuln.title.value)
    new_item['Published'] = vuln.published.strftime('%Y/%m/%d')
    new_item['Updated'] = vuln.updated.strftime('%Y/%m/%d')
    new_item['Title'] = re.findall(r'\s.*', vuln.title.value)
    new_item['Cve'] = re.findall(r'CVE-\d{4}-\d{4,7}', vuln.content.value)
    new_items.append(new_item)
    print(new_items)

df = pd.DataFrame(

示例#27

0

显示文件

def get_deals(client, conditions, currencies, minimum_discount):

    for condition in args.condition:
        for currency in args.currency:

            wantlist_url = f'{WWW}/sell/mpmywantsrss'
            wantlist_params = {
                'output': 'rss',
                'user': DISCOGS_USER,
                'condition': condition,
                'currency': currency,
                'hours_range': '0-12',
            }
            feed = atoma.parse_atom_bytes(
                get(client, wantlist_url, wantlist_params).encode('utf8')
            )

            for entry in feed.entries:
                try:
                    listing_id = entry.id_.split('/')[-1]
                    listing = call_public_api(
                        client,
                        f'/marketplace/listings/{listing_id}'
                    )

                    if listing['seller']['username'] in BLOCKED_SELLERS:
                        continue

                    seller_rating = get_seller_rating(listing)
                    price = get_total_price(listing)
                    release_year = get_release_year(listing)
                    release_id = listing['release']['id']
                    min_price, median_price, max_price = get_price_statistics(
                        client,
                        release_id
                    )
                    suggested_price = get_suggested_price(
                        client,
                        release_id,
                        condition
                    )
                    demand_ratio = get_demand_ratio(client, release_id)
                    has_sold = True

                    if price is None:
                        continue

                    # adjust price for standard domestic shipping
                    price = price - STANDARD_SHIPPING

                    release_age = date.today().year - release_year

                    if condition == CONDITIONS['VG+']:
                        if release_age < ALLOW_VG['minimum_age']:
                            continue
                        if seller_rating < ALLOW_VG['minimum_seller_rating']:
                            continue

                    if median_price is None:
                        has_sold = False

                    if has_sold:
                        if not price < median_price:
                            continue

                        difference_from_median = difference(
                            price, median_price)
                        difference_from_suggested = difference(
                            price, suggested_price)
                        difference_from_min = difference(
                            price, min_price)
                        difference_from_max = difference(
                            price, max_price)

                        minimum = minimum_discount if demand_ratio < 2 else 5

                        if difference_from_median < minimum:
                            continue

                    debug(
                        f'\n{entry.title.value}\n'
                        f'{entry.summary.value}\n'
                        f'price: ${price:.2f}\n'
                        f'demand ratio: {demand_ratio:.1f}\n'
                        f'seller rating: {seller_rating:.1f}\n'
                        f'release year: {release_year}'
                    )

                    if has_sold:
                        debug(
                            f'median price: ${median_price:.2f}\n'
                            f'suggested price: ${suggested_price:.2f}\n'
                            f'lowest price: ${min_price:.2f}\n'
                            f'highest price: ${max_price:.2f}\n'
                            f'difference from median: '
                            f'{difference_from_median}%\n'
                            f'difference from suggested: '
                            f'{difference_from_suggested}%\n'
                            f'difference from lowest: '
                            f'{difference_from_min}%\n'
                            f'difference from highest: '
                            f'{difference_from_max}%\n'
                        )
                        summary = (
                            f'<b>{summarize_difference(difference_from_median)}'
                            f' median price (${median_price:.2f})</b><br>'
                            f'{summarize_difference(difference_from_suggested)}'
                            f' suggested price (${suggested_price:.2f})<br>'
                            f'{summarize_difference(difference_from_min)}'
                            f' min price (${min_price:.2f})<br>'
                            f'{summarize_difference(difference_from_max)}'
                            f' max price (${max_price:.2f})<br>'
                            f'demand ratio: {demand_ratio:.1f}<br><br>'
                            f'{entry.summary.value}'
                        )
                    else:
                        debug('never sold\n')
                        summary = (
                            f'<b>never sold</b><br>'
                            f'demand ratio: {demand_ratio:.1f}<br><br>'
                            f'{entry.summary.value}'
                        )

                    yield {
                        'id': entry.id_,
                        'title': entry.title.value,
                        'updated': isoformat(entry.updated),
                        'summary': summary,
                    }

                except DealException as e:
                    log_error(e, entry)
                except httpx.HTTPError as e:
                    debug(e)

示例#28

0

显示文件

def scarica_feed() -> list:
    r = requests.get(ATOM_FEED)
    if r.status_code == 200:
        return lista_articoli(parse_atom_bytes(r.content).entries)
    else:
        return []