Exemplo n.º 1
0
async def get_paste(link_id: int, session: aiohttp.client.ClientSession, url: str):
    serial = get_serial(link_id)
    async with session.post(f"https://publisher.linkvertise.com/api/v1/redirect/link/{url}/paste", json={
        "serial": serial
    }, headers=headers()) as resp:
        json_content: dict = await resp.json()
    return json_content
Exemplo n.º 2
0
async def fetch(url: str, session: aiohttp.client.ClientSession) -> dict:
    """
    Asynchronously fetch a url, using specified ClientSession.
    """
    async with session.get(url) as response:
        resp = await response.json()
        return resp
Exemplo n.º 3
0
async def redirect_gatherer(url: str, session: aiohttp.client.ClientSession):
    async with session.get(url,
                           headers={"User-Agent":
                                    get_random_user_agent()}) as resp:
        history = [str(x.url) for x in resp.history]
        history.append(str(resp.url))
        return history
Exemplo n.º 4
0
async def get_req_stats(
    pkg_req: requirements.requirement.Requirement,
    aiohttp_session: aiohttp.client.ClientSession,
    url: str = "https://pypi.org/pypi/{}/json",
) -> Dict:
    pkg_url = url.format(pkg_req.name)
    async with aiohttp_session.get(pkg_url) as response:
        try:
            pkg_json = await response.json()
        except aiohttp.client_exceptions.ContentTypeError:
            LOG.error(f"{pkg_req} does not return JSON ...")
            return {}

    version = str(pkg_req.specs[0][1])
    try:
        version_json = pkg_json["releases"][version].pop()
    except KeyError:
        LOG.error(f"{pkg_req} version does not exist in JSON ...")
        return {}

    upload_dt = datetime.strptime(version_json["upload_time"],
                                  "%Y-%m-%dT%H:%M:%S")
    dt_now = datetime.now()
    return {
        "name": pkg_req.name,
        "latest": version == pkg_json["info"]["version"],
        "released_days_ago": (dt_now - upload_dt).days,
        "upload_time": version_json["upload_time"],
        "version": version,
    }
Exemplo n.º 5
0
async def update_webrisk(url: str, session: aiohttp.client.ClientSession,
                         pool: asyncpg.pool.Pool):
    log.info(f"Fetching webrisk: {url}")

    params = [("uri", url), ("key", config.webrisk_key),
              ("threatTypes", "MALWARE"),
              ("threatTypes", "SOCIAL_ENGINEERING")]
    async with session.get(
            "https://webrisk.googleapis.com/v1beta1/uris:search",
            params=params) as resp:
        json_content = await resp.json()

    if json_content == {}:
        await insert_blank_webrisk(pool, url)
    else:
        parsed_time = datetime.datetime.strptime(
            json_content["threat"]["expireTime"][:-4], "%Y-%m-%dT%H:%M:%S.%f")
        if json_content["threat"]["threatTypes"] == ["SOCIAL_ENGINEERING"]:
            await insert_webrisk(pool=pool,
                                 url=url,
                                 social_engineering=True,
                                 expire_time=parsed_time)
        elif json_content["threat"]["threatTypes"] == ["MALWARE"]:
            await insert_webrisk(pool=pool,
                                 url=url,
                                 malware=True,
                                 expire_time=parsed_time)
        else:
            await insert_webrisk(pool=pool,
                                 url=url,
                                 social_engineering=True,
                                 malware=True,
                                 expire_time=parsed_time)
Exemplo n.º 6
0
 async def __fetch_and_feed(self, client: aiohttp.client.ClientSession, url, parser: htmlParser, client_num: int):
     # logger.debug(f"client - {client_num}, try to get url <{url}>")
     resp: aiohttp.client.ClientResponse
     while True:
         async with client.get(url) as resp:
             if resp.status < 200 or resp.status >= 300:
                 logger.warning(f"server response - {resp.status}")
                 break
             try:
                 html_data = await resp.text()
             except UnicodeDecodeError as err:
                 logger.warning(err)
                 break
             # print(html_data)
             parser.feed(html_data)
             m_v = html_url_regex_3.match(url)
             if m_v is None or m_v.group(4) == "":
                 logger.warning(f"Processing url <{url}> cause an exception, url isn't correspond with content.")
                 break
             PageId = int(m_v.group(4))
             if parser.HasContent and (parser.BookName != "" and parser.BookName is not None):
                 AppendContext(parser.BookName, PageId, parser.Content)
         break
     await self.__client_list.ReleaseClient(client_num)
     return
Exemplo n.º 7
0
async def do_rating(session: aiohttp.client.ClientSession, reviewer: str,
                    length: int):
    global index
    async with session.get(reviewer, headers=HEADERS_GET) as response:
        html = await response.text()
        reviews = [
            str(i) for i in BeautifulSoup(html, 'html.parser').find_all(
                attrs={'class': 'review_stats'})
        ]
    for review in reviews:
        grade = int(
            BeautifulSoup(review, 'html.parser').find(attrs={
                'class': 'review_score'
            }).text)
        game_name = BeautifulSoup(review,
                                  'html.parser').find(attrs={
                                      'class': 'product_title'
                                  }).text
        if game_name not in games:
            games.insert(0, game_name)
        if grade >= 9:
            if prog_scores.get(game_name):
                prog_scores[game_name] = prog_scores.get(game_name) + 1
            else:
                prog_scores[game_name] = 1
        elif grade <= 2:
            if prog_scores.get(game_name):
                prog_scores[game_name] = prog_scores.get(game_name) - 1
            else:
                prog_scores[game_name] = -1
    print(str(index + 1) + "/" + str(length))
    index += 1
Exemplo n.º 8
0
async def update_internal_hsts(session: aiohttp.client.ClientSession, url,
                               pool: asyncpg.pool.Pool):
    log.info("Updating HSTS status for: {}".format(url))
    async with session.get("https://hstspreload.org/api/v2/status",
                           params={"domain": url}) as resp:
        json_data = await resp.json()
        status = json_data.get("status")
        await update_db_hsts(pool, url, status)
    return status
async def fetch(
        url: str,
        session: aiohttp.client.ClientSession) -> Tuple[bool, Optional[bytes]]:
    async with session.get(url) as response:
        if response.status != 200:
            print('Bad response:', url, response.status, response.reason)
            return True, None
        else:
            return False, await response.read()
Exemplo n.º 10
0
async def get_link_id(parsed: ParseResult, session: aiohttp.client.ClientSession, url: str):
    async with session.get(f"https://publisher.linkvertise.com/api/v1/redirect/link/static/{url}",
                           headers=headers(),
                           allow_redirects=False
                           ) as resp:
        json_content: dict = await resp.json()
    if json_content["success"]:
        return json_content["data"]["link"]["id"], json_content["data"]["link"]["target_type"]
    raise Linkvertise(f"Got errors from {urlunparse(parsed)}: {', '.join(json_content['messages'])}")
Exemplo n.º 11
0
async def update_internal_hsts(session: aiohttp.client.ClientSession, url,
                               pool: asyncpg.pool.Pool,
                               transaction: Hub.current.scope.transaction):
    with transaction.start_child(op="task",
                                 description="update_internal_hsts"):
        log.info("Updating HSTS status for: {}".format(url))
        async with session.get("https://hstspreload.org/api/v2/status",
                               params={"domain": url}) as resp:
            json_data = await resp.json()
            status = json_data.get("status")
            await update_db_hsts(pool, url, status)
        return status
Exemplo n.º 12
0
async def DownloadImg_async(session: aiohttp.client.ClientSession,
                            img_url: str, path: str, img_name: str):
    async with session.get(img_url) as img_response:
        #aiohttp下打开网页
        await img_response.content.read(10)
        #aiohttp 的流读取
        with open('%s/%s' % (path, img_name), 'wb') as f:
            #写入图片
            while True:
                chunk = await img_response.content.read(128)
                #对aiohttp的流文件下载
                if not chunk:
                    break
                f.write(chunk)
Exemplo n.º 13
0
async def manual_redirect_gatherer(url: str,
                                   session: aiohttp.client.ClientSession,
                                   db: asyncpg.pool.Pool):
    urls = [url]
    for internalUrl in urls:
        cached_data = await api.cached.cached(internalUrl, db)
        if cached_data:
            log.info(f"Using cached redirect data for {internalUrl}")
            cached_data = json.loads(cached_data)
            status, _, _, _, _, _, headers, _, _, _, _ = cached_data
            r_url = ""
            if status in (301, 302, 303, 307, 308):
                r_url = headers.get("Location") or headers.get(
                    "location") or headers.get("uri")
                if r_url:
                    if r_url.startswith("/"):
                        parsed = urlparse(internalUrl)
                        r_url = f"{parsed.scheme}://{parsed.netloc}/{r_url[1:]}"
                        log.info(
                            f"Found redirect for {internalUrl} to {r_url} due to status of {status}"
                        )
            if r_url not in urls and r_url != "":
                urls.append(r_url)
            continue

        try:
            async with session.get(
                    internalUrl,
                    headers={"User-Agent": get_random_user_agent()},
                    allow_redirects=False,
                    timeout=10) as resp:
                r_url = ""
                if resp.status in (301, 302, 303, 307, 308):
                    hdrs = resp.headers
                    r_url = hdrs.get("Location") or hdrs.get(
                        "Location") or hdrs.get("uri")
                    if r_url:
                        if r_url.startswith("/"):
                            parsed = urlparse(internalUrl)
                            r_url = f"{parsed.scheme}://{parsed.netloc}/{r_url[1:]}"
                            log.info(
                                f"Found redirect for {internalUrl} to {r_url} due to status of {resp.status}"
                            )
                if r_url not in urls and r_url != "":
                    urls.append(r_url)
        except asyncio.exceptions.TimeoutError:
            log.warning(f"Timeout error on {internalUrl}")
    return urls
    async def __fetch_results(self,
                              query,
                              number,
                              language_code,
                              user_agent=None,
                              user: UserAsync = None,
                              proxy=None,
                              timeout=5.0,
                              session: aiohttp.client.ClientSession = None,
                              engine='google'):
        url = ''

        # preparation of request link
        if engine == 'bing':
            url = 'https://www.bing.com/search?q={}&count={}'.format(
                query, number)
        elif engine == 'google':
            url = 'https://www.google.com/search?q={}&num={}&hl={}'.format(
                query, number, language_code)
        elif engine == 'yahoo':
            url = 'https://search.yahoo.com/search?p={}&n={}&ei=UTF-8'.format(
                query, number)
        elif engine == 'youtube':
            url = 'https://www.youtube.com/results?search_query={}'.format(
                query)

        # get page with timeout (for imitation user activity)
        async with session.get(url,
                               headers=user.agent,
                               timeout=timeout,
                               proxy=proxy) as response:
            # error checking
            if response.status != 200:
                response.raise_for_status()

            # get HTML code of page
            data = await response.text()

            # get cookies
            user.cookies = session.cookie_jar
            # user.cookies = session._cookie_jar._cookies

            # delay between requests
            await asyncio.sleep(timeout)
            # sleep(timeout)

            # return HTML code of page
            return data
Exemplo n.º 15
0
async def platform_data(session: aiohttp.client.ClientSession,
                        platform_url: str):
    async with session.get(platform_url, headers=HEADERS_GET) as response:
        req = await response.text()

    soup = BeautifulSoup(req, 'html.parser')
    sum_reviews = soup.find_all(attrs={'class': 'count'})
    try:
        if "metascore_w user large game tbd" not in req:
            sum_reviews = sum_reviews[1]
            if len(str(sum_reviews).split('user-reviews">')) > 1:
                sum_reviews = str(sum_reviews).split(
                    'user-reviews">')[1].split("Ratings")[0]
                reviews.append((platform_url, sum_reviews))
    except Exception as e:
        print(platform_url, e.with_traceback())
Exemplo n.º 16
0
async def duckduckgo(parsed: ParseResult,
                     session: aiohttp.client.ClientSession):
    if "q" in urllib.parse.parse_qs(parsed.query):
        ddg_query = urllib.parse.parse_qs(parsed.query).get("q")[0]
        if ddg_query.startswith("! site:"):
            async with session.get("https://api.duckduckgo.com/",
                                   params={
                                       "q": ddg_query,
                                       "format": "json",
                                       "no_redirect": 1
                                   }) as resp:
                json_content: dict = await resp.json(
                    content_type="application/x-javascript")
            return json_content["Redirect"]
        else:
            log.warning(f"DDG: Unknown query string: {ddg_query}")
            return {"error": "Unknown query string"}
Exemplo n.º 17
0
async def update_webrisk(url: str, session: aiohttp.client.ClientSession,
                         pool: asyncpg.pool.Pool,
                         transaction: Hub.current.scope.transaction):
    with transaction.start_child(op="task", description="Updating webrisk"):
        log.info(f"Fetching webrisk: {url}")

        params = [("uri", url), ("key", config.webrisk_key),
                  ("threatTypes", "MALWARE"),
                  ("threatTypes", "SOCIAL_ENGINEERING"),
                  ("threatTypes", "UNWANTED_SOFTWARE")]
        async with session.get("https://webrisk.googleapis.com/v1/uris:search",
                               params=params) as resp:
            json_content: dict = await resp.json()

        if json_content.get("error"):
            log.info("Error with Google API")
            log.error(json_content)

        if json_content == {}:
            await insert_blank_webrisk(pool, url)
        else:
            parsed_time = datetime.datetime.strptime(
                json_content["threat"]["expireTime"][:-4],
                "%Y-%m-%dT%H:%M:%S.%f")
            if json_content["threat"]["threatTypes"] == ["SOCIAL_ENGINEERING"]:
                await insert_webrisk(pool=pool,
                                     url=url,
                                     social_engineering=True,
                                     expire_time=parsed_time)
            elif json_content["threat"]["threatTypes"] == ["MALWARE"]:
                await insert_webrisk(pool=pool,
                                     url=url,
                                     malware=True,
                                     expire_time=parsed_time)
            else:
                await insert_webrisk(pool=pool,
                                     url=url,
                                     social_engineering=True,
                                     malware=True,
                                     expire_time=parsed_time)
Exemplo n.º 18
0
async def fetch(session: aiohttp.client.ClientSession, url: str) -> tuple:
    """ Middleware for articles """
    async def subfetch(url, subdomain_url):
        parsed = url_parsers.urlparse(url)
        replaced = parsed._replace(netloc=subdomain_url)
        url = replaced.geturl()
        # get acknowledged if server has required url
        res = await session.head(url)
        return res, url

    async with session.get(url) as response:
        if response.status != 200:
            for subdomain in configs.SUBDOMAINS:
                res, new_url = await subfetch(url, subdomain)
                if res.status != 404:
                    return await fetch(session, new_url)
                else:
                    continue

        content_type = response.content_type
        body = await response.read()
        return body, content_type
Exemplo n.º 19
0
async def redirect_gatherer(url: str, session: aiohttp.client.ClientSession):
    async with session.get(url) as resp:
        history = [str(x.url) for x in resp.history]
        history.append(str(resp.url))
        return history
Exemplo n.º 20
0
async def fetch(session: aiohttp.client.ClientSession, uri: str) -> dict:
    """Helper function to keep session.get() calls clean."""
    async with session.get(uri) as resp:
        return await resp.json()
Exemplo n.º 21
0
async def fetch(session: aiohttp.client.ClientSession, url: str,
                **kwargs) -> dict:
    """From here: https://stackoverflow.com/questions/22190403/how-could-i-use-requests-in-asyncio/50312981#50312981"""
    async with session.get(url, **kwargs) as response:
        return await response.json()
Exemplo n.º 22
0
async def fetch(session: aiohttp.client.ClientSession, url: str) -> str:
    async with session.get(url) as response:
        response.raise_for_status()
        return await response.text()
Exemplo n.º 23
0
async def dark_sky(args: argparse.Namespace, client: aiohttp.client.ClientSession, location: geopy.location.Location) -> Any:
   url = 'https://api.forecast.io/forecast/{}/{},{}'.format(args.key, location.latitude, location.longitude)
   async with client.get(url) as response:
     assert response.status == 200
     return await response.read(decode=True)
Exemplo n.º 24
0
async def fetch_spell(*, eng_spell_name: str, session: aiohttp.client.ClientSession, debug: bool = False) \
        -> typing.Optional[Spell]:
    searching_url = 'http://dungeon.su/spells/'
    # searching_url = f'http://dungeon.su/spells/{eng_spell_name}'
    async with session.get(
        searching_url,
        headers={
            'Accept': 'text/html,application/xhtml+xml,application/xml',
            'Content-Type': 'text/html'
        },
        params={'search': eng_spell_name}) as response:
        if debug:
            print(f'Fetching spell "{eng_spell_name}"')
            print(response.url)

        response_binary = await response.read()
        html = BeautifulSoup(response_binary.decode('utf-8'), 'html.parser')
        articles = html.find_all(
            name='div',
            attrs={'itemtype': "https://schema.org/Article"
                   })  # type: typing.List[BeautifulSoup.element.Tag]
        if len(articles) == 0:
            print(f'No spells with name "{eng_spell_name}" found')
            return
        elif len(articles) > 1:
            names = []
            for article in articles:
                name_tag = article.find('a',
                                        attrs={
                                            'class': 'item-link',
                                            'itemprop': 'url'
                                        })
                if '(' in name_tag.text:
                    eng_name = name_tag.text.split('(')[1].strip().replace(
                        ')', '')
                else:
                    eng_name = name_tag.text
                names.append(eng_name)
            print(
                f'{len(articles)} spells with name "{eng_spell_name}" found: {names}\nPlease refine your search'
            )

            return None

        spell_attributes_dict = {
            'level':
            SpellAttribute(ru_name='уровень',
                           ru_value=-1,
                           en_name='level',
                           en_value=-1),
            'school':
            SpellAttribute(ru_name='школа',
                           ru_value='нет',
                           en_name='school',
                           en_value='na'),
            'cast_time':
            SpellAttribute(ru_name='время накладывания',
                           ru_value='нет',
                           en_name='cast_time',
                           en_value='na'),
            'duration':
            SpellAttribute(ru_name='длительность',
                           ru_value='na',
                           en_name='duration',
                           en_value='na'),
            'range':
            SpellAttribute(ru_name='дистанция',
                           ru_value='na',
                           en_name='range',
                           en_value='na'),
            'components':
            SpellAttribute(ru_name='компоненты',
                           ru_value=[],
                           en_name='components',
                           en_value=[]),
            'classes':
            SpellAttribute(ru_name='классы',
                           ru_value=[],
                           en_name='classes',
                           en_value=[]),
            'source':
            SpellAttribute(ru_name='источник',
                           ru_value='na',
                           en_name='source',
                           en_value='na'),
            'higher_levels':
            SpellAttribute(ru_name='на больших уровнях',
                           ru_value='',
                           en_name='higher levels',
                           en_value='na'),
            'name':
            SpellAttribute(ru_name='имя',
                           ru_value=eng_spell_name,
                           en_name='name',
                           en_value=''),
            'description':
            SpellAttribute(ru_name='описание',
                           ru_value='Нет описания',
                           en_name='description',
                           en_value='No description')
        }

        article = articles[0]  # type: BeautifulSoup.element.Tag
        name_tag = article.find('a',
                                attrs={
                                    'class': 'item-link',
                                    'itemprop': 'url'
                                })
        if not name_tag:
            print(f'Name tag not found for {eng_spell_name}')
            return
        spell_attributes_dict['name'] = SpellAttribute(
            ru_name='имя',
            ru_value=name_tag.text.split('(')[0].strip(),
            en_name='name',
            en_value=name_tag.text.split('(')[1].strip().replace(')', ''))

        article_body = article.find(name='div',
                                    attrs={
                                        "class": "card-body",
                                        "itemprop": "articleBody"
                                    })
        # type: BeautifulSoup.element.Tag

        if not article_body:
            print(f'Cannot find any spell on html page')
            return None

        for attribute_tag in article_body('ul')[0](
                'li'):  # iterate over each <li> tag

            if attribute_tag.find(name='div',
                                  attrs={'itemprop': 'description'}):
                description_tag = attribute_tag.find(
                    name='div', attrs={'itemprop': 'description'})
                if "На больших уровнях:" in description_tag.text:
                    ru_desc = description_tag.text.split(
                        'На больших уровнях:')[0].strip()
                    ru_higher_levels = description_tag.text.split(
                        'На больших уровнях:')[1].strip()
                    spell_attributes_dict['description'] = SpellAttribute(
                        ru_name='описание',
                        ru_value=ru_desc,
                        en_name='description',
                        en_value='')
                    spell_attributes_dict['higher_levels'] = SpellAttribute(
                        ru_name='на больших уровнях',
                        ru_value=ru_higher_levels,
                        en_name='higher levels',
                        en_value='')
                else:
                    spell_attributes_dict['description'] = SpellAttribute(
                        ru_name='описание',
                        ru_value=description_tag.text,
                        en_name='description',
                        en_value='')
            else:
                ru_name = attribute_tag('strong')[0].text.replace(':', '')
                if ru_name.lower() not in attributes_translations_dict:
                    continue

                ru_value = attribute_tag.text.replace(f'{ru_name}:',
                                                      '').strip().replace(
                                                          '«',
                                                          '').replace('»', '')
                en_name = attributes_translations_dict[ru_name.lower()]
                spell_attributes_dict[attributes_translations_dict[
                    ru_name.lower()]] = SpellAttribute(ru_name=ru_name,
                                                       ru_value=ru_value,
                                                       en_name=en_name,
                                                       en_value='')

    return Spell(
        name=spell_attributes_dict['name'],
        level=spell_attributes_dict['level'],
        cast_time=spell_attributes_dict['cast_time'],
        classes=spell_attributes_dict['classes'],
        components=spell_attributes_dict['components'],
        duration=spell_attributes_dict['duration'],
        higher_levels=spell_attributes_dict['higher_levels'],
        range=spell_attributes_dict['range'],
        school=spell_attributes_dict['school'],
        source=spell_attributes_dict['source'],
        description=spell_attributes_dict['description'],
    )