Exemplos de scraping em Python, exemplos de scraping.scraping em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: server.py Projeto: Piranik/emag-scraper

async def start_scrap():
    while True:

        next_time = time.time() + 15
        await scraping.scraping()
        wait_for = next_time - time.time()
        if wait_for > 0:
            time.sleep(wait_for)

    scraping.scraping().close()

Exemplo n.º 2

0

Exibir arquivo

def servScrape(cloudprovider, companyname, keyword):
    """ Scrape for the cloud services of the company.

    Args:
        cloudprovider (string): the cloudprovider to check for.
        companyname (string): the name of the company we are looking for.
        keyword (string): optional a keyword to specify the search.

    Outprints:
        the found results of the search.
        
    """
    print(f'{Fore.CYAN}Checking for {cloudprovider} services{Style.RESET_ALL}')
    request = []

    if cloudprovider == 'Azure':
        request.append('site:core.windows.net')
    elif cloudprovider == 'AWS':
        request.append('site:http://s3.amazonaws.com/*/')
        request.append('site:http://*.s3.amazonaws.com/')
    elif cloudprovider == 'Google Cloud Platform':
        request.append('site:*.storage.googleapis.com')
    else:
        print(f'Cloudprovider {cloudprovider} not supported')

    associated_list = []
    potentially_associated_list = []

    for r in request:
        if keyword:
            output = filter_for_correct_links(
                scraping.scraping(r + ' ' + companyname + ' ' + keyword))
            associated_list.extend(output[0])
            potentially_associated_list.extend(output[1])
        else:
            output = filter_for_correct_links(
                scraping.scraping(r + ' ' + companyname))
            associated_list.extend(output[0])
            potentially_associated_list.extend(output[1])

    sorted(list(dict.fromkeys(associated_list)))
    sorted(list(dict.fromkeys(potentially_associated_list)))

    if associated_list:
        print(
            f'{Fore.RED}We\'ve got them! Their associated services are as follows:{Style.RESET_ALL}'
        )
        p.pprint(sorted(associated_list))
    if potentially_associated_list:
        print(
            f'{Fore.YELLOW}Potentially associated services are as follows:{Style.RESET_ALL}'
        )
        p.pprint(sorted(potentially_associated_list))
    if not associated_list and not potentially_associated_list:
        print('Nothing detected here.')

Exemplo n.º 3

0

Exibir arquivo

def main(args):
    """
  最初に実行される関数.
  @param args コンソールで渡された引数をdict型にしたもの
  """

    # 設定ファイルのロード
    config = read_config(args)

    # プログラムの実行
    if args.module_name == "scraping":
        scraping.scraping(args, config)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: conversione_csv.py Projeto: dallanoce/vaccinazione_COVID19

def convertion(date, link, dest_path, latest=False):
    total_somministrations = 0
    total_available = 0
    total_population = 0
    total_health = 0

    day, month = date.split(sep="_")

    data = [str(datetime.date(2021, int(month), int(day)))] * (len(POPOLAZIONE))
    data.insert(0, "Data")

    regions, administration, available, percentage = scraping(link)

    zipped = zip(regions, administration, available, percentage)
    sort = sorted(zipped, key = lambda x: x[0])
    regions = [regions for (regions, administration, available, percentage) in sort]
    administration = [administration for (regions, administration, available, percentage) in sort]
    available = [available for (regions, administration, available, percentage) in sort]
    percentage = [percentage for (regions, administration, available, percentage) in sort]

    copertura = []
    copertura.append('Copertura')

    copertura_dosi = []
    copertura_dosi.append(('Copertura Teorica'))

    for x in range(1, len(POPOLAZIONE)):
        copertura.append(str(round(int(administration[x].replace('.', '')) / int(POPOLAZIONE[x]) * 100, 3)) + '%')
        copertura_dosi.append(str(round(int(available[x].replace('.', '')) / int(POPOLAZIONE[x]) * 100, 3)) + '%')

        total_somministrations += int(administration[x].replace('.', ''))
        total_available += int(available[x].replace('.', ''))
        total_population += int(POPOLAZIONE[x])
        total_health += int(PERSONALE_SANITARIO[x])
    print(copertura)
    print(copertura_dosi)

    regions.append("Totale")
    administration.append(total_somministrations)
    available.append(total_available)
    percentage.append(str(round(total_somministrations / total_available * 100, 3)) + '%')
    POPOLAZIONE.append(total_population)
    PERSONALE_SANITARIO.append(total_health)
    copertura.append(str(round(total_somministrations / total_population * 100, 3)) + '%')
    copertura_dosi.append(str(round(total_available / total_population * 100, 3)) + '%')

    print("\n")
    result = [list(zipped) for zipped in
              zip(regions, administration, available, percentage, POPOLAZIONE, PERSONALE_SANITARIO, copertura,
                  copertura_dosi, data)]
    print(result)

    np.savetxt(dest_path + str(datetime.date(2021, int(month), int(day))) + '.csv', result,
               delimiter=',', fmt='%s')

    if latest:
        np.savetxt(dest_path + 'latest' + '.csv', result,
                   delimiter=',', fmt='%s')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: ucho0303/ucho-third-bot

def handle_message(event):
    recived = event.message.text
    trends = scraping(recived)
    n = len(trends[0])
    if n == 0:
        line_bot_api.reply_message(
            event.reply_token, TextSendMessage(text='そのようなタグの記事は存在しませんでした'))
    else:
        text = ''
        for i in range(n):
            text += trends[0][i] + '\n' + trends[1][i] + '\n'
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text=text))

Exemplo n.º 6

0

Exibir arquivo

    def __init__(self,
                 working_dir,
                 executable_path,
                 download_path,
                 cache_path,
                 mode='csv',
                 directory_polling_interval=2.,
                 directory_polling_limit=10):

        self.working_dir = Path(working_dir)
        self.working_dir.mkdir(exist_ok=True)

        self.download_dir = self.working_dir / 'publication'
        self.download_dir.mkdir(exist_ok=True)

        self.database_dir = self.working_dir / 'database'
        self.database_dir.mkdir(exist_ok=True)

        self.gds_path = self.database_dir / 'gds.csv'
        self.publication_path = self.database_dir / 'publication.csv'
        self.source_path = self.database_dir / 'source.csv'

        if self.gds_path.exists():
            self.gds = pd.read_csv(self.gds_path)
            self.gds_index = set(self.gds.gds_uid)
        else:
            self.gds = pd.DataFrame(columns=columns['gds'])
            self.gds_index = set()

        if self.publication_path.exists():
            self.publication = pd.read_csv(self.publication_path)
            self.publication_index = set(self.publication.doi)
        else:
            self.publication = pd.DataFrame(columns=columns['publication'])
            self.publication_index = set()

        if self.source_path.exists():
            self.source = pd.read_csv(self.source_path)
        else:
            self.source = pd.DataFrame(columns=columns['source'])

        self.scraping = scraping(executable_path, cache_path, download_path)

        self.crawler = paper_crawler(executable_path, str(self.download_dir),
                                     cache_path)
        self.directory_polling_interval = directory_polling_interval
        self.directory_polling_limit = directory_polling_limit

Exemplo n.º 7

0

Exibir arquivo

Arquivo: views.py Projeto: EdwardMaeng/Price-Comparsion

def result(request, product):
    scraping_object = scraping.scraping(product)
    items = scraping_object.get_item_list
    return render(request, 'search/result.html', {
        'item': items,
    })

Exemplo n.º 8

0

Exibir arquivo

Arquivo: main.py Projeto: Cicero-Henrique/Light-Side

        print('\n\t\t MENU')
        print('1- Generate wordlist')
        print('2- Validate password')
        print('3- Close')
        op = input('What is your choice number: ')
    return op


if __name__ == "__main__":

    view.clear()
    view.logo()
    url = get_URL()
    profile = {}
    if (url != ''):
        profile = fs.scraping(url)
    else:
        profile["name"] = get_name()
    get_information(profile)
    view.clear()
    view.show_info(profile)
    finish = False
    t = Combinations(profile)
    info = t.info
    op = menu('')

    while (not finish):
        if (op == '1'):
            wg(info, profile)
            message = 'Your wordlist is ready, the file is wordlist.txt'
            op = menu(message)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: server.py Projeto: Piranik/emag-scraper

async def stop_scrap():
    scraping.scraping().close()

    return render_template('index.html')

Exemplo n.º 10

0

Exibir arquivo

Arquivo: rotas.py Projeto: NicolasM3/libras_api

from flask import Flask, jsonify
from gevent.pywsgi import WSGIServer
import logging
from flask import request

from scraping import scraping

LOG_FORMAT = "%(levelname)s : %(filename)s : %(asctime)s : %(message)s"
logging.basicConfig(filename=".logs/ApiLogs",
                    level=logging.INFO,
                    format=LOG_FORMAT,
                    filemode="w")
logger = logging.getLogger()

scrap = scraping()

app = Flask(__name__)


@app.route("/getDict/", methods=["GET"])
def getDictionary():
    letter = request.args.get('letter')

    logger.info(f"getDictionary of {letter}")

    words = scrap.getDictionary(letter)

    if (words == None):
        logger.info(f"Letra não encontrda")
        return jsonify({"None": "Nenhuma palavra encontrada com essa letra"})

Exemplo n.º 11

0

Exibir arquivo

Arquivo: main.py Projeto: JoaoPGoes/python-webscraping-tratamento-de-dados

# -*_ encoding: utf-8 -*-

import os
from scraping import scraping
from download import download
from dataProcessing import processing


def clear():
    return os.system('clear')


url = "http://www.ans.gov.br/prestadores/tiss-troca-de-informacao-de-saude-suplementar"

pdfFileName = download(scraping(url))
csvFileName = processing(pdfFileName)
clear()

print('URL: ' + url)
print('Arquivo ' + pdfFileName + ' baixado.')
print('Tabelas extraidas e salvas em ' + csvFileName + '.')

Exemplo n.º 12

0

Exibir arquivo

import matplotlib as m
import numpy as np
import pandas as pd
from scipy import stats
import scraping as sc
import seaborn as sns
import matplotlib.pyplot as plt
import pingouin as pg

# Imports para formatação dos gráficos
sns.set_style('whitegrid')

dfScraping = sc.scraping()

# Convertendo a coluna "Chip Time" em apenas minutos
# Chip time é o tempo total de corrida medido com a leitura do sensor RFID no selo da camisa do participante
time_list = dfScraping[' Chip Time'].tolist()

# Visualizando uma amostra de dados
# print(time_list[1:5])

# Lista para receber o resultado da conversão
time_mins = []

# Interação para conversão em minutos
for i in time_list:
    i = i.strip(' ')
    if len(i) != 7:
        i = '0:' + i
    h, m, s = i.split(':')
    math = (int(h) * 3600 + int(m) * 60 + int(s)) / 60

Exemplo n.º 13

0

Exibir arquivo

Arquivo: server.py Projeto: xuanyuanzhang/Data-Scraping

sys.setdefaultencoding("utf-8")

tmpl_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'templates')
app = Flask(__name__, template_folder=tmpl_dir)

DEBUG = True

SECRET_KEY = 'development key'

tmpl_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'templates')
app = Flask(__name__, template_folder=tmpl_dir)
app.config.from_object(__name__)

scraping()

username = None

conn = None

cur = None


def encrypt_password(password):
    encrypted_pass = hashlib.sha1(password.encode('utf-8')).hexdigest()
    return encrypted_pass


conn = lite.connect('orbis.sqlite')
cur = conn.cursor()

Exemplo n.º 14

0

Exibir arquivo

import scraping

rawtext = 'スライド作成には大きな労力が必要です。プレゼン発表の前はただでさえ内容確認、Q&A対策、などの準備に追われているのに、発表用のスライドも用意しなくてはなりません。しかも、スライド作成はすぐに終わる作業ではなく、大きな手間を必要とします。 \
まず、発表内容から要点だけを抽出しなくてはなりません。原稿そのままの文章をスライドに載せてしまうと非常に見にくく、またどの部分に着目したら良いかもわからないため聞いている人に上手く伝えることができません。 \
また、その内容に合うグラフやイラストも用意する必要があります。視覚的に聞いている人に訴えかけることは大切な手法の一つです。 \
さらに、文章と画像をいい感じにスライドに配置する必要もあります。 \
最後に、テンプレートを選択する必要があります。'

word_list = scraping.scraping(rawtext, 1)
print(word_list)

path = scraping.irasutoya(word_list, 1)
print(path)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: website-scraping.py Projeto: philippdewald/CySec--as-a-Service-cloud-assets

    if not associated_websites and not potential_associated_websites:
        print('Nothing detected here.')


"""
In case of 429, try:
https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/
Category: Retry on failure
"""

companyname = input('Provide the companyname: ')
main_domain = input('Provide the company\'s classic domain: ')
keyword = None
keyword_option = input('Do you want to use an additional keyword? [y/n]: ')
if keyword_option == 'y' or keyword_option == 'yes':
    keyword = input('Enter keyword: ')


print(f'{Fore.CYAN}Checking Amazon websites{Style.RESET_ALL}')
if keyword:
    get_associated_websites(scraping.scraping("site:s3.amazonaws.com -filetype:pdf " + companyname + " " + keyword), companyname, main_domain, keyword)
else:
    get_associated_websites(scraping.scraping("site:s3.amazonaws.com -filetype:pdf " + companyname), companyname, main_domain, keyword)

print(f'{Fore.CYAN}Checking Azure websites{Style.RESET_ALL}')
if keyword:
    get_associated_websites(scraping.scraping("site:azurewebsites.net -filetype:pdf " + companyname + " " + keyword), companyname, main_domain, keyword)
else:
    get_associated_websites(scraping.scraping("site:azurewebsites.net -filetype:pdf " + companyname), companyname, main_domain, keyword)

Exemplo n.º 16

0

Exibir arquivo

Arquivo: root.py Projeto: kngy0306/FlaskApp

def get():
    return scraping.scraping()

Exemplo n.º 17

0

Exibir arquivo

    async def tenki(self, message: discord.message):
        sc = scraping()
        whether = sc.forecast("名古屋", "横浜")

        await self.send_message(message, whether)