Exemplo n.º 1
0
async def start_scrap():
    while True:

        next_time = time.time() + 15
        await scraping.scraping()
        wait_for = next_time - time.time()
        if wait_for > 0:
            time.sleep(wait_for)

    scraping.scraping().close()
Exemplo n.º 2
0
def servScrape(cloudprovider, companyname, keyword):
    """ Scrape for the cloud services of the company.

    Args:
        cloudprovider (string): the cloudprovider to check for.
        companyname (string): the name of the company we are looking for.
        keyword (string): optional a keyword to specify the search.

    Outprints:
        the found results of the search.
        
    """
    print(f'{Fore.CYAN}Checking for {cloudprovider} services{Style.RESET_ALL}')
    request = []

    if cloudprovider == 'Azure':
        request.append('site:core.windows.net')
    elif cloudprovider == 'AWS':
        request.append('site:http://s3.amazonaws.com/*/')
        request.append('site:http://*.s3.amazonaws.com/')
    elif cloudprovider == 'Google Cloud Platform':
        request.append('site:*.storage.googleapis.com')
    else:
        print(f'Cloudprovider {cloudprovider} not supported')

    associated_list = []
    potentially_associated_list = []

    for r in request:
        if keyword:
            output = filter_for_correct_links(
                scraping.scraping(r + ' ' + companyname + ' ' + keyword))
            associated_list.extend(output[0])
            potentially_associated_list.extend(output[1])
        else:
            output = filter_for_correct_links(
                scraping.scraping(r + ' ' + companyname))
            associated_list.extend(output[0])
            potentially_associated_list.extend(output[1])

    sorted(list(dict.fromkeys(associated_list)))
    sorted(list(dict.fromkeys(potentially_associated_list)))

    if associated_list:
        print(
            f'{Fore.RED}We\'ve got them! Their associated services are as follows:{Style.RESET_ALL}'
        )
        p.pprint(sorted(associated_list))
    if potentially_associated_list:
        print(
            f'{Fore.YELLOW}Potentially associated services are as follows:{Style.RESET_ALL}'
        )
        p.pprint(sorted(potentially_associated_list))
    if not associated_list and not potentially_associated_list:
        print('Nothing detected here.')
Exemplo n.º 3
0
def main(args):
    """
  最初に実行される関数.
  @param args コンソールで渡された引数をdict型にしたもの
  """

    # 設定ファイルのロード
    config = read_config(args)

    # プログラムの実行
    if args.module_name == "scraping":
        scraping.scraping(args, config)
def convertion(date, link, dest_path, latest=False):
    total_somministrations = 0
    total_available = 0
    total_population = 0
    total_health = 0

    day, month = date.split(sep="_")

    data = [str(datetime.date(2021, int(month), int(day)))] * (len(POPOLAZIONE))
    data.insert(0, "Data")

    regions, administration, available, percentage = scraping(link)

    zipped = zip(regions, administration, available, percentage)
    sort = sorted(zipped, key = lambda x: x[0])
    regions = [regions for (regions, administration, available, percentage) in sort]
    administration = [administration for (regions, administration, available, percentage) in sort]
    available = [available for (regions, administration, available, percentage) in sort]
    percentage = [percentage for (regions, administration, available, percentage) in sort]

    copertura = []
    copertura.append('Copertura')

    copertura_dosi = []
    copertura_dosi.append(('Copertura Teorica'))

    for x in range(1, len(POPOLAZIONE)):
        copertura.append(str(round(int(administration[x].replace('.', '')) / int(POPOLAZIONE[x]) * 100, 3)) + '%')
        copertura_dosi.append(str(round(int(available[x].replace('.', '')) / int(POPOLAZIONE[x]) * 100, 3)) + '%')

        total_somministrations += int(administration[x].replace('.', ''))
        total_available += int(available[x].replace('.', ''))
        total_population += int(POPOLAZIONE[x])
        total_health += int(PERSONALE_SANITARIO[x])
    print(copertura)
    print(copertura_dosi)

    regions.append("Totale")
    administration.append(total_somministrations)
    available.append(total_available)
    percentage.append(str(round(total_somministrations / total_available * 100, 3)) + '%')
    POPOLAZIONE.append(total_population)
    PERSONALE_SANITARIO.append(total_health)
    copertura.append(str(round(total_somministrations / total_population * 100, 3)) + '%')
    copertura_dosi.append(str(round(total_available / total_population * 100, 3)) + '%')

    print("\n")
    result = [list(zipped) for zipped in
              zip(regions, administration, available, percentage, POPOLAZIONE, PERSONALE_SANITARIO, copertura,
                  copertura_dosi, data)]
    print(result)

    np.savetxt(dest_path + str(datetime.date(2021, int(month), int(day))) + '.csv', result,
               delimiter=',', fmt='%s')

    if latest:
        np.savetxt(dest_path + 'latest' + '.csv', result,
                   delimiter=',', fmt='%s')
Exemplo n.º 5
0
def handle_message(event):
    recived = event.message.text
    trends = scraping(recived)
    n = len(trends[0])
    if n == 0:
        line_bot_api.reply_message(
            event.reply_token, TextSendMessage(text='そのようなタグの記事は存在しませんでした'))
    else:
        text = ''
        for i in range(n):
            text += trends[0][i] + '\n' + trends[1][i] + '\n'
        line_bot_api.reply_message(event.reply_token,
                                   TextSendMessage(text=text))
Exemplo n.º 6
0
    def __init__(self,
                 working_dir,
                 executable_path,
                 download_path,
                 cache_path,
                 mode='csv',
                 directory_polling_interval=2.,
                 directory_polling_limit=10):

        self.working_dir = Path(working_dir)
        self.working_dir.mkdir(exist_ok=True)

        self.download_dir = self.working_dir / 'publication'
        self.download_dir.mkdir(exist_ok=True)

        self.database_dir = self.working_dir / 'database'
        self.database_dir.mkdir(exist_ok=True)

        self.gds_path = self.database_dir / 'gds.csv'
        self.publication_path = self.database_dir / 'publication.csv'
        self.source_path = self.database_dir / 'source.csv'

        if self.gds_path.exists():
            self.gds = pd.read_csv(self.gds_path)
            self.gds_index = set(self.gds.gds_uid)
        else:
            self.gds = pd.DataFrame(columns=columns['gds'])
            self.gds_index = set()

        if self.publication_path.exists():
            self.publication = pd.read_csv(self.publication_path)
            self.publication_index = set(self.publication.doi)
        else:
            self.publication = pd.DataFrame(columns=columns['publication'])
            self.publication_index = set()

        if self.source_path.exists():
            self.source = pd.read_csv(self.source_path)
        else:
            self.source = pd.DataFrame(columns=columns['source'])

        self.scraping = scraping(executable_path, cache_path, download_path)

        self.crawler = paper_crawler(executable_path, str(self.download_dir),
                                     cache_path)
        self.directory_polling_interval = directory_polling_interval
        self.directory_polling_limit = directory_polling_limit
Exemplo n.º 7
0
def result(request, product):
    scraping_object = scraping.scraping(product)
    items = scraping_object.get_item_list
    return render(request, 'search/result.html', {
        'item': items,
    })
Exemplo n.º 8
0
        print('\n\t\t MENU')
        print('1- Generate wordlist')
        print('2- Validate password')
        print('3- Close')
        op = input('What is your choice number: ')
    return op


if __name__ == "__main__":

    view.clear()
    view.logo()
    url = get_URL()
    profile = {}
    if (url != ''):
        profile = fs.scraping(url)
    else:
        profile["name"] = get_name()
    get_information(profile)
    view.clear()
    view.show_info(profile)
    finish = False
    t = Combinations(profile)
    info = t.info
    op = menu('')

    while (not finish):
        if (op == '1'):
            wg(info, profile)
            message = 'Your wordlist is ready, the file is wordlist.txt'
            op = menu(message)
Exemplo n.º 9
0
async def stop_scrap():
    scraping.scraping().close()

    return render_template('index.html')
Exemplo n.º 10
0
from flask import Flask, jsonify
from gevent.pywsgi import WSGIServer
import logging
from flask import request

from scraping import scraping

LOG_FORMAT = "%(levelname)s : %(filename)s : %(asctime)s : %(message)s"
logging.basicConfig(filename=".logs/ApiLogs",
                    level=logging.INFO,
                    format=LOG_FORMAT,
                    filemode="w")
logger = logging.getLogger()

scrap = scraping()

app = Flask(__name__)


@app.route("/getDict/", methods=["GET"])
def getDictionary():
    letter = request.args.get('letter')

    logger.info(f"getDictionary of {letter}")

    words = scrap.getDictionary(letter)

    if (words == None):
        logger.info(f"Letra não encontrda")
        return jsonify({"None": "Nenhuma palavra encontrada com essa letra"})
# -*_ encoding: utf-8 -*-

import os
from scraping import scraping
from download import download
from dataProcessing import processing


def clear():
    return os.system('clear')


url = "http://www.ans.gov.br/prestadores/tiss-troca-de-informacao-de-saude-suplementar"

pdfFileName = download(scraping(url))
csvFileName = processing(pdfFileName)
clear()

print('URL: ' + url)
print('Arquivo ' + pdfFileName + ' baixado.')
print('Tabelas extraidas e salvas em ' + csvFileName + '.')
Exemplo n.º 12
0
import matplotlib as m
import numpy as np
import pandas as pd
from scipy import stats
import scraping as sc
import seaborn as sns
import matplotlib.pyplot as plt
import pingouin as pg

# Imports para formatação dos gráficos
sns.set_style('whitegrid')

dfScraping = sc.scraping()

# Convertendo a coluna "Chip Time" em apenas minutos
# Chip time é o tempo total de corrida medido com a leitura do sensor RFID no selo da camisa do participante
time_list = dfScraping[' Chip Time'].tolist()

# Visualizando uma amostra de dados
# print(time_list[1:5])

# Lista para receber o resultado da conversão
time_mins = []

# Interação para conversão em minutos
for i in time_list:
    i = i.strip(' ')
    if len(i) != 7:
        i = '0:' + i
    h, m, s = i.split(':')
    math = (int(h) * 3600 + int(m) * 60 + int(s)) / 60
Exemplo n.º 13
0
sys.setdefaultencoding("utf-8")

tmpl_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'templates')
app = Flask(__name__, template_folder=tmpl_dir)

DEBUG = True

SECRET_KEY = 'development key'

tmpl_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'templates')
app = Flask(__name__, template_folder=tmpl_dir)
app.config.from_object(__name__)

scraping()

username = None

conn = None

cur = None


def encrypt_password(password):
    encrypted_pass = hashlib.sha1(password.encode('utf-8')).hexdigest()
    return encrypted_pass


conn = lite.connect('orbis.sqlite')
cur = conn.cursor()
Exemplo n.º 14
0
import scraping

rawtext = 'スライド作成には大きな労力が必要です。プレゼン発表の前はただでさえ内容確認、Q&A対策、などの準備に追われているのに、発表用のスライドも用意しなくてはなりません。しかも、スライド作成はすぐに終わる作業ではなく、大きな手間を必要とします。 \
まず、発表内容から要点だけを抽出しなくてはなりません。原稿そのままの文章をスライドに載せてしまうと非常に見にくく、またどの部分に着目したら良いかもわからないため聞いている人に上手く伝えることができません。 \
また、その内容に合うグラフやイラストも用意する必要があります。視覚的に聞いている人に訴えかけることは大切な手法の一つです。 \
さらに、文章と画像をいい感じにスライドに配置する必要もあります。 \
最後に、テンプレートを選択する必要があります。'

word_list = scraping.scraping(rawtext, 1)
print(word_list)

path = scraping.irasutoya(word_list, 1)
print(path)
    if not associated_websites and not potential_associated_websites:
        print('Nothing detected here.')


"""
In case of 429, try:
https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/
Category: Retry on failure
"""

companyname = input('Provide the companyname: ')
main_domain = input('Provide the company\'s classic domain: ')
keyword = None
keyword_option = input('Do you want to use an additional keyword? [y/n]: ')
if keyword_option == 'y' or keyword_option == 'yes':
    keyword = input('Enter keyword: ')


print(f'{Fore.CYAN}Checking Amazon websites{Style.RESET_ALL}')
if keyword:
    get_associated_websites(scraping.scraping("site:s3.amazonaws.com -filetype:pdf " + companyname + " " + keyword), companyname, main_domain, keyword)
else:
    get_associated_websites(scraping.scraping("site:s3.amazonaws.com -filetype:pdf " + companyname), companyname, main_domain, keyword)

print(f'{Fore.CYAN}Checking Azure websites{Style.RESET_ALL}')
if keyword:
    get_associated_websites(scraping.scraping("site:azurewebsites.net -filetype:pdf " + companyname + " " + keyword), companyname, main_domain, keyword)
else:
    get_associated_websites(scraping.scraping("site:azurewebsites.net -filetype:pdf " + companyname), companyname, main_domain, keyword)
Exemplo n.º 16
0
def get():
    return scraping.scraping()
Exemplo n.º 17
0
    async def tenki(self, message: discord.message):
        sc = scraping()
        whether = sc.forecast("名古屋", "横浜")

        await self.send_message(message, whether)