Exemplos de Scrapy em Python, exemplos de scrapy.Scrapy em Python

Exemplo n.º 1

0

Exibir arquivo

 def get(self, name: str):
     if name:
         try:
             stock = Scrapy(stock=name)
             return json.dumps(stock.getPrice()), 200
         except:
             return "Invalid Stock Ticker", 404
     return "Bad Request: Provide Stock Ticker", 400

Exemplo n.º 2

0

Exibir arquivo

Arquivo: view.py Projeto: s911335/scrapy_discord

 def refresh(self):
     self.setup_canvas()
     scrapy = Scrapy(url, headers=headers)
     scrapy.get_pokemon_data()
     self.index = 0
     for pokemon in scrapy.pokemon_array:
         self.get_cells(pokemon)
         self.index += 1
     self.win.update()
     return None

Exemplo n.º 3

0

Exibir arquivo

def case2():
    msg = "tell me the file path\n"
    keyword = input(msg)
    while len(keyword) < 1 or not os.path.exists(keyword):
        print("key word empty or file not exist")
        keyword = input(msg)
    profilemsg = "tell me where you save firefox cookies(option)\n"
    profile = input(profilemsg)
    if (len(profile) < 1):
        profile = None
    print("already start, wait for a moment...")
    Scrapy.handleitembyfile(keyword)

Exemplo n.º 4

0

Exibir arquivo

def case1():
    msg = "tell me the keyword, you want to use\n"
    keyword = input(msg)
    while len(keyword) < 1:
        keyword = input(msg)
    profilemsg = "tell me where you save firefox cookies(option)\n"
    profile = input(profilemsg)
    if (len(profile) < 1):
        profile = None
    print("already start, wait for a moment...")
    Scrapy.startBykeyword(keyword, profile)
    endmsg = "press any key to end\n"
    endstring = input(endmsg)

Exemplo n.º 5

0

Exibir arquivo

def main():
    print('EXECUTING SCR_MAIN.PY')
    scrapy_site = Scrapy(url=ROOT_URL,
                         sub_url=LIST_URL,
                         output_path=OUTPUT_PATH)
    catagory_links = scrapy_site.get_catagory_link_list()
    file_links, file_names = scrapy_site.access_pages(catagory_links)
    scrapy_site.get_files(file_links, file_names)

    with open(ZIP_FILE, 'w') as fr:
        for _index, _file in enumerate(os.listdir(ZIP_PATH)):
            if _file.endswith('.zip'):

                ori_name = (ZIP_PATH + _file)
                fr.writelines(_file + '\n')

                zf = zipfile.ZipFile(ori_name)

                zipfile_member = []
                for _member in zf.namelist():
                    if _member.endswith('.csv'):
                        zipfile_member.append(_member)

                for _member in zipfile_member:
                    try:
                        zf.extract(_member, path=UNZIP_PATH, pwd=None)
                        fr.writelines('      |-- ' + _member + '\n')

                    except zipfile.BadZipfile:
                        fr.writelines('      |-- (BadZipFile)' + _member +
                                      '\n')
                        subprocess.call('mv ' + ori_name.replace(
                            ' ', '\ ').replace('(', '\(').replace(')', '\)') +
                                        ' ' + BADFILE_PATH,
                                        shell=True)
                        print(
                            'BAD ZIP FILE: ', _member, 'is found from ' +
                            ori_name + ' and it is moved into ', BADFILE_PATH)

    char_collect = CharCollector(file_path=UNZIP_PATH)
    file_list = char_collect.get_file_list()
    name_list, invalid_name_list = char_collect.get_shop_name_from_all_files(
        file_list)
    char_list = char_collect.get_hans_char_from_all_words(name_list)

    with open(RESULT_PATH + 'char_list.txt', 'w') as filehandle:
        for char in char_list:
            filehandle.write('%s\n' % char)

    with open(RESULT_PATH + 'name_list.txt', 'w') as filehandle:
        for name in name_list:
            filehandle.write(name + '\n')

Exemplo n.º 6

0

Exibir arquivo

# -*- coding:UTF-8 -*-
from scrapy import Scrapy
import sys
import os

print("Do you want to get data by search or by file?")
scrapytype = input("1.by search; 2.by file?\n")
Scrapy = Scrapy()


def case1():
    msg = "tell me the keyword, you want to use\n"
    keyword = input(msg)
    while len(keyword) < 1:
        keyword = input(msg)
    profilemsg = "tell me where you save firefox cookies(option)\n"
    profile = input(profilemsg)
    if (len(profile) < 1):
        profile = None
    print("already start, wait for a moment...")
    Scrapy.startBykeyword(keyword, profile)
    endmsg = "press any key to end\n"
    endstring = input(endmsg)


# 通过文件方式处理连接
def case2():
    msg = "tell me the file path\n"
    keyword = input(msg)
    while len(keyword) < 1 or not os.path.exists(keyword):
        print("key word empty or file not exist")

Exemplo n.º 7

0

Exibir arquivo

 def testgetproductdetail(self):
     sc = Scrapy()
     url = "https://www.aliexpress.com/item/4000171398545.html?algo_pvid=6f2a8b66-d2a9-4e4a-aa6c-77d40724f61f&algo_expid=6f2a8b66-d2a9-4e4a-aa6c-77d40724f61f-10&btsid=0b86d80216066135805932798ec64f&ws_ab_test=searchweb0_0,searchweb201602_,searchweb201603_"
     sc.getproductdetail(url)

Exemplo n.º 8

0

Exibir arquivo

 def testgetitemfile(self):
     sc = Scrapy()
     sc.resultfile = 'G:\\scrapyexpress\\result\\' + str(
         time.time()) + '.csv'
     file = 'G:\\scrapyexpress\\result\\2020-10-21\\mouse toy_test1.csv'
     sc.handleitembyfile(file, sc.resultfile)

Exemplo n.º 9

0

Exibir arquivo

import sys
import time
import json
import codecs
from optparse import OptionParser
from selenium import webdriver
from settings import CHROME_DRIVER_LOCATION, BASE_URL
from scrapy import Scrapy

if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-o', '--output', dest="filename", help="Write output to a file")

    options, args = parser.parse_args()
    if options.filename is None:
        print('usage: python main.py -o <outputfile>')
        sys.exit(2)

    scrapy = Scrapy()
    scrapy.run()

    with codecs.open(options.filename , 'w', encoding='utf-8') as f:
        json.dump(scrapy.result, f, ensure_ascii=False)