def get(self, name: str): if name: try: stock = Scrapy(stock=name) return json.dumps(stock.getPrice()), 200 except: return "Invalid Stock Ticker", 404 return "Bad Request: Provide Stock Ticker", 400
def refresh(self): self.setup_canvas() scrapy = Scrapy(url, headers=headers) scrapy.get_pokemon_data() self.index = 0 for pokemon in scrapy.pokemon_array: self.get_cells(pokemon) self.index += 1 self.win.update() return None
def case2(): msg = "tell me the file path\n" keyword = input(msg) while len(keyword) < 1 or not os.path.exists(keyword): print("key word empty or file not exist") keyword = input(msg) profilemsg = "tell me where you save firefox cookies(option)\n" profile = input(profilemsg) if (len(profile) < 1): profile = None print("already start, wait for a moment...") Scrapy.handleitembyfile(keyword)
def case1(): msg = "tell me the keyword, you want to use\n" keyword = input(msg) while len(keyword) < 1: keyword = input(msg) profilemsg = "tell me where you save firefox cookies(option)\n" profile = input(profilemsg) if (len(profile) < 1): profile = None print("already start, wait for a moment...") Scrapy.startBykeyword(keyword, profile) endmsg = "press any key to end\n" endstring = input(endmsg)
def main(): print('EXECUTING SCR_MAIN.PY') scrapy_site = Scrapy(url=ROOT_URL, sub_url=LIST_URL, output_path=OUTPUT_PATH) catagory_links = scrapy_site.get_catagory_link_list() file_links, file_names = scrapy_site.access_pages(catagory_links) scrapy_site.get_files(file_links, file_names) with open(ZIP_FILE, 'w') as fr: for _index, _file in enumerate(os.listdir(ZIP_PATH)): if _file.endswith('.zip'): ori_name = (ZIP_PATH + _file) fr.writelines(_file + '\n') zf = zipfile.ZipFile(ori_name) zipfile_member = [] for _member in zf.namelist(): if _member.endswith('.csv'): zipfile_member.append(_member) for _member in zipfile_member: try: zf.extract(_member, path=UNZIP_PATH, pwd=None) fr.writelines(' |-- ' + _member + '\n') except zipfile.BadZipfile: fr.writelines(' |-- (BadZipFile)' + _member + '\n') subprocess.call('mv ' + ori_name.replace( ' ', '\ ').replace('(', '\(').replace(')', '\)') + ' ' + BADFILE_PATH, shell=True) print( 'BAD ZIP FILE: ', _member, 'is found from ' + ori_name + ' and it is moved into ', BADFILE_PATH) char_collect = CharCollector(file_path=UNZIP_PATH) file_list = char_collect.get_file_list() name_list, invalid_name_list = char_collect.get_shop_name_from_all_files( file_list) char_list = char_collect.get_hans_char_from_all_words(name_list) with open(RESULT_PATH + 'char_list.txt', 'w') as filehandle: for char in char_list: filehandle.write('%s\n' % char) with open(RESULT_PATH + 'name_list.txt', 'w') as filehandle: for name in name_list: filehandle.write(name + '\n')
# -*- coding:UTF-8 -*- from scrapy import Scrapy import sys import os print("Do you want to get data by search or by file?") scrapytype = input("1.by search; 2.by file?\n") Scrapy = Scrapy() def case1(): msg = "tell me the keyword, you want to use\n" keyword = input(msg) while len(keyword) < 1: keyword = input(msg) profilemsg = "tell me where you save firefox cookies(option)\n" profile = input(profilemsg) if (len(profile) < 1): profile = None print("already start, wait for a moment...") Scrapy.startBykeyword(keyword, profile) endmsg = "press any key to end\n" endstring = input(endmsg) # 通过文件方式处理连接 def case2(): msg = "tell me the file path\n" keyword = input(msg) while len(keyword) < 1 or not os.path.exists(keyword): print("key word empty or file not exist")
def testgetproductdetail(self): sc = Scrapy() url = "https://www.aliexpress.com/item/4000171398545.html?algo_pvid=6f2a8b66-d2a9-4e4a-aa6c-77d40724f61f&algo_expid=6f2a8b66-d2a9-4e4a-aa6c-77d40724f61f-10&btsid=0b86d80216066135805932798ec64f&ws_ab_test=searchweb0_0,searchweb201602_,searchweb201603_" sc.getproductdetail(url)
def testgetitemfile(self): sc = Scrapy() sc.resultfile = 'G:\\scrapyexpress\\result\\' + str( time.time()) + '.csv' file = 'G:\\scrapyexpress\\result\\2020-10-21\\mouse toy_test1.csv' sc.handleitembyfile(file, sc.resultfile)
import sys import time import json import codecs from optparse import OptionParser from selenium import webdriver from settings import CHROME_DRIVER_LOCATION, BASE_URL from scrapy import Scrapy if __name__ == '__main__': parser = OptionParser() parser.add_option('-o', '--output', dest="filename", help="Write output to a file") options, args = parser.parse_args() if options.filename is None: print('usage: python main.py -o <outputfile>') sys.exit(2) scrapy = Scrapy() scrapy.run() with codecs.open(options.filename , 'w', encoding='utf-8') as f: json.dump(scrapy.result, f, ensure_ascii=False)