Пример #1
0
def manga_download(mangaid):
    # debug information
    print(f"downloading {mangaid}")
    print(f"threads {len(active_downloads)}")

    # if our manga exists in our db, report the status
    thread_to_delete = -1
    for idx, thread in enumerate(active_downloads):
        if thread.manga_id == mangaid:
            if not thread.is_alive():
                thread_to_delete = idx
                break
            return jsonify(thread.information())

    # call our download utility
    thread = download.Download(mangaid, "gb")
    thread.start()
    active_downloads.append(thread)

    # if the thread has finished, then we can return the result and remove
    # this means next time we call on this, it will try to re-download if errors
    if thread_to_delete != -1:
        info = active_downloads[thread_to_delete].information()
        del active_downloads[thread_to_delete]
        return jsonify(info)

    # return the status of the thread
    return jsonify(thread.information())
Пример #2
0
    def testCommandKey(self):
        kwargs = dict(command_key='testCommandKey')
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)
        dl = download.Download(stateobj=stateobj, transfer_complete_cb=None)
        self.assertEqual(dl.CommandKey(), kwargs['command_key'])

        kwargs = dict()
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)
        dl = download.Download(stateobj=stateobj, transfer_complete_cb=None)
        self.assertEqual(dl.CommandKey(), None)
Пример #3
0
    def __init__(self):

        # 实例化 管理器、下载器、解析器、输出器
        self.manager = manager.Manager()
        self.download = download.Download()
        self.parser = parser.Parser()
        self.output = output.Output()
Пример #4
0
def main():
    size = psize.Size()
    dl = download.Download()

    page = int(input("请输入下载开始页码: "))
    max_page = int(input("请输入下载结束页码: "))
    pic_type = int(input("选择图片的类型: \n 0=全部 1=横图 2=竖图 3=正方形\n"))
    change_size = str(input("是否限制图片大小: \n 0=不限制 1=限制\n"))
    if change_size == '0':
        pic_size = size.psize()
    elif change_size == '1':
        print("请勿输入浮点数!")
        width = int(input("最小长: "))
        height = int(input("最小宽: "))
        proportion = int(input("长宽比: "))

        max_width = int(input("最大长: "))
        max_height = int(input("最大宽: "))
        max_proportion = int(input("长宽比: "))

        pic_size = size.psize(width, height, proportion, max_width, max_height,
                              max_proportion)
    else:
        print("输入有误,结束~~")
        return

    path = str(input("修改图片存储根目录?(默认为'C:/Yandere/'): \n"))
    if path == '':
        root = 'C:/Yandere/'
    else:
        root = path

    dl.do_download(pic_size, page, max_page, pic_type, root)
Пример #5
0
def lambda_handler(event, context):
    # listfiles()

    download_instance = download.Download()
    # data = input("Please enter your choice of which sheets to update. Please enter 'all' to update all sheets. "
    #              "Else enter the name of the sheet you want to update.")
    # data = 'all'

    cwd_dir = os.getcwd()

    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/Adoption-Sectionwise.csv'),
                                                 adoption_sectionwise_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/Installs.csv'),
                                                 installs_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/DAU.csv'),
                                                 dau_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/Adoption_Overall.csv'),
                                                 adoption_overall_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/ContentVideo.csv'),
                                                 content_video_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/ContentPDF.csv'),
                                                 content_pdf_sql_query)
    download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/AdoptionCohorts.csv'),
                                                 overall_adoption_cohorts_sql_query)
	download_instance.generate_and_save_csv_file(os.path.join(cwd_dir,
                                                              '/tmp/TVC_GS.csv'),
                                                 tvc_gs_sql_query)
Пример #6
0
def main():
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    redis_obj = Redis_curl()
    down = download.Download()
    task_obj = redis_obj.read_task()

    if task_obj != None:
        print('当前任务  '+str(task_obj))
        #时间不到,不爬取
        if int(time.time()) < task_obj['TYPE2_TIMESTAMP']:
            print('尚未到达指定时间,重新push回队列')
            redis_obj.rpush(task_obj)
        else:
            #超过自定义时间,开始爬取
            game_ordersn = task_obj['game_ordersn']
            serverid = task_obj['serverid']
            startPrice = task_obj['startPrice']

            detail_url = 'https://yys.cbg.163.com/cgi/api/get_equip_detail'

            headers = {
               'Accept': "application/json, text/javascript, */*; q=0.01",
               'Accept-Encoding': "gzip, deflate, br",
               'Accept-Language': "zh-CN,zh;q=0.9",
               'Cache-Control': "no-cache",
               'Connection': "keep-alive",
               # 'Content-Length': "80",
               'Content-Type': "application/x-www-form-urlencoded; charset=UTF-8",
               # 'Cookie': "_ntes_nuid=94814b19e03b06016dfb804c54a0db08; usertrack=ezq0pVrPFa4zlUrCAyGOAg==; _ntes_nnid=e6bb7727c72bf6eed9771025ac2d509d,1523520941862; _ga=GA1.2.1137804346.1523520943; mail_psc_fingerprint=ef7ad2ac3cc4a73eb8535ba9c2961f86; __gads=ID=5405f793b3cc736d:T=1532326194:S=ALNI_MaygA1PB1BOhuqdi_7JY_6zi5kD4Q; UM_distinctid=164c5c22efa4e8-09259a77a91f5e-16386952-15f900-164c5c22efb747; vjuids=-b6e854263.164fde0bf8b.0.ada218d20b7b1; vjlast=1533267722.1533267722.30; vinfo_n_f_l_n3=1702a993a7145074.1.1.1532326196646.1533267739442.1541993721295; [email protected]|1542715891|0|mail163|11&10|null&null&null#gud&440300#10#0#0|&0||[email protected]; [email protected]:-1:1; fingerprint=rx9zee8v6gfptjzy; is_log_active_stat=1",
               'Host': "yys.cbg.163.com",
               'Origin': "https://yys.cbg.163.com",
               'Pragma': "no-cache",
               'Referer': "https://yys.cbg.163.com/",
               'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
               'X-Requested-With': "XMLHttpRequest",
               'cache-control': "no-cache",
            }

            data = 'serverid={serverid}&ordersn={game_ordersn}&view_loc=all_list%EF%BC%9B1'
            body = data.format(game_ordersn=game_ordersn, serverid=serverid)
            try:
                myip = redis_obj.get_redis_Ip()
                proxies = {
                    'http:':'http://'+myip,
                    'https:':'http://'+myip,
                }
                response = requests.post(detail_url,data=body, headers=headers,proxies=proxies,timeout=10)
                # response = down.get_html(detail_url, method='post', data=body, headers=headers)
                print(response.text)
                if response:
                    json_obj = json.loads(response.text)
                    parse_detail(json_obj,task_obj)
            except:
                print('获取数据失败,重新push回队列')
                redis_obj.rpush(task_obj)

    else:
        print('无任务')
Пример #7
0
def preprocessor():
    #Taking dataset as input
    if len(sys.argv)<=1:
        print("\nEnter dataset address")
        df_input=input()
    else:
        df_input=sys.argv[1]

    #Reading the dataset
    df=pd.read_csv(df_input)
    print(" -> Data taken from "+df_input)

    df=change_column_lower(df)

    #deciding the target variable
    target_colm=find_target(df)

    y=df[target_colm]
    x=df.drop(target_colm,axis=1)

    #Menu:
    while(1):
        print("\nWhat do you want")

        count=1
        for i in functionality:
            print(str(count)+". "+i)
            count+=1

        inp=int(input("\nEnter your input: ,-1 to exit\n"))
        if inp==-1:
            exit()
        elif inp==1:
            print(" --> Data Description:")
            description(df)
        elif inp==2:
            print(" --> Data imputation:")
            imp_obj=data_impute.Imputer(df)
            df=imp_obj.impute()
        elif inp==3:
            print(" --> Univariate Analysis:")
            uni_obj=univariate.Univariate(df)
            df=uni_obj.univariate_plot()
        elif inp==4:
            print(" --> Bivariate Analysis:")
            bi_obj = bivariate.Bivariate(df)
            df = bi_obj.bivariate_plot()
        elif inp==6:
            print(" --> Download Dataset")
            down_obj=download.Download(df)
            down_obj.make_dataset()
        elif inp==5:
            print(" --> Data Wrangling")
            wrangle_obj = wrangling.Wrangle(df)
            df=wrangle_obj.wrangle()

    return
Пример #8
0
    def insertDownload(self, document_id, search_query):
        if self.__downloadByDocumentID.has_key(document_id):
            return

        self.__mutex.acquire()
        print "insert le download"
        dl = download.Download(document_id, search_query)
        self.__downloadByDocumentID[document_id] = dl
        self.__downloads.append(dl)
        self.__mutex.release()
Пример #9
0
 def __init__(self):
     self.urls = []
     self.download = download.Download()
     #初始化两个文件
     with open('success.csv', 'w') as f:
         write_res = 'domain,Ahrefs_Rank,DR,Backlinks,Referring_domains' + '\n'
         f.write(write_res)
     with open('failed.csv', 'w') as f:
         write_res = 'domain' + '\n'
         f.write(write_res)
Пример #10
0
 def __init__(self, root, start):
     self.root = root
     self.start = start
     self.thisurl = root
     self.urls = []
     self.queue = queue.Queue()
     url = parse.urljoin(self.root, self.start)
     self.queue.put(url)
     self.download_task = download.Download(file_path)
     self.download_task.start_down()
Пример #11
0
    def get_results(self, start_url, start, end, datetime):
        down = download.Download()
        response = down.get_html(start_url)
        if response:
            try:
                json_obj = json.loads(response.text)
                if len(json_obj['data']['result']) == 0:
                    print('no results..')
                    return None
                for data in json_obj['data']['result']:
                    item = data.split('|')

                    datetime = datetime
                    startStation = self.get_stationName(item[6])[2]
                    endStation = self.get_stationName(item[7])[2]
                    startStationNum = item[6]
                    endStationNum = item[7]
                    trainName = item[3]
                    startTime = item[8]
                    endTime = item[9]
                    duration = item[10]

                    try:
                        durDate = int(duration.split(':')[0])
                        arrivalDate = str(math.ceil(durDate / 24))
                    except:
                        arrivalDate = ''

                    if '列车停运' in data:
                        status = '0'
                    else:
                        status = '1'
                    resultsStr = data
                    timestampStr = datetime + ' ' + startTime

                    if startTime[0:2] == '24':
                        timestampStr = datetime
                        ts = time.strptime(timestampStr, "%Y-%m-%d")
                        timestamp = str(int(time.mktime(ts)) + 86400)
                    else:
                        ts = time.strptime(timestampStr, "%Y-%m-%d %H:%M")
                        timestamp = str(int(time.mktime(ts)))

                    # print(datetime,startStation,endStation,startStationNum,endStationNum,trainName,startTime,endTime,duration,arrivalDate,status,resultsStr,timestamp)
                    sql = "insert into results(datetime,startStation,endStation,startStationNum,endStationNum,trainName,startTime,endTime,duration,arrivalDate,status,resultsStr,timestamp) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')"\
                          %(datetime,startStation,endStation,startStationNum,endStationNum,trainName,startTime,endTime,duration,arrivalDate,status,resultsStr,timestamp)+ "ON DUPLICATE KEY UPDATE startTime='%s', endTime='%s',timestamp='%s'"%(startTime,endTime,timestamp)
                    print(sql)
                    self.mysql.save(sql)
            except:
                print('未知错误')
                with open('fail.txt', 'a') as f:
                    f.write(start_url + '\n')
Пример #12
0
 def __init__(self):
     self.urls = []
     self.download = download.Download()
     #初始化两个文件
     with open('registed.csv', 'w') as f:
         write_res = 'domain' + '\n'
         f.write(write_res)
     with open('unregister.csv', 'w') as f:
         write_res = 'domain,google_pr,sogou_pr,sogou_link' + '\n'
         f.write(write_res)
     with open('failed_domain.csv', 'w') as f:
         write_res = 'failed' + '\n'
         f.write(write_res)
Пример #13
0
def lambda_handler(event, context):
    sqs = Sqs()
    data = sqs.Handle()
    if data != None:
        body = data[0]
        key = data[1]
        queue_url = data[2]

        r_sqs = Resource("sqs")
        download.Download(body)  #run download
        r_sqs.Message(queue_url,
                      key).delete()  #download success delete queue message
    else:
        exit(110)
Пример #14
0
def main():
    argv = args.get_parsed_args()
    token = environ.get("DISCORD_API_TOKEN", argv.token)

    dc = discord.Discord(token)

    res = dc.get_all_channel_messages(argv.channel)

    if argv.analyze:
        plot_size = tuple([int(n) for n in argv.plot_size.split(',')])
        analyze.analyze(res, argv.plot, plot_size)
        return

    dl = download.Download(argv.output)
    for msg in res:
        dl.download_image_from_message(msg)
Пример #15
0
 def __init__(self,
              url=None,
              urls=None,
              url_iter=None,
              num_threads=20,
              cb=None,
              depth=True,
              max_errors=None,
              pattern=None,
              **kwargs):
     self.settings = adt.Bag(read_cache=True,
                             write_cache=True,
                             num_redirects=5,
                             num_retries=2,
                             timeout=20,
                             headers={},
                             num_threads=num_threads,
                             cb=cb,
                             url_iter=url_iter,
                             depth=depth,
                             pattern=pattern)
     self.settings.update(**kwargs)
     self.D = download.Download(**kwargs)
     self.kwargs = kwargs
     # queue of html to be written to cache
     self.cache_queue = []
     # URL's that are waiting to download
     self.download_queue = collections.deque()
     if urls:
         self.download_queue.extend(urls)
     if url:
         self.download_queue.append(
             url
         )  # XXX create compressed dict data type for large in memory?
     # URL's currently downloading
     self.processing = {}
     # defereds that are downloading
     self.downloading = []
     # URL's that have been found before
     self.found = adt.HashDict()
     for url in self.download_queue:
         self.found[url] = True
     self.state = download.State()
     self.max_errors = max_errors
     self.num_errors = 0  # counter for the number of subsequent errors
Пример #16
0
    def testInstallNoReboot(self):
        ioloop = MockIoloop()
        cmpl = MockTransferComplete()
        time.time = self.mockTime

        kwargs = dict(command_key='testCommandKey',
                      url='http://example.com/foo',
                      delay_seconds=1)
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)

        dl = download.Download(stateobj=stateobj,
                               transfer_complete_cb=cmpl.SendTransferComplete,
                               ioloop=ioloop)

        # Step 1: Wait delay_seconds
        dl.do_start()
        self.assertEqual(ioloop.timeout, _Delta(kwargs['delay_seconds']))

        # Step 2: HTTP Download
        dl.timer_callback()
        self.assertEqual(len(mock_http_downloads), 1)
        http = mock_http_downloads[0]
        self.assertEqual(http.url, kwargs['url'])

        # Step 3: Install
        dlfile = MockFile('/path/to/downloaded/file')
        http.download_complete_cb(0, '', dlfile)
        self.assertEqual(len(mock_installers), 1)
        inst = mock_installers[0]
        self.assertTrue(inst.did_install)
        self.assertEqual(inst.filename, dlfile.name)
        self.assertFalse(inst.did_reboot)

        # Step 4: Install Succeeded, no reboot
        inst.install_callback(0, '', must_reboot=False)
        self.assertTrue(cmpl.transfer_complete_called)
        self.assertEqual(cmpl.command_key, kwargs['command_key'])
        self.assertEqual(cmpl.faultcode, 0)
        self.assertEqual(cmpl.faultstring, '')
        self.assertEqual(cmpl.starttime, self.mockTime())
        self.assertEqual(cmpl.endtime, self.mockTime())
        self.assertEqual(cmpl.event_code, 'M Download')
Пример #17
0
 def __init__(self, config):
     self.api = None
     self.dtnow = datetime.now()
     self.fillspace = 0
     self.isTweet = True
     self.event = SimpleEvent()
     self.auth_twitter = config['AUTH']['TWITTER']
     self.__download = download.Download(config)
     self.__ranking = ranking.Ranking(config)
     self.uploadDir = config['WORK_DIRECTORY']['UPLOAD']
     self.backupDir = config['WORK_DIRECTORY']['BACKUP']
     self.upload_file_suffixes = config['WORK_DIRECTORY']['SUFFIXES']
     self.upload_max_file_size = config['UPLOAD']['MAX_FILESIZE']
     node = config['TWEET']
     self.tweet_format = node['POST']['FORMAT']
     self.tweet_datefmt = node['POST']['DATEFMT']
     self.tweet_screen_name = node['SCREEN_NAME']
     self.tweet_limit = node['LIMIT']
     self.backup_file_prefix = config['BACKUP']['FILE']['PREFIX']
     self.initialize()
     self.task = OrderedDict()
Пример #18
0
    def testCancelRefused(self):
        ioloop = MockIoloop()
        cmpl = MockTransferComplete()

        kwargs = dict(command_key='testCommandKey',
                      url='http://example.com/foo')
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)
        dl = download.Download(stateobj=stateobj,
                               transfer_complete_cb=cmpl.SendTransferComplete,
                               ioloop=ioloop)
        dl.do_start()  # Step 1: Wait delay_seconds
        dl.timer_callback()  # Step 2: HTTP Download
        dl.download_complete_callback(0, None, None)  # Step 3: Install
        self.assertTrue(dl.cleanup())
        dl.installer_callback(0, None, must_reboot=True)  # Step 4: Reboot
        self.assertTrue(dl.cleanup())
        dl.reboot_callback(0, '')  # Step 5: Rebooted
        self.assertFalse(dl.cleanup())
Пример #19
0
 def __init__(self):
     # self.whois_list = [{'function':'builtIn()'},{'function':'panda()'},{'function':'aizhan()'}]
     # self.whois_list = [{'function':'panda()'},{'function':'aizhan()'},{'function':'chinaz()'},{'function':'tencent()'},{'function':'baidu()'},{'function':'oneonefour()'},{'function':'twotwo()'},{'function':'whois_domain()'},{'function':'threeZeroZero()'}]
     self.whois_list = [{
         'function': 'panda()'
     }, {
         'function': 'aizhan()'
     }, {
         'function': 'chinaz()'
     }, {
         'function': 'tencent()'
     }, {
         'function': 'baidu()'
     }, {
         'function': 'twotwo()'
     }, {
         'function': 'whois_domain()'
     }, {
         'function': 'threeZeroZero()'
     }]
     self.domain_obj = {}
     self.down = download.Download()
Пример #20
0
    def download(self, thread_id):
        try:
            uri = self.__getUri()
            while uri:
                o_download = download.Download(uri, self.__dataQueue)
                o_download.setThreadId(thread_id)
                o_download.run()

                self.__curlTime += o_download.getUsedTime()
                self.__curlCounts += 1
                funcUtil.recordStatus(
                    self.__id, '%s  uri: %s  use time: %.2f  size: %d' %
                    (thread_id, uri, o_download.getUsedTime(),
                     self.__uriQueue.qsize()))
                print thread_id + '    uri: ' + uri + '   use time: ' + str(
                    o_download.getUsedTime()) + '  size: ' + str(
                        self.__uriQueue.qsize())

                self.__addFailUri(uri, o_download.getErrorQueue())

                uri = self.__getUri()
        except Exception, ex:
            print ex
Пример #21
0
    def testDownloadFailed(self):
        ioloop = MockIoloop()
        cmpl = MockTransferComplete()
        time.time = self.mockTime

        kwargs = dict(command_key='testCommandKey',
                      url='http://example.com/foo',
                      delay_seconds=1)
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)

        dl = download.Download(stateobj=stateobj,
                               transfer_complete_cb=cmpl.SendTransferComplete,
                               ioloop=ioloop)

        # Step 1: Wait delay_seconds
        dl.do_start()
        self.assertEqual(ioloop.timeout, _Delta(kwargs['delay_seconds']))

        # Step 2: HTTP Download
        dl.timer_callback()
        self.assertEqual(len(mock_http_downloads), 1)
        http = mock_http_downloads[0]
        self.assertEqual(http.url, kwargs['url'])

        # Step 3: Download fails
        http.download_complete_cb(100, 'TestDownloadError', None)
        self.assertEqual(len(mock_installers), 0)
        self.assertTrue(cmpl.transfer_complete_called)
        self.assertEqual(cmpl.command_key, kwargs['command_key'])
        self.assertEqual(cmpl.faultcode, 100)
        self.assertEqual(cmpl.faultstring, 'TestDownloadError')
        self.assertEqual(cmpl.starttime, 0.0)
        self.assertEqual(cmpl.endtime, 0.0)
        self.assertEqual(cmpl.event_code, 'M Download')
Пример #22
0
                response = down.get_html(url, proxy=True, IP_URL=IP_URL)
            else:
                response = down.get_html(url, proxy=False, IP_URL=IP_URL)

            if response:
                # print(response.text)
                html = HTML(response.text)
                #姓名 电话 地址  公司 职位 是否VIP
                name = html.xpath('string(//h1[@class="uname"]/text())')
                phone = html.xpath('string(//span[@id="telD"]/text())')
                allInfo = html.xpath(
                    'string(//div[@class="userinfo"]/p/text())').strip()
                address = allInfo.split('|')[1]
                company = allInfo.split('|')[0].split(' ')[0]
                position = allInfo.split('|')[0].split(' ')[1]
                vip = html.xpath('string(//input[@id="vip"]/@value)')

                print(name, phone, address, company, position, vip)
                savr_res = name + ',' + phone + ',' + address + ',' + company + ',' + position + ',' + vip + '\n'
                with open('结果.csv', 'a', encoding='gbk', errors='ignore') as f:
                    f.write(savr_res)
        except:
            print('未知错误')


if __name__ == '__main__':
    with open('结果.csv', 'w', encoding='gbk', errors='ignore') as f:
        f.write('姓名,电话,地址,公司,职位,是否vip\n')

    down = download.Download()
    start()
Пример #23
0
 def __init__(self):
     self.down = download.Download()
     self.result = []
Пример #24
0

signal.signal(signal.SIGINT, onsignal_int)
signal.signal(signal.SIGTERM, onsignal_int)

# commands
read_sn_cmd = "01d0400100".decode("hex")

# Settings
MAX_COUNT = 1
comport = 'COM75'
BIN_FILE = 'prod_test_580.bin'
LOG_FILE = 'prodtest_log.txt'

# Create download object
burn = download.Download(comport, BIN_FILE)

# init log
f = open(LOG_FILE, "w")
f.write("-------------------------------------------------------\n")
f.write("start time:" + time.strftime("%H:%M:%S") + "\n")

# init serial
ser = serial.Serial()

print "--------------------Test Start--------------------"

count = 0
error_count = 0
while count < MAX_COUNT:
    count = count + 1
Пример #25
0
def run_download(url, metadata, author):
    download_function = download.Download(url, metadata, author)
    threaded_download = Thread(target=download_function.run)
    threaded_download.start()
Пример #26
0
 def __init__(self):
     self.down = download.Download()
Пример #27
0
    def testSuccess(self):
        ioloop = MockIoloop()
        cmpl = MockTransferComplete()
        time.time = self.mockTime

        kwargs = dict(command_key='testCommandKey',
                      file_type='testFileType',
                      url='http://example.com/foo',
                      username='******',
                      password='******',
                      file_size=1000,
                      target_filename='testTargetFilename',
                      delay_seconds=99)
        stateobj = persistobj.PersistentObject(objdir=self.tmpdir,
                                               rootname='testObj',
                                               filename=None,
                                               **kwargs)

        dl = download.Download(stateobj=stateobj,
                               transfer_complete_cb=cmpl.SendTransferComplete,
                               ioloop=ioloop)
        self.assertEqual(self.QCheckBoring(dl, kwargs),
                         1)  # 1: Not Yet Started

        # Step 1: Wait delay_seconds
        dl.do_start()
        self.assertEqual(ioloop.timeout, _Delta(kwargs['delay_seconds']))
        self.assertEqual(self.QCheckBoring(dl, kwargs),
                         1)  # 1: Not Yet Started

        # Step 2: HTTP Download
        dl.timer_callback()
        self.assertEqual(len(mock_http_downloads), 1)
        http = mock_http_downloads[0]
        self.assertEqual(http.url, kwargs['url'])
        self.assertEqual(http.username, kwargs['username'])
        self.assertEqual(http.password, kwargs['password'])
        self.assertTrue(http.download_complete_cb)
        self.assertTrue(http.did_fetch)
        self.assertEqual(self.QCheckBoring(dl, kwargs), 2)  # 2: In process

        # Step 3: Install
        dlfile = MockFile('/path/to/downloaded/file')
        http.download_complete_cb(0, '', dlfile)
        self.assertEqual(len(mock_installers), 1)
        inst = mock_installers[0]
        self.assertTrue(inst.did_install)
        self.assertEqual(inst.file_type, kwargs['file_type'])
        self.assertEqual(inst.target_filename, kwargs['target_filename'])
        self.assertEqual(inst.filename, dlfile.name)
        self.assertFalse(inst.did_reboot)
        self.assertEqual(self.QCheckBoring(dl, kwargs), 2)  # 2: In process

        # Step 4: Reboot
        inst.install_callback(0, '', must_reboot=True)
        self.assertTrue(inst.did_reboot)
        self.assertEqual(self.QCheckBoring(dl, kwargs), 2)  # 2: In process

        # Step 5: Send Transfer Complete
        dl.reboot_callback(0, '')
        self.assertTrue(cmpl.transfer_complete_called)
        self.assertEqual(cmpl.command_key, kwargs['command_key'])
        self.assertEqual(cmpl.faultcode, 0)
        self.assertEqual(cmpl.faultstring, '')
        self.assertEqual(cmpl.starttime, self.mockTime())
        self.assertEqual(cmpl.endtime, self.mockTime())
        self.assertEqual(cmpl.event_code, 'M Download')
        self.assertEqual(self.QCheckBoring(dl, kwargs), 3)  # 3: Cleaning up

        # Step 6: Wait for Transfer Complete Response
        self.assertFalse(dl.cleanup())
        self.assertEqual(self.QCheckBoring(dl, kwargs), 3)  # 3: Cleaning up
Пример #28
0
 def __init__(self):
     self.data = pd.DataFrame()
     self.dl = download.Download(
         "https://raw.githubusercontent.com/ieee8023/covid-chestxray-dataset/master/metadata.csv"
     )
Пример #29
0
        print(res)
        try:
            pid = str(res[0])
            response = download.get_html(res[6])
            html = HTML(response.text)
            screenlist = html.xpath(
                '//ul[@id="zg_browseRoot"]/ul/ul/ul/li/a/text()')
            namelist = html.xpath(
                '//ul[@id="zg_browseRoot"]/ul/ul/ul/li/a/@href')
            for item in zip(screenlist, namelist):
                screenname = item[0]
                url = item[1]
                name = re.search(
                    'https://www.amazon.com/(B|b)est-(S|s)ellers-(.*?)/zgbs/',
                    item[1]).group(3)
                deparmentid = hashlib.md5(name.encode()).hexdigest()
                type = '3'
                sql = "insert into threedepa(pid,screenname,url,name,deparmentid,type) values ('%s','%s','%s','%s','%s','%s')" \
                      % (pid, screenname, url, name, deparmentid, type)
                print(sql)
                mysql.save(sql)
        except:
            pass


if __name__ == '__main__':
    download = download.Download()
    mysql = db.MysqlClient()
    # get_one()
    # get_two()
    get_three()
Пример #30
0
def main():
  downloader = download.Download()
  downloader.run()