Пример #1
0
def simple_parallel():
    values = [[2, 3, 5], [5, 5, 5], [2], [3, 3]]
    pool = Pool(4)
    results = pool.map(sum, values)
    pool.close()  # closing the pool
    pool.join()  # waiting for the work to finish
    print results
Пример #2
0
def background_refresh_offline_user_data():    
    if r_session.exists('api_error_info') or datetime.now().minute < 50:
        return

    if DEBUG_MODE:
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'background_refresh_offline_user_data()')

    offline_users = []
    for b_user in r_session.mget(*['user:%s' % name.decode('utf-8') for name in
                                   r_session.sdiff('users', *r_session.smembers('global:online.users'))]):
        user_info = json.loads(b_user.decode('utf-8'))
        username = user_info.get('username')
        if not user_info.get('active'):
            continue

        every_hour_key = 'user:%s:cron_queued' % username
        if r_session.exists(every_hour_key):
            continue
        offline_users.append(username)

    pool = ThreadPool(processes=5)

    pool.map(get_data, offline_users)
    pool.close()
    pool.join()
Пример #3
0
    def run():
        t = [
            ('users', User().create),
            ('forums', Forum().create),
            ('threads', Thread().create),
            ('posts', Post().create),
            ("followers", User().follow),
            ("subscribptions", Thread().subscribe),
        ]

        for entity, factory in t:
            entities = [True for i in range(int(settings[entity]))]
            num_tasks = len(entities)
            pool = ThreadPool(int(settings['num_threads']))
            try:
                progress = range(5, 105, 5)
                for i, _ in enumerate(pool.imap(factory, entities)):
                    perc = i * 100 / num_tasks
                    if perc % 5 == 0 and perc in progress: 
                        log.print_out('Creating %s: %d%% done' % (entity, perc))
                        progress.remove(perc)
                pool.close()
                pool.join()
            except Exception, e:
                print e
                pool.terminate()
                sys.exit(1)
Пример #4
0
def check_and_rank_ip(session):
    def ping_jd(ip):
        t = time.time()
        try:
            respond = requests.post('http://so.m.jd.com/ware/searchList.action',
                                    data={'_format_': 'json', 'stock': 1, 'page': 1, 'keyword': '手机'},
                                    proxies=ip.to_proxy(), timeout=5).content
            json.loads(respond)
            ip.rank = int(100 * (time.time() - t))
        except Exception:
            ip.rank = None
        return ip

    print datetime.now(), '开始判断ip活性'
    from multiprocessing.dummy import Pool as ThreadPool
    all_ip = session.query(IP).all()
    pool = ThreadPool(100)
    ips = pool.map(ping_jd, all_ip)
    for ip in ips:
        session.add(ip)
    session.query(IP).filter(IP.rank == None).delete()
    session.commit()
    pool.close()
    pool.join()
    return session.query(IP).count()
Пример #5
0
    def createDemo(self):

        usersData = []
        event = Event()
        pool = ThreadPool(multiprocessing.cpu_count() * 2)
        pool = ThreadPool(5)

        for i in range(0, 1000):
            string = hashlib.sha224()
            string.update('{}'.format(random.random()))
            first = 'first{}'.format(string.hexdigest()[0:10])
            string.update('{}'.format(random.random()))
            last = 'last{}'.format(string.hexdigest()[0:10])
            tel = '{}'.format(8005550000 + i)
            email = 'email{}@localhost.email'.format(string.hexdigest()[0:10])
            postData = {
                    'first': first,
                    'last': last,
                    'tel': tel,
                    'email': email,
                    'pass': '******',
                    'type': 'customer',
                    'event': event
                }
            usersData.append(postData)


        results = pool.map(self.createUser, usersData)
        pool.close()
        pool.join()
def main():
    dfbToken = raw_input('Enter your Dropbox Business API App token (Team Member File Access permission): ')

    if args.verbose:
        dumpArguments()

    global fileQuota
    fileQuota = args.quota * UNITS[args.units]

    log("Creating Dropbox V2 API Client")
    global dbxApiV2
    dbxApiV2 = DbxApi(DbxApi.DBX_API_V2, dfbToken)

    log("Collecting Member List...")
    members = getDfbMembers(None)
    # Filter out invited members as they can't consume any quota yet
    activeMembers = [member for member in members if member.status != "invited"]
    log("Got {} total team members ({} active, {} suspended, {} invited)"
        .format(
                len(members), len(activeMembers),
                len(getMemberSublist(members, "suspended")),
                len(getMemberSublist(members, "invited"))
                ))

    log("Collecting file quota information - this may take a while...")
    pool = ThreadPool(args.threads)
    members = pool.map(getFileQuotaUsage, activeMembers)
    pool.close()
    pool.join()

    # Write final output
    log("Processing complete, writing output to {}".format(args.output.name))
    dumpCsvFile(members)
Пример #7
0
def parallel_bulk(client, actions, thread_count=4, chunk_size=500,
        max_chunk_bytes=100 * 1014 * 1024,
        expand_action_callback=expand_action, **kwargs):
    """
    Parallel version of the bulk helper run in multiple threads at once.
    :arg client: instance of :class:`~elasticsearch.Elasticsearch` to use
    :arg actions: iterator containing the actions
    :arg thread_count: size of the threadpool to use for the bulk requests
    :arg chunk_size: number of docs in one chunk sent to es (default: 500)
    :arg max_chunk_bytes: the maximum size of the request in bytes (default: 100MB)
    :arg raise_on_error: raise ``BulkIndexError`` containing errors (as `.errors`)
        from the execution of the last chunk when some occur. By default we raise.
    :arg raise_on_exception: if ``False`` then don't propagate exceptions from
        call to ``bulk`` and just report the items that failed as failed.
    :arg expand_action_callback: callback executed on each action passed in,
        should return a tuple containing the action line and the data line
        (`None` if data line should be omitted).
    """
    # Avoid importing multiprocessing unless parallel_bulk is used
    # to avoid exceptions on restricted environments like App Engine
    from multiprocessing.dummy import Pool
    actions = map(expand_action_callback, actions)

    pool = Pool(thread_count)

    for result in pool.imap(
        lambda chunk: list(_process_bulk_chunk(client, chunk, **kwargs)),
        _chunk_actions(actions, chunk_size, max_chunk_bytes, client.transport.serializer)
    ):
        for item in result:
            yield item

    pool.close()
    pool.join()
Пример #8
0
def load_rowdata_to_mongo_zh(is_incremental):
    print("start loading row data(zh) from JSON file to MongoDB...")
    all_start = timeit.default_timer()
    static = Static()
    bydim_dir = static.output_folder + static.dataset_bydim_folder
    
    client = MongoClient(static.mongo_url, static.mongo_port)
    db = client[static.database_name]
    dataset_col = db[static.dataset_col_name]
    if not is_incremental:
        dataset_col.drop()

    file_path_array = []
    for idx, file in enumerate(os.listdir(bydim_dir)):
        file_path = os.path.join(bydim_dir, file)
        if os.path.isfile(file_path):
            file_path_array.append(file_path)
    print(str(len(file_path_array)) + " files are loaded")

    counter = []
    mapfunc = partial(insert_by_dim, counter=counter, dataset_col=dataset_col, all_start=all_start)
    pool = ThreadPool(12)
    pool.map(mapfunc, file_path_array)
    pool.close() 
    pool.join()
    
    print("All the threads are completed. Total number is " + str(len(counter)) + "\n")
    print("total time cost: " + str(round(timeit.default_timer() - all_start)) + 's')
Пример #9
0
def run(threads):
    urls = ['http://www.python.org',
    	'http://www.python.org/about/',
    	'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',
    	'http://www.python.org/doc/',
    	'http://www.python.org/download/',
    	'http://www.python.org/getit/',
    	'http://www.python.org/community/',
    	'https://wiki.python.org/moin/',
    	'http://planet.python.org/',
    	'https://wiki.python.org/moin/LocalUserGroups',
    	'http://www.python.org/psf/',
    	'http://docs.python.org/devguide/',
    	'http://www.python.org/community/awards/'
         ]
    results = []
    scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
    
    requests = [urllib.request.Request(url=url,data=b'None',
                headers={'User-Agent':' Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'})
                for url in urls]
    pool = ThreadPool(threads)    
    results = list(pool.map(lambda x: urllib.request.urlopen(x, context=scontext), requests))
    pool.close()
    pool.join()

    dataLen = [len(result.read().decode('utf-8')) for result in results]
    print(threads, 'поток(ов), прочитано', sum(dataLen), 'байт')
Пример #10
0
def parse_films_infomation(item):
    title = item[0]
    title_deal = ''.join(title.split('*'))

    title_deal=''.join(title_deal.split('/'))
    title_deal=''.join(title_deal.split(':'))

    os.mkdir(title_deal)
    os.chdir(title_deal)
    film_info = item[1]
    with open('film_tag.txt', 'w+', encoding='utf-8') as file:
        for i in film_info:
            file.write(i)

    magnent_container = item[3]
    with open('magnent.txt', 'w+', encoding='utf-8') as file2:
        for per_list in magnent_container:
            strings = ''.join(i + '   ' for i in per_list)
            file2.write(strings + '\n')

    # os.mkdir('sample_img')
    # os.chdir('sample_img')

    film_pic_url = item[2]
    sample_images_urls = item[4]

    # print(type(sample_images_urls))

    # 设置线程池
    child_pool = ThreadPool(12)
    result = child_pool.map(download, sample_images_urls)
    # print('下载完成')
    child_pool.close()
    child_pool.join()
    os.chdir('../')
Пример #11
0
def build_words_weight():
    st = time.time()
    bigvs = BigVs.objects.all()
    def _build(b):
        data = ArticlePostedResults.active_objects.filter(bigv__v_id=b.v_id, is_correct__in=(0, 1)).values('is_correct').annotate(count=Count('is_correct')).order_by('is_correct')
        sum_c , w, c = 0, 0, 0
        for d in data:
            if d['is_correct'] == 1:
                c = d['count']
            sum_c += d['count']
        if sum_c:
            w = c * 1.0 / sum_c
            c = w * 200
            sum_c = 200
        data = Judgement.objects.filter(article__bigv=b, judge__isnull=False).values('judge').annotate(count=Count('judge')).order_by('judge')
        for d in data:
            if d['judge'] == 'right':
                c += d['count']
            sum_c += d['count']
        if sum_c:
            w = int(round(c * 1.0 / sum_c * 100))
            b.words_weight = w
            b.save()
            print b.name, c, sum_c, w
    pool = Pool(8)
    pool.map(_build, bigvs)
    pool.close()
    pool.join()
    ed = time.time()
    debug('build_words_weight', ed - st)
Пример #12
0
def collect_crystal():
    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'collect_crystal')
    pool = ThreadPool(processes=5)

    pool.map(check_collect, (json.loads(c.decode('utf-8')) for c in r_session.smembers('global:auto.collect.cookies')))
    pool.close()    
    pool.join()
Пример #13
0
    def load(cls, docs, ignore_errors=False):
        """Force load the provided docs to read from file system."""
        if not docs:
            return

        pod = docs[0].pod

        def load_func(doc):
            """Force the doc to read the source file."""
            try:
                # pylint: disable=pointless-statement
                doc.has_serving_path()  # Using doc fields forces file read.
            except document_front_matter.BadFormatError:
                if not ignore_errors:
                    raise

        with pod.profile.timer('DocsLoader.load'):
            if ThreadPool is None or len(docs) < cls.MIN_POOL_COUNT:
                for doc in docs:
                    load_func(doc)
                return
            pool_size = min(cls.MAX_POOL_SIZE, len(docs) * cls.POOL_RATIO)
            pool_size = int(round(pool_size))
            thread_pool = ThreadPool(pool_size)
            results = thread_pool.imap_unordered(load_func, docs)
            # Loop results to make sure that the threads are all processed.
            for _ in results:
                pass
            thread_pool.close()
            thread_pool.join()
def make_unaligned_fasta(dnaDirectory, groupsDict):
    """ Reads through files in provided directory to find gene sequences that
    match the proteins in the groups dictionary"""
    print "Collecting core genes"
    def make_fasta(group):
        proteins = groupsDict[group]
        out = open('proteinAlignments/' + group + '.fasta', 'w')
        records = []
        seqIDs = []
        for protein in proteins:
            seqID = protein.split('|')[0]
            seqIDs.append(seqID)
            protein = protein.split('|')[1]
            records.append(seqRecordDict[protein])
        SeqIO.write(records, out, 'fasta')
        return seqIDs

    try:
        os.makedirs("proteinAlignments")
    except OSError:
        if not os.path.isdir("proteinAlignments"):
            raise
    files = listdir_fullpath(dnaDirectory)
    seqRecordDict = {}
    seqIDs = []
    for f in files:
        handle = open(f, 'r')
        for record in SeqIO.parse(handle, 'fasta'):
            seqRecordDict[record.id] = record
    pool = ThreadPool(args.threads)
    seqIDs = pool.map(make_fasta, groupsDict.keys())
    pool.close()
    pool.join()
    return seqIDs[0]
Пример #15
0
def Producer():
#    urls = [
#            'http://www.python.org', 
#            'http://www.python.org/about/',
#            'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',
#            'http://www.python.org/doc/',
#            'http://www.python.org/download/',
#            'http://www.python.org/getit/',
#            'http://www.python.org/community/',
#            'https://wiki.python.org/moin/',
#            'http://planet.python.org/',
#            'https://wiki.python.org/moin/LocalUserGroups',
#            'http://www.python.org/psf/',
#            'http://docs.python.org/devguide/',
#            'http://www.python.org/community/awards/'
#            # etc.. 
#            ]

#            'http://wwww.qq.com','http://www.baidu.com'
    urls = [
            'http://www.taobao.com','http://www.sina.com.cn'
            ]

    start_time = time.time()
    # Make the Pool of workers
    pool = ThreadPool(4) 
    # Open the urls in their own threads
    # and return the results
    results = pool.map(urllib2.urlopen, urls)
    #close the pool and wait for the work to finish 
    pool.close() 
    pool.join()

    print "Done! time Taken()",format(time.time()-start_time)
Пример #16
0
def grab_everything():
    node_links = grab_blog_node_links()
    pool = ThreadPool(cpu_count())
    results = pool.map(grab_blog_content, node_links)
    pool.close()
    pool.join()
    return results
Пример #17
0
def create_all_preflop_two_hand_equity(verbose=False, save=False, distributed=False, nb_process=4):
	"""returns preflop_two_hand_equity for all two hand preflop combinations"""
	global all_preflop_two_hands

	print '\n--------------- start create_all_preflop_two_hand_equity'
	print 'all preflop two hands = \nstart = {}\nend = {}\nnb of elements = {}'.format(all_preflop_two_hands[:5], all_preflop_two_hands[-5:], len(all_preflop_two_hands))

	t0 = timer()

	if (distributed):
		pool = ThreadPool(nb_process)
		equity = pool.map(preflop_two_hand_equity, all_preflop_two_hands[:])
		pool.close()
		pool.join()
	else:
		equity = []
		for k, p in enumerate(all_preflop_two_hands[:]):
			if (verbose):
				# print k,' - ', p
				sys.stdout.write('\rk=%5d / %5d : {}' % (k+1, len(all_preflop_two_hands)), p)
				sys.stdout.flush()
			equity.append(preflop_two_hand_equity(p))

	t1 = timer()
	print 'all_preflop_two_hand_equity time = {:9.4f} s'.format(t1-t0)
	print 'exact number of distinct (rankwise) pairs of preflop hands = {}'.format(np.array([len(e) for e in equity]).sum())
	if (save):
		cPickle.dump(equity, open(os.path.join('Tables', 'all_preflop_two_hand_equity.pk'), 'wb'))
		print '{} saved to disk as {}'.format('equity', os.path.join('Tables', 'all_preflop_two_hand_equity.pk'))
	return equity
def dns_resolver(filename, dst="mail.txt"):
    try:
        fd = open(filename, 'r')
    except:
        print 'can not open the file:', filename
        return

    try:
        fd_write = open(dst,'w')
    except:
        print 'error in open',dst
        return 

    thread_num = 2
    pool = ThreadPool(thread_num)
    results = pool.map(verify_domain,fd.readlines())
    pool.close()
    pool.join()
    results = list(set(results))
    results = [item for item in results if item]
    

    for line in results:
        fd_write(line)

    fd_write.close()
Пример #19
0
def simTrans(hosts, prm):
	fname = str(prm.n) + 'nodes.' + str(prm.data_size) + 'MB.' + str(prm.pipes) + 'pipes.out'
	for h in hosts:
		full_name = "results/%04d/%s"%(int(h.name.split('h')[1]), fname)
		os.system("rm %s" % full_name)
		status[h.name] = [0 for i in range(prm.pipes)]
		ip[h.name] = h.IP()
		h.cmdPrint('iperf -s -f M >> %s &'%full_name)
	'''for h1 in hosts:
		for h2 in hosts:
			if h1 == h2:
				continue
			print "Testing %s and %s after running server" % (h1.name, h2.name)
			net.iperf( (h1, h2) )
	'''
	print neib
	status['h1'] = [2 for i in range(prm.pipes)]	#start node
	print status
	k = []
	for h in hosts:
		k.append((h, prm))
	pool = ThreadPool(50)
	pool.map(perNodeProc, k)
	pool.close()
	pool.join()

	for h in hosts:
		h.cmdPrint('kill %iperf')
Пример #20
0
def parallel_runner(args):
    pool = ThreadPool(args.parallel)
    map_args = map(lambda f: (args, f), args.file)
    result = pool.map(run_cmd, map_args)
    pool.close()
    pool.join()
    print result
Пример #21
0
def _download_all(items):
    """Async download of the files.

       Example: [(url, quality, file_path)]

    """

    global WORKERS
    # Don't start more workers then 1:1
    if WORKERS < len(items):
        WORKERS = len(items)

    pool = ThreadPool(WORKERS)
    chunks = 1  # TODO
    # 1 ffmpeg is normally 10x- 20x * 2500kbits ish
    # so depending on how many items you download and
    # your bandwidth you might need to tweak chunk

    results = pool.imap_unordered(dl, items, chunks)
    try:
        for j in tqdm.tqdm(results, total=len(items)):
            pass
    finally:
        pool.close()
        pool.join()
Пример #22
0
 def get_proxy(self):
     self._parse_proxy()
     pool = ThreadPool(8)
     pool.map(self._check_proxy, self.proxies)
     pool.close()
     pool.join()
     return self.checked_proxies
Пример #23
0
def multiRunuser():
    pool = ThreadPool(cpu_count() * 8)
    global ip_list
    global results
    results = pool.map_async(runuser, ip_list)
    pool.close()
    pool.join()
Пример #24
0
def e_cal(l, cores):
    global LOOPS
    '''
    e calculator
    this function will recive digits of float
    and calculate and print status during working.
    
    This function will return value of e.
    '''
    p = Pool()
    getcontext().prec = l
    e = Decimal(0)
    i = 0
    temp = 0
    c = 0
    while True:
        fact = p.map(math.factorial, range(i, i+cores)) #parallel process factorial
        e += sum(p.map(one_div, fact)) #processed factorial will total in here
        i += cores
        c += 1
        LOOPS += 1
        sys.stdout.write("\r%i loops passed." % (c) ) #Print Loop status
        sys.stdout.flush()
        #print i, "loops passed."
        if e == temp:
            break
        temp = e
    sys.stdout.write("\r%i loops passed.\n" % (c) )
    print i
    p.close()
    p.join()

    return e
Пример #25
0
def BurstUc(host, path, passfile):
    hostuser = host.split('.')
    hostuser = hostuser[len(hostuser)-2]
    hostdir = [hostuser,hostuser+hostuser,'admin'+hostuser,hostuser+'123','manage'+hostuser,hostuser+'123456',hostuser+'admin','123'+hostuser]

    opts_list = []

    f = open(passfile, 'r')
    password = f.read().split()
    dic = password+hostdir
    pool = ThreadPool(10)
    host1 = host+path

    for x in range(len(dic)):
        mima = dic[x]

        opts = {
            'host': host1,
            'password': mima
        }
        opts_list.append(opts)

    pool.map(LoginUc,opts_list)
    pool.join()
    print "All PassWord Run Over"
Пример #26
0
def getAllSecrets(version="", region=None, table="credential-store",
                  context=None, credential=None, session=None, **kwargs):
    '''
    fetch and decrypt all secrets
    '''
    if session is None:
        session = get_session(**kwargs)
    dynamodb = session.resource('dynamodb', region_name=region)
    kms = session.client('kms', region_name=region)
    secrets = listSecrets(region, table, **kwargs)

    # Only return the secrets that match the pattern in `credential`
    # This already works out of the box with the CLI get action,
    # but that action doesn't support wildcards when using as library
    if credential and WILDCARD_CHAR in credential:
        names = set(expand_wildcard(credential,
                                    [x["name"]
                                     for x in secrets]))
    else:
        names = set(x["name"] for x in secrets)

    pool = ThreadPool(min(len(names), THREAD_POOL_MAX_SIZE))
    results = pool.map(
        lambda credential: getSecret(credential, version, region, table, context, dynamodb, kms, **kwargs),
        names)
    pool.close()
    pool.join()
    return dict(zip(names, results))
Пример #27
0
def eval_dir(fn, files_list):
	pool = ThreadPool(WORKER_NUM)
	results = pool.map(fn, files_list)
	# close the pool and wait for the work to finish
	pool.close()
	pool.join()
	return sum(results)
Пример #28
0
def main():
    parser = argparse.ArgumentParser(usage='%(prog)s [options] SERVER_URL',
                                     description=__doc__)
    parser.add_argument(
        '-t', '--threads',
        help='Number of threads (simultaneous connections)',
        dest='threads', default=1, type=int)
    parser.add_argument('server', help='URL of server')
    args = parser.parse_args()

    server = args.server

    if not server.startswith('http://'):
        server = 'http://{}'.format(server)

    icons = []
    for font_id, font in fonts.items():
        for char in font['characters']:
            url = os.path.join(server, 'icon', font_id, '000', char)
            icons.append((font_id, char, url))

    icons.sort()

    print('{} icons to test on {} ...'.format(len(icons), args.server))

    if MAX_ICONS:
        icons = icons[:MAX_ICONS]

    pool = Pool(args.threads)
    pool.map(check_icon, icons)
    pool.close()
    pool.join()
Пример #29
0
def get_proxys(file_name, thread_num=5):
    """这里的文件内容可以是从cn-proxy.com复制过来的数据"""
    proxys = []
    ip_reg = re.compile(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', re.I)
    try:
        with open(file_name, 'r') as fd_proxy:
            for line in fd_proxy:
                if line and line.strip():
                    print 'line',line.strip()
                    if ip_reg.match(line.strip()):
                        ip, port = line.strip().split()[0], line.strip().split()[1]
                        proxy = '%s:%s' %(ip, port)
                        print 'proxy',proxy
#                        if test_connection(proxy):
                        if proxy:
                            proxys.append(proxy)
        pool = ThreadPool(thread_num)
        results = pool.map(test_connection,proxys)
        pool.close()
        pool.join()
        
        proxys = list(set(results))
        proxys = sorted(proxys,key=lambda x:x.split(".")[0])
    
    except Exception,e:
        print 'error',e
Пример #30
0
def main():
    parser = argparse.ArgumentParser(description='Checks a LegalOne application for broken links')
    parser.add_argument('-d', '--domain',
                        help='URL to check for broken links. Ex. http://colucci.release.dco.novajus.com.br',
                        required=True)
    parser.add_argument("-e", '--escritorio',
                        help='Account to check for broken links, Ex. xxxx, where xxx.release.dco.novajus.com.br',
                        required=True)
    parser.add_argument("-l", '--loginpage',
                        help='URL to login on the application. Ex. http://release.dco.novajus.com.br/conta/login',
                        required=True)
    parser.add_argument("-t", '--threads',
                        type=int,
                        help='How many threads sarching for broken links at the same time. Default is 10',
                        required=False, default=10)
    args = parser.parse_args()
    loginpage = args.loginpage
    escritorio = args.escritorio
    domain = args.domain
    threads = args.threads
    pages_to_test = queue.Queue(maxsize=0)
    cookie_login = login(domain, escritorio, loginpage)
    pages_to_test.put(domain + "/contatos/contatos/search")
    test_url(cookie_login, pages_to_test, domain, pages_to_test.get())
    while not pages_to_test.empty():
        pool = ThreadPool(threads)
        links_to_check = []
        for x in range(0, threads):
            links_to_check.append(pages_to_test.get())
        partialtest_url = partial(test_url, cookie_login, pages_to_test, domain)
        pool.map(partialtest_url, links_to_check)
        pool.close()
        pool.join()
Пример #31
0
            #prendo il tempo della predizione
            prediction_start = time.time()

            if NUMBER_OF_MODELS > 1:
                prediction_with_param = partial(
                    prediction,
                    data_subset.drop([
                        'test_class_name', 'cycle_id', 'current_failures',
                        'time'
                    ],
                                     axis='columns'))
                prediction_pool = ThreadPool(NUMBER_OF_MODELS)
                result = prediction_pool.map(prediction_with_param,
                                             agent.model)
                prediction_pool.close()
                prediction_pool.join()

                prediction_end = time.time()
                print('PREDICTION TIME')
                print(prediction_end - prediction_start)
                prediction_time.append(prediction_end - prediction_start)

                print(result)
                print('#############')

                #concatenare i risultati dei thread
                action = []
                for i in range(0, len(result[0][0])):
                    temp_class_array = []
                    for j in range(0, NUMBER_OF_MODELS):
                        temp = result[j][0]
Пример #32
0
                f.write(
                    str(shape.part(58).x) + ' ' + str(shape.part(58).y) + '\n')
                f.write(
                    str(shape.part(59).x) + ' ' + str(shape.part(59).y) + '\n')
                f.write(
                    str(shape.part(60).x) + ' ' + str(shape.part(60).y) + '\n')
                f.write(
                    str(shape.part(61).x) + ' ' + str(shape.part(61).y) + '\n')
                f.write(
                    str(shape.part(62).x) + ' ' + str(shape.part(62).y) + '\n')
                f.write(
                    str(shape.part(63).x) + ' ' + str(shape.part(63).y) + '\n')
                f.write(
                    str(shape.part(64).x) + ' ' + str(shape.part(64).y) + '\n')
                f.write(
                    str(shape.part(65).x) + ' ' + str(shape.part(65).y) + '\n')
                f.write(
                    str(shape.part(66).x) + ' ' + str(shape.part(66).y) + '\n')
                f.write(
                    str(shape.part(67).x) + ' ' + str(shape.part(67).y) + '\n')

                f.close()


if __name__ == '__main__':
    testlist = ['4.txt', '5.txt']
    pool = ThreadPool(2)
    pool.map(single_list, testlist)
    pool.close()
    pool.join()
Пример #33
0
class TestBitmexRestApi(object):
    """REST API"""

    #----------------------------------------------------------------------
    def __init__(self):
        """Constructor"""
        self.apiKey = ''
        self.apiSecret = ''
        self.host = ''

        self.active = False
        self.reqid = 0
        self.queue = Queue()
        self.pool = None
        self.sessionDict = {}  # 会话对象字典

        self.header = {
            'Content-Type': 'application/x-www-form-urlencoded',
            'Accept': 'application/json'
        }

    #----------------------------------------------------------------------
    def init(self, apiKey, apiSecret):
        """初始化"""
        self.apiKey = apiKey
        self.apiSecret = apiSecret

        self.host = REST_HOST

    #----------------------------------------------------------------------
    def start(self, n=3):
        """启动"""
        if self.active:
            return

        self.active = True
        self.pool = Pool(n)
        self.pool.map_async(self.run, range(n))

    #----------------------------------------------------------------------
    def close(self):
        """关闭"""
        self.active = False

        if self.pool:
            self.pool.close()
            self.pool.join()

    #----------------------------------------------------------------------
    def addReq(self, method, path, callback, params=None, postdict=None):
        """添加请求"""
        self.reqid += 1
        req = (method, path, callback, params, postdict, self.reqid)
        self.queue.put(req)
        return self.reqid

    #----------------------------------------------------------------------
    def processReq(self, req, i):
        """处理请求"""
        method, path, callback, params, postdict, reqid = req
        url = self.host + path
        expires = int(time() + 5)

        rq = requests.Request(url=url, data=postdict)
        p = rq.prepare()

        header = copy(self.header)
        header['api-expires'] = str(expires)
        header['api-key'] = self.apiKey
        header['api-signature'] = self.generateSignature(method,
                                                         path,
                                                         expires,
                                                         params,
                                                         body=p.body)

        # 使用长连接的session,比短连接的耗时缩短80%
        session = self.sessionDict[i]
        resp = session.request(method,
                               url,
                               headers=header,
                               params=params,
                               data=postdict)

        #resp = requests.request(method, url, headers=header, params=params, data=postdict)

        code = resp.status_code
        d = resp.json()

        if code == 200:
            callback(d, reqid)
        else:
            self.onError(code, d)

    #----------------------------------------------------------------------
    def run(self, i):
        """连续运行"""
        self.sessionDict[i] = requests.Session()

        while self.active:
            try:
                req = self.queue.get(timeout=1)
                self.processReq(req, i)
            except Empty:
                pass

    #----------------------------------------------------------------------
    def generateSignature(self, method, path, expires, params=None, body=None):
        """生成签名"""
        # 对params在HTTP报文路径中,以请求字段方式序列化
        if params:
            query = parse.urlencode(params.items())
            path = path + '?' + query

        if body is None:
            body = ''

        msg = method + '/api/v1' + path + str(expires) + body
        signature = hmac.new(self.apiSecret, msg,
                             digestmod=hashlib.sha256).hexdigest()
        return signature

    #----------------------------------------------------------------------
    def onError(self, code, error):
        """错误回调"""
        print('on error')
        print(code, error)

    #----------------------------------------------------------------------
    def onData(self, data, reqid):
        """通用回调"""
        print('on data')
        print(data, reqid)
Пример #34
0
 def initiate_threads():
     _pool = Pool(5)
     _pool.map(traverse_directory, self.valid_directories)
     _pool.close()
     _pool.join()
Пример #35
0
class BaiduImgDownloader(object):
    """百度图片下载工具,目前只支持单个关键词"""

    # 解码网址用的映射表
    str_table = {'_z2C$q': ':', '_z&e3B': '.', 'AzdH3F': '/'}

    char_table = {
        'w': 'a',
        'k': 'b',
        'v': 'c',
        '1': 'd',
        'j': 'e',
        'u': 'f',
        '2': 'g',
        'i': 'h',
        't': 'i',
        '3': 'j',
        'h': 'k',
        's': 'l',
        '4': 'm',
        'g': 'n',
        '5': 'o',
        'r': 'p',
        'q': 'q',
        '6': 'r',
        'f': 's',
        'p': 't',
        '7': 'u',
        'e': 'v',
        'o': 'w',
        '8': '1',
        'd': '2',
        'n': '3',
        '9': '4',
        'c': '5',
        'm': '6',
        '0': '7',
        'b': '8',
        'l': '9',
        'a': '0'
    }

    re_objURL = re.compile(r'"objURL":"(.*?)".*?"type":"(.*?)"')
    re_downNum = re.compile(r"已下载\s(\d+)\s张图片")
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36",
        "Accept-Encoding": "gzip, deflate, sdch",
    }

    def __init__(self, word, dirpath=None, processNum=30, max_num_pic=10000):
        if " " in word:
            raise AttributeError("本脚本仅支持单个关键字")
        self.word = word
        self.char_table = {
            ord(key): ord(value)
            for key, value in BaiduImgDownloader.char_table.items()
        }
        pre_root_path_raw = os.path.join(
            os.environ['HOME'],
            'Code/PycharmProjects/AnimeSketchColorization/colorization/datasets/anime/raw'
        )
        if not dirpath:
            dirpath = os.path.join(pre_root_path_raw, str(word))  # 'anime')
        self.dirpath = dirpath
        self.jsonUrlFile = os.path.join(pre_root_path_raw, 'jsonUrl.txt')
        self.logFile = os.path.join(pre_root_path_raw, 'logInfo.txt')
        self.errorFile = os.path.join(pre_root_path_raw, 'errorUrl.txt')
        if os.path.exists(self.errorFile):
            os.remove(self.errorFile)
        if not os.path.exists(self.dirpath):
            os.mkdir(self.dirpath)
        self.pool = Pool(30)
        requests.adapters.DEFAULT_RETRIES = 3
        self.session = requests.Session()
        self.session.keep_alive = False
        self.session.headers = BaiduImgDownloader.headers
        self.queue = Queue()
        self.messageQueue = Queue()
        self.index = 0  # 图片起始编号,牵涉到计数,不要更改
        self.promptNum = 200  # 下载几张图片提示一次
        self.lock = threading.Lock()
        self.delay = 1.5  # 网络请求太频繁会被封
        self.QUIT = "QUIT"  # Queue中表示任务结束
        self.printPrefix = "**"  # 用于指定在控制台输出
        self.MAX_NUM_PIC = max_num_pic
        self.NUM_PIC = 0

    def start(self):
        # 控制台输出线程
        t = threading.Thread(target=self.__log)
        t.setDaemon(True)
        t.start()
        self.messageQueue.put(self.printPrefix + "脚本开始执行")
        start_time = dt.now()
        urls = self.__buildUrls()
        self.messageQueue.put(self.printPrefix +
                              "已获取 %s 个Json请求网址" % len(urls))
        # 解析出所有图片网址,该方法会阻塞直到任务完成
        self.pool.map(self.__resolveImgUrl, urls)
        while self.queue.qsize():
            imgs = self.queue.get()
            self.pool.map_async(self.__downImg, imgs)
        self.pool.close()
        self.pool.join()
        self.messageQueue.put(self.printPrefix + "下载完成!已下载 %s 张图片,总用时 %s" %
                              (self.index, dt.now() - start_time))
        self.messageQueue.put(self.printPrefix + "请到 %s 查看结果!" % self.dirpath)
        self.messageQueue.put(self.printPrefix + "错误信息保存在 %s" % self.errorFile)
        self.messageQueue.put(self.QUIT)

    def __log(self):
        """控制台输出,加锁以免被多线程打乱"""
        with open(self.logFile, "w") as f:  # , encoding="utf-8") as f:
            while True:
                message = self.messageQueue.get()
                if message == self.QUIT:
                    break
                message = str(dt.now()) + " " + message
                if self.printPrefix in message:
                    print(message)
                elif "已下载" in message:
                    # 下载N张图片提示一次
                    downNum = self.re_downNum.findall(message)
                    if downNum and int(downNum[0]) % self.promptNum == 0:
                        print(message)
                f.write(message)
                f.write('\n')
                f.flush()

    def __getIndex(self):
        """获取文件编号"""
        self.lock.acquire()
        try:
            return self.index
        finally:
            self.index += 1
            self.lock.release()

    def decode(self, url):
        """解码图片URL
        解码前:
        ippr_z2C$qAzdH3FAzdH3Ffl_z&e3Bftgwt42_z&e3BvgAzdH3F4omlaAzdH3Faa8W3ZyEpymRmx3Y1p7bb&mla
        解码后:
        http://s9.sinaimg.cn/mw690/001WjZyEty6R6xjYdtu88&690
        """
        # 先替换字符串
        for key, value in self.str_table.items():
            url = url.replace(key, value)
        # 再替换剩下的字符
        return url.translate(self.char_table)

    def __buildUrls(self):
        """json请求网址生成器"""
        word = urllib.quote(self.word)  # urllib.parse.quote(self.word)
        url = r"http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&st=-1&ic=0&word={word}&face=0&istype=2nc=1&pn={pn}&rn=60"
        time.sleep(self.delay)
        html = self.session.get(url.format(word=word, pn=0),
                                timeout=15).content.decode('utf-8')
        results = re.findall(r'"displayNum":(\d+),', html)
        maxNum = int(results[0]) if results else 0
        maxNum = maxNum if maxNum < int(self.MAX_NUM_PIC * 12) else int(
            self.MAX_NUM_PIC * 12)
        urls = [url.format(word=word, pn=x) for x in range(0, maxNum + 1, 60)]
        with open(self.jsonUrlFile, "w") as f:  # , encoding="utf-8") as f:
            for url in urls:
                f.write(url + "\n")
        return urls

    def __resolveImgUrl(self, url):
        """从指定网页中解析出图片URL"""
        time.sleep(self.delay)
        try:
            html = self.session.get(url, timeout=100).content.decode('utf-8')
            datas = self.re_objURL.findall(html)
            imgs = [Image(self.decode(x[0]), x[1]) for x in datas]
            if self.NUM_PIC > self.MAX_NUM_PIC:
                imgs = []
            else:
                self.NUM_PIC = self.NUM_PIC + len(imgs)
                self.messageQueue.put(self.printPrefix +
                                      u"已解析出 %s 个图片网址" % len(imgs))
                self.queue.put(imgs)
        except Exception as e:
            # print ('str(Exception):\t', str(Exception))
            print('str(e):\t\t', str(e))
            # print ('repr(e):\t', repr(e))
            if self.NUM_PIC > self.MAX_NUM_PIC:
                return

    def __downImg(self, img):
        """下载单张图片,传入的是Image对象"""
        imgUrl = img.url
        # self.messageQueue.put("线程 %s 正在下载 %s " %
        #          (threading.current_thread().name, imgUrl))
        try:
            time.sleep(self.delay)
            res = self.session.get(imgUrl, timeout=35)
            message = None
            if str(res.status_code)[0] == "4":
                message = "\n%s: %s" % (res.status_code, imgUrl)
            elif "text/html" in res.headers["Content-Type"]:
                message = "\n无法打开图片: %s" % imgUrl
        except Exception as e:
            message = "\n抛出异常: %s\n%s" % (imgUrl, str(e))
        finally:
            if message:
                self.messageQueue.put(message)
                self.__saveError(message)
                return
        index = self.__getIndex()
        # index从0开始
        self.messageQueue.put("已下载 %s 张图片:%s" % (index + 1, imgUrl))
        filename = os.path.join(self.dirpath, str(index) + "." + img.type)
        with open(filename, "wb") as f:
            f.write(res.content)

    def __saveError(self, message):
        self.lock.acquire()
        try:
            with open(self.errorFile, "a", encoding="utf-8") as f:
                f.write(message)
        finally:
            self.lock.release()
Пример #36
0
def scan_ports(first, last, scans):
    pool = Pool()
    for scan in scans:
        pool.map(scan, range(first, last + 1))
    pool.close()
    pool.join()
Пример #37
0
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from multiprocessing.dummy import Pool as ThreadPool
sites = open('ip.txt', 'r').read().split('\n')
print(""" 
		[#] Script ::
     _____           _          _   _             _   _  _____     
    /  __ \         | |        | | | |           | | | ||  ___|    
    | /  \/ ___   __| | ___  __| | | |__  _   _  | |_| || |____  __
    | |    / _ \ / _` |/ _ \/ _` | | '_ \| | | | |  _  ||  __\ \/ /
    | \__/\ (_) | (_| |  __/ (_| | | |_) | |_| | | | | || |___>  < 
    \_____/\___/ \__,_|\___|\__,_| |_.__/ \__, | \_| |_/\____/_/\_\\
                                           __/ |                   
              #Yahoo Sites Grabber ::     |___/         
 """)

def BingBot(url):
    try:
        headers = {
            'user-agent': 'Googlebot/2.1 (+http://www.google.com/bot.html)'
        }
        req = requests.get('https://search.yahoo.com/search?q=ip:' + url +
                           '&first=1',
                           headers=headers)
        r1 = req.text
        soup = BeautifulSoup(r1, 'html.parser')
        re = soup.find('ol', {'class': 'mb-15 reg searchCenterMiddle'})
        l = str(re.findAll('span',
                           {'class': 'fz-ms fw-m fc-12th wr-bw lh-17'}))
Пример #38
0
                        if n == ste:
                            continue  # we do not check a node with itself

                        if is_there_common_sym(ste.symbols, n.symbols):
                            continue  # first condition has not been met
                        elif is_there_common_sym(
                                parent_sym_to_product_sym_dic[ste],
                                parent_sym_to_product_sym_dic[n]):
                            continue  # second condition has not been met
                        else:  # both condiotions have been met, we can create an edge between ste, n
                            match_graph.add_edge(ste, n)

                matching_result = matching_alg.max_weight_matching(match_graph)

                print "number of orignal nodes ", curr_atm.nodes_count
                print "number of matching nodes ", 2 * len(matching_result)
                csv_writer.writerow(
                    [curr_atm.nodes_count, 2 * len(matching_result)])
    except Exception as ex:
        print traceback.print_exc()


if __name__ == '__main__':

    ds = [a for a in AnmalZoo]

    t_pool = ThreadPool(thread_count)
    results = t_pool.map(thread_func, ds)
    t_pool.close()
    t_pool.join()
Пример #39
0
    def get_content(self, threads=3):
        start_time = time.time()

        pool = ThreadPool(threads)
        pool.map(self._download_and_extract_datasets, ["basics", "episode", "ratings"])
        pool.close()
        pool.join()

        # Create datasets folder if not exists already
        Path("datasets").mkdir(parents=True, exist_ok=True)

        basics = DatasetReader(f"datasets/basics.tsv", self.dataset_parser, 0, True, str, str, str, str, int, int, int,
                               int, str)
        episodes = DatasetReader(f"datasets/episode.tsv", self.dataset_parser, 0, True, str, str, int, int)
        ratings = DatasetReader(f"datasets/ratings.tsv", self.dataset_parser, 0, True, str, float, int)

        # Get series and movies
        print("Getting series and movies")
        for iid, content_type, title, original_title, adult, start_year, end_year, runtime, genres in \
                basics.iterate_data():

            # Skip adult content
            if adult == 1:
                basics.remove_data(iid)
                continue

            # Skip unwanted content
            if content_type not in ["tvMiniSeries", "tvSeries", "tvMovie", "movie"]:
                if content_type != "tvEpisode":
                    basics.remove_data(iid)
                continue

            # Convert content type
            if content_type in ["tvSeries", "tvMiniSeries"]:
                content_type = ContentType.series
            else:
                content_type = ContentType.movie

            # Join rating
            x, average_rating, vote_count = ratings.get_data(iid)

            # Skip content with small number of votes (unpopular content)
            if not vote_count or \
                    (content_type == ContentType.movie and vote_count <= 5000) or \
                    (content_type == ContentType.series and vote_count <= 5000):
                basics.remove_data(iid)

                continue

            yield {
                "imdb_id": iid,
                "type": content_type,
                "title": title,
                "original_title": original_title,
                "year_released": start_year,
                "year_end": end_year,
                "runtime": runtime,
                "genres": genres,
                "rating": average_rating,
                "votes": vote_count
            }

        # Get episodes
        print("Getting episodes")
        for iid, parent_iid, season, episode in episodes.iterate_data():

            # Skip episodes without parent content
            if not basics.has_key(parent_iid):
                basics.remove_data(iid)
                continue

            # Skip too high episode numbers
            if not episode or not season or episode >= 32767:
                basics.remove_data(iid)
                continue

            # Join rating data
            x, average_rating, vote_count = ratings.get_data(iid)

            # Skip episode if it's not released yet
            if not vote_count:
                basics.remove_data(iid)
                continue

            # Join basic data
            x, content_type, title, original_title, adult, start_year, end_year, runtime, genres = basics.get_data(iid)

            yield {
                "imdb_id": iid,
                "type": ContentType.episode,
                "title": (title[:185] + '...') if len(title) > 185 else title,
                "original_title": (original_title[:185] + '...') if len(original_title) > 185 else original_title,
                "year_released": start_year,
                "year_end": end_year,
                "runtime": runtime,
                "genres": genres,
                "rating": average_rating,
                "votes": vote_count,
                "parent_uid": Content.generate_uid(parent_iid),
                "season": season,
                "episode": episode
            }

        print(f"Content updated, took {time.time() - start_time} seconds")
Пример #40
0
def download_movies(url, movie_path):
    '''
    :param url: 'http://532movie.bnu.edu.cn/player/3379.html'
    :return: None
    download *.ts to RAM
    write mp4 movie file to local disk from RAM
    '''
    temp_file = str(time.time())
    try:
        codecs.open(movie_path + temp_file, 'wb')
    except:
        sg.Popup(u'路径错误', font=Font)
        return None
    os.remove(movie_path + temp_file)
    movie_name, urls = get_vedio_url(url)
    invalid_char = '/\:*"<>|?'
    for ic in invalid_char:
        if ic in movie_name:
            movie_name = movie_name.replace(ic, '.')
    print(movie_name)
    # try:
    #     print(movie_name.decode('utf-8').encode('utf-8','ignore'))
    # except:
    #     print(movie_name.split()[0].decode('utf-8').encode('utf-8','ignore'))
    #
    try:
        movie_name_utf8 = movie_name.decode('utf-8').encode('gbk', 'ignore')
    except:
        movie_name_utf8 = movie_name.split()[0].decode('utf-8').encode(
            'gbk', 'ignore')
    episode = 0
    flag = 0
    # time_init = time.time()

    # time_start = time.time()
    if len(urls) == 1:
        if os.path.isfile(movie_path + movie_name.decode('utf-8') + '.mp4'):
            # print(movie_path + movie_name_utf8 + '.mp4 is already existed')
            sg.Popup(movie_path + movie_name.decode('utf-8') + u'已存在',
                     font=Font)
            return None
        ts = split_videos(urls[0])

        pool = ThreadPool(20)
        # bar_fmt = 'Downloading\t' + '|{bar}|{percentage:3.0f}%'
        results = list(tqdm_gui(pool.imap(download_ts, ts), total=len(ts)))
        plt.title('asdf')
        pool.close()
        pool.join()
        # print('Writing to disk...')
        movie = codecs.open(movie_path + movie_name_utf8 + '.mp4', 'wb')
        bar_fmt1 = 'writing to disk\t' + '|{bar}|{percentage:3.0f}%'
        for i in tqdm(range(len(results)), bar_format=bar_fmt1, ncols=50):
            movie.write(results[i])
        movie.close()

        plt.close()
        sg.Popup(u'下载完成!', title=u'完成', font=Font)

    else:

        # episode_str = raw_input(
        #     'there are %s episodes, please input a series of numbers like this(e.g.:1,10,15 or 1-3,4-10)' % len(urls))

        text = sg.Text(u'共有%s集\n请输入集数\n(例如:1,10,15 or 1-3,4-10)\n直接点击下载所有' %
                       len(urls),
                       auto_size_text=1,
                       font=Font)
        layout = [
            [text],
            [sg.Input(), sg.OK(u'下载')],
        ]
        window = sg.Window(u'选择集数').Layout(layout)
        while 1:

            ev, vals = window.Read()
            if ev is None:
                break
            # episodes = episode_str.split(',')
            print(vals)
            # continue
            if len(vals[0]) == 0:
                selected = range(1, len(urls) + 1)
            else:
                episodes = vals[0].split(',')

                selected = []
                fail1 = 0
                try:
                    for e in episodes:
                        if '-' in e:
                            e_split = e.split('-')
                            e_start = e_split[0]
                            e_end = e_split[1]
                            ee = range(int(e_start), int(e_end) + 1)
                            for ei in ee:
                                selected.append(ei)
                        else:
                            selected.append(int(e))
                except:
                    fail1 = 1

                selected.sort()
                for s in selected:
                    if s > len(urls):
                        print('there are no episode %s' % s)
                        fail1 = 1
                    elif s < 1:
                        print('input error...')
                        fail1 = 1
                if fail1 == 1:
                    sg.PopupError(u'输入有误', title=u'错误', font=Font)
                    continue

            # print selected
            # continue
            text2 = sg.Text('')
            layout2 = [
                [text2],
                [
                    sg.ProgressBar(len(selected),
                                   orientation='h',
                                   size=(20, 20),
                                   key='progressbar')
                ],
            ]
            window2 = sg.Window(u'下载进度').Layout(layout2)
            progress_bar = window2.FindElement('progressbar')

            flag1 = 0
            for i in urls:

                ev2, va2 = window2.Read(timeout=0)
                if ev2 is None:
                    break
                # selected_episodes = range(1, int(1e5))
                text2.Update(i)
                episode += 1
                if episode not in selected:
                    continue
                TV_dir = movie_path + movie_name_utf8 + '\\'
                if not os.path.isdir(TV_dir):
                    os.makedirs(TV_dir)
                if os.path.isfile(TV_dir + 'Episode ' + '%02d' % episode +
                                  '.mp4'):
                    # print(TV_dir + 'Episode ' + '%02d' % episode + '.mp4 is already existed')
                    flag += 1
                    # continue
                    sg.Popup(movie_name.decode('utf-8') + 'Episode ' +
                             '%02d' % episode + u'已存在',
                             font=Font)
                    continue
                pool = ThreadPool(20)
                ts = split_videos(i)
                bar_fmt = 'Episode %02d' % episode + '|{bar}|{percentage:3.0f}%'
                results = list(
                    tqdm_gui(pool.imap(download_ts, ts),
                             total=len(ts),
                             ncols=50,
                             bar_format=bar_fmt))
                pool.close()
                pool.join()
                plt.close()
                movie = codecs.open(
                    TV_dir + 'Episode ' + '%02d' % episode + '.mp4', 'wb')
                for r in results:
                    movie.write(r)
                progress_bar.UpdateBar(flag1 + 1)
                flag1 += 1
                window2.Close()
Пример #41
0
########PrecipForcastor.exe#########
#####Part1 prepare physics##########
nowtime = datetime.datetime.now().strftime('%y%m%d%H')
pasttime = (datetime.datetime.now() -
            datetime.timedelta(days=1)).strftime('%y%m%d%H')
if float(nowtime[-2:]) < 15:
    date = pasttime[:-2] + '20'
else:
    date = nowtime[:-2] + '08'
forcast = ['012', '015', '018', '021', '024']
fmap = pd.read_table(rootdir + 'jx86.txt', header=None, sep='\s+', index_col=0)
fmap.columns = ['lon', 'lat']
tpool = ThreadPool(8)
dflist = tpool.map(main, forcast)
tpool.close()
tpool.join()
df = sum(dflist) / len(forcast)
print(df)
#####Part2 forcast#######
#######select model######
cluster = raincluster()
fout = open(rootdir + 'fout_now.txt', 'w')
for c in cluster:
    yp = []
    for ilabel in range(4):
        ml = joblib.load(modelSaveModelPath + str(c[0]) + '_cluster_' +
                         str(ilabel) + 'Pca_Logistic.model')
        if ilabel == 3:
            yp.append(ml.predict_proba(df.loc[c, df.columns[:-1]])[:, 1])
        else:
            yp.append(ml.predict_proba(df.loc[c])[:, 1])
Пример #42
0
    def new_interactions(self, constraint_list, f, specs, n_jobs):
        """
        A function to generate new interactions between features. The generation is handled by threads. The amount of threads is dependend on the number of different interaction degrees.

        Args:
            constraint_list (list): The previously aquired list (of lists) with all constraints of a model.
            f (dict): The models features.
            specs (list): The amount of new interactions, followed by value pairs for ratio in percent and interaction degree, e.g. [100, 50, 2, 50, 3].

        Returns:
            a dictionary with the new interactions as keys.
        """
        total_amount = specs[0]
        interaction_ratio = list(specs[1::2])
        interaction_degree = list(specs[2::2])
        all_new_interactions = dict()
        splitted_new_interactions = dict()
        for elem in interaction_degree:
            splitted_new_interactions["dict" + str(elem)] = {}
        if n_jobs > 0:
            number_of_threads = n_jobs
        else:
            number_of_threads = os.cpu_count()

        # some sweet, sweet error handling:
        assert (sum(interaction_ratio) == 100), (
            "The interaction ratios must sum up to 100. Currently they sum up to: ",
            sum(interaction_ratio))

        def worker(amount):
            new_interactions = dict()
            for elem in range(len(interaction_degree)):
                if elem == len(interaction_degree) - 1:
                    amount_new_int = amount
                else:
                    amount_new_int = round(amount / 100.0 *
                                           interaction_ratio[elem])
                    amount = amount - amount_new_int

                while amount_new_int > len(splitted_new_interactions[
                        "dict" + str(interaction_degree[elem])]):
                    legit_int = False
                    while legit_int == False:
                        random_feature = list(
                            np.random.choice(
                                list(f.keys())[1:], interaction_degree[elem]))
                        if self.check_interaction(constraint_list, f,
                                                  random_feature):
                            legit_int = True
                            random_feature = sorted(random_feature)
                            interaction = ""
                            for i in random_feature:
                                interaction = interaction + str(i) + "#"
                            interaction = interaction[:-1]
                            splitted_new_interactions["dict" + str(
                                interaction_degree[elem])][interaction] = ""
                # new_interactions.update(new_int_degree_subdict)
            # return new_interactions

        pool = ThreadPool()
        l = [total_amount] * number_of_threads
        pool.map(worker, l)
        inter_type_n = {}

        for i, elem in enumerate(interaction_degree):
            desired_amount = total_amount * interaction_ratio[i] / 100
            inter_type_n[i] = int(desired_amount)
        accounted_inters = sum(list(inter_type_n.values()))
        diff = total_amount - accounted_inters
        if diff > 0:
            inter_type_n[list(inter_type_n.keys())[0]] += diff

        for elem, num_inters in inter_type_n.items():
            while num_inters < len(
                    splitted_new_interactions["dict" +
                                              str(interaction_degree[elem])]):
                rchoice = random.choice(
                    list(splitted_new_interactions[
                        "dict" + str(interaction_degree[elem])].keys()))
                del splitted_new_interactions[
                    "dict" + str(interaction_degree[elem])][rchoice]
            all_new_interactions.update(
                splitted_new_interactions["dict" +
                                          str(interaction_degree[elem])])
        pool.close()
        pool.join()
        print("Finished with creating interactions")
        return all_new_interactions
Пример #43
0
def makePool(num_threads):
    # Make the Pool of workers
    pool = ThreadPool(num_threads)
    results = pool.map(print, files)
    pool.close()
    pool.join()
Пример #44
0
devices = read_devices('host_files/backup_all_hosts.txt')
user = input('Username: '******'Password: '******'svc_scpatconfig'
# scp_password = getpass.getpass('SCP Password: '******'\nNumber of threads (10): ') or '10'
num_threads = int(num_threads_str)

###Create list for passing to config worker
config_params_list = []

for i in range(len(devices)):
    device = [devices[i]['hostname'], devices[i]['group'], devices[i]['os']]
    config_params_list.append((device, creds))

starting_time = time()

print('\n--- Creating threadpool\n')
threads = ThreadPool(num_threads)
results = threads.map(worker, config_params_list)

threads.close()
threads.join()

total_time = format((time() - starting_time) / 60, '.2f')
print('\n---- Elapsed time: ', str(total_time) + ' minutes')
Пример #45
0
class MsgDispatcherBase(Recoverable):
    """This is where tuners and assessors are not defined yet.
    Inherits this class to make your own advisor.
    """
    def __init__(self):
        if multi_thread_enabled():
            self.pool = ThreadPool()
            self.thread_results = []
        else:
            self.stopping = False
            self.default_command_queue = Queue()
            self.assessor_command_queue = Queue()
            self.default_worker = threading.Thread(
                target=self.command_queue_worker,
                args=(self.default_command_queue, ))
            self.assessor_worker = threading.Thread(
                target=self.command_queue_worker,
                args=(self.assessor_command_queue, ))
            self.default_worker.start()
            self.assessor_worker.start()
            self.worker_exceptions = []

    def run(self):
        """Run the tuner.
        This function will never return unless raise.
        """
        _logger.info('Start dispatcher')
        if dispatcher_env_vars.NNI_MODE == 'resume':
            self.load_checkpoint()

        while True:
            command, data = receive()
            if data:
                data = json_tricks.loads(data)

            if command is None or command is CommandType.Terminate:
                break
            if multi_thread_enabled():
                result = self.pool.map_async(self.process_command_thread,
                                             [(command, data)])
                self.thread_results.append(result)
                if any([
                        thread_result.ready()
                        and not thread_result.successful()
                        for thread_result in self.thread_results
                ]):
                    _logger.debug('Caught thread exception')
                    break
            else:
                self.enqueue_command(command, data)
                if self.worker_exceptions:
                    break

        _logger.info('Dispatcher exiting...')
        self.stopping = True
        if multi_thread_enabled():
            self.pool.close()
            self.pool.join()
        else:
            self.default_worker.join()
            self.assessor_worker.join()

        _logger.info('Terminated by NNI manager')

    def command_queue_worker(self, command_queue):
        """Process commands in command queues.
        """
        while True:
            try:
                # set timeout to ensure self.stopping is checked periodically
                command, data = command_queue.get(timeout=3)
                try:
                    self.process_command(command, data)
                except Exception as e:
                    _logger.exception(e)
                    self.worker_exceptions.append(e)
                    break
            except Empty:
                pass
            if self.stopping and (_worker_fast_exit_on_terminate
                                  or command_queue.empty()):
                break

    def enqueue_command(self, command, data):
        """Enqueue command into command queues
        """
        if command == CommandType.TrialEnd or (
                command == CommandType.ReportMetricData
                and data['type'] == 'PERIODICAL'):
            self.assessor_command_queue.put((command, data))
        else:
            self.default_command_queue.put((command, data))

        qsize = self.default_command_queue.qsize()
        if qsize >= QUEUE_LEN_WARNING_MARK:
            _logger.warning('default queue length: %d', qsize)

        qsize = self.assessor_command_queue.qsize()
        if qsize >= QUEUE_LEN_WARNING_MARK:
            _logger.warning('assessor queue length: %d', qsize)

    def process_command_thread(self, request):
        """Worker thread to process a command.
        """
        command, data = request
        if multi_thread_enabled():
            try:
                self.process_command(command, data)
            except Exception as e:
                _logger.exception(str(e))
                raise
        else:
            pass

    def process_command(self, command, data):
        _logger.debug('process_command: command: [%s], data: [%s]', command,
                      data)

        command_handlers = {
            # Tuner commands:
            CommandType.Initialize: self.handle_initialize,
            CommandType.RequestTrialJobs: self.handle_request_trial_jobs,
            CommandType.UpdateSearchSpace: self.handle_update_search_space,
            CommandType.ImportData: self.handle_import_data,
            CommandType.AddCustomizedTrialJob:
            self.handle_add_customized_trial,

            # Tuner/Assessor commands:
            CommandType.ReportMetricData: self.handle_report_metric_data,
            CommandType.TrialEnd: self.handle_trial_end,
            CommandType.Ping: self.handle_ping,
        }
        if command not in command_handlers:
            raise AssertionError('Unsupported command: {}'.format(command))
        command_handlers[command](data)

    def handle_ping(self, data):
        pass

    def handle_initialize(self, data):
        """Initialize search space and tuner, if any
        This method is meant to be called only once for each experiment, after calling this method,
        dispatcher should `send(CommandType.Initialized, '')`, to set the status of the experiment to be "INITIALIZED".
        Parameters
        ----------
        data: dict
            search space
        """
        raise NotImplementedError('handle_initialize not implemented')

    def handle_request_trial_jobs(self, data):
        """The message dispatcher is demanded to generate `data` trial jobs.
        These trial jobs should be sent via `send(CommandType.NewTrialJob, json_tricks.dumps(parameter))`,
        where `parameter` will be received by NNI Manager and eventually accessible to trial jobs as "next parameter".
        Semantically, message dispatcher should do this `send` exactly `data` times.

        The JSON sent by this method should follow the format of
        {
            "parameter_id": 42
            "parameters": {
                 // this will be received by trial
            },
            "parameter_source": "algorithm" // optional
        }
        Parameters
        ----------
        data: int
            number of trial jobs
        """
        raise NotImplementedError('handle_request_trial_jobs not implemented')

    def handle_update_search_space(self, data):
        """This method will be called when search space is updated.
        It's recommended to call this method in `handle_initialize` to initialize search space.
        *No need to* notify NNI Manager when this update is done.
        Parameters
        ----------
        data: dict
            search space
        """
        raise NotImplementedError('handle_update_search_space not implemented')

    def handle_import_data(self, data):
        """Import previous data when experiment is resumed.
        Parameters
        ----------
        data: list
            a list of dictionaries, each of which has at least two keys, 'parameter' and 'value'
        """
        raise NotImplementedError('handle_import_data not implemented')

    def handle_add_customized_trial(self, data):
        """Experimental API. Not recommended for usage.
        """
        raise NotImplementedError(
            'handle_add_customized_trial not implemented')

    def handle_report_metric_data(self, data):
        """Called when metric data is reported or new parameters are requested (for multiphase).
        When new parameters are requested, this method should send a new parameter.
        Parameters
        ----------
        data: dict
            a dict which contains 'parameter_id', 'value', 'trial_job_id', 'type', 'sequence'.
            type: can be `MetricType.REQUEST_PARAMETER`, `MetricType.FINAL` or `MetricType.PERIODICAL`.
            `REQUEST_PARAMETER` is used to request new parameters for multiphase trial job. In this case,
            the dict will contain additional keys: `trial_job_id`, `parameter_index`. Refer to `msg_dispatcher.py`
            as an example.
        Raises
        ------
        ValueError
            Data type is not supported
        """
        raise NotImplementedError('handle_report_metric_data not implemented')

    def handle_trial_end(self, data):
        """Called when the state of one of the trials is changed
        Parameters
        ----------
        data: dict
            a dict with keys: trial_job_id, event, hyper_params.
            trial_job_id: the id generated by training service.
            event: the job’s state.
            hyper_params: the string that is sent by message dispatcher during the creation of trials.
        """
        raise NotImplementedError('handle_trial_end not implemented')
Пример #46
0
def ioBoundMap(function, iterator, processes=10):
    pool = ThreadPool(processes=processes)
    results = pool.map(function, iterator)
    pool.close()
    pool.join()
    return results
Пример #47
0
    def upload_large_file(self,
                          contents,
                          file_name,
                          part_size=None,
                          num_threads=4,
                          mime_content_type=None,
                          content_length=None,
                          progress_listener=None):
        """

        :param contents:
        :param file_name:
        :param part_size:
        :param num_threads:
        :param mime_content_type:
        :param content_length:
        :param progress_listener:
        :return:
        """
        if file_name[0] == '/':
            file_name = file_name[1:]
        if part_size == None:
            part_size = self.connector.recommended_part_size
        if content_length == None:
            content_length = get_content_length(contents)
        start_large_file_path = API.upload_large
        params = {
            'bucketId': self.bucket.bucket_id,
            'fileName': b2_url_encode(file_name),
            'contentType': mime_content_type or 'b2/x-auto'
        }
        large_file_response = self.connector.make_request(
            path=start_large_file_path, method='post', params=params)
        if large_file_response.status_code == 200:
            file_id = large_file_response.json().get('fileId', None)
            get_upload_part_url_path = API.upload_large_part
            params = {'fileId': file_id}
            pool = ThreadPool(num_threads)

            def upload_part_worker(args):
                part_number, part_range = args
                offset, content_length = part_range
                with open(contents.name, 'rb') as file:
                    file.seek(offset)
                    stream = RangeStream(file, offset, content_length)
                    upload_part_url_response = self.connector.make_request(
                        path=get_upload_part_url_path,
                        method='post',
                        params=params)
                    if upload_part_url_response.status_code == 200:
                        upload_url = upload_part_url_response.json().get(
                            'uploadUrl')
                        auth_token = upload_part_url_response.json().get(
                            'authorizationToken')
                        upload_part_response = self.connector.upload_part(
                            file_contents=stream,
                            content_length=content_length,
                            part_number=part_number,
                            upload_url=upload_url,
                            auth_token=auth_token,
                            progress_listener=progress_listener)
                        if upload_part_response.status_code == 200:
                            return upload_part_response.json().get(
                                'contentSha1', None)
                        else:
                            raise B2Exception.parse(upload_part_response)
                    else:
                        raise B2Exception.parse(upload_part_url_response)

            sha_list = pool.map(
                upload_part_worker,
                enumerate(get_part_ranges(content_length, part_size), 1))
            pool.close()
            pool.join()
            finish_large_file_path = API.upload_large_finish
            params = {'fileId': file_id, 'partSha1Array': sha_list}
            finish_large_file_response = self.connector.make_request(
                path=finish_large_file_path, method='post', params=params)
            if finish_large_file_response.status_code == 200:
                new_file = B2File(connector=self.connector,
                                  parent_list=self,
                                  **finish_large_file_response.json())
                return new_file
            else:
                raise B2Exception.parse(finish_large_file_response)
        else:
            raise B2Exception.parse(large_file_response)
Пример #48
0
#         falseCount += 1

import time
from multiprocessing import Pool
from multiprocessing.dummy import Pool as TPool


def work(i):
    global mbf
    ranLen = random.randint(minl, maxl + 1)
    strr = string.join(
        random.sample([
            'z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q', 'p', 'o', 'n',
            'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a'
        ], ranLen)).replace(' ', '')
    return (strr not in words) and (mbf.lookup(strr))


start = time.time()

p = TPool(13)  #设置线程池
res = p.map(work, range(1, amount))
p.close()
p.join()

falseCount = sum(res)
print 'falseCount = ', falseCount
print 'amount = ', amount
print 'The Rate is', falseCount / (amount + 0.0)
print 'using ', time.time() - start, 's'
Пример #49
0
def work2(num=2):
    p = ThreadPool(num)
    for i in urls:
        p.apply_async(urllib2.urlopen, args=(i, ))
    p.close()
    p.join()
Пример #50
0
print('-' * 60)
remote_server = input("Enter a remote host or IP address to scan:\n")
remote_server_ip = socket.gethostbyname(remote_server)

print('-' * 60)
print('Please wait, scanning remote host ', remote_server_ip)
print('-' * 60)


# scanning function that scans only one port
def scan(port):
    try:
        with socket.socket(2, 1) as s:
            res = s.connect_ex((remote_server_ip, port))
            if res == 0:
                print('Port {}: OPEN'.format(port))
    except Exception as e:
        print(e)


# Check what time the scan started
t1 = datetime.now()
# set the number of threads in pool
# default parameter is the number of kernels
pool = ThreadPool()
pool.map(scan, ports)
pool.close()
pool.join()  # wait for sub-thread ending

print('Multiprocess Scanning Completed in  ', datetime.now() - t1)
Пример #51
0
 def _parallel_augments(cls, action_fx, data):
     pool = ThreadPool(len(data))
     results = pool.map(action_fx, data)
     pool.close()
     pool.join()
     return results
Пример #52
0
class AsinSpider(object):
    def __init__(self, name):
        self.name = name
        self.headers = {
            'Origin':
            'https://www.junglescout.com',
            'Referer':
            'https://www.junglescout.com/',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
        }
        # 创建redis连接对象
        self.sr = Redis(**REDIS_CONFIG[BASE_TYPE])
        # 用于保存预测销量的标志位  初始值不为'< 5'就可以
        self.state = 1
        # 用于redis保存每个分类的排名
        self.rank = 0
        # 创建线程池
        self.pool = Pool()
        # url队列
        self.url_queue = Queue()
        # 创建postpresql连接
        self.conn = psycopg2.connect(**DATADB_CONFIG[BASE_TYPE])
        # 创建游标
        self.cur = self.conn.cursor()

        # 设置日志信息
        self.log_name = sys.argv[0].split('/')[-1].split('.')[0]
        self.info_log = Logger(log_name=self.log_name, log_level='info')
        self.error_log = Logger(log_name=self.log_name, log_level='error')

    def get_data(self, url, category):
        # 从url中提取排名
        rank = re.findall(r'rank=(\d+)&', url)
        # 获取响应数据
        try:
            # 调用utils中的下载器的方法  请求下载数据
            print(111)
            response = get_use_requests({"url": url})
            print(2222)
            data = json.loads(response.text)
        except Exception as e:
            self.error_log.error(e)
            data = {}

        # 将数据写入日志文件
        self.info_log.info(data)
        # 提取月销量的数据
        est = data.get('estSalesResult')
        # 将每次获取的预测价格的信息保存到数据中
        self.sr.set('{}_state'.format(category), est)
        # 将每次获取的数据的排名保存到redis
        self.sr.set('{}_rank'.format(category), rank)
        # 从redis中获取预测销量的状态
        self.state = self.sr.get('{}_state'.format(category))

        # 获取当前时间戳
        tm = int(round(time.time() * 1000))
        # 获取当前日期
        date = datetime.datetime.now().strftime('%Y-%m-%d')

        if type(est) == int or est == '< 5':
            # 确定当前的爬虫状态  如果返回的销量数据是数字或者是'< 5' 那么就判定状态是正常  否则就是失败
            state = 1
        elif est == 'N.A.':
            # 如果获取的数据是'N.A.'   那么说明没有拿到正常的数据  就把est的值设置为0
            est = 0
            state = 2
        else:
            state = 2

        # 如果返回的数据不是'< 5' 那就将数据保存到数据库中
        if est != '< 5' and est:
            self.info_log.info(
                '类别是:{}, 爬虫的状态是:{}, 当前时间戳是:{}, 销量排名是:{}, 月销量是:{}'.format(
                    category, state, tm, rank[0], est))
            self.save_date_to_db(category, state, tm, rank[0], est, date)

    def get_url(self, category):
        # 从Redis队列中获取url
        res = self.sr.rpop('{}_url'.format(category))
        # 如果url不为空  就把url添加到内存的队列中
        if res:
            self.url_queue.put(res)

    def save_date_to_db(self, *args):
        try:
            # self.cur.execute(
            #     # 插入数据的表名和数据都是以变量的形式传入
            #     # 插入数据到update表中
            #     "INSERT INTO " + UPDATE_TABEL_NAME + " (CATEGORY, STATE, TM, BSR, MOON_SALE_QTYM) VALUES ( '" + args[
            #         0] + "', '" + str(args[1]) + "', '" + str(args[2]) + "',  '" + args[3] + "', '" + str(
            #         args[4]) + "')");
            self.cur.execute(
                # 插入数据到druid表中
                "INSERT INTO " + DRUID_TABEL_NAME +
                " (CATEGORY, STATE, TM, BSR, MOON_SALE_QTYM, ADAY) VALUES ( '"
                + args[0] + "', '" + str(args[1]) + "', '" + str(args[2]) +
                "',  '" + args[3] + "', '" + str(args[4]) + "', '" + args[5] +
                "')")

            # 提交数据到数据库
            self.conn.commit()
        except Exception as e:
            # 将错误信息保存到日志文件中
            self.error_log.error(e)

    def execute_task_data(self, task, category, count):
        # 创建协程的代码
        for i in range(count):
            if self.url_queue.qsize() > 0:
                url = self.url_queue.get()
                # 使用线程的异步任务
                self.pool.apply_async(task, (url, category))
            else:
                break

    def execute_task_url(self, task, category, count):
        # 创建协程的代码
        for i in range(count):
            # 使用线程的异步任务
            self.pool.apply_async(task, (category, ))

    def run(self):
        for category in CATEGORYS:
            # 每一个分类在redis中都对应一个队列  分别取出其中的url获取数据
            while True:
                # 如果获取的响应数据是 < 5  那么后面的url就不用再发送请求
                if self.state == '< 5':
                    # 爬到的数据是 < 5 那么后面的url就不需要了  直接把剩下的队列删除
                    self.sr.delete('{}_url'.format(category))
                    self.sr.delete('{}_state'.format(category))
                    self.state = 0  # 重置状态值
                    break

                self.execute_task_url(self.get_url, category,
                                      GET_URL_COROUTINE_NUM)
                time.sleep(0.1)

                if self.url_queue.qsize() == 0:
                    break
                # print(category)
                self.execute_task_data(self.get_data, category,
                                       GET_DATA_COROUTINE_NUM)
                time.sleep(0.01)
                # self.url_queue.join()
                self.pool.join()
        # 程序结束  关闭连接
        self.conn.close()
Пример #53
0
def run_main(page_list, tags, thread_num, save_dir):
    page_list = page_list
    tags = tags
    thread_num = thread_num
    save_dir = save_dir
    log = getLogger('Main')
    log.info('YandeCrawler %s' % __version__)
    if thread_num > 5:
        log.warning('Option thread_num is bigger than 5')
        log.warning('And this may cause some problems')
    if tags == '':
        log.info('Option tags not specified')
        log.info('Default tag is wildcard')
    try:
        pool = Pool(thread_num)
        if not exists(save_dir):
            log.info("Save dir doesn't exist")
            log.info('Creating save dir')
            makedirs(save_dir)
        for page in page_list:
            log.info('Page %d: Getting metadata' % page)
            url = 'https://yande.re/post.json?limit=100&page=%d&tags=%s' % (page, tags)
            pic_list = loads(urlopen(url).read().decode())
            # Exit when this is the last page
            if len(pic_list) <= 0:
                log.info('Page %d: Empty. Exiting' % page)
                break
            # Sort by size, the first is the smallest
            pic_list.sort(key = lambda pic_list: pic_list['file_size'], reverse = False)
            # Add savepath
            i = 0
            while i < len(pic_list):
                v = pic_list[i]
                pic_list[i]['save_path'] = '%s%d.%s' % (save_dir, v['id'], v['file_ext'])
                i += 1
            # Remove duplicates by savepath
            for pic in pic_list:
                if exists(pic['save_path']):
                    pic_list.remove(pic)
            # Add ID
            i = 0
            while i < len(pic_list):
                pic_list[i]['_id'] = i
                i += 1
            if len(pic_list) > 0:
                log.info('Page %d: Got. Total %d pics' % (page, len(pic_list)))
                log.info('Page %d: Working' % page)
                pool.map(get_pic, pic_list)
                log.info('Page %d: Done' % page)
    except KeyboardInterrupt:
        log.info('Interrupted by user. Exiting')
    except HTTPError as e:
        o = e.reason
        log.error('HTTPError %d %s' % (o.errno, o.strerror))
    except URLError as e:
        o = e.reason
        log.error('URLError %d %s' % (o.errno, o.strerror))
    except Exception:
        log.exception('Error happens')
    pool.close()
    log.info('Waiting for tasks')
    try:
        pool.join()
    except:
        log.warning('Exiting immediately')
        log.warning('Unsaved data droped')
    log.info('Control exited')
Пример #54
0
    def start(self, min_nodes=None):
        """Starts up all the instances in the cloud. To speed things up all
        instances are started in a seperate thread. To make sure
        elasticluster is not stopped during creation of an instance, it will
        overwrite the sigint handler. As soon as the last started instance
        is returned and saved to the repository, sigint is executed as usual.
        An instance is up and running as soon as a ssh connection can be
        established. If the startup timeout is reached before all instances
        are started, the cluster will stop and destroy all instances.

        This method is blocking and might take some time depending on the
        amount of instances to start.

        :param min_nodes: minimum number of nodes to start in case the quota
                          is reached before all instances are up
        :type min_nodes: dict [node_kind] = number
        """

        # To not mess up the cluster management we start the nodes in a
        # different thread. In this case the main thread receives the sigint
        # and communicates to the `start_node` thread. The nodes to work on
        # are passed in a managed queue.
        self.keep_running = True

        def sigint_handler(signal, frame):
            """
            Makes sure the cluster is stored, before the sigint results in
            exiting during the node startup.
            """
            log.error("user interruption: saving cluster before exit.")
            self.keep_running = False

        nodes = self.get_all_nodes()

        if log.DO_NOT_FORK:
            # Start the nodes sequentially without forking, in order
            # to ease the debugging
            for node in nodes:
                self._start_node(node)
                self.repository.save_or_update(self)
        else:
            # Create one thread for each node to start
            thread_pool = Pool(
                processes=min(len(nodes), self.thread_pool_max_size))
            log.debug("Created pool of %d threads" % len(nodes))
            # Intercept Ctrl-c
            signal.signal(signal.SIGINT, sigint_handler)

            # This is blocking
            result = thread_pool.map_async(self._start_node, nodes)

            while not result.ready():
                result.wait(1)
                if not self.keep_running:
                    # the user did abort the start of the cluster. We
                    # finish the current start of a node and save the
                    # status to the storage, so we don't have
                    # unmanaged instances laying around
                    log.error("Aborting upon Ctrl-C")
                    thread_pool.close()
                    thread_pool.join()
                    self.repository.save_or_update(self)
                    sys.exit(1)

        # dump the cluster here, so we don't loose any knowledge
        self.repository.save_or_update(self)

        signal.alarm(0)

        def sigint_reset(signal, frame):
            sys.exit(1)

        signal.signal(signal.SIGINT, sigint_reset)

        # check if all nodes are running, stop all nodes if the
        # timeout is reached
        def timeout_handler(signum, frame):
            raise TimeoutError(
                "problems occured while starting the nodes, "
                "timeout `%i`", Cluster.startup_timeout)

        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(Cluster.startup_timeout)

        starting_nodes = self.get_all_nodes()
        try:
            while starting_nodes:
                starting_nodes = [
                    n for n in starting_nodes if not n.is_alive()
                ]
                if starting_nodes:
                    time.sleep(10)
        except TimeoutError as timeout:
            # FIXME: this is wrong: the reason why `node.is_alive()` fails could be caused by a network error, and we shouldn't just delete the nodes.

            log.error("Not all nodes were started correctly within the given"
                      " timeout `%s`" % Cluster.startup_timeout)
            log.error(
                "Please check if image, keypair, and network configuration is correct and try again."
            )
            # for node in starting_nodes:
            #     log.error("Stopping node `%s`, since it could not start "
            #               "within the given timeout" % node.name)
            #     node.stop()
            #     self.remove_node(node)

        signal.alarm(0)

        # If we reached this point, we should have IP addresses for
        # the nodes, so update the storage file again.
        self.repository.save_or_update(self)

        # Try to connect to each node. Run the setup action only when
        # we successfully connect to all of them.
        signal.signal(signal.SIGALRM, timeout_handler)
        signal.alarm(Cluster.startup_timeout)
        pending_nodes = self.get_all_nodes()[:]

        if not os.path.exists(self.known_hosts_file):
            # Create the file if it's not present, otherwise the
            # following lines will raise an error
            try:
                fd = open(self.known_hosts_file, 'a')
                fd.close()
            except IOError as err:
                log.warning(
                    "Error while opening known_hosts file `%s`: `%s`"
                    " NOT using known_hosts_file.", self.known_hosts_file, err)
        try:
            keys = paramiko.hostkeys.HostKeys(self.known_hosts_file)
        except IOError:
            keys = paramiko.hostkeys.HostKeys()
            log.warning("Ignoring error while opening known_hosts file %s" %
                        self.known_hosts_file)

        try:
            while pending_nodes:
                for node in pending_nodes[:]:
                    ssh = node.connect(keyfile=self.known_hosts_file)
                    if ssh:
                        log.info("Connection to node %s (%s) successful.",
                                 node.name, node.connection_ip())
                        # Add host keys to the keys object.
                        for host, key in ssh.get_host_keys().items():
                            for ktype, keydata in key.items():
                                keys.add(host, ktype, keydata)
                        pending_nodes.remove(node)
                if pending_nodes:
                    time.sleep(5)

        except TimeoutError:
            # remove the pending nodes from the cluster
            log.error("Could not connect to all the nodes of the "
                      "cluster within the given timeout `%s`." %
                      Cluster.startup_timeout)
            for node in pending_nodes:
                log.error("Stopping node `%s`, since we could not connect to"
                          " it within the timeout." % node.name)
                self.remove_node(node, stop=True)

        signal.alarm(0)

        # It might be possible that the node.connect() call updated
        # the `preferred_ip` attribute, so, let's save the cluster
        # again.
        self.repository.save_or_update(self)

        # Save host keys
        try:
            keys.save(self.known_hosts_file)
        except IOError:
            log.warning("Ignoring error while saving known_hosts file %s" %
                        self.known_hosts_file)

        # A lot of things could go wrong when starting the cluster. To
        # ensure a stable cluster fitting the needs of the user in terms of
        # cluster size, we check the minimum nodes within the node groups to
        # match the current setup.
        if not min_nodes:
            # the node minimum is implicit if not specified.
            min_nodes = dict(
                (key, len(self.nodes[key])) for key in self.nodes.iterkeys())
        else:
            # check that each group has a minimum value
            for group, nodes in nodes.iteritems():
                if group not in min_nodes:
                    min_nodes[group] = len(nodes)

        self._check_cluster_size(min_nodes)
Пример #55
0
def _maybe_convert_sets(target_dir, extracted_data, english_compatible=False):
    extracted_dir = path.join(target_dir, extracted_data)
    # override existing CSV with normalized one
    target_csv_template = os.path.join(target_dir,
                                       'ts_' + ARCHIVE_NAME + '_{}.csv')
    if os.path.isfile(target_csv_template):
        return
    path_to_original_csv = os.path.join(extracted_dir, 'data.csv')
    with open(path_to_original_csv) as csv_f:
        data = [
            d for d in csv.DictReader(csv_f, delimiter=',')
            if float(d['duration']) <= MAX_SECS
        ]

    # Keep track of how many samples are good vs. problematic
    counter = {
        'all': 0,
        'failed': 0,
        'invalid_label': 0,
        'too_short': 0,
        'too_long': 0,
        'total_time': 0
    }
    lock = RLock()
    num_samples = len(data)
    rows = []

    wav_root_dir = extracted_dir

    def one_sample(sample):
        """ Take a audio file, and optionally convert it to 16kHz WAV """
        orig_filename = path.join(wav_root_dir, sample['path'])
        # Storing wav files next to the wav ones - just with a different suffix
        wav_filename = path.splitext(orig_filename)[0] + ".converted.wav"
        _maybe_convert_wav(orig_filename, wav_filename)
        file_size = -1
        if path.exists(wav_filename):
            file_size = path.getsize(wav_filename)
            frames = int(
                subprocess.check_output(['soxi', '-s', wav_filename],
                                        stderr=subprocess.STDOUT))
        label = sample['text']
        with lock:
            if file_size == -1:
                # Excluding samples that failed upon conversion
                counter['failed'] += 1
            elif label is None:
                # Excluding samples that failed on label validation
                counter['invalid_label'] += 1
            elif int(frames / SAMPLE_RATE * 1000 / 10 / 2) < len(str(label)):
                # Excluding samples that are too short to fit the transcript
                counter['too_short'] += 1
            elif frames / SAMPLE_RATE > MAX_SECS:
                # Excluding very long samples to keep a reasonable batch-size
                counter['too_long'] += 1
            else:
                # This one is good - keep it for the target CSV
                rows.append((wav_filename, file_size, label))
            counter['all'] += 1
            counter['total_time'] += frames

    print("Importing wav files...")
    pool = Pool(cpu_count())
    bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
    for i, _ in enumerate(pool.imap_unordered(one_sample, data), start=1):
        bar.update(i)
    bar.update(num_samples)
    pool.close()
    pool.join()

    with open(target_csv_template.format('train'),
              'w') as train_csv_file:  # 80%
        with open(target_csv_template.format('dev'),
                  'w') as dev_csv_file:  # 10%
            with open(target_csv_template.format('test'),
                      'w') as test_csv_file:  # 10%
                train_writer = csv.DictWriter(train_csv_file,
                                              fieldnames=FIELDNAMES)
                train_writer.writeheader()
                dev_writer = csv.DictWriter(dev_csv_file,
                                            fieldnames=FIELDNAMES)
                dev_writer.writeheader()
                test_writer = csv.DictWriter(test_csv_file,
                                             fieldnames=FIELDNAMES)
                test_writer.writeheader()

                for i, item in enumerate(rows):
                    print('item', item)
                    transcript = validate_label(
                        cleanup_transcript(
                            item[2], english_compatible=english_compatible))
                    if not transcript:
                        continue
                    wav_filename = os.path.join(target_dir, extracted_data,
                                                item[0])
                    i_mod = i % 10
                    if i_mod == 0:
                        writer = test_writer
                    elif i_mod == 1:
                        writer = dev_writer
                    else:
                        writer = train_writer
                    writer.writerow(
                        dict(
                            wav_filename=wav_filename,
                            wav_filesize=os.path.getsize(wav_filename),
                            transcript=transcript,
                        ))

    print('Imported %d samples.' %
          (counter['all'] - counter['failed'] - counter['too_short'] -
           counter['too_long']))
    if counter['failed'] > 0:
        print('Skipped %d samples that failed upon conversion.' %
              counter['failed'])
    if counter['invalid_label'] > 0:
        print('Skipped %d samples that failed on transcript validation.' %
              counter['invalid_label'])
    if counter['too_short'] > 0:
        print(
            'Skipped %d samples that were too short to match the transcript.' %
            counter['too_short'])
    if counter['too_long'] > 0:
        print('Skipped %d samples that were longer than %d seconds.' %
              (counter['too_long'], MAX_SECS))
    print('Final amount of imported audio: %s.' %
          secs_to_hours(counter['total_time'] / SAMPLE_RATE))
Пример #56
0
 def _parallel_augment(cls, action_fx, data, n, num_thread=2):
     pool = ThreadPool(num_thread)
     results = pool.map(action_fx, [data] * n)
     pool.close()
     pool.join()
     return results
Пример #57
0
class ChainceRestApi(object):
    """"""
    jws = PyJWS()

    #----------------------------------------------------------------------
    def __init__(self):
        """Constructor"""
        self.jws.register_algorithm('Ed25519', EDAlgorithm())

        self.apiKey = ''
        self.secretKey = ''

        self.active = False
        self.reqid = 0
        self.queue = Queue()
        self.pool = None
        self.sessionDict = {}

    #----------------------------------------------------------------------
    def init(self, apiKey, secretKey):
        """初始化"""
        self.apiKey = str(apiKey)
        self.secretKey = str(secretKey)

    #----------------------------------------------------------------------
    def start(self, n=10):
        """启动"""
        if self.active:
            return

        self.active = True
        self.pool = Pool(n)
        self.pool.map_async(self.run, range(n))

    #----------------------------------------------------------------------
    def close(self):
        """关闭"""
        self.active = False

        if self.pool:
            self.pool.close()
            self.pool.join()

    #----------------------------------------------------------------------
    def addReq(self, method, path, callback, params=None, postdict=None):
        """添加请求"""
        self.reqid += 1
        req = (method, path, callback, params, postdict, self.reqid)
        self.queue.put(req)
        return self.reqid

    #----------------------------------------------------------------------
    def processReq(self, req, i):
        """处理请求"""
        method, path, callback, params, postdict, reqid = req
        url = REST_HOST + path

        header = {'Authorization': 'Bearer %s' % self.generateSignature()}

        try:
            # 使用长连接的session,比短连接的耗时缩短20%
            resp = self.sessionDict[i].request(method,
                                               url,
                                               headers=header,
                                               params=params,
                                               json=postdict)

            code = resp.status_code
            d = resp.json()

            if code == 200:
                callback(d, req)
            else:
                self.onError(code, str(d))
        except Exception as e:
            self.onError(type(e), e.message)

    #----------------------------------------------------------------------
    def run(self, i):
        """连续运行"""
        s = requests.session()
        s.keep_alive = False
        self.sessionDict[i] = s

        while self.active:
            try:
                req = self.queue.get(timeout=1)
                self.processReq(req, i)
            except Empty:
                pass

    #----------------------------------------------------------------------
    def generateSignature(self):
        """生成签名"""
        payload = '{"key": "%s", "iat": %s}' % (self.apiKey, time())

        try:
            # py2
            signature = self.jws.encode(payload,
                                        secretKey,
                                        algorithm='Ed25519')
        except TypeError:
            # py3
            payload = bytes(payload, encoding="utf-8")
            bearer = self.jws.encode(payload, secretKey, algorithm='Ed25519')
            signature = str(bearer, encoding="utf-8")

        return signature

    #----------------------------------------------------------------------
    def onError(self, code, error):
        """错误回调"""
        print('on error')
        print(code, error)

    #----------------------------------------------------------------------
    def onData(self, data, req):
        """通用回调"""
        print('on %s' % req[1])
        print(data, req[5])
Пример #58
0
def main():
    pool = ThreadPool(20)
    results = pool.map(check_hash, keywords)
    pool.close()
    pool.join()
Пример #59
0
def probMask(A, P, X, ii, jj):
    def cross_product(a, b):
        return a[0] * b[1] - a[1] * b[0]

    def iscross(pairData):
        A, B = pairData[0]
        C, D = pairData[1]
        AC = C - A
        AD = D - A
        BC = C - B
        BD = D - B
        CA = -AC
        CB = -BC
        DA = -AD
        DB = -BD

        return 1 if cross_product(AC, AD) * cross_product(
            BC, BD) < 0 and cross_product(CA, CB) * cross_product(
                DA, DB) < 0 else 0

    def calprobMask(pairData):
        Adj, Pro, Xfe, ii, jj = pairData
        Adj = Adj[:ii, :ii]
        Pro = Pro[:jj - ii, :]
        Xfe = Xfe[:jj]

        # existed ridges
        edgesList = []
        edgeSet = set()
        edgesIdxList = []
        # G = nx.Graph()
        for i in range(ii):
            for j in range(i):
                if Adj[i][j] == 1 and (i, j) not in edgeSet:
                    edgesList.append([Xfe[i][:2], Xfe[j][:2]])
                    edgesIdxList.append((i, j))
                    # G.add_edge(i, j)
                    edgeSet.add((i, j))

        newNodes = Xfe[ii:jj]
        newp = Pro
        for idx in range(len(newNodes)):
            for pidx in range(len(Pro[idx])):
                newedge = [Xfe[pidx][:2], Xfe[ii + idx][:2]]
                # edgesIdxList.append((pidx,ii+idx))
                for i, edge in enumerate(edgesList):
                    if pidx in edgesIdxList[i]:
                        continue
                    if iscross([edge, newedge]):
                        newp[idx, pidx] = 0
                        break

        return newp

    newP = []
    pool = ThreadPool(4)
    tasks = [[A[i], P[i], X[i], ii, jj] for i in range(len(A))]
    newP = pool.map(calprobMask, tasks)
    pool.close()
    pool.join()
    return newP
Пример #60
0
    def download_data(self, video_data):
        """下载数据 - 阻塞并耗时的操作,加进进程池"""
        name = video_data['name']
        url = video_data['url']
        print('Downloading...', name)
        video_content = requests.get(url=url, headers=self.headers).content
        self.save_data(name, video_content)
        time.sleep(2)
        print('Completed: ', name)

    def save_data(self, name, data):
        """保存数据"""
        file_path = './videos/' + name
        with open(file_path, mode='wb') as fp:
            fp.write(data)


if __name__ == "__main__":
    # 创建文件保存路径
    if not os.path.exists('./videos'):
        os.mkdir('./videos')
    glv = GetLiVideo()
    # 实例化进程池 - 包含4个进程
    pool = Pool(4)
    v_information = glv.get_each_video_url()
    pool.map(glv.download_data, v_information)  # pool.man()返回一个列表
    pool.close()  # 关闭pool,使其不再接受新的任务
    #  pool.terminate()  # 结束工作进程,不再处理未完成的任务
    pool.join()  # 主进程阻塞,等待子进程退出