Пример #1
0
    def get_keys_multiproc(
        cls,
        lookup,
        loc_df,
        success_only=False,
        num_cores=-1,
        num_partitions=-1
    ):
        """
        Used for CPU bound lookup operations, Depends on a method

        `process_locations_multiproc(dataframe)`

        where single_row is a pandas series from a location Pandas DataFrame
        and returns a list of dicts holding the lookup results for that single row
        """
        pool_count = num_cores if num_cores > 0 else cpu_count()
        part_count = num_partitions if num_partitions > 0 else min(pool_count * 2, len(loc_df))
        locations = np.array_split(loc_df, part_count)

        pool = Pool(pool_count)
        results = pool.map(lookup.process_locations_multiproc, locations)
        lookup_results = sum([r for r in results if r], [])
        pool.terminate()
        return lookup_results
Пример #2
0
def source_item_from_list_in_event(
    stream_name,
    list_name,
    field,
    value,
    batch_size=1000,
):
    broker = RedisStream.get_broker()
    next_id = "+"
    i = 0
    max_iter = int(1000 / batch_size) + 1
    while i < max_iter:
        i += 1
        event_tuples = broker.xrevrange(stream_name,
                                        max=next_id,
                                        count=batch_size)
        n = len(event_tuples)
        if not n:
            return None, None, None
        event_ids, event_dicts = zip(*event_tuples)

        with Pool(parallel_jobs(n)) as pool:
            event_bytes = pool.map(event_from_dict, event_dicts)
            events = pool.map(bytes_to_event, event_bytes)
            args = zip(*(events, [list_name] * len(events)))
            dicts = pool.starmap(extract_attr, args)

            args = zip(*(dicts, [field] * len(dicts), [value] * len(dicts)))
            matches = pool.starmap(find_first_by, args)

            for i, detection in enumerate(matches):
                if detection:
                    return detection, event_ids[i], events[i].correlations
        next_id = decrement_id(event_ids[-1])
    return None, None, None
Пример #3
0
def progress_map(func, *iterables, jobs=1, **kwargs):
    r"""
    Map a function across iterables of arguments.

    This is comparable to :meth:`astropy.utils.console.ProgressBar.map`, except
    that it is implemented using :mod:`tqdm` and so provides more detailed and
    accurate progress information.
    """
    total = min(len(iterable) for iterable in iterables)
    if jobs == 1:
        return list(tqdm(map(func, *iterables), total=total, **kwargs))
    else:
        with Pool(jobs) as pool:
            return [
                item[1] for item in sorted(
                    tqdm(
                        pool.imap_unordered(
                            WrappedFunc(func),
                            enumerate(zip(*iterables))
                        ),
                        total=total, **kwargs
                    ),
                    key=itemgetter(0)
                )
            ]
Пример #4
0
def source_event(stream_name,
                 filters_dict={},
                 batch_size=128,
                 latest_first=True):
    broker = RedisStream.get_broker()
    next_id = "+" if latest_first else "-"
    i = 0
    max_iter = int(1000 / batch_size) + 1
    while i < max_iter:
        i += 1
        if latest_first:
            event_tuples = broker.xrevrange(stream_name,
                                            max=next_id,
                                            count=batch_size)
        else:
            event_tuples = broker.xrange(stream_name,
                                         min=next_id,
                                         count=batch_size)
        n = len(event_tuples)
        if not n:
            return None
        event_ids, event_dicts = zip(*event_tuples)
        with Pool(parallel_jobs(n)) as pool:
            event_bytes = pool.map(event_from_dict, event_dicts)
            events = pool.map(bytes_to_event, event_bytes)
            args = zip(*(events, [filters_dict] * len(events)))
            matches = pool.starmap(match_event, args)
            if any(matches):
                index = matches.index(True)
                return events[index]
        next_id = decrement_id(
            event_ids[-1]) if latest_first else increment_id(event_ids[-1])
    return None
Пример #5
0
 def stream(
     self,
     subreddit: str,
     start_time: int,
     end_time: int
 ) -> Iterator[List[Dict[str, Any]]]:
     for id_iter in query_pushshift(subreddit, start_time, end_time):
         with Pool(cpu_count(), initializer) as workers:
             yield list(workers.imap_unordered(praw_by_id, id_iter))
Пример #6
0
def cm(m =None, p=5, u="", **kw):
    ct = fetch(m)
    t = etree.HTML(ct)
    title = ''.join(t.xpath('//*[@id="workinfo"]/h1/text()')).strip()
    els = [(i.get("title")[:-11], i.get("href")) for i in t.xpath('//*[@id="chapterlist"]/ul/li/a')]
    for n, (e, _) in enumerate(els):
        print "%s." % n, e
    ci_pure = raw_input("输入对应集数序号,多集使用逗号分开,连续使用\"-\"分割 eg. 1\n4,6,7\n1-10,14-75\n请选择册:")
    ci = formatipt(ci_pure)
    
    #mg = Manager()
    tasks = []
    for i in ci:
        ci_title, ci_href = els[i]
        tasks.append({"type": "sets", "title": title, "set": "%s.%s" % (i+1, ci_title), "url": ci_href})

    file_dir = os.path.join(".", "download", title).encode("u8")
    try:
        if not os.path.exists(file_dir): os.makedirs(file_dir)
    except OSError:
        pass
    #use manual processes
    #ps = [Process(target=save_pic, args=(sets, tpls, pics)) for i in xrange(min(p, len(ci)))]
    #for i in ps:
    #    i.start()
    pool = Pool(processes=p)
    result = pool.map_async(save_pic, tasks) #for i in xrange(realy_p)] #if p.map_async don't forget p.close()
    out = sys.stdout
    try: 
        while True:
            if result.ready():
                tasks = []
                for ret in result.get():
                    tasks.extend(ret)
                if not tasks: break
                result = pool.map_async(save_pic, tasks) #for i in xrange(realy_p)] #if p.map_async don't forget p.close()
            out.write("\r%d" % (len(tasks)))
            out.flush()
            time.sleep(.5)
    except KeyboardInterrupt:
        print "已停止下载"
        #return
    finally:
        pool.close()

    print "下载完成"

    if not u: return
    #压缩并上传
    zip_name = file_dir[2:].replace(os.path.sep, "_") + "_" + ci_pure
    username, password = u.split(":")
    ci_str = map(str, ci)
    tfiles = ["%s/%s"%(dirpath, filename) for dirpath, dirs, files in os.walk(file_dir)
                                          for filename in files if filename[:filename.find(".")] in ci_str]
    zip_files(tfiles, zip_name)
    upfile(username, password, zip_name)
Пример #7
0
 def praw_memes(self, verbose: bool) -> Iterator[List[redditData]]:
     for ids in self.query_pushshift():
         with cast(mpPool, Pool(cpu_count(), initializer)) as workers:
             if verbose:
                 memes: list[Union[redditData, None]] = list(
                     tqdm(workers.imap_unordered(praw_by_id, ids)))
             else:
                 memes = list(workers.imap_unordered(praw_by_id, ids))
         yield [
             meme for meme in memes if meme and meme["username"] != "None"
         ]
Пример #8
0
def progress_map(func, *iterables, jobs=1, **kwargs):
    """Map a function across iterables of arguments.

    This is comparable to :meth:`astropy.utils.console.ProgressBar.map`, except
    that it is implemented using :mod:`tqdm` and so provides more detailed and
    accurate progress information.
    """
    total = _get_total_estimate(*iterables)
    if jobs == 1:
        yield from tqdm(map(func, *iterables), total=total, **kwargs)
    else:
        with Pool(jobs) as pool:
            yield from _results_in_order(
                tqdm(pool.imap_unordered(WrappedFunc(func),
                                         enumerate(zip(*iterables))),
                     total=total,
                     **kwargs))
Пример #9
0
    def searchDarkWeb(self, query, include=None, exclude=None):
        ''' Gets data from search engines specified '''
        if include:
            final = [a for a in include if a in self.sites]
        elif exclude:
            final = [a for a in self.sites if a not in exclude]
        else:
            final = list(self.sites.keys())

        self.query = query
        pool = Pool(processes=len(final))
        data = pool.map(self.search, final)
        resultList = [d for dat in data for d in dat]
        pool.close()

        ind = Indexer()
        for i in resultList:
            ind.join(i)
        return ind.results()
Пример #10
0
def _scan_match(sample_ui_list, path_list, comp_func, weight_list=None, threshold=0.6, pool_size=12):
    """
    :param sample_ui_list: output after process_csv()
    :param path_list: relative or absolute path list of csv files
    :param comp_func: compare function
    :param weight_list: ui weight mask
    :param threshold: threshold,超过一定的阈值才会被计算成相同组件
    :param pool_size: 并行池大小
    :return: best match path name
    """
    pool = Pool(processes=pool_size)

    arg_list = []
    for j in range(len(path_list)):
        arg_list.append((j + 1, path_list[j], sample_ui_list, comp_func, weight_list, threshold))
    score_list = pool.map(_single_scan_helper, arg_list)
    pool.close()
    pool.join()

    # return sorted path^score^score_distribution_list list
    return sorted(score_list, key=lambda k: k[1], reverse=True)
Пример #11
0
    def process(self):
        """
            Process all files contained in .mans extracted folder
        """
        files_list = []
        for filetype in self.filelist.keys():
            # If filetype is new for now it's skipped
            if filetype not in MANS_FIELDS.keys():
                logging.warning(
                    f"[MAIN] {filetype} filetype not recognize. Send us a note! - SKIP"
                )
                continue
            # Ignore items if not related to timeline
            # TODO: will use them in neo4j for relationship
            if MANS_FIELDS[filetype].get("skip", False):
                logging.debug(f"[MAIN] SKIP {filetype}")
                continue
            # Read all files related to the type
            for (file, offset) in self.filelist[filetype]:
                files_list.append((filetype, file, offset))

        with Pool(processes=self.cpu_count) as pool:
            res = pool.starmap_async(self.process_file, files_list).get()
        logging.debug("[MAIN] Pre-Processing [✔]")
Пример #12
0
def mix_ciphers(ciphers_for_mixing, nr_rounds=MIN_MIX_ROUNDS,
                teller=_teller, nr_parallel=0):
    p = ciphers_for_mixing['modulus']
    g = ciphers_for_mixing['generator']
    q = ciphers_for_mixing['order']
    y = ciphers_for_mixing['public']

    original_ciphers = ciphers_for_mixing['mixed_ciphers']
    nr_ciphers = len(original_ciphers)

    teller.task('Mixing %d ciphers for %d rounds' % (nr_ciphers, nr_rounds))

    cipher_mix = {'modulus': p, 'generator': g, 'order': q, 'public': y}
    cipher_mix['original_ciphers'] = original_ciphers

    with teller.task('Producing final mixed ciphers', total=nr_ciphers):
        shuffled = shuffle_ciphers(p, g, q, y, original_ciphers, teller=teller)
        mixed_ciphers, mixed_offsets, mixed_randoms = shuffled
        cipher_mix['mixed_ciphers'] = mixed_ciphers

    total = nr_ciphers * nr_rounds
    with teller.task('Producing ciphers for proof', total=total):
        if nr_parallel > 0:
            pool = Pool(nr_parallel, Random.atfork)
            data = [
                (p, g, q, y, original_ciphers)
                for _ in range(nr_rounds)
            ]
            collections = []
            for r in pool.imap(_shuffle_ciphers, data):
                teller.advance()
                collections.append(r)
            pool.close()
            pool.join()
        else:
            collections = [shuffle_ciphers(p, g, q, y,
                                           original_ciphers, teller=teller)
                           for _ in range(nr_rounds)]

        unzipped = [list(x) for x in zip(*collections)]
        cipher_collections, offset_collections, random_collections = unzipped
        cipher_mix['cipher_collections'] = cipher_collections
        cipher_mix['random_collections'] = random_collections
        cipher_mix['offset_collections'] = offset_collections

    with teller.task('Producing cryptographic hash challenge'):
        challenge = compute_mix_challenge(cipher_mix)
        cipher_mix['challenge'] = challenge

    bits = bit_iterator(int(challenge, 16))

    with teller.task('Answering according to challenge', total=nr_rounds):
        for i, bit in zip(range(nr_rounds), bits):
            offsets = offset_collections[i]
            randoms = random_collections[i]

            if bit == 0:
                # Nothing to do, we just publish our offsets and randoms
                pass
            elif bit == 1:
                # The image is given. We now have to prove we know
                # both this image's and mixed_ciphers' offsets/randoms
                # by providing new offsets/randoms so one can reencode
                # this image to end up with mixed_ciphers.
                # original_ciphers -> image
                # original_ciphers -> mixed_ciphers
                # Provide image -> mixed_ciphers
                new_offsets = list([None]) * nr_ciphers
                new_randoms = list([None]) * nr_ciphers

                for j in range(nr_ciphers):
                    cipher_random = randoms[j]
                    cipher_offset = offsets[j]
                    mixed_random = mixed_randoms[j]
                    mixed_offset = mixed_offsets[j]

                    new_offsets[cipher_offset] = mixed_offset
                    new_random = (mixed_random - cipher_random) % q
                    new_randoms[cipher_offset] = new_random

                offset_collections[i] = new_offsets
                random_collections[i] = new_randoms
                del offsets, randoms
            else:
                m = "This should be impossible. Something is broken."
                raise AssertionError(m)

            teller.advance()
    teller.finish('Mixing')

    return cipher_mix
Пример #13
0
def verify_cipher_mix(cipher_mix, teller=_teller, nr_parallel=0):
    try:
        p = cipher_mix['modulus']
        g = cipher_mix['generator']
        q = cipher_mix['order']
        y = cipher_mix['public']

        original_ciphers = cipher_mix['original_ciphers']
        mixed_ciphers = cipher_mix['mixed_ciphers']
        challenge = cipher_mix['challenge']
        cipher_collections = cipher_mix['cipher_collections']
        offset_collections = cipher_mix['offset_collections']
        random_collections = cipher_mix['random_collections']
    except KeyError as e:
        m = "Invalid cipher mix format"
        raise ZeusError(m, e)

    if compute_mix_challenge(cipher_mix) != challenge:
        m = "Invalid challenge"
        raise ZeusError(m)

    nr_ciphers = len(original_ciphers)
    nr_rounds = len(cipher_collections)
    teller.task('Verifying mixing of %d ciphers for %d rounds'
                 % (nr_ciphers, nr_rounds))

    if (len(offset_collections) != nr_rounds or
        len(random_collections) != nr_rounds):
        m = "Invalid cipher mix format: collections not of the same size!"
        raise ZeusError(m)

    #if not validate_cryptosystem(p, g, q, teller):
    #    m = "Invalid cryptosystem"
    #    raise AssertionError(m)

    total = nr_rounds * nr_ciphers
    with teller.task('Verifying ciphers', total=total):
        data = []
        for i, bit in zip(range(nr_rounds), bit_iterator(int(challenge, 16))):
            ciphers = cipher_collections[i]
            randoms = random_collections[i]
            offsets = offset_collections[i]
            data.append((p, g, q, y,
                         i, bit, original_ciphers,
                         mixed_ciphers, ciphers,
                         randoms, offsets))

        if nr_parallel <= 0:
            for args in data:
                verify_mix_round(*args, teller=teller)

        else:
            pool = Pool(nr_parallel, Random.atfork)
            try:
                for count in pool.imap(_verify_mix_round, data):
                    teller.advance(count)
            finally:
                pool.terminate()
                pool.join()

    teller.finish('Verifying mixing')
    return 1
Пример #14
0
def main():
    p = Pool(5)
    A = []
    for i in range(0, 100):
        A.append(i)
    print(p.map(f, A))