def _do_jpeg_analysis(self):
        """ Method that makes jpeg analysis in the current database. In the
            case of jpeg it's useful because the codec works with nominal
            quality, not one directly converted into psnr / bpp
        """
        gen = self.generators[self.st.mode.name.lower()]
        file_paths = gen.get_db_files_pathnames()
        gen_folder = gen.get_db_folder()
        pool = Pool()
        walk_ret = list(walk(gen_folder))
        root = Path(walk_ret[0][0])
        s_folders = [root] + list(map(lambda f: root / f, walk_ret[0][1]))
        s_folders = list(filter(lambda s: s.name != 'jpeg', s_folders))
        s_folders = list(map(lambda f: f / 'jpeg', s_folders))
        list(map(lambda x: x.mkdir(exist_ok=True), s_folders))

        csv = np.array(list(map(
            lambda p: p.parent / 'jpeg' / (p.stem + '.jpeg.csv'), file_paths)))
        to_create_index = list(map(lambda p: not p.exists(), csv))
        csv = csv[to_create_index]
        file_paths = file_paths[to_create_index]
        pool.starmap_async(ImgProc.save_jpeg_analysis, zip(file_paths, csv))
        pool.close()
        print('Analyzing the generator folder with jpeg.', end='\n\n')
        pool.join()
Пример #2
0
def compute_ND(root_dir):    
    # read all MPdata filenames
    assert os.path.isfile(root_dir+"MPdata_all.csv")
    MPdata = pd.read_csv(root_dir+"MPdata_all.csv", sep=';', header=0, index_col=None)
    filenames = MPdata['material_id'].values.tolist()
    # random shuffle in case of data clustering
    random.shuffle(filenames)
    # save dir
    save_dir = os.path.join(root_dir, "ND_data")
    if os.path.exists(save_dir):
        _ = input("{} already exists, please check if you want to continue, Ctrl+C to terminate.."\
                  .format(save_dir))
    else:
        os.mkdir(save_dir)
    # parameters
    max_Miller = 4
    sym_thresh = 0.1
    nworkers = multiprocessing.cpu_count()
    print('total size: {}, parallel computing on {} workers..'.format(len(filenames), nworkers))
    # initialize pool of workers
    pool = Pool(processes=nworkers)
    file_split = np.array_split(filenames, nworkers)
    args = [(root_dir, save_dir, fnames, max_Miller, sym_thresh) for fnames in file_split]
    pool.starmap_async(parallel_ND, args)
    pool.close()
    pool.join()
    print('all jobs done, pool closed..')
Пример #3
0
def compute_xrd(root_dir):
    # read all MPdata filenames
    assert (os.path.isfile(root_dir + "MPdata_all.csv"))
    MPdata = pd.read_csv(root_dir + "MPdata_all.csv",
                         sep=';',
                         header=0,
                         index_col=None)
    filenames = MPdata['material_id'].values.tolist()
    random.shuffle(filenames)

    # parameters
    wavelength = 'CuKa'  # CuKa by default
    sym_thresh = 0.1
    nworkers = multiprocessing.cpu_count()
    print('total size: {}, parallel computing on {} workers..'.format(
        len(filenames), nworkers))

    # initialize pool of workers
    pool = Pool(processes=nworkers)
    file_split = np.array_split(filenames, nworkers)
    args = [(root_dir, fnames, wavelength, sym_thresh)
            for fnames in file_split]
    pool.starmap_async(parallel_computing, args)
    pool.close()
    pool.join()
Пример #4
0
def generate_maze_metrics(folder):
    global pool, filenames, positions, result_file_name, start_time
    if(pool is not None):
        print("PROCESS POOL ALREADY OPEN")
        return

    results = []

    args = []
    filenames = []
    positions = []
    for filename in os.listdir(folder):
        if filename.endswith(".xml"):
            try:
                print(filename, '', end='')
                full_path = os.path.join(folder, filename)
                walls_aux, feeders_aux, start_positions_aux = MazeParser.parse_maze(full_path)

                walls = [ Wall(float(row['x1']), float(row['y1']), float(row['x2']), float(row['y2'])) for index, row in walls_aux.iterrows()]
                goals = [ Feeder(int(f['fid']), float(f['x']), float(f['y'])) for _, f in feeders_aux.iterrows() ]
                starts = [ (id, StartPos( float(s['x']), float(s['y']), float(s['w']) )) for id , s in start_positions_aux.iterrows() ]

                args += [(s , g, walls) for g in goals for (id,s) in starts ]
                filenames += [ filename for g in goals for (id,s) in starts ]
                positions += [ id       for g in goals for (id,s) in starts ]     

            except:
                print('error')
                pass
    print()
    args = [ (i,) + a for (i,a) in zip(range(len(args)), args) ]
    result_file_name = os.path.join(folder, 'mazeMetrics.csv')
    pool = Pool(12)
    start_time = time.time()
    pool.starmap_async(find_path, args, callback=save_maze_metrics)
Пример #5
0
def main():
    if len(sys.argv) < 3:
        print("Usage: %s <num_processes> <num_resources>" % sys.argv[0])
        return
    m = Manager()
    global_sema = m.Semaphore()
    num_process = int(sys.argv[1])
    num_resource = int(sys.argv[2])
    #global_resource_list = m.list([0] * num_resource)
    #global_res_list_lock = m.Lock()
    nodes = m.list([])
    global_resource_list = m.list(
        [Resource(m, i, nodes) for i in range(num_resource)])
    global_transmit_lock = m.Lock()

    nodes.extend([
        Node(i, num_process, m, global_resource_list, global_sema,
             global_transmit_lock) for i in range(num_process)
    ])

    # the nodes stop after this many total requests are made
    max_req = num_process

    # the worker pool
    # it contains one process for each of the node in the network
    jobPool = Pool(processes=len(nodes))
    jobPool.starmap_async(
        fire_node, zip(repeat(nodes), range(num_process), repeat(max_req)))

    for _ in range(num_process + 1):
        global_sema.acquire()

    jobPool.terminate()
Пример #6
0
def send_email(operation: str, document_id: str, user_id: str) -> None:
    document = cast(Dict, mongo.find_document(document_id))
    users = mongo.get_users_with_permissions_to_document(
        document_id, document["company"], user_id)

    pool = Pool(processes=5)
    pool.starmap_async(send_single_mail,
                       [(document_id, operation, user) for user in users])
def main():
    if len(sys.argv) < 3:
        print("Usage:", sys.argv[0], "num_process", "num_resource")
        return
    m = Manager()
    num_process = int(sys.argv[1])
    num_resource = int(sys.argv[2])
    res_tab = [m.list([0] * num_process) for _ in range(num_resource)]
    res_tab = m.list(res_tab)
    res_tab_lock = m.Lock()
    res_sems = m.list([Resource(m, i) for i in range(num_resource)])
    global_sema = m.Semaphore(0)

    nodes = [
        Node(i, res_tab, res_tab_lock, num_resource, res_sems)
        for i in range(num_process)
    ]

    # the nodes stop after this many total requests are made
    max_req = num_process

    #killEvent = m.Event()
    # controller process
    controller = Process(target=check_for_deadlock,
                         args=(res_tab, res_tab_lock, num_process,
                               global_sema),
                         daemon=True)
    controller.start()
    '''
    processes = []
    for i in range(num_process):
        processes.append(Process(target=check_for_deadlock, args=(), daemon=True))
        processes[-1].start()
        '''
    # the worker pool
    # it contains one process for each of the node in the
    # network. each process gets assigned to perform the
    # free -> request -> cs loop for one node.
    jobPool = Pool(processes=len(nodes))
    jobPool.starmap_async(
        fire_node,
        zip(repeat(nodes), range(len(nodes)), repeat(max_req),
            repeat(global_sema)))
    #jobPool.close()
    # request done

    for _ in range(num_process):
        global_sema.acquire()
    #killEvent.wait()
    jobPool.terminate()
    #controller.close()
    controller.terminate()

    #controller.join()
    '''
Пример #8
0
def get_pool(n=5):
    p = Pool(n)
    # p.map(test, (i for i in range(10))) # 阻塞式多进程执行
    # p.starmap(test1, zip([1,2,3],[3,4,5])) # 阻塞式多进程执行多参数函数
    # 异步多进程执行函数
    p.map_async(test, (i for i in range(5)), callback=back_func, error_callback=back_func_err)
    # 异步多进程执行多参数函数
    p.starmap_async(test1, zip([1,2,3],[3,4,5]), callback=back_func, error_callback=back_func_err)
    print('-----')
    p.close()
    p.join()
Пример #9
0
class Parallelism(object):
    """ 多进程map类
        pl = ParallelSim()
        pl.add(yourFunc, yourIter)
        data = pl.get_results()
        data = list(data)
        print(data)
    """
    def __init__(self, processes=cpu_count()):
        '''

        :param processes: 进程数量,默认为cpu个数
        '''
        self.pool = Pool(processes=processes)
        self.total_processes = 0
        self.completed_processes = 0
        self.results = []
        self.data = None
        self.cores = processes  # cpu核心数量

    def add(self, func, iter):
        if isinstance(iter,
                      list) and self.cores > 1 and len(iter) > self.cores:
            for i in range(self.cores):
                pLen = int(len(iter) / self.cores) + 1
                self.data = self.pool.starmap_async(
                    func,
                    iter[int(i * pLen):int((i + 1) * pLen)],
                    callback=self.complete,
                    error_callback=self.exception)
                self.total_processes += 1
        else:
            self.data = self.pool.starmap_async(func=func,
                                                iterable=iter,
                                                callback=self.complete)
            self.total_processes += 1
        # self.data.get()

    def complete(self, result):
        self.results.extend(result)
        self.completed_processes += 1
        print('Progress: {:.2f}%'.format(
            (self.completed_processes / self.total_processes) * 100))

    def exception(self, exception=None):
        print(exception)

    def run(self):
        self.data.get()
        self.pool.close()
        self.pool.join()

    def get_results(self):
        return self.results
Пример #10
0
def multiprocess_video_to_json():
    files = next(walk(input_vid_dir))[2]
    processes = cpu_count()
    print(processes)
    pool = Pool(processes)
    start = clock()
    pool.starmap_async(video_to_json, zip(files))
    pool.close()
    pool.join()
    stop = clock()
    print("Time Taken : ", stop - start)
Пример #11
0
def main():
    pool = Pool(16)
    # proxies = read_proxy()
    # proxies = read_available_proxy()
    # proxies = pool.map(test_proxy, proxies)
    # proxies = [p for p in proxies if p is not None]
    proxies = []
    urls = ["https://www.jiaozw.cn/zuowen/{}.html".format(i) for i in range(1, 500)]
    params = ((url, proxies) for url in urls)
    pool.starmap_async(extract_content, params)
    pool.close()
    pool.join()
Пример #12
0
class Parallelism(object):
    """ 多进程map类
        pl = ParallelSim()
        pl.add(yourFunc, yourIter)
        data = pl.get_results()
        data = list(data)
        print(data)
    """

    def __init__(self, processes=cpu_count()):
        '''

        :param processes: 进程数量,默认为cpu个数
        '''
        self.pool = Pool(processes=processes)
        self.total_processes = 0
        self.completed_processes = 0
        self.results = []
        self.data = None
        self.cores = processes  # cpu核心数量

    def add(self, func, iter):
        if isinstance(iter, list) and self.cores > 1 and len(iter) > self.cores:
            for i in range(self.cores):
                pLen = int(len(iter) / self.cores) + 1
                self.data = self.pool.starmap_async(func, iter[int(i * pLen):int((i + 1) * pLen)],
                                                    callback=self.complete,
                                                    error_callback=self.exception)
                self.total_processes += 1
        else:
            self.data = self.pool.starmap_async(func=func, iterable=iter, callback=self.complete,
                                                error_callback=self.exception)
            self.total_processes += 1
        # self.data.get()

    def complete(self, result):
        self.results.extend(result)
        self.completed_processes += 1
        print('Progress: {:.2f}%'.format((self.completed_processes / self.total_processes) * 100))

    def exception(self, exception = None):
        print(exception)

    def run(self):
        self.data.get()
        self.pool.close()
        self.pool.join()

    def get_results(self):
        return self.results
Пример #13
0
def executable():
    pool = Pool(processes=global_args.cores)
    thread_args = []
    for bampath, bamname in runall_samples(global_args.sample_dir):
        try:
            os.mkdir(global_args.output_dir + "frames_coverage_start/")
            os.mkdir(global_args.output_dir + "frames_coverage_term/")
        except:
            pass
        thread_args.append((bampath, bamname))

    pool.starmap_async(plot_results_start, thread_args)
    pool.close()
    pool.join()
Пример #14
0
    def predict_from_image_batch(self, mnist_batch, index):

        t0 = time.time()
        connection_pool = ConnectionPool(size=self.CONNECTION_POOL_SIZE,
                                         host=HBaseManager.HOST,
                                         port=HBaseManager.PORT)
        hbase_manager = HBaseManager(connection_pool)

        process_pool = Pool(self.POOL_SIZE)
        n = len(mnist_batch)

        indexs = list(range(n))

        extract_process = process_pool.starmap_async(self.extract_keys,
                                                     zip(mnist_batch, indexs))
        extracted_keys = extract_process.get()

        predict_hash_args = zip(extracted_keys, indexs)

        predictions = [
            self.predict_hash_values(keys, hbase_manager, i)
            for keys, i in predict_hash_args
        ]

        process_pool.close()

        t1 = time.time()
        print("Mnist Batch {} predicted in: {} Seconds, For Node: {}".format(
            str(index), str(t1 - t0), self.__str__()))

        return predictions
Пример #15
0
def intensity_parallel(q, points, f, lmax, core_num=2, proc_num=4):
    '''
    使用einsum后单进程速度也加快了非常多,并且内存占用也没有那么大了,其实单进程就完全可用了。
    但是为了以防万一需要更快速的计算,或者点数太多爆内存,还是需要保留多进程计算
    不过有时候切片太细之后建立进程的时间反倒比计算时间还长,得不偿失,因此在计算量不太大的时候减少切片或者干脆单进程就好了
    '''
    if core_num > cpu_count():
        core_num = cpu_count()
    else:
        core_num = int(core_num)

    slice_num = int(proc_num)

    slice_length = round(q.size/slice_num)
    q_list = []
    for i in range(slice_num-1):
        q_list.append(q[i*slice_length:(i+1)*slice_length])
    q_list.append(q[(slice_num-1)*slice_length:])
    # 以防最后几个是空的,得去掉不然会报错
    while q_list[-1].size == 0:
        q_list.pop()
    slice_num = len(q_list)

    pool = Pool(core_num)
    args = zip(q_list, [points]*slice_num, [f]*slice_num, [lmax]*slice_num)
    result = pool.starmap_async(intensity, args)
    pool.close()
    pool.join()
    I = np.array(result.get()).flatten()

    return I
Пример #16
0
def edit_analyse_multi(word, base, label, k=1):
    if len(base) == 0:
        return -1
    if len(base) == 1:
        return label[0]

    pool = Pool()

    all_distance = pool.starmap_async(edidistance_multi,
                                      zip(base, [word] * len(base),
                                          label)).get()

    pool.close()

    #    all_distance = dict(zip(label, all_distance))
    #
    #    all_distance = sorted(label, key=all_distance.__getitem__)

    all_distance = sorted(all_distance, key=lambda tup: tup[1])

    votes = [0] * 10

    for i in range(k):
        votes[all_distance[i][0]] += 1

    return votes.index(max(votes))
Пример #17
0
    def similarity_prediction(cls, similarity, number_of_neighbors):
        shape = cls.data.shape

        time1 = time.time()
        cls.create_similarity_matrix(similarity)
        print("(TIME) Create similarity matrix:", time.time() - time1)

        time1 = time.time()

        args = []
        for i, indexes in enumerate(cls.train_indexes):
            indexes = set(indexes)
            similar_users = np.argpartition(
                cls.similarity_matrix[i],
                -number_of_neighbors)[-number_of_neighbors:]
            args.extend([(i, j, similar_users) for j in range(shape[1])])

        pool = Pool()
        map_result = pool.starmap_async(cls.predict_entry, args)
        results = map_result.get()
        pool.close()
        pool.join()

        predicted_data = np.empty(shape)
        for i, j, prediction in results:
            predicted_data[i, j] = prediction

        print("(TIME) Predict data", time.time() - time1)

        return predicted_data
Пример #18
0
def simulatehoneycomb(self, verbose=1, usediag=False, multiprocess=True):
    '''Loop over the 2D matrix of parameter values defined by makeparamvalues2D, calculate the ground state
    for each point, search for transitions and save in self.honeycomb'''
    t0 = time.time()
    paramnames = list(self.vals2D.keys())
    npointsx = np.shape(self.vals2D[paramnames[0]])[0]
    npointsy = np.shape(self.vals2D[paramnames[0]])[1]
    self.hcgs = np.empty((npointsx, npointsy, self.ndots))

    if multiprocess:
        pool = Pool(processes=4)
        aa = [(i, self, npointsy, usediag) for i in range(npointsx)]
        result = pool.starmap_async(simulate_row, aa)
        out = result.get()
        self.hcgs = np.array(out)
    else:
        for i in range(npointsx):
            if verbose:
                tprint('simulatehoneycomb: %d/%d' % (i, npointsx))

            for j in range(npointsy):
                for name in paramnames:
                    setattr(self, name, self.vals2D[name][i][j])
                self.makeH()
                self.solveH(usediag=usediag)
                self.hcgs[i, j] = self.OCC
    self.honeycomb, self.deloc = self.findtransitions(self.hcgs)

    if verbose:
        print('simulatehoneycomb: %.2f [s]' % (time.time() - t0))

    sys.stdout.flush()
Пример #19
0
def analyse_multi(word, base, label, k=1):
    if len(base) == 0:
        return -1
    if len(base) == 1:
        return label[0]

    pool = Pool()

    all_distance = pool.starmap_async(edidistance_multi_compress_GUI,
                                      zip(base, [word] * len(base),
                                          label)).get()

    pool.close()

    #    all_distance = dict(zip(label, all_distance))
    #
    #    all_distance = sorted(label, key=all_distance.__getitem__)

    all_distance = sorted(all_distance, key=lambda tup: tup[1])

    votes = [0] * 10

    Nearest = []
    for i in range(k):
        votes[all_distance[i][0]] += 1
        Nearest.append(all_distance[i][2])

    if max(votes) == 1:
        return word, all_distance[0][0], Nearest
    return word, votes.index(max(votes)), Nearest
Пример #20
0
def grayscale_valid_files(valid_files_list, N_CPU, GRAYSCALE_RESULTS_PATH):

    # get new files
    col_names = ['file', 'gray_status']
    new_files = get_new_files_to_be_processed(path=GRAYSCALE_RESULTS_PATH,
                                              col_names=col_names,
                                              index_col='file',
                                              all_files=valid_files_list)

    if len(new_files) > 0:
        args_1 = new_files
        args_2 = ['jpg'] * len(args_1)
        all_args = zip(args_1, args_2)

        print('start grayscaling..')
        print('grayscaling files: ', len(args_1), ' cpus', N_CPU)
        pool = Pool(processes=N_CPU)

        res_gray = pool.starmap_async(img_to_grayscale, all_args)
        results_gray = res_gray.get()
        print('..done grayscaling')

        with open(GRAYSCALE_RESULTS_PATH, 'a', newline='') as outcsv:
            writer = csv.writer(outcsv)
            writer.writerows(results_gray)
Пример #21
0
    def _process_slices_parallel(function_name, *vols, cores=0):
        """
        Runs a defined function over the slice direction on parallel threads
        :param function_name: function to be performed (must operate on a 2D image)
        :param *vols: image volumes (3D) to pass to function - must be same size
        :param cores: number of cores to run on [default: 1 or max - 1]
        :return:
        """

        # cores defaults to number of CPUs - 1
        if cores is 0:
            cores = max(1, cpu_count() - 1)

        pool = Pool(cores)

        # start timer
        t1 = time.time()

        # convert to list
        vols = list(vols)

        sub_arrays = pool.starmap_async(
            function_name,
            [([vols[v][:, :, zz] for v in range(0, vols.__len__())])
             for zz in range(0, vols[0].shape[2])]).get()

        # print function duration info
        print('%s duration: %.1fs [%d processes]' %
              (function_name.__name__, (time.time() - t1), cores))

        # return recombined array
        return np.stack(sub_arrays, axis=2)
Пример #22
0
def predict_next_stage(trainer, stage_to_be_predicted_folder):
    output_folder = join(pardir(trainer.output_folder), "pred_next_stage")
    maybe_mkdir_p(output_folder)

    process_manager = Pool(2)
    results = []

    for pat in trainer.dataset_val.keys():
        print(pat)
        data_file = trainer.dataset_val[pat]['data_file']
        data_preprocessed = np.load(data_file)['data'][:-1]
        predicted = trainer.predict_preprocessed_data_return_softmax(
            data_preprocessed, True, 1, False, 1,
            trainer.data_aug_params['mirror_axes'], True, True, 2,
            trainer.patch_size, True)
        data_file_nofolder = data_file.split("/")[-1]
        data_file_nextstage = join(stage_to_be_predicted_folder,
                                   data_file_nofolder)
        data_nextstage = np.load(data_file_nextstage)['data']
        target_shp = data_nextstage.shape[1:]
        output_file = join(
            output_folder,
            data_file_nextstage.split("/")[-1][:-4] + "_segFromPrevStage.npz")
        results.append(
            process_manager.starmap_async(
                resample_and_save, [(predicted, target_shp, output_file)]))

    _ = [i.get() for i in results]
Пример #23
0
    def get(self, request):
        """
        Steps being taken ATM:
        -> Get emails for vendor in state 2
        -> Filter out emails from unsub list
        -> Spawn (4) process, and assign cleaning process to each
        -> Collect the result of list of booleans, and create cleaned emails accordingly
        """
        vendor_id = request.GET.get('vendor_id')
        vendor = Group.objects.get(id=vendor_id)
        if vendor:
            emails = vendor.get_emails_for_vendor()
            filtered_emails = set(emails).difference(
                UnSubscribedEmails.get_all_unsubscribed())
            host_name = socket.gethostname()

            pool = Pool(processes=cpu_count())
            db.connections.close_all()
            result_set = pool.starmap_async(
                CleaningService.cleaner,
                [(vendor_id, email, host_name, process_num)
                 for process_num, email in enumerate(filtered_emails, 1)])
            result = result_set.get()
            logging.info(
                f"CLEANING UPDATE: Total {result.count(True)} emails added to CleanedDB"
            )
            # self._bulk_creator(emails=list(compress(emails, result)), vendor_id=vendor_id)
            vendor.cleaned = vendor.STATUS_CLEANED
            vendor.cleaned_count = sum([i for i in result if i])
            vendor.save()
        return Response({'success': True}, status=status.HTTP_200_OK)
Пример #24
0
    def test(self, test_x):
        ''' return the predicted y array(class) '''
        # 首先要將test data經過同樣encoder 並產生query vector
        if self.PCA_projection:
            test_x = self.PCA_test(test_x)
        query_vector = np.zeros(
            (len(test_x), 1, self.nof_dimension)).astype(int)
        self.y_pred = np.zeros((len(test_x), 1))
        # 因為要將x每個feature的value根據數值切成level個等級 所以要記住某個範圍的數值
        # 以level=21為例子 假如數值範圍是0~20 就是0是一個level
        # 但這裡實作 我打算將0~20/21 當作level 0
        ''' encoding and prediction'''
        # 利用multiprocessing 的pool 去做多進程(多個CPU核心去做運算)
        # Pool() 代表利用cpu最大核心數量去跑
        # 用 starmap function他就會自動分配資料讓核心去跑 並回傳每次function的結果成一個list
        # 這裡要注意用多核心去跑的時候 即使在function裡面改了self.pred的value 也不會改動到self.pred的value
        # 可以在裡面改self.y_pred[0][0] 並在這裡print看看就可知道
        start = time.time()
        pool = Pool()

        self.y_pred = np.array([
            pool.starmap_async(self.encoder_query_vector,
                               [(test_x[data, :], query_vector[data, :], data)
                                for data in range(len(test_x))]).get()
        ]).reshape((len(test_x), 1))
        pool.close()
        pool.join()
        end = time.time()

        return self.y_pred
Пример #25
0
    def train_batch(self, mnist_batch, index):
        '''
        :type mnist_batch: list of tuple
        :type deviate: boolean
        :rtype: None
        '''

        t0 = time.time()

        connection_pool = ConnectionPool(size=self.CONNECTION_POOL_SIZE, host=HBaseManager.HOST, port=HBaseManager.PORT)
        hbase_manager = HBaseManager(connection_pool)

        process_pool = Pool(self.POOL_SIZE)
        thread_pool = ThreadPool(self.POOL_SIZE)
        n = len(mnist_batch)

        numbers, mnist_images = MnistHelper.extract_numbers_images(mnist_batch)
        mnist_images = [mnist_obs[MnistModel.PREDICTOR_INDEX] for mnist_obs in mnist_batch]
        indexs = list(range(n))

        extract_process = process_pool.starmap_async(self.extract_keys, zip(mnist_images, indexs))
        extracted_keys = extract_process.get()

        store_hash_args = zip(extracted_keys, numbers, indexs)
        [self.store_hash_values(k, n, hbase_manager, i) for k, n, i in store_hash_args]

        process_pool.close()
        thread_pool.close()

        t1 = time.time()
        print("Time taken to train batch {} : {} Seconds".format(str(index),str(t1 - t0)))
Пример #26
0
def save_predictions_MLPerf(predictions, output_folder, output_files, dictionaries, num_threads_nifti_save, all_in_gpu, force_separate_z=None, interp_order=3, interp_order_z=0):
    print("Saving predictions...")
    pool = Pool(num_threads_nifti_save)
    results = []
    for i, output_filename in enumerate(output_files):
        print(i, "/", len(output_files))
        output_filename = os.path.join(output_folder, output_filename + ".nii.gz")
        softmax_mean = predictions[i]
        dct = dictionaries[i]
        bytes_per_voxel = 4
        if all_in_gpu:
            bytes_per_voxel = 2  # if all_in_gpu then the return value is half (float16)
        if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85):  # * 0.85 just to be save
            print(
                "This output is too large for python process-process communication. Saving output temporarily to disk")
            np.save(output_filename[:-7] + ".npy", softmax_mean)
            softmax_mean = output_filename[:-7] + ".npy"

        results.append(pool.starmap_async(save_segmentation_nifti_from_softmax,
                                          ((softmax_mean, output_filename, dct, interp_order, None, None, None,
                                            None, None, force_separate_z, interp_order_z),)
                                          ))
    _ = [i.get() for i in results]

    pool.close()
    pool.join()

    del predictions
Пример #27
0
class MultiProcess(object):
    def __init__(self, work_num: int = 0):
        if not work_num:
            work_num = cpu_count()
        self.work_num = work_num
        self.pool = Pool(self.work_num)
        self.params = []
        self.func = None
        self.res = None

    def add_params(self, params):
        self.params = params

    def add_func(self, func):
        self.func = func

    def deal(self):
        logger.info("generate {} worker pool for {}".format(self.work_num, self.func))
        self.res = self.pool.starmap_async(self.func, self.params)

    def wait(self):
        logger.info("wait process finish")
        if self.res:
            self.res.get()
        if self.pool:
            self.pool.close()
Пример #28
0
def getCassandraDataPerHr(endtime=1475823180):

    # getDataCassandra() takes on avg 0.449 secs to get 1 min data. It reads from casandra
    # use the folloing to test
    # print(timeit.timeit("getDataCassandra()", setup="from __main__ import getDataCassandra", number=100 ))
    p = Pool(60)
    re = p.starmap_async(getDataCassandra, get_arg(endtime))
    re.wait()
    double_array = re.get(
    )  # [ [return from getDataCassandra call] , [..], [..], ...   ] the size is = len(get_arg())

    # this loop writes every min of data to file and agregates file names in 'filenames'
    # you can pss this names to the blockchain writing fuction with -f option
    """filenames = []
    for i in range(len(double_array)):
        fname = str(double_array[i][0][0])
        f = open("outputs/"+fname, 'a')
        f.write(str(double_array[i][0]))
        filenames.append(fname)
    """

    # or using this loop pass every min of data to the blockchain directly with -s option
    # use this for sequential add/verify oprations
    for i in range(len(double_array)):
        re = bc_app.add(data=str(double_array[i]), datatype="string")
        if re:
            print("add a min of record to blockchain")
        else:
            print("Error: " + double_array[i][0] + " is not added")
Пример #29
0
def main():
    genre_set = make_genres(GENRES)

    pool = Pool(8)
    p_output = pool.map_async(get_body, genre_set).get()
    records = pool.starmap_async(extract_ids, p_output).get()
    pool.close()
    json.dump(records, open('records.json', 'w'))
Пример #30
0
def main():
    idt = json.load(open('idt.json'))
    comb_list = [(id, _['tags']) for id, _ in idt.items()]

    pool = Pool(8)
    data = pool.starmap_async(fetch_data, comb_list).get()
    pool.close()
    json.dump(data, open('data_final.json', 'w'))
Пример #31
0
class Parallel(BaseParallel):
    """多进程类
    """
    def __init__(self, processes, initializer, init_args):
        super(Parallel, self).__init__(processes)
        self.pool = Pool(processes, initializer, init_args)

    def run(self, func, iter):
        if isinstance(iter,
                      list) and self.cores > 1 and len(iter) >= self.cores:
            n_cores = self.cores
            for i in range(n_cores):
                n_per_core = int(len(iter) / n_cores) + 1  # 每个进程运行的数据数量
                self.data.append(
                    self.pool.starmap_async(func,
                                            iter[i * n_per_core:(i + 1) *
                                                 n_per_core],
                                            callback=self.complete,
                                            error_callback=self.exception))
                self.total_processes += 1
        else:
            self.data.append(
                self.pool.starmap_async(func=func,
                                        iterable=iter,
                                        callback=self.complete,
                                        error_callback=self.exception))
            self.total_processes += 1
        for i in range(self.total_processes):
            try:
                while not self.data[i].ready():
                    time.sleep(0.5)
                self.data[i].get()
            except Exception as e:
                msg = str(e)
                if "JQData02" in msg:
                    now = dt.datetime.today()
                    next = (now + dt.timedelta(hours=2))
                    sleep_time = (next - now).total_seconds()
                    logger.info('等待数据权限更新...下次启动时间: {}'.format(next))
                    time.sleep(sleep_time)
                logger.exception(e)
        self.pool.close()
        self.pool.join()

    def exception(self, exception=None):
        logger.exception(exception)
Пример #32
0
def build_trees(topo, real, num_rep, part):
    prefix = 'nantes/{}_{}*.pack'.format(topo, 'yes' if real else 'no')

    def is_packed_tree(name):
        return 'gtx' in name or 'bfs' in name or 'rst' in name
    graphs = sorted(((f, os.stat(f).st_size) for f in glob(prefix) if not is_packed_tree(f)),
                    key=lambda x: x[1])
    tasks = []
    for filename, size_b in graphs:
        prefix = os.path.splitext(filename)[0]
        for i, treekind in product(range(num_rep), ['bfs', 'gtx', 'rst']):
            tid = part * num_rep + i
            tree_filename = '{}_{}_{}.pack'.format(prefix, treekind, tid)
            if not os.path.exists(tree_filename):
                tasks.append((size_b, (filename, treekind, tid)))
    num_threads = 8
    tasks = distribute_tasks(tasks, num_threads)
    pool = Pool(num_threads)
    pool.starmap_async(single_tree, tasks, chunksize=max(1, len(tasks) // num_threads))
    pool.close()
    pool.join()
Пример #33
0
def _speckleDisplacementMulticore(image, image_ref, stride,
                                  halfsubwidth, halfTemplateSize,
                                  subpixelResolution,
                                  ncores, taskPerCore, verbose):

    print('MESSAGE: _speckleDisplacementMulticore:')
    print("MESSAGE: %d cpu's available" % cpu_count())
    nprocesses = int(cpu_count() * ncores)
    p = Pool(processes=nprocesses)
    print("MESSAGE: Using %d cpu's" % p._processes)

    irange = np.arange(halfsubwidth, image.shape[0] - halfsubwidth + 1, stride)
    jrange = np.arange(halfsubwidth,image.shape[1] - halfsubwidth + 1, stride)



    ntasks = np.size(irange) * np.size(jrange)

    chunksize = ntasks // p._processes // taskPerCore + 1


    if subpixelResolution is not None:
        if verbose: print('MESSAGE: register_translation method.')
        parList = [image, image_ref, halfsubwidth, subpixelResolution]
        func_4_starmap_async = _func_4_starmap_async_method1

    elif halfTemplateSize is not None:
        if verbose: print('MESSAGE: match_template method.')
        parList = [image, image_ref, halfsubwidth, halfTemplateSize]
        func_4_starmap_async = _func_4_starmap_async_method2

    res = p.starmap_async(func_4_starmap_async,
                          zip(itertools.product(irange, jrange),
                              itertools.repeat(parList)),
                          chunksize=chunksize)

    p.close()  # No more work

    wpu.progress_bar4pmap(res)  # Holds the program in a loop waiting
                                 # starmap_async to finish

    sx = np.array(res.get())[:, 0].reshape(len(irange), len(jrange))
    sy = np.array(res.get())[:, 1].reshape(len(irange), len(jrange))
    error = np.array(res.get())[:, 2].reshape(len(irange), len(jrange))

    return (sx, sy, error, stride)
    received_data = {}
    active_processes = {}

    while True:
        data = data_catcher.get()
        file = data
        file = file.split("/")
        file_name = file[-1]
        file_path = file[:-1]
        if file_name not in active_processes.keys():
            api = graphing_api.GraphingApplication()
            api.open_file(data)

            number_of_trials = int(api.number_trials) + 1

            manager = Manager()
            queue = manager.Queue()
            pool_count = multiprocessing.cpu_count() * 2
            processes = Pool(processes=pool_count, maxtasksperchild=2)
            list_of_trials = [str(x) for x in range(1, number_of_trials)]

            print("starting file analysis")
            start_time = time.time()
            for image, trial in processes.starmap_async(load_to_memory, zip(repeat(data), list_of_trials)).get():
                received_data[trial] = image
            print("Took " + str(delay(start_time)) + "to finish analysis of file")

            parsed = True
            active_processes[file_name] = NewWindow(number_of_trials, file_name, received_data)
            active_processes[file_name].start()