Exemplo n.º 1
0
def main():
    base_filename = "../plots/survival/{}.pdf"

    survival_functions = [(sv.FractionOldNew, 'FractionNew'),
                          (sv.OldNewSurvival, 'OldNewMix'),
                          (sv.OldWaning, 'OldWaning')]

    p = Pool()
    p.map(run_survival_function, survival_functions)
Exemplo n.º 2
0
 def createZips(self):
     t1 = time()
     if __name__ == '__main__':
         self.get_list_of_id()  # get set of string id
         p = Pool()
         p.map(self.createZip, range(self.count_zips))
         p.close()
         p.join()
     print('Create .zip files time = ' + str(time() - t1) + 's')
Exemplo n.º 3
0
def fmultiprocess(log, function, inputArray, poolSize=False, **kwargs):
    """multiprocess pool

    **Key Arguments:**
        - ``log`` -- logger
        - ``function`` -- the function to multiprocess
        - ``inputArray`` -- the array to be iterated over

    **Return:**
        - ``resultArray`` -- the array of results

    **Usage:**

        .. code-block:: python 

            from fundamentals import multiprocess
            # DEFINE AN INPUT ARRAY
            inputArray = range(10000)
            results = multiprocess(log=log, function=functionName,
                                  inputArray=inputArray, otherFunctionKeyword="cheese")
    """
    log.info('starting the ``multiprocess`` function')

    # DEFINTE POOL SIZE - NUMBER OF CPU CORES TO USE (BEST = ALL - 1)
    # if cpu_count() > 1:
    #     poolSize = cpu_count() - 1
    # else:
    #     poolSize = 1

    # if len(inputArray) < poolSize:
    #     poolSize = len(inputArray)
    if poolSize:
        p = Pool(processes=poolSize)
    else:
        p = Pool()

    # MAP-REDUCE THE WORK OVER MULTIPLE CPU CORES
    try:
        mapfunc = partial(function, log=log, **kwargs)
        resultArray = p.map(mapfunc, inputArray)
    except:
        try:
            mapfunc = partial(function, **kwargs)
            resultArray = p.map(mapfunc, inputArray)
        except:
            mapfunc = partial(function, log=log, **kwargs)
            resultArray = p.map(mapfunc, inputArray)

    p.close()
    p.terminate()
    p.join()

    log.info('completed the ``multiprocess`` function')
    return resultArray
Exemplo n.º 4
0
 def extract_patterns_matching_async(self):
     startTime = time.time()
     print "running on {} processors".format(WORKERS)
     pool = Pool(processes=WORKERS,
                 initargs=(sent_locker, lock, sentence_counter))
     pool.map(self.extract_patterns_from_file,
              self.data_wrapper.ngrams_files)
     pool.close()
     pool.join()
     total_time = time.time() - startTime
     print "extract_patterns_matching_async running time: {}".format(
         total_time)
Exemplo n.º 5
0
    def convertpool(self):

        if len(self.todo) > 0:

            if self.type in [".h264", ".mp4", ".avi"]:

                pool = Pool(min(self.pools, len(self.todo)))
                try:
                    pool.map(self.conv_single, self.todo)
                    pool.close()
                    lineprint("Done converting all videofiles!")
                except KeyboardInterrupt:
                    lineprint("User terminated converting pool..")
                    pool.terminate()
                except Exception as e:
                    excep = "Got exception: %r, terminating pool" % (e, )
                    lineprint(excep)
                    pool.terminate()
                finally:
                    pool.join()

                if self.delete:
                    for filein in self.todo:
                        os.remove(filein)
                    lineprint("Deleted all original videofiles..")

            elif self.type in [".jpg", ".jpeg", ".png"]:

                vidname = commonpref(self.todo)
                lineprint("Start converting " + str(len(self.todo)) +
                          " images")

                frame_array = []
                for filename in self.todo:
                    frame = cv2.imread(filename)
                    frame_array.append(frame)
                    #os.rename(filename, self.outdir+"/"+filename)
                h, w, _ = frame_array[0].shape
                if self.outdir != "":
                    vidname = self.outdir + "/" + os.path.basename(vidname)
                vidout = videowriter(vidname, w, h, self.imgfps,
                                     self.resizeval)
                for i in range(len(frame_array)):
                    vidout.write(frame_array[i])
                vidout.release()
                lineprint("Finished converting " + os.path.basename(vidname))

            else:
                lineprint("No video or image files found..")
Exemplo n.º 6
0
def main():
    workers = 5
    while True:
        try:
            # Worklist contains data to be distributed
            worklist = []
            # Launch workers
            process = Pool(workers)
            # Map data to worker_main function
            process.map(worker_main, worklist)
            # Block until all work completed
            process.close()
            process.join()
        except Exception as ex:
            print(str(ex))
Exemplo n.º 7
0
def calculate_prob(hole_cards, num_iterations, given_board):
    import itertools
    
    #must pip these library
    from multiprocess import Pool
    import dill as pickle

    #creates 4 threads
    p = Pool(4)

    deck_cards = prob_functions.generate_deck(hole_cards)
    possible_card_pairings = tuple(itertools.combinations(deck_cards, 2))
    card_combos = map( lambda x: tuple (list(hole_cards) + [x]), possible_card_pairings)

    s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board))
    f = pickle.loads(s)

    prob_list = p.map( f , card_combos)

    tie = 0
    win = 0
    for prob in prob_list:
        tie += prob[0] 
        win += prob[1]
    l = len(prob_list)
    tie = tie / l
    win = win / l

    return (tie,win)
def compute_jaccard_list_vs_all(seed_indices):
    pool = Pool(14)
    affinities_to_seeds = np.array(
        pool.map(lambda i: compute_jaccard_i_vs_all(i), seed_indices))
    pool.close()
    pool.join()
    return affinities_to_seeds
def compute_jaccard_pairwise(indices,
                             square_form=True,
                             parallel=True,
                             return_poses=False):
    n = len(indices)

    if parallel:
        pool = Pool(16)
        scores_poses_tuples = pool.map(
            lambda x: compute_jaccard_i_vs_list(x[0], x[1]),
            [(indices[i], indices[i + 1:]) for i in range(n)])
        pool.close()
        pool.join()
    else:
        scores_poses_tuples = [
            compute_jaccard_i_vs_list(indices[i], indices[i + 1:])
            for i in range(n)
        ]

    pairwise_scores = np.array(
        [scores for scores, poses in scores_poses_tuples])

    if square_form:
        pairwise_scores = squareform(np.concatenate(pairwise_scores))

    if return_poses:
        poses = np.array([poses for scores, poses in scores_poses_tuples])
        return pairwise_scores, poses
    else:
        return pairwise_scores
Exemplo n.º 10
0
    def create_csv(self):
        t1 = time()
        file1 = open(self.out_csv1, "w")
        file1.write("id" + ',' + "level" + '\n')
        file2 = open(self.out_csv2, "w")
        file2.write("id" + ',' + "object_name" + '\n')
        file1.close()
        file2.close()

        if __name__ == '__main__':
            i = range(len(self.list_of_zips))
            p = Pool()
            p.map(self.parse_Zip, i)
            p.close()
            p.join()
        print('Create .csv files time = ' + str(time() - t1) + 's')
Exemplo n.º 11
0
    def main(self):

        urls = self.generateListOfUrls(self.WEBSITE)

        pool = Pool(5)
        p_map = pool.map(self.processPage, urls)
        self.saveLinks(p_map)
def load_scoremaps_multiple_sections_parallel(sections, stack, structure, downscale, detector_id):
    pool = Pool(12)
    scoremaps = pool.map(lambda sec: load_scoremap_worker(stack, sec, structure, downscale, detector_id=detector_id),
                                     sections)
    pool.close()
    pool.join()
    return {sec: sm for sec, sm in zip(sections, scoremaps) if sm is not None}
Exemplo n.º 13
0
def calculate_prob(hole_cards, num_iterations, given_board):
    import itertools

    #must pip these library
    from multiprocess import Pool
    import dill as pickle

    #creates 4 threads
    p = Pool(4)

    deck_cards = prob_functions.generate_deck(hole_cards)
    possible_card_pairings = tuple(itertools.combinations(deck_cards, 2))
    card_combos = map(lambda x: tuple(list(hole_cards) + [x]),
                      possible_card_pairings)

    s = pickle.dumps(lambda hc: single_prob(hc, num_iterations, given_board))
    f = pickle.loads(s)

    prob_list = p.map(f, card_combos)

    tie = 0
    win = 0
    for prob in prob_list:
        tie += prob[0]
        win += prob[1]
    l = len(prob_list)
    tie = tie / l
    win = win / l

    return (tie, win)
Exemplo n.º 14
0
def parallel_apply(df, func, n_cores, n_jobs):
    df_split = np.array_split(df, n_jobs)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return (df)
Exemplo n.º 15
0
def calculate_expected(uri, chroms, maxdis=2000000, balance=True, nproc=1):

    # B: Block Bias, constant for each copy number pair
    hic_pool = cooler.Cooler(uri)
    res = hic_pool.binsize
    maxdis = maxdis // res
    args = []
    for c in chroms:
        args.append((hic_pool, c, maxdis, balance))

    # Allocate processes
    if nproc == 1:
        results = list(map(_expected_core, args))
    else:
        pool = Pool(nproc)
        results = pool.map(_expected_core, args)
        pool.close()
        pool.join()

    expected = {}
    for i in range(1, maxdis + 1):
        nume = 0
        denom = 0
        for extract in results:
            if i in extract:
                nume += extract[i][0]
                denom += extract[i][1]
        if nume > 0:
            expected[i] = nume / denom

    return expected
Exemplo n.º 16
0
    def fit(self, frame=None, bootstrap=False, n_iter=200):
        """
        Fit Exponential Model
        """

        if not isinstance(frame, pd.core.frame.DataFrame):
            frame = self.infections.copy()
        if bootstrap:
            p = Pool()
            bootstrapped_lams = p.map(self.fit, bootstrap_frame(frame, n_iter))

        # generate durations and initial guess
        l1_d, l2_d, durations = self.GetInfectionDurations(frame)
        lam = np.random.random()

        # run minimization of negative log likelihood
        opt = minimize(decay_function,
                       lam,
                       args=(l1_d, l2_d),
                       method='L-BFGS-B',
                       bounds=((1e-6, None), ))
        self.optimizers.append(opt)
        self.estimated_lam = opt.x[0]

        if bootstrap:
            self.bootstrapped_lams = np.array(bootstrapped_lams)
            return (self.estimated_lam, self.bootstrapped_lams)
        else:
            return self.estimated_lam
Exemplo n.º 17
0
def main():
    xtcfp, top, sel, outp, T, dt, cf = parse_args()

    if os.path.isfile(xtcfp) and xtcfp.endswith(".xtc"):
        #seq = cal_rmsdmatrix(xtcfp, cf, top, sel, dt)
        assign_(xtcfp, cf, top, sel, dt=dt)
    else:
        if not os.path.exists(outp):
            os.mkdir(outp)
        cwd0 = os.getcwd()
        os.chdir(outp)
        if xtcfp.endswith("pkl3") or xtcfp.endswith("pkl") or xtcfp.endswith(
                "pkl2"):
            meta = pd.read_pickle(xtcfp)
            xtcfs = meta["traj_fn"].to_numpy()
            top = meta["top_fn"].iloc[0]
        else:
            xtcfs = [
                os.path.join(xtcfp, _) for _ in os.listdir(xtcfp)
                if _.endswith(".xtc")
            ]
        n_trajs = len(xtcfs)
        dtrajs = []
        if T > 1:
            pool = Pool(T)
            args = [(xtcfs[i], cf, top, sel, dt) for i in range(n_trajs)]
            dtrajs = pool.map(parallel, args)
        else:
            for i in range(n_trajs):
                dtraj = assign_(xtcfs[i], cf, top, sel, dt=dt, outfname=None)
                dtrajs.append(dtraj)
        os.chdir(cwd0)
        pd.to_pickle(dtrajs, "dtrajs.pkl3")
Exemplo n.º 18
0
    def run_parallel_fep(self, mutant_params, system_idx, mutant_idx, n_steps,
                         n_iterations, windows):
        logger.debug('Computing FEP for {}...'.format(self.name))
        if not self.opt:
            mutant_systems = mutant_params.build_fep_systems(
                system_idx, mutant_idx, windows)
        else:
            mutant_systems = mutant_params

        nstates = len(mutant_systems)
        chunk = math.ceil(nstates / self.num_gpu)
        groups = grouper(range(nstates), chunk)
        pool = Pool(processes=self.num_gpu)

        system = copy.deepcopy(self.wt_system)
        box_vectors = self.input_pdb.topology.getPeriodicBoxVectors()
        system.setDefaultPeriodicBoxVectors(*box_vectors)
        system.addForce(
            mm.MonteCarloBarostat(1 * unit.atmospheres,
                                  self.temperature * unit.kelvin, 25))  ###

        fep = partial(run_fep,
                      sim=self,
                      system=system,
                      pdb=self.extended_pdb,
                      n_steps=n_steps,
                      n_iterations=n_iterations,
                      all_mutants=mutant_systems)
        u_kln = pool.map(fep, groups)
        pool.close()
        pool.join()
        pool.terminate()
        ddg = FSim.gather_dg(self, u_kln, nstates)

        return ddg
Exemplo n.º 19
0
 def get_new_tickets(self, from_time=utils.pre_day_to_string(1)):
     search_conditions = {
         "skip": 0,
         "query": {
             "ctimeGte": "{}T21:00:00.000Z".format(from_time)
         }
     }
     pool_size = multiprocess.cpu_count()
     pool_volume = 10 * pool_size
     index = 0
     tickets_num = self._get_number_of_tickets(from_time, to_time)
     req_num = utils.ceil_division(tickets_num, 1000)
     pool = Pool(pool_size)
     for req_count in range(req_num):
         search_tickets = self.search_tickets(search_conditions)
         while True:
             tickets = pool.map(
                 self.add_attr_to_ticket,
                 itertools.islice(search_tickets, pool_volume))
             if tickets:
                 print('Downloaded {}/{} tickets'.format(
                     index, tickets_num),
                       end='\r')
                 index += pool_volume
                 yield tickets
             else:
                 break
         search_conditions['skip'] += 1000
Exemplo n.º 20
0
def main():
    model = Doc2Vec.load("/usr/local/apsis/queries/title_model")
    do_shingle = True
    print(scopus_doc.objects.filter(TI__isnull=True).count())
    if do_shingle:
        unshingled_s_docs = scopus_doc.objects.filter(shingle__exists=False)
        print(unshingled_s_docs.count())
        for sd in unshingled_s_docs:
            if not hasattr(sd, 'TI'):
                print(sd)
                sd.delete()
            else:
                #try:
                sd.shingle = list(shingle(sd.TI, 2))
                sd.save()
            #except:
            #    pass

    scopus_docs_all = scopus_doc.objects.filter(shingle__exists=True,
                                                DO__exists=True,
                                                doc2vec_checked=False)
    s_docs_i = scopus_docs_all.count()

    #s_docs_i = 10

    chunk_size = 3

    #similarity.objects.all().delete()

    for i in range(s_docs_i // chunk_size + 1):
        print(i)
        #t0 = time.time()
        f = i * chunk_size
        l = (i + 1) * chunk_size
        if l > s_docs_i:
            l = s_docs_i - 1
        s_docs = scopus_docs_all[f:l]

        print(s_docs)

        # initialise an empty list, and append sim items to it in parallel
        sims = []
        pool = Pool(processes=chunk_size)
        sims.append(pool.map(partial(compare, model=model), s_docs))
        pool.terminate()

        #sims = [item for sublist in sims for item in sublist]
        #try:
        #    sims = [item for sublist in sims for item in sublist]
        #except:
        #    pass

        # Flatten and remove nones
        #print(sims)
        sims = flatten(sims)
        sims = list(filter(None.__ne__, sims))

        similarity.objects.insert(sims)

        s_docs.update(doc2vec_checked=True)
Exemplo n.º 21
0
    def handle(self, *args, **options):
        qid = options['qid']

        q = Query.objects.get(pk=qid)
        docs = Doc.objects.filter(query=q,
                                  wosarticle__cr__isnull=False,
                                  cdo__citation__isnull=True)

        docs = docs

        ndocs = docs.count()

        print(ndocs)

        # Chunk size, so as to prevent overuse of memory
        chunk_size = 1000

        for i in range(ndocs // chunk_size + 1):
            cdos = []
            f = i * chunk_size
            print(f)
            l = (i + 1) * chunk_size
            if l > ndocs:
                l = ndocs - 1
            chunk_docs = docs[f:l]
            pool = Pool(processes=4)
            cdos.append(pool.map(doc_cites, chunk_docs))
            pool.terminate()
            gc.collect()

            django.db.connections.close_all()
            cdos = flatten(cdos)

            CDO.objects.bulk_create(cdos)
Exemplo n.º 22
0
def find_day_series(df,
                    day,
                    tol,
                    min_occurrence,
                    is_departure: bool,
                    num_procs=1):

    df_day = df[df["week day"] == day].copy()
    date_num = dict(
        zip(np.sort(df_day.day.unique()), range(len(df_day.day.unique()))))
    df_day["day_num"] = df_day.day.apply(lambda d: date_num[d])
    series = df_day.series.unique()
    len_tot = series.shape[0]
    len_slice = len_tot // num_procs
    split_series = [i * len_slice for i in range(num_procs)] + [len_tot]
    split_flights = tuple([(series[split_series[i]:split_series[i + 1]],
                            df_day[df_day.series.isin(
                                series[split_series[i]:split_series[i + 1]])],
                            tol, min_occurrence, is_departure)
                           for i in range(num_procs)])

    pool = Pool(num_procs)
    result = pool.map(compute_series, split_flights)
    final_df = pd.concat(result, ignore_index=True)
    pool.close()
    pool.join()

    return final_df
Exemplo n.º 23
0
    def _train_batch_parallelize(self, trees, n_incorrect_answers):
        """Parallelizes training for a list of trees.
        Uses the number of threads given by multiprocessing.cpu_count()

        Updates model parameters directly, and returns batch error.
        """
        # Defaults to using cpu_count() threads
        pool = Pool()
        
        def get_subbatch_deltas(_trees):
            return self._train_batch(_trees, n_incorrect_answers, 
                                     apply_learning=False)

        subbatches = utils.split(trees, n_slices=cpu_count())

        # result will be a list of tuples (error, deltas)
        result = pool.map(get_subbatch_deltas, subbatches)

        # no more processes accepted by this pool
        pool.close()   
        # Wait until mapping is completed
        pool.join()

        error = sum([r[0] for r in result])
        deltas = [r[1] for r in result]
        for (delta_Wv, delta_b, delta_We, delta_Wr) in deltas:
            self.Wv -= delta_Wv
            self.b -=  delta_b
            self.We -= delta_We
            self.Wr -= delta_Wr

        return error
Exemplo n.º 24
0
def main():

    #match.objects.all().delete()
    s_docs_count = scopus_doc.objects.filter(DO__exists=True,shingle__exists=True).count()
    print(s_docs_count)

    model = Doc2Vec.load("/queries/title_model")

    s_docs_count = 10025

    chunk_size= 10000

    for i in range(s_docs_count//chunk_size+1):
        f = i*chunk_size
        l = (i+1)*chunk_size-1
        if l > s_docs_count:
            l = s_docs_count-1
        s_docs = scopus_doc.objects.filter(DO__exists=True,shingle__exists=True)[f:l]

        matches = []
        sims = []
        pool = Pool(processes=16)
        #matches.append(pool.map(partial(find_match,),s_docs))
        sims.append(pool.map(partial(find_sim,),s_docs))
        pool.terminate()
        #matches = [x for x in matches[0] if x is not None]
        #matches = list(filter(None.__ne__, matches[0]))
        sims = list(filter(None.__ne__, sims[0]))
        #match.objects.insert(matches)
        similarity.objects.insert
Exemplo n.º 25
0
    def get_panorama(self, fname, pano_id, zoom_level=3):
        server_url = 'http://cbk%d.google.com/' % randint(0,3)
        pano_url = server_url + 'cbk?output=tile&panoid=%s&zoom=%d&x=%d&y=%d'
        zoom_sizes = {3:(7,4), 4:(13,7), 5:(26,13)}
        max_x, max_y = zoom_sizes[zoom_level]

        jobs = []
        for y in xrange(max_y):
            for x in xrange(max_x):
                tile_url = pano_url % (pano_id, zoom_level, x, y)
                jobs.append(tile_url)

        p = Pool(len(jobs))
        tiles = p.map(self.get_tile, jobs)
        p.close()

        if all(x.size for x in tiles):
            tiles = np.array(tiles)
            strips = []
            for y in xrange(max_y):
                strips.append(np.hstack(tiles[y*max_x:(y+1)*max_x,:,:,:]))
            pano = np.vstack(strips)
            pano = pano[0:1664, 0:3328]
        else:
            pano = np.array([])
        return pano
Exemplo n.º 26
0
def pcall_mp(fun, args, cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores)  # create pool
    out = mainpool.map(fun, args)  # return list
    mainpool.terminate()
    del mainpool  # delete pool
    return out
Exemplo n.º 27
0
    def run(self):
        """
        This functions reads the feature extraction filelist and creates a pool of processes to extract features
        from distinct files in parallel. It outputs one pymir3 FeatureTrack file per input file. Output is buffered
        to save memory and defer disk access.

        .. note::
            These keys are expected to be set in the experiment file:
                * ['general']['feature_extraction_filelist']
                * ['general']['scratch_directory']
                * ['feature_extraction']['output_buffer_size']
                * ['feature_extraction']['worker_extractors']

        """

        print("Running feature extraction behavior: %s" % self.name)

        # todo: use metadata file to add labels to track metadata (if available)
        # deve garantir a label no metadados pra facilitar a vida, ao invés de usar o nome do arquivo (acho que não precisa)

        with open(self.params['general']['feature_extraction_filelist']) as f:
            files = f.read().splitlines()

        # todo: usar um multiprocessing.manager pra realizar o compatilhamento do buffer (ao invés de fazer por chunks, como abaixo)

        metas = copy.copy(files)
        files = []
        for i in metas:
            files.append(i.split("\t")[0])
        metas = []

        num_files = len(files)
        output_buffer_size = self.params['feature_extraction']['output_buffer_size']

        pool = Pool(processes=self.params['feature_extraction']['worker_extractors'])
        for i in range(0, num_files, output_buffer_size):
            print "processing files %d through %d of %d" % (i + 1, min(i + output_buffer_size, num_files), num_files)
            result = pool.map(self.extract, files[i:min(i + output_buffer_size, num_files)])

            T0 = time.time()
            for track in result:
                filename = acf_utils.extract_filename(track.metadata.filename, "wav") + ".features"
                filename = self.params['general']['scratch_directory'] + "/" + filename

                print "writing features to file %s..." % (filename)
                feature_file = open(filename, "w")
                track.save(feature_file)
                feature_file.close()
                del track
            T1 = time.time()
            print "writing feature files to disk took %f seconds" % (T1 - T0)

            del result
            gc.collect()

        pool.close()
        pool.join()

        print ('Feature extraction done!')
Exemplo n.º 28
0
def pcall_mp(fun, args, cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores)  # create pool
    #    print("Using",cores,"cores")
    out = mainpool.map(fun, args)  # return list
    mainpool.terminate()  # clear the pool
    del mainpool  # delete pool
    return out
Exemplo n.º 29
0
def ospf_check():
    clear_log()
    devices = [x.split(',')[0] for x in open(devicesFile)]
    pool = Pool(processor)
    lock = Manager().Lock()
    list(pool.map(partial(_inf_ospf_check, lock), devices))
    pool.close()
    pool.join()
Exemplo n.º 30
0
def pcall_mp(fun,args,cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores) # create pool
#    print("Using",cores,"cores")
    out = mainpool.map(fun,args) # return list
    mainpool.terminate()
    del mainpool # delete pool
    return out
Exemplo n.º 31
0
def Multiprocessed_OCRPDF(
        source="",
        targetPath=None,
        processes=4,
        nice=5,
        verbose=False,
        tesseract_config='--oem 1 -l best/eng -c preserve_interword_spaces=1 textonly_pdf=1',
        logger=None):

    if isinstance(source, str):
        if verbose:
            (
                logger.info if logger else print
            )("You passed a string in as source. Trying this as source pdf file path."
              )
        page_count = PyPDF2.PdfFileReader(source).getNumPages()
    else:
        if verbose:
            (logger.info if logger else
             print)("OCRUSREX - Try extracting Images from bytes object")
        page_count = PyPDF2.PdfFileReader(io.BytesIO(source)).getNumPages()

    output = PyPDF2.PdfFileWriter()

    # set up a multiprocess pool with the specified number of processes. Then call the single-threaded OCRPDF pethod
    # on each page
    p = Pool(processes)
    for ocred_page in p.map(
            lambda p: OCRPDF(source=source,
                             verbose=verbose,
                             nice=nice,
                             page=p + 1,
                             tesseract_config=tesseract_config,
                             logger=logger), range(0, page_count)):
        output.addPage(PyPDF2.PdfFileReader(io.BytesIO(ocred_page)).getPage(0))

    if verbose:
        (logger.info if logger else print)("Multithreaded Execution Complete!")

    # If targetPath was provided, assume that it's a string and valid path. Try to write.
    if targetPath:
        outputStream = open(targetPath, "wb")
        output.write(outputStream)
        outputStream.close()
        # upon success, return truthy values (in this case, True)
        return True

    # otherwise, return results as bytes obj
    else:
        output_file_obj = io.BytesIO()
        output.write(output_file_obj)
        return output_file_obj.getvalue()

    if verbose:
        (logger.info if logger else print)(
            "Complete! Elapsed time: {0}".format(end - start))
Exemplo n.º 32
0
def get_gameplays():

    PlayTypeDict = {}

    PlayTypeStrings = {
        'Pass': ['pass incomplete', 'pass complete', 'sacked'],
        'Admin': ['spiked the ball', 'Timeout', 'Penalty', 'aborted'],
        'Kneel': ['knee', 'knelt'],
        'Punt': ['Punts'],
        'Field Goal': ['field goal', 'no good'],
        'Special Teams':
        ['kicks off', 'kicks onside', 'extra point', 'two point'],
        'Run': [
            'left end', 'right end', ' for ', 'up the middle', 'middle for',
            'left tackle', 'left guard', 'right guard', 'right tackle'
        ],
    }

    YearStart = 1998
    YearsToGo = 20
    for Year in range(YearStart, YearStart + YearsToGo):

        PlayTypeCounts = {
            'Pass': 0,
            'Run': 0,
            'Punt': 0,
            'Field Goal': 0,
            'Admin': 0,
            'Kneel': 0,
            'Special Teams': 0
        }
        for GameNumber in range(1, 17):
            print('Game', GameNumber, 'in', Year, 'Time: ', datetime.now())

            PlayTypeDict = {}
            PathList = []
            for Team in TeamLookup:
                for GameLocation in ['H', 'A']:
                    path = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=pfr&url=%2Fplay-index%2Fplay_finder.cgi%3Frequest%3D1%26match%3Dall%26year_min%3D{YEAR}%26year_max%3D{YEAR}%26game_type%3DR%26game_num_min%3D{GameNumber}%26game_num_max%3D{GameNumber}%26week_num_min%3D0%26week_num_max%3D99%26game_location%3D{GameLocation}%26minutes_max%3D15%26seconds_max%3D0%26minutes_min%3D0%26seconds_min%3D0%26team_id%3D{TEAM}%26field_pos_min_field%3Dteam%26field_pos_max_field%3Dteam%26end_field_pos_min_field%3Dteam%26end_field_pos_max_field%3Dteam%26type%255B%255D%3DPASS%26type%255B%255D%3DRUSH%26type%255B%255D%3DPUNT%26type%255B%255D%3DKOFF%26type%255B%255D%3DONSD%26type%255B%255D%3DFG%26type%255B%255D%3DXP%26type%255B%255D%3D2PC%26no_play%3DN%26turnover_type%255B%255D%3Dinterception%26turnover_type%255B%255D%3Dfumble%26score_type%255B%255D%3Dtouchdown%26score_type%255B%255D%3Dfield_goal%26score_type%255B%255D%3Dsafety%26order_by%3Dyds_to_go&div=div_all_plays&del_col=1,11,12,13,14'.format(
                        YEAR=Year,
                        GameNumber=GameNumber,
                        TEAM=Team,
                        GameLocation=GameLocation)

                    PathList.append(path)
                    #req = get(path)
            p = Pool(8)  # Pool tells how many at a time
            records = p.map(GetAndParsePath, PathList)
            p.terminate()
            p.join()

            with open(
                    'output/PlayTypeCounts-Year-' + str(Year) + '-Game-' +
                    str(GameNumber) + '.json', 'w') as outfile:
                json.dump(PlayTypeDict, outfile)
Exemplo n.º 33
0
Arquivo: olt.py Projeto: sjava/olt
def zte_gpon_svlan_check():
    clear_log()
    nodes = graph.cypher.execute(
        "match(n:Olt)--(c:Card) where c.name='GTGO' return n.ip,collect(c.slot)")
    olts = ((x[0], x[1]) for x in nodes)
    lzte_gpon_svlan = lambda x: zte_gpon_svlan(ip=x[0], slots=x[1])
    pool = Pool(8)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, lzte_gpon_svlan), olts))
    pool.close()
    pool.join()
Exemplo n.º 34
0
Arquivo: switch.py Projeto: sjava/olt
def interface_check_m():
    clear_log()
    #  cmd = "match(s: Switch) where s.model in ['S8505','S8508'] return s.ip, s.model"
    cmd = "match(s: Switch)  return s.ip, s.model"
    #  cmd = "match(s:Switch) where s.model='S9306' or s.model='s9303' return s.ip,s.model limit 2"
    nodes = graph.cypher.execute(cmd)
    switchs = [(x[0], x[1]) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    out_inf = partial(output_interface_m, lock)
    list(pool.map(compose(out_inf, get_interface), switchs))
    pool.close()
    pool.join()
Exemplo n.º 35
0
Arquivo: olt.py Projeto: sjava/olt
def svlan_check():
    clear_log()
    #  nodes = graph.find('Olt', property_key='ip', property_value='9.192.96.246')
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='company', property_value='zte')
    olts = [(x['ip'], x['company'], x['area']) for x in nodes]
    #  list(map(compose(card_entry, get_card), olts))
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(svlan_entry, lock)
    list(pool.map(compose(func, get_svlan), olts))
    pool.close()
    pool.join()
Exemplo n.º 36
0
Arquivo: olt.py Projeto: sjava/weihu
def add_infs():
    funcs = {'zte': Zte.get_infs, 'hw': Huawei.get_infs}
    get_infs = partial(_company, funcs)

    clear_log()
    nodes = graph.cypher.execute(
        'match (n:Olt) return n.ip as ip,n.company as company')
    olts = [dict(ip=x['ip'], company=x['company']) for x in nodes]
    pool = Pool(128)
    lock = Manager().Lock()
    _add_infs_p = partial(_add_infs, lock)
    list(pool.map(compose(_add_infs_p, get_infs), olts))
    pool.close()
    pool.join()
Exemplo n.º 37
0
def main(args):

    filedate = args.filedate
    database = args.database

    slablist = ['alu','cal','cam','car','cas','cot','hal','hel','him','hin','izu','jap','ker','kur','mak','man','mue','pam','png','phi','puy','ryu','sam','sco','sol','sul','sum','van']

    indices = range(len(slablist))
    pool1 = Pool(args.nCores)
    partial_loop1 = partial(calls2d, database, filedate, slablist)

    pts = pool1.map(partial_loop1, indices)
    pool1.close()
    pool1.join()
Exemplo n.º 38
0
Arquivo: olt.py Projeto: sjava/olt
def hostname_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(hostname_entry, lock)
    list(pool.map(compose(func, get_hostname), olts))
    pool.close()
    pool.join()
    ip_hostname = (x.split(',') for x in open(result_file))
    cmd = "match (n:Olt) where n.ip={ip} set n.hostname={hostname}"
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], hostname=x[1]), ip_hostname))
Exemplo n.º 39
0
def get_vlan_usersP(bras):
    def _get_vlan_users(bas):
        funcs = {'m6k': M6k.get_vlan_users,
                 'me60': ME60.get_vlan_users}
        _gvu = partial(_model, funcs)
        return _gvu(bas)

    bras = [dict(ip=x[0], model=x[1], inf=x[2])
            for x in bras]
    pool = Pool(len(bras))
    temp = pool.map(_get_vlan_users, bras)
    pool.close()
    pool.join()
    temp = [x[1] for x in temp if x[1]]
    rslt = reduce(lambda x, y: merge_with(sum, x, y), temp)
    return rslt
Exemplo n.º 40
0
 def calculate(self, data):
     t1 = dt.datetime.utcnow()
     LOGGER.info('Starting calculation...')
     self._data = deepcopy(data)
     self._check_inputs(data)
     dep = self._dependencies()
     sorted_dep = topological_sort(dep)
     for items in sorted_dep:
         # loading node with inputs
         for item in items:
             node = self._get_node(item)
             args = [i_name for i_name in node.input_names if i_name not in node.kwargs]
             data_to_pass = []
             for arg in args:
                 data_to_pass.append(self._data[arg])
             kwargs_to_pass = {}
             for kwarg in node.kwargs:
                 kwargs_to_pass[kwarg] = self._data[kwarg]
             node.load_inputs(data_to_pass, kwargs_to_pass)
         # running nodes
         if self._parallel:
             pool = Pool(self._pool_size)
             results = pool.map(
                 Graph.run_node,
                 [self._get_node(i) for i in items]
             )
             pool.close()
             pool.join()
             results = {k: v for k, v in results}
         else:
             results = {}
             for item in items:
                 node = self._get_node(item)
                 res = node.run_with_loaded_inputs()
                 results[node.id] = res
         # save results
         for item in items:
             node = self._get_node(item)
             res = results[node.id]
             if len(node.output_names) == 1:
                 self._data[node.output_names[0]] = res
             else:
                 for i, out in enumerate(node.output_names):
                     self._data[out] = res[i]
     t2 = dt.datetime.utcnow()
     LOGGER.info('Calculation finished in {}'.format(t2-t1))
     return res
Exemplo n.º 41
0
Arquivo: olt.py Projeto: sjava/olt
def zhongji_check():
    clear_log()
    nodes = graph.find('Olt')
    #  nodes = graph.find('Olt', property_key='ip', property_value='172.18.0.46')
    olts = [(x['ip'], x['company']) for x in nodes]
    pool = Pool(16)
    lock = Manager().Lock()
    func = partial(zhongji_entry, lock)
    list(pool.map(compose(func, get_zhongji), olts))
    pool.close()
    pool.join()
    ports = (x.split(',') for x in open(result_file))
    cmd = """match(n: Olt) where n.ip = {ip} 
    merge(n) - [:HAS]->(m: Etrunk{name: {sm}}) 
    merge(m) - [:Include]->(p: Port{name: {interface}})"""
    list(map(lambda x: graph.cypher.execute(
        cmd, ip=x[0], sm=x[1], interface=x[2]), ports))
Exemplo n.º 42
0
def parallel_cdist(data1, data2, n_rows_per_job=100):

    from scipy.spatial.distance import cdist

    data1 = np.array(data1)
    data2 = np.array(data2)

    pool = Pool(12)

    start_indices = np.arange(0, data1.shape[0], n_rows_per_job)
    end_indices = start_indices + n_rows_per_job - 1

    partial_distance_matrices = pool.map(lambda (si, ei): cdist(data1[si:ei+1].copy(), data2), zip(start_indices, end_indices))
    pool.close()
    pool.join()

    distance_matrix = np.concatenate(partial_distance_matrices)
    return distance_matrix
Exemplo n.º 43
0
    def eval_EFG(self,x,num_procs=None,info=False):

        from multiprocess import Pool,cpu_count

        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EFG_sequential(x,num,i,info),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        if not info:
            assert(len(results) == 4)
        else:
            assert(len(results) == 5)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
Exemplo n.º 44
0
def add_power_info():
    funcs = {'S8508': S85.get_power_info,
             'S8505': S85.get_power_info,
             'T64G': T64.get_power_info,
             'S8905': S89.get_power_info,
             'S8905E': S8905E.get_power_info,
             'S9306': S93.get_power_info,
             'S9303': S93.get_power_info}
    get_power_info = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch) where s.snmpState='normal' return s.ip as ip,s.model as model")
    switches = [dict(ip=x['ip'], model=x['model']) for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_power_info, lock)
    list(pool.map(compose(_ff, get_power_info), switches))
    pool.close()
    pool.join()
Exemplo n.º 45
0
def add_traffics():
    funcs = {'S8508': S85.get_traffics,
             'S8505': S85.get_traffics,
             'T64G': T64.get_traffics,
             'S8905': S89.get_traffics,
             'S8905E': S8905E.get_traffics,
             'S9306': S93.get_traffics,
             'S9303': S93.get_traffics}
    get_traffics = partial(_model, funcs)
    #  clear_log()
    nodes = graph.cypher.execute(
        "match (s:Switch)--(i:Inf) where s.snmpState='normal' return s.ip as ip,collect(i.name) as infs,s.model as model")
    switchs = [dict(ip=x['ip'], infs=x['infs'], model=x['model'])
               for x in nodes]
    pool = Pool(processor)
    lock = Manager().Lock()
    _ff = partial(_add_traffics, lock)
    list(pool.map(compose(_ff, get_traffics), switchs))
    pool.close()
    pool.join()
Exemplo n.º 46
0
def compute_jaccard_pairwise(indices, square_form=True, parallel=True, return_poses=False):
    n = len(indices)

    if parallel:
        pool = Pool(16)
        scores_poses_tuples = pool.map(lambda x: compute_jaccard_i_vs_list(x[0],x[1]),
                                   [(indices[i], indices[i+1:]) for i in range(n)])
        pool.close()
        pool.join()
    else:
        scores_poses_tuples = [compute_jaccard_i_vs_list(indices[i], indices[i+1:]) for i in range(n)]

    pairwise_scores = np.array([scores for scores, poses in scores_poses_tuples])

    if square_form:
        pairwise_scores = squareform(np.concatenate(pairwise_scores))

    if return_poses:
        poses = np.array([poses for scores, poses in scores_poses_tuples])
        return pairwise_scores, poses
    else:
        return pairwise_scores
Exemplo n.º 47
0
    def eval_EQ(self,p,num_procs=None,quiet=True):
        """
        Evaluates E[Q(p,r)] and its gradient in parallel. 

        Parameters
        ----------
        p : generator powers
        num_procs : number of parallel processes
        quiet : flag
        """
       
        from multiprocess import Pool,cpu_count
 
        if not num_procs:
            num_procs = cpu_count()
        num_samples = self.parameters['num_samples']
        pool = Pool(num_procs)
        num = int(np.ceil(float(num_samples)/float(num_procs)))
        results = list(zip(*pool.map(lambda i: self.eval_EQ_sequential(p,num,i,quiet),range(num_procs),chunksize=1)))
        pool.terminate()
        pool.join()
        assert(len(results) == 2)
        assert(all([len(vals) == num_procs for vals in results]))
        return [sum(vals)/float(num_procs) for vals in results]
Exemplo n.º 48
0
 def get_new_tickets(self, from_time=utils.pre_day_to_string(1)):
     search_conditions = {
         "skip": 0,
         "query": {
             "ctimeGte": "{}T21:00:00.000Z".format(from_time)
          }
     }
     pool_size = multiprocess.cpu_count()
     pool_volume = 10 * pool_size
     index = 0
     tickets_num = self._get_number_of_tickets(from_time, to_time)
     req_num = utils.ceil_division(tickets_num, 1000)
     pool = Pool(pool_size)
     for req_count in range(req_num):
         search_tickets = self.search_tickets(search_conditions)
         while True:
             tickets = pool.map(self.add_attr_to_ticket, itertools.islice(search_tickets, pool_volume))
             if tickets:
                 print('Downloaded {}/{} tickets'.format(index, tickets_num), end='\r')
                 index += pool_volume
                 yield tickets
             else:
                 break
         search_conditions['skip'] += 1000
Exemplo n.º 49
0
        else:
            raise

#             input_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=5, version=version, resol='raw')
        out_dir = DataManager.get_image_dir_v2(stack=stack, prep_id=2, resol=resol, version=version)
        print 'out_dir:', out_dir
#             script = os.path.join(REPO_DIR, 'preprocess', 'warp_crop_IM_v3.py')

#         ! rm -rf {out_dir}
        create_if_not_exists(out_dir)

        t = time.time()

        pool = Pool(8)
        _ = pool.map(lambda img_name: crop(stack=stack, img_name=img_name, version=version, resol=resol, 
                                        x=x, y=y, w=w, h=h), 
                     metadata_cache['valid_filenames'][stack])
        pool.close()
        pool.join()

#             for img_name in metadata_cache['valid_filenames'][stack]:
#                 f(stack=stack, img_name=img_name, version=version, resol=resol, 
#                                             x=x, y=y, w=w, h=h)

    #     run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
    #                     {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
    #                     kwargs_list=[{'input_fp': DataManager.get_image_filepath_v2(stack=stack, prep_id=5, resol='raw', version=version, fn=img_name),
    #                                   'output_fp': DataManager.get_image_filepath_v2(stack=stack, fn=img_name, prep_id=2, version=version, resol='raw')}
    #                                  for img_name in metadata_cache['valid_filenames'][stack]],
    # #                                  for img_name in ['CHATM3_slide35_2018_02_17-S1']],
    #                     argument_type='single',
                ntb_to_nissl[ntb_v] = np.unique(a)[0]

        ntb_values = np.arange(0, 5000)
        ntb_matched_values = np.interp(ntb_values, 
                                       [ntb_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())], 
                                       [nissl_v for ntb_v, nissl_v in sorted(ntb_to_nissl.items())])
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_matched_values, (region1_x, region1_y, region1_w, region1_h)
        
    
    n_regions = 8
    
    pool = Pool(4)
    res = pool.map(f, range(n_regions))
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    
#     for region_id in range(10):
        
#         while True:
#             region1_x = np.random.randint(0, w-10000, 1)[0]
#             region1_y = np.random.randint(0, h-10000, 1)[0]
#             region1_w = 5000
#             region1_h = 5000
#             print region1_x, region1_y, region1_w, region1_h
            
#             tb_region1_xmin = region1_x / 32
#             tb_region1_xmax = (region1_x + region1_w) / 32
    
        ntb_blue_inv_bins = np.arange(5001)
        ntb_inv_to_nissl_mapping = np.interp(ntb_blue_inv_bins, ntb_inv_vals, nissl_vals)
        
        ntb_to_nissl_mapping = ntb_inv_to_nissl_mapping[5000 - ntb_blue_bins]
        ntb_to_nissl_mapping = np.round(ntb_to_nissl_mapping).astype(np.uint8)
                        
        ntb_matched_values_all_examples_one_section.append(ntb_to_nissl_mapping)
        region_bboxes_all_examples_one_section.append((region1_x, region1_y, region1_w, region1_h))
    
        sys.stderr.write('Compute matching: %.2f seconds.\n' % (time.time()-t))
        
        return ntb_to_nissl_mapping, (region1_x, region1_y, region1_w, region1_h)
            
    pool = Pool(4)
    res = pool.map(match_intensity_histogram_one_region, regions)
    ntb_matched_values_all_examples_one_section, region_bboxes_all_examples_one_section = zip(*res)
    pool.close()
    pool.join()
    

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_intensity_mapping_all_regions.npy' % (ntb_fn, nissl_fn))
    create_parent_dir_if_not_exists(fp)
    np.save(fp, np.asarray(ntb_matched_values_all_examples_one_section))
    upload_to_s3(fp)

    fp = os.path.join(DATA_DIR, stack, stack + '_intensity_mapping', '%s_to_%s_region_bboxes.npy' % (ntb_fn, nissl_fn))
    np.save(fp, np.asarray(region_bboxes_all_examples_one_section))
    upload_to_s3(fp)

    median_mapping_one_section = np.median(ntb_matched_values_all_examples_one_section, axis=0)
Exemplo n.º 52
0
def compute_jaccard_list_vs_all(seed_indices):
    pool = Pool(14)
    affinities_to_seeds = np.array(pool.map(lambda i: compute_jaccard_i_vs_all(i), seed_indices))
    pool.close()
    pool.join()
    return affinities_to_seeds
def compute_spm_histograms(labelmap, sample_locs, patch_size, M):
    """
    Args:
        labelmap (2d-ndarray of int):
        sample_locs (2d-ndarray): List of (x,y) locations at which to sample the SPM histograms
        M (int): number of unique SIFT descriptor words, aka. size of vocabulary
        
    Returns:
        hists_arr0 ((1,M)-array of int)
        hists_arr1 ((4,M)-array of int)
        hists_arr2 ((16,M)-array of int)
    """

    global labelmap_global
    labelmap_global = labelmap

    # compute level-2 histograms
    l = 2

    grid_size = patch_size / 2**l

    if l == 2:
        rx = [-2, -1, 0, 1]
        ry = [-2, -1, 0, 1]
    elif l == 1:
        rx = [-1, 0]
        ry = [-1, 0]
    elif l == 0:
        rx = [-.5]
        ry = [-.5]

    rxs, rys = np.meshgrid(rx, ry)

    patch_coords_allGrid = []

    for grid_i, (rx, ry) in enumerate(np.c_[rxs.flat, rys.flat]):

        patch_xmin = sample_locs[:,0] + rx * grid_size
        patch_ymin = sample_locs[:,1] + ry * grid_size
        patch_xmax = sample_locs[:,0] + (rx + 1) * grid_size
        patch_ymax = sample_locs[:,1] + (ry + 1) * grid_size

        patch_coords_allGrid.append([patch_xmin, patch_ymin, patch_xmax, patch_ymax])


    all_coords = np.hstack(patch_coords_allGrid)
    patch_xmin = all_coords[0]
    patch_ymin = all_coords[1]
    patch_xmax = all_coords[2]
    patch_ymax = all_coords[3]

    def compute_histogram_particular_label(i):
        m = (labelmap_global == i).astype(np.uint8)
        mi = cv2.integral(m)
        ci = mi[patch_ymin, patch_xmin] + mi[patch_ymax, patch_xmax] - mi[patch_ymax, patch_xmin] - mi[patch_ymin, patch_xmax]
        return ci

    t = time.time()
    # hists = Parallel(n_jobs=16)(delayed(compute_histogram_particular_label)(i) for i in range(1, M+1))
    # hists = Parallel(n_jobs=8)(delayed(compute_histogram_particular_label)(i) for i in range(1, M+1))
    pool = Pool(8)
    hists = pool.map(compute_histogram_particular_label, range(1, M+1))
    # pool.terminate()
    pool.close()
    pool.join()
    # del pool
    sys.stderr.write('done in %f seconds\n' % (time.time() - t)) # ~ 13 seconds

    n_grid = (2**l)**2
    hists_arr2 = np.transpose(np.reshape(hists, (M, n_grid, -1)))
    print hists_arr2.shape

    # compute level-1 histograms based on level-2 histograms

    hists_arr1 = np.transpose([hists_arr2[:, [0,1,4,5], :].sum(axis=1),
                               hists_arr2[:, [2,3,6,7], :].sum(axis=1),
                               hists_arr2[:, [8,9,12,13], :].sum(axis=1),
                               hists_arr2[:, [10,11,14,15], :].sum(axis=1)],
                              [1,0,2])
    print hists_arr1.shape

    # compute level-0 histograms based on level-1 histograms

    hists_arr0 = hists_arr1.sum(axis=1)
    print hists_arr0.shape

    return hists_arr0, hists_arr1, hists_arr2
    def save_scoremap(structure):
        viz_fp = DataManager.get_scoremap_viz_filepath(stack=stack, downscale=downscale, fn=fn, structure=structure, detector_id=detector_id)
        create_parent_dir_if_not_exists(viz_fp)
        
        try:
            if add_label_text:
                label_text = str(structure)
            else:
                label_text = None
            viz = scoremap_overlay_on(bg='original', stack=stack, fn=fn, structure=structure,
                                out_downscale=downscale, label_text=label_text, detector_id=detector_id,
                                     cmap_name=cmap_name, image_version=bg_image_version)
            imsave(viz_fp, img_as_ubyte(viz))
            upload_to_s3(viz_fp)
        except Exception as e:
            # raise e
            sys.stderr.write('%s\n' % e.message)
            return

    # for s in all_known_structures:
        # save_scoremap(s)

    pool = Pool(NUM_CORES/2)
    pool.map(save_scoremap, all_known_structures)
    pool.close()
    pool.join()

    sys.stderr.write('Visualize scoremaps: %.2f seconds.\n' % (time.time() - t))
    # 7s for one structure, one section, single process
    # 20s for all structures, one section, 8 processes
Exemplo n.º 55
0
if hasattr(args, "rescale_factor"):
    rescale_factor = args.rescale_factor
else:
    w = args.width
    h = args.height

n_jobs = args.jobs

def worker(img_name):

    input_fp = input_fp_map[img_name]
    output_fp = output_fp_map[img_name]
    create_parent_dir_if_not_exists(output_fp)

    img = imread(input_fp)
    save_data(img[::1/rescale_factor, ::1/rescale_factor], output_fp)


pool = Pool(n_jobs)
_ = pool.map(worker, in_image_names)
pool.close()
pool.join()

# run_distributed('convert \"%%(input_fp)s\" -crop %(w)dx%(h)d+%(x)d+%(y)d  \"%%(output_fp)s\"' % \
#                 {'w':w_raw, 'h':h_raw, 'x':x_raw, 'y':y_raw},
#                 kwargs_list=[{'input_fp': ,
#                               'output_fp': output_fp_map[img_name]}
#                              for img_name in metadata_cache['valid_filenames'][stack]],
#                 argument_type='single',
#                jobs_per_node=1,
#                local_only=True)
structure_colors = {n: np.random.randint(0, 255, (3,)) for n in all_known_structures}

def generate_annotation_viz_one_section(stack, fn, structure_colors=structure_colors, downsample_factor=downsample_factor):
    global contours
    
    if is_invalid(fn):
        return
    
    img_fp = DataManager.get_image_filepath(stack=stack, fn=fn, resol='lossless', version='compressed')
    download_from_s3(img_fp)
    img = imread(img_fp)
    viz = img[::downsample_factor, ::downsample_factor].copy()
    
    for name_u, color in structure_colors.iteritems():
        matched_contours = contours[(contours['name'] == name_u) & (contours['filename'] == fn)]
        for cnt_id, cnt_props in matched_contours.iterrows():
            cv2.polylines(viz, [(cnt_props['vertices']/downsample_factor).astype(np.int)], True, color, 2)
    
    viz_fp = DataManager.get_annotation_viz_filepath(stack=stack, fn=fn)
    create_parent_dir_if_not_exists(viz_fp)
    imsave(viz_fp, viz)
    upload_to_s3(viz_fp)

# for fn in filenames:
#     generate_annotation_viz_one_section(fn=fn)

pool = Pool(NUM_CORES/2)
pool.map(lambda fn: generate_annotation_viz_one_section(stack=stack, fn=fn, structure_colors=structure_colors, downsample_factor=downsample_factor), filenames)
pool.close()
pool.join()
#     sys.stderr.write('Compute saturation: %.2f seconds\n' % (time.time() - t1)) # skimage 6.5s; opencv 5s


def generate_versions(fn, which):

    input_fn=os.path.join(input_dir, fn)
    basename = os.path.splitext(os.path.basename(fn))[0]

    if 'compressed' in which:
        output_compressed_fn = os.path.join(output_compressed_dir, basename + '_compressed.jpg')
        if 'compressed' in which:
            if os.path.exists(output_compressed_fn):
                sys.stderr.write('File exists: %s.\n' % output_compressed_fn)
            else:
                os.system("convert %(input_fn)s -format jpg %(output_compressed_fn)s" % \
                    dict(input_fn=input_fn, output_compressed_fn=output_compressed_fn))

    if 'saturation' in which:
        # output_saturation_fn = os.path.join(output_saturation_dir, basename + '_saturation.jpg') # why jpg?
        output_saturation_fn = os.path.join(output_saturation_dir, basename + '_saturation.tif')
        if os.path.exists(output_saturation_fn):
            sys.stderr.write('File exists: %s.\n' % output_saturation_fn)
        else:
            convert_to_saturation(input_fn, output_saturation_fn, rescale=True)

#Parallel(n_jobs=4)(delayed(generate_versions)(fn, which) for fn in filenames)
pool = Pool(4)
pool.map(lambda fn: generate_versions(fn, which), filenames)
pool.close()
pool.join()
    for iy, y0 in enumerate(np.arange(0, img_h, 5000)):
        for ix, x0 in enumerate(np.arange(0, img_w, 5000)):
            origins.append((x0, y0))

    alg = 'cellprofiler'

    big_labelmap = np.zeros((img_h, img_w), dtype=np.int64)
    n = 0
    for i, input_fp in enumerate(input_fps):
        prefix = os.path.splitext(input_fp)[0]
        labelmap = labelmap_alltiles[i].astype(np.int64) # astype(np.int64) is important, otherwise results in negative label values.
        x0, y0 = origins[i]
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)
    bp.pack_ndarray_file(big_labelmap, labelmap_fp)
    upload_to_s3(labelmap_fp)
    
    for fp in input_fps:
        execute_command('rm ' + fp)        

t = time.time()

pool = Pool(NUM_CORES/2)
pool.map(detect_cells, filenames)
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
        big_labelmap[y0:y0+5000, x0:x0+5000][labelmap != 0] = labelmap[labelmap != 0] + n
        n += labelmap.max()

    labelmap_fp = os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.bp' % dict(alg=alg)

    bp.pack_ndarray_file(big_labelmap, labelmap_fp)

#     for tile_i in range(12):
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d.tif' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))
#         execute_command('rm %(DETECTED_CELLS_DIR)s/%(stack)s/%(img_fn)s/%(img_fn)s_image_inverted_%(tile_i)02d_labelmap_cellprofiler.bp' % \
#                         dict(DETECTED_CELLS_DIR=DETECTED_CELLS_DIR, stack=stack, img_fn=img_fn, tile_i=tile_i))

    # Generate labelmap viz
    t = time.time()

    viz = img_as_ubyte(label2rgb(big_labelmap, bg_label=0, bg_color=(0, 0, 0)))
    cv2.imwrite(os.path.splitext(input_img_fp)[0] + '_labelmap_%(alg)s.png' % dict(alg=alg), viz);

    sys.stderr.write('Generate labelmap viz: %.2f seconds.\n' % (time.time()-t)) # 60s


t = time.time()

pool = Pool(12)
pool.map(detect_cells, range(first_sec, last_sec+1))
pool.close()
pool.join()

sys.stderr.write('Overall time: %.2f seconds.\n' % (time.time()-t))
        dense_score_map[dense_score_map < 1e-1] = 0
        dense_score_map[dense_score_map > 1.] = 1.
#             sys.stderr.write('threshold: %.2f seconds\n' % (time.time() - t))

        if np.count_nonzero(dense_score_map) < 1e5:
            sys.stderr.write('No %s is detected on section %d\n' % (structure, sec))
            return None

        t1 = time.time()

        scoremap_bp_filepath, scoremap_interpBox_filepath = \
        DataManager.get_scoremap_filepath(stack=stack, fn=fn, anchor_fn=anchor_fn, structure=structure,
                                          return_bbox_fp=True, setting=actual_setting)

        save_hdf(dense_score_map.astype(np.float16), scoremap_bp_filepath, complevel=5)
        np.savetxt(scoremap_interpBox_filepath,
               np.array((interpolation_xmin, interpolation_xmax, interpolation_ymin, interpolation_ymax))[None],
               fmt='%d')

        sys.stderr.write('save: %.2f seconds\n' % (time.time() - t1)) # 4s, very high penalty when multiprocessing


    t = time.time()

    pool = Pool(4) # 8 causes contention, resuls in high upscaling and dumping to disk time.
    _ = pool.map(generate_score_map, structures)
    pool.close()
    pool.join()

    sys.stderr.write('interpolate: %.2f seconds\n' % (time.time() - t)) # ~ 30 seconds / section