Пример #1
0
    def gen_training_data(self, pdbbind_dir, pdbbind_version = '2007', home_dir = None, sf_pickle = ''):
        # build train and test
        cpus = self.n_jobs if self.n_jobs > 0 else -1
        #pool = Pool(processes=cpus)
        pdbbind_db = pdbbind(pdbbind_dir, int(pdbbind_version), opt={'b':None})
        if not home_dir:
            home_dir = dirname(__file__) + '/RFScore'
            
        pdbbind_db.default_set = 'core'
        core_set = pdbbind_db.ids
        core_act = np.array(pdbbind_db.activities)
#         core_desc = np.vstack([self.descriptor_generator.build([pid.ligand], protein=pid.pocket) for pid in pdbbind_db])
        result = Parallel(n_jobs=cpus)(delayed(_parallel_helper)(self.descriptor_generator, 'build', [pid.ligand], protein=pid.pocket) for pid in pdbbind_db if pid.pocket)
        core_desc = np.vstack(result)


        pdbbind_db.default_set = 'general'
        refined_set  = [pid for pid in pdbbind_db.ids if not pid in core_set]
        refined_act = np.array([pdbbind_db.sets[pdbbind_db.default_set][pid] for pid in refined_set])
#         refined_desc = np.vstack([self.descriptor_generator.build([pid.ligand], protein=pid.pocket) for pid in pdbbind_db])
        result = Parallel(n_jobs=cpus)(delayed(_parallel_helper)(self.descriptor_generator, 'build', [pid.ligand], protein=pid.pocket) for pid in pdbbind_db if pid.pocket and not pid.id in core_set)
        refined_desc = np.vstack(result)

        self.train_descs = refined_desc
        self.train_target = refined_act
        self.test_descs = core_desc
        self.test_target = core_act

        # save numpy arrays
        np.savetxt(home_dir + '/train_descs_v%i.csv' % (self.version), self.train_descs, fmt='%g', delimiter=',')
        np.savetxt(home_dir + '/train_target.csv', self.train_target, fmt='%.2f', delimiter=',')
        np.savetxt(home_dir + '/test_descs_v%i.csv' % (self.version), self.test_descs, fmt='%g', delimiter=',')
        np.savetxt(home_dir + '/test_target.csv', self.test_target, fmt='%.2f', delimiter=',')
Пример #2
0
def batch_train(opt, round_index, round_train_data, round_valid_data, round_valid_weights=None, save_all=True, file_indices=None, return_acc_len=False, seq2seq=False):
    i = 0
    perfs = []
    M = len(round_train_data)
    while i < M:
        j = min(i + opt['num_machines'], M)
        cur_perfs = Parallel(n_jobs=j - i, backend='threading') \
            (delayed(train)(opt, round_index, train_index, file_indices[train_index] if file_indices else train_index, round_train_data[train_index], round_valid_data[train_index], valid_weights=round_valid_weights[train_index] if round_valid_weights else None, save_all=save_all, return_acc_len=return_acc_len, seq2seq=seq2seq) \
                for train_index in range(i, j))
        perfs.extend(cur_perfs)
        i = j

    error_indices, valid_indices = [], []
    for i, perf in enumerate(perfs):
        if perf == 0.0 or type(perf) == tuple and perf[0] == 0.0:
            error_indices.append(i)
        elif i < opt['num_machines']:
            valid_indices.append(i)

    M = len(error_indices)
    TMP_NUM_MACHINES = len(valid_indices)
    if M > 0 and TMP_NUM_MACHINES > 0:
        i = 0
        error_perfs = []
        while i < M:
            j = min(i + TMP_NUM_MACHINES, M)
            cur_perfs = Parallel(n_jobs=j - i, backend='threading') \
                (delayed(train)(opt, round_index, valid_indices[train_index], file_indices[error_indices[train_index]] if file_indices else error_indices[train_index], round_train_data[error_indices[train_index]], round_valid_data[error_indices[train_index]], valid_weights=round_valid_weights[error_indices[train_index]] if round_valid_weights else None, save_all=save_all, return_acc_len=return_acc_len, seq2seq=seq2seq) \
                    for train_index in range(i, j))
            error_perfs.extend(cur_perfs)
            i = j
        for i in range(M):
            perfs[error_indices[i]] = error_perfs[i]

    return perfs
Пример #3
0
def _nlp_sub(disc_clsdict, gold_clsdict, names, label, verbose, n_jobs):
    # ned
    ned = NED
    cov = coverage
    if verbose:
        print '  nlp ({2}): subsampled {0} files in {1} sets'\
            .format(sum(map(len, names)), len(names), label)
    with verb_print('  nlp ({0}): calculating scores'
                             .format(label), verbose, False, True, False):
        ned_score = Parallel(n_jobs=n_jobs,
                             verbose=5 if verbose else 0,
                             pre_dispatch='n_jobs')(delayed(ned)\
                                                    (disc_clsdict.restrict(ns,
                                                                           True))
                                                    for ns in names)
        cov_score = Parallel(n_jobs=n_jobs,
                             verbose=5 if verbose else 0,
                             pre_dispatch='n_jobs')(delayed(cov)\
                                                    (disc_clsdict.restrict(ns,
                                                                           False),
                                                     gold_clsdict.restrict(ns,
                                                                           False))
                                                    for ns in names)
    # don't replace nan's by 1, but ignore them, unless all values in ned_score
    # are nan
    ned_score, cov_score = np.array(ned_score), np.array(cov_score)
    ned_score, cov_score = aggregate(ned_score, 1), aggregate(cov_score)
    return np.array(ned_score), np.array(cov_score)
Пример #4
0
def compute_pairwise_distances(sets, metric, sp_areas=None):

    if metric == 'nonoverlap-area':
        
        partial_distance_matrix = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric=metric,
                                                                                                      sp_areas=sp_areas) 
                                                              for s in np.array_split(range(len(sets)), 16))
        distance_matrix = np.vstack(partial_distance_matrix)
        np.fill_diagonal(distance_matrix, 0)
        return distance_matrix
        
    elif hasattr(metric, '__call__'):
        
        partial_distance_matrix = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(cdist)(s, sets, metric=metric) 
                                                                      for s in np.array_split(sets, 16))
        distance_matrix = np.vstack(partial_distance_matrix)
        np.fill_diagonal(distance_matrix, 0)
        return distance_matrix
        
    elif metric == 'overlap-size':
        partial_overlap_mat = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric='overlap-size') 
                                            for s in np.array_split(range(len(sets)), 16))
        overlap_matrix = np.vstack(partial_overlap_mat)
        return overlap_matrix
    
    else:
    
        partial_overlap_mat = Parallel(n_jobs=16, max_nbytes=1e6)(delayed(compute_overlap_partial)(s, sets, metric=metric) 
                                            for s in np.array_split(range(len(sets)), 16))
        overlap_matrix = np.vstack(partial_overlap_mat)
        distance_matrix = 1 - overlap_matrix
    
        np.fill_diagonal(distance_matrix, 0)
    
        return distance_matrix
Пример #5
0
def do_cmd_multiproc(cmd, analyzer, hash_tab, filename_iter, matcher,
                     outdir, report, ncores):
    """ Run the actual command, using multiple processors """
    if cmd == 'precompute':
        # precompute fingerprints with joblib
        msgslist = joblib.Parallel(n_jobs=ncores)(
            joblib.delayed(file_precompute)(analyzer, file, outdir,
                                            audfprint_analyze.PRECOMPEXT)
            for file in filename_iter
        )
        # Collapse into a single list of messages
        for msgs in msgslist:
            report(msgs)

    elif cmd == 'match':
        # Running queries in parallel
        msgslist = joblib.Parallel(n_jobs=ncores)(
            # Would use matcher.file_match_to_msgs(), but you
            # can't use joblib on an instance method
            joblib.delayed(matcher_file_match_to_msgs)(matcher, analyzer,
                                                       hash_tab, filename)
            for filename in filename_iter
        )
        for msgs in msgslist:
            report(msgs)

    elif cmd == 'new' or cmd == 'add':
        # We add by forking multiple parallel threads each running
        # analyzers over different subsets of the file list
        multiproc_add(analyzer, hash_tab, filename_iter, report, ncores)

    else:
        # This is not a multiproc command
        raise ValueError("unrecognized multiproc command: "+cmd)
Пример #6
0
    def fit(self, imgs):
        """ compute connectivities
        """

        if self.metric == 'wavelet':
            jobs = (delayed(wavelet_worker)(img, self.masker, self.regu, self.lbda,
                                                    self.nb_vanishmoment, self.norm,
                                                    self.q, self.nbvoies,
                                                    self.distn, self.wtype,
                                                    self.j1, self.j2) for img in imgs)

        elif self.metric == 'dfa':
            jobs = (delayed(dfa_worker)(img, self.masker, self.regu, self.lbda,
                                                    self.wtype,
                                                    self.j1, self.j2) for img in imgs)
        elif self.metric=='welch':
            jobs = (delayed(welch_worker)(img, self.masker, self.regu, self.lbda,
                                                    ) for img in imgs)
        else:
            raise ValueError("the metric dico = %s is not yet implemented"
                % (self.metric,))

        ts = Parallel(n_jobs=5, verbose=5)(jobs)

        self.hurst = ts
        return self.hurst
Пример #7
0
def main():

    #configurable parameters
    num_cores = 30 #multiprocessing.cpu_count()
    main_urls = [("http://www.mothering.com/forum/306-unassisted-childbirth",331),
      #("http://www.mothering.com/forum/69-vaccinations-archives",1),
      ("http://www.mothering.com/forum/443-i-m-not-vaccinating",191),
      ("http://www.mothering.com/forum/373-selective-delayed-vaccination",114),
      ("http://www.mothering.com/forum/17507-vaccinating-schedule",7)
      ]
    for main_url,nsubpages in main_urls:
        forum_label = main_url.split("/")[-1]

        start = time.time()
        print "Running on ", num_cores, " CPU cores"
        print "Scraping ",forum_label
        real_links = Parallel(n_jobs=num_cores)(delayed(doPage)(main_url,ipage) for ipage in range(nsubpages))
        #somehow we get duplicates.... so set() it
        real_links = set([item for sublist in real_links for item in sublist])
        end = time.time()
        print "Elapsed time %s" % (end-start)
        #print real_links

        results = Parallel(n_jobs=num_cores)(delayed(doTexts)(l) for l in real_links)
        results = [item for sublist in results for item in sublist]

        #save the data
        with open(forum_label+'_out.csv','w') as out:
            csv_out=csv.writer(out,delimiter='|')
            csv_out.writerow(['username','timestamp','text'])
            for row in results:
                csv_out.writerow(row)

        end2 = time.time()
        print "Total elapsed time %s" % (end2-start)
Пример #8
0
    def fit_mvpa(self, data, labels):
            """
            Fit Searchlight for MVPA
            Parameters:
                data:       4D numpy array - (x, y, z, condition vols)
                labels:     classifier labels

            """
            print('Running searchlight Decoding')
            x, y, z, nobjects = data.shape
            # now the first dimension of data is directly indexable by
            # subspace index of the searchlight centers
            data = data.reshape((x*y*z, nobjects))

            # test run_per_center
            # for x in self.allIndices:
            #     t = run_per_center(data, x, labels)

            if self.verbose is True:
                scores = Parallel(n_jobs=self.njobs)(
                    delayed(run_per_center)(
                        data, x, labels) for x in tqdm(self.allIndices))
            else:
                scores = Parallel(n_jobs=self.njobs)(
                    delayed(run_per_center)(
                        data, x, labels) for x in self.allIndices)

            print('\n')

            self.MVPA = np.zeros((x*y*z))
            self.MVPA[list(self.centerIndices)] = scores
            self.MVPA = self.MVPA.reshape((x, y, z))
Пример #9
0
    def __call__(self, filenames):
        batch_num = 1
        batch_means = np.zeros(((self.size[0]**2)*self.channels,1))
        start_time = time.clock()
        for filenames,next_filenames in get_next(list(chunks(filenames,self.batch_size))):
            if batch_num == 1:
		rows = Parallel(n_jobs=self.n_jobs)(
		    delayed(_process_tag_item)(self.size,self.channels,filename)
		    for filename in filenames)
	    data = np.vstack([r for r in rows if r is not None]).T
            if data.shape[1] > 5:
#            print 'Over 20'
		mean = data.mean(axis=1).reshape(((self.size[0]**2)*self.channels,1))
#            print mean
#            print mean.shape
		data = data - mean
#           else:
#               print 'Less than 20'
#		mean = self.model.train_data_provider.data_mean
#               print mean
#               print mean.shape
#		data = data - mean
            self.model.start_predictions(data)
            if next_filenames is not None:
		rows = Parallel(n_jobs=self.n_jobs)(
		    delayed(_process_tag_item)(self.size,self.channels,filename)
		    for filename in next_filenames)
	    names = [name for (r,name) in zip(rows,filenames) if r is not None];
            self.model.finish_predictions(names,self.num_results,self.threshold)
            batch_num += 1
Пример #10
0
def dump_csv(llclat, llclon, urclat, urclon, start_date_str, end_date_str,
             altitude_layer):
    assert type(llclat) is int
    assert type(llclon) is int
    assert type(llclat) is int
    assert type(llclat) is int
    llclat = int(llclat)
    llclon = int(llclon)
    urclat = int(urclat)
    urclon = int(urclon)
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    url_and_files = get_url_and_files_from_dates(start_date, end_date)
    joblib.Parallel(n_jobs=N_PROC)(joblib.delayed(download_file)(url, local)
                                   for url, local in url_and_files)
    files_allegedly_downloaded = [f[1] for f in url_and_files]
    files_to_process = get_files_to_process(files_allegedly_downloaded)
    results = joblib.Parallel(n_jobs=N_PROC)(joblib.delayed(render_map)(
        f, llclat, llclon, urclat, urclon, altitude_layer)
                                   for f in files_to_process)
    node_interp = np.array(results)
    node_interp = node_interp.reshape(-1, node_interp.shape[-1])
    df_interp = pd.DataFrame(
        data=node_interp,
        columns=['unix_epoch', 'lat', 'lon', 'temp'])
    df_interp.to_csv('node_temp.csv')
    return(df_interp)
Пример #11
0
    def process(self, num_cores = 10):

        print "Running prokka for protein annotation (excedpt if faas already provided)"
        to_prokka = [g for g in self.genomes if not os.path.exists(g.proteom)]
        prokka_stuff = Parallel(n_jobs=num_cores)(delayed(prokka)(i) for i in tqdm(to_prokka))

        to_mash = [g for g in self.genomes if not os.path.exists(g.genome + ".msh")]

        print "running mash hashing"
        mashstuff= Parallel(n_jobs=num_cores)(delayed(mash)(i) for i in tqdm(to_mash))

        print "running CheckM"
        to_check = [g for g in self.genomes if not os.path.exists(g.genome.replace(".fna",".checkm.json")) or not g.checkm_meta]
        checkmstuff= Parallel(n_jobs=num_cores)(delayed(checkm)(i) for i in tqdm(to_check))

        print "computing genome sizes"
        for g in tqdm(self.genomes):
            if not g.size:
                g.compute_size()

        print "computing gc contents"
        for g in tqdm(self.genomes):
            if not g.size:
                g.compute_gc()

        print "making fake reads"
        for g in tqdm(self.genomes):
            if not os.path.exists(g.fakereads):
                g.make_fake_reads(read_len=150)
Пример #12
0
def load_stl(fname):
  from joblib import Parallel, delayed
  import features

  X = np.fromfile('../stl/'+fname, dtype=np.uint8)
  X = X.reshape((X.size/3/96/96, 3, 96, 96)).transpose((0,3,2,1))
  dispImg(X[:100, :, :, [2,1,0]], 10, fname+'_org.jpg')

  n_jobs = 10
  cmap_size = (8,8)
  N = X.shape[0]

  H = np.asarray(Parallel(n_jobs=n_jobs)( delayed(features.hog)(X[i]) for i in xrange(N) ))

  H_img = np.repeat(np.asarray([ hog_picture(H[i], 9) for i in xrange(100) ])[:, :,:,np.newaxis], 3, 3)
  dispImg(H_img, 10, fname+'_hog.jpg') 
  H = H.reshape((H.shape[0], H.size/N))

  X_small = np.asarray(Parallel(n_jobs=n_jobs)( delayed(cv2.resize)(X[i], cmap_size) for i in xrange(N) ))
  crcb = np.asarray(Parallel(n_jobs=n_jobs)( delayed(cv2.cvtColor)(X_small[i], cv.CV_RGB2YCrCb) for i in xrange(N) ))
  crcb = crcb[:,:,:,1:]
  crcb = crcb.reshape((crcb.shape[0], crcb.size/N))

  feature = np.concatenate(((H-0.2)*10.0, (crcb-128.0)/10.0), axis=1)
  print feature.shape

  return feature, X[:,:,:,[2,1,0]]
Пример #13
0
    def __call__(self, all_names_and_labels, shuffle=False):
        batch_num = 1
        batch_means = np.zeros(((self.size[0]**2)*self.channels,1))
        self.count_correct = 0
        self.count_incorrect = 0
        start_time = time.clock()
        for names_and_labels,n_l_next in get_next(list(chunks(all_names_and_labels,self.batch_size))):
            loop_time = time.clock()
            if batch_num == 1:
		rows = Parallel(n_jobs=self.n_jobs)(
		    delayed(_process_tag_item)(self.size,self.channels,name)
		    for name, label in names_and_labels)
	    data = np.vstack([r for r in rows if r is not None]).T
            if len(names_and_labels) > 20:
                mean = data.mean(axis=1).reshape(((self.size[0]**2)*self.channels,1))
	        data = data - mean
            if self.model is not None:
                self.model.start_predictions(data)
            if n_l_next is not None:
	        rows = Parallel(n_jobs=self.n_jobs)(
                delayed(_process_tag_item)(self.size,self.channels,name)
                for name, label in n_l_next)
            if self.model is not None:
                tags = self.model.finish_predictions()
            else:
                tags = [('No model',0.0) for name in names_and_labels]
            self.write_to_xml(zip(tags,names_and_labels))
            batch_num += 1
	    print "Tagged %d images in %.02f seconds" % (len(names_and_labels),time.clock()-loop_time)
        print "Tagging complete. Tagged %d images in %.02f seconds" % (len(all_names_and_labels),time.clock()-start_time)
Пример #14
0
def count_reads_in_windows(bed_file, args):

    chromosome_size_dict = create_genome_size_dict(args.genome)
    chromosomes = natsorted(list(chromosome_size_dict.keys()))

    if not args.paired_end:
        parallel_count_reads = partial(_count_reads_in_windows, bed_file, args)
    else:
        parallel_count_reads = partial(_count_reads_in_windows_paired_end,
                                       bed_file, args)

    info("Binning chromosomes {}".format(", ".join([c.replace("chr", "")
                                                    for c in chromosomes])))
    chromosome_dfs = Parallel(n_jobs=args.number_cores)(
        delayed(parallel_count_reads)(chromosome_size_dict[chromosome],
                                      chromosome, strand)
        for chromosome, strand in product(chromosomes, ["+", "-"]))

    info("Merging the bins on both strands per chromosome.")
    both_chromosome_strand_dfs = [df_pair
                                  for df_pair in _pairwise(chromosome_dfs)]
    merged_chromosome_dfs = Parallel(
        n_jobs=args.number_cores)(delayed(merge_chromosome_dfs)(df_pair)
                                  for df_pair in both_chromosome_strand_dfs)

    return merged_chromosome_dfs
Пример #15
0
def compareAlgorithms(numRuns):
    f = plt.figure(5)
    plt.clf()
    plt.hold(True)

    maxFitnessHists, minMisstepsHists = zip(*Parallel(n_jobs=-1)(delayed(GA)(i, 1000, True, False) for i in range(numRuns)))
    maxFitnessHists = np.array(maxFitnessHists)

    stdDev = maxFitnessHists.std(axis=0)
    avg = maxFitnessHists.mean(axis=0)
    plt.plot(np.arange(len(avg)), avg, color='g')
    plt.fill_between(np.arange(len(avg)), avg - stdDev, avg + stdDev, facecolor='g', alpha=0.2)
    m = maxFitnessHists

    maxFitnessHists, minMisstepsHists = zip(*Parallel(n_jobs=-1)(delayed(SA)(i, 1000, False) for i in range(numRuns)))
    maxFitnessHists = np.array(maxFitnessHists)

    stdDev = maxFitnessHists.std(axis=0)
    avg = maxFitnessHists.mean(axis=0)
    plt.plot(np.arange(len(avg)), avg, color='m', label= "Simulated annealing")
    plt.fill_between(np.arange(len(avg)), avg - stdDev, avg + stdDev, facecolor='m', alpha=0.2)

    plt.xlabel('generations / periods')
    plt.ylabel('fitness')
    green_patch = patches.Patch(color='green', label='Genetic Algorithm')
    purple_patch = patches.Patch(color='magenta', label='Simulated Annealing')
    plt.legend(handles=[green_patch, purple_patch], loc='upper left')
    f.canvas.draw()
    f.show()
    return m, maxFitnessHists
    def get_correlation_between_mean_score_and_error(self):
        """Compute the correlation between:

         * mean genuine score and false reject count
         * mean impostor score and false acceptance count


        False reject count and flase reject count is computed thanks to a global threshold.
        This threshold is the threshold giving the EER.
        Correlation is computed using Pearson correlation factor.
        """

        # We need the EER threshold
        eer, thr = self.get_eer_and_threshold()

        # We need to compute error rate of each user
        # Get genuine reject of each users
        fr = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \
                (delayed(_parallel_false_reject_helper)(self.get_genuine_presentations_of_user(userid),
                    thr, self._type) \
                    for userid in self._users_id))


        # Get impostors accept of each users
        fa = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \
                (delayed(_parallel_false_accept_helper)(self.get_impostor_presentations_of_user(userid),
                    thr, self._type) \
                    for userid in self._users_id))



        #compute the correlations
        return pearsonr(fr, self._genuine_scores)[0], pearsonr(fa,
                self._impostor_scores)[0], eer
Пример #17
0
def batch_align(image_list, dest_dir="output"):
    """
    Correct the sharking on the series of images
    :param image_list: The input series of images
    :param dest_dir:   The destination directory
    """
    if not path.exists(dest_dir):
        mkdir(dest_dir)
    if path.isdir(dest_dir):
        print "Aligning %d images, output in %s, this may take a while" % (len(im_list), dest_dir)

        ref_img = io.imread(image_list[0])
        r = Parallel(n_jobs=4, backend="threading", verbose=25)(
            delayed(find_shift)(io.imread(img), ref_img) for img in image_list[1:])
        y_shift = map(lambda x: x[0], r)
        x_shift = map(lambda x: x[1], r)

        print min(y_shift), max(y_shift), min(x_shift), max(x_shift)
        crop = [int(min(y_shift)) - 1, int(max(y_shift)) + 1, int(min(x_shift)) - 1, int(max(x_shift)) + 1]

        correct(ref_img, (0, 0), "%s/%s" % (dest_dir, path.basename(image_list[0])), crop)
        Parallel(n_jobs=4, backend="threading", verbose=25)(
            delayed(correct)(io.imread(img), r[k], "%s/%s" % (dest_dir, path.basename(image_list[k])), crop)
            for k, img in enumerate(image_list[1:]))
    else:
        print "Output dir does not exists or is not a directory : %s" % dest_dir
Пример #18
0
    def update_parallel(self, fixed, moving):
        if hasattr(self.regularizer, "set_operator"):
            self.regularizer.set_operator(shape=fixed.shape)
        self.forward_vector_fields.delta_vector_fields = np.array(
            Parallel(self.n_jobs)(
                delayed(derivative)(
                    self.similarity,
                    fixed[-i - 1],
                    moving[i],
                    self.deformation.backward_dets[-i - 1],
                    self.forward_vector_fields[i],
                    self.regularizer,
                    self.learning_rate)
                for i in xrange(self.n_step_half + 1)
            )
        )
        self.backward_vector_fields.delta_vector_fields = np.array(
            Parallel(self.n_jobs)(
                delayed(derivative)(
                    self.similarity,
                    moving[-i - 1],
                    fixed[i],
                    self.deformation.forward_dets[-i - 1],
                    self.backward_vector_fields[i],
                    self.regularizer,
                    self.learning_rate)
                for i in xrange(self.n_step_half + 1)
            )
        )

        self.forward_vector_fields.update()
        self.backward_vector_fields.update()

        self.integrate_vector_fields()
def multi_main(n_jobs, FILENAME, FUN, **kargs):
    if FUN == MLEM2_LERS:
        joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(FUN)(FILENAME, iter1, iter2)
            for (iter1, iter2) in product(kargs["ITERS"][0], kargs["ITERS"][1])
        )
    elif FUN == MLEM2_delAttrRule_LERS:
        joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attributes)
            for (iter1, iter2, delfun, cls, attributes) in product(
                kargs["ITERS"][0], kargs["ITERS"][1], kargs["DELFUNS"], kargs["CLASSES"], kargs["ATTRIBUTES"]
            )
        )
    elif FUN == MLEM2_delERule_LERS:
        joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attribute_value)
            for (iter1, iter2, delfun, cls, attribute_value) in product(
                kargs["ITERS"][0], kargs["ITERS"][1], kargs["DELFUNS"], kargs["CLASSES"], kargs["ATTRIBUTE_VALUE"]
            )
        )
    elif FUN == MLEM2_delEAlphaRule_LERS:
        joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(FUN)(FILENAME, iter1, iter2, delfun, cls, attribute_value, alpha)
            for (iter1, iter2, delfun, cls, attribute_value, alpha) in product(
                kargs["ITERS"][0],
                kargs["ITERS"][1],
                kargs["DELFUNS"],
                kargs["CLASSES"],
                kargs["ATTRIBUTE_VALUE"],
                kargs["ALPHA"],
            )
        )
    else:
        print("unknown function")
    return 0
def get_population_fitness_tasks(pop,taskdata,targetdata,params): #nsplits,clf,obj_weight):
    # first get cc for each item in population
    cc_recon=numpy.zeros(len(pop))
    predacc_insample=numpy.zeros(len(pop))
    if params.objective_weights[0]>0:
        if __USE_MULTIPROC__:
            cc_recon=Parallel(n_jobs=num_cores)(delayed(get_reconstruction_error)(ct,taskdata,targetdata,params) for ct in pop)
        else:
            cc_recon=[get_reconstruction_error(ct,taskdata,targetdata,params) for ct in pop]
    else:
        cc_recon=[0]
    if params.objective_weights[1]>0:
        if __USE_MULTIPROC__:
            cc_subsim=Parallel(n_jobs=num_cores)(delayed(get_subset_corr)(ct,taskdata,targetdata) for ct in pop)
        else:
            cc_subsim=[get_subset_corr(ct,taskdata,targetdata) for ct in pop]
    else:
        cc_subsim=[0]
    maxcc=[numpy.max(cc_recon),numpy.max(cc_subsim)]
    cc_recon=scale(cc_recon)
    cc_subsim=scale(cc_subsim)
    try:
        print('corr recon-subsim:',numpy.corrcoef(cc_recon,cc_subsim)[0,1])
    except:
        pass
    cc=cc_recon*params.objective_weights[0] + cc_subsim*params.objective_weights[1]
    return cc,maxcc
Пример #21
0
def _addCols(df):
    l = ['teff', 'tefferr', 'logg', 'loggerr', 'feh', 'feherr']
    r = [i+'new' for i in l]

    # Apply correction from Mortier+ 2014
    df.rename(columns={'loggnew': 'loggSpec'}, inplace=True)
    idx = (df.teffnew >= 4500) & (df.teffnew <= 7050) & (df.loggSpec > 4.2)
    df.loc[idx, 'loggnew'] = df.loggSpec[idx] - 3.89E-4*df.teffnew[idx] + 2.10
    df.loc[~idx, 'loggnew'] = df.loggSpec[~idx]
    df.loc[df.loggnew < 4.2, 'loggnew'] = df.loggSpec[df.loggnew < 4.2]

    # Get R and M from Torres+
    df['R'] = np.array(Parallel(n_jobs=4)(delayed(radTorres)(*df.loc[star, l].values) for star in df.index))[:, 0]
    df['Rnew'] = np.array(Parallel(n_jobs=4)(delayed(radTorres)(*df.loc[star, r].values) for star in df.index))[:, 0]
    df['Rperc'] = (df.Rnew-df.R)/df.R * 100
    df['M'] = np.array(Parallel(n_jobs=4)(delayed(massTorres)(*df.loc[star, l].values) for star in df.index))[:, 0]
    df['Mnew'] = np.array(Parallel(n_jobs=4)(delayed(massTorres)(*df.loc[star, r].values) for star in df.index))[:, 0]
    df['Mperc'] = (df.Mnew-df.M)/df.M * 100
    df['loggPerc'] = (df.logg-df.loggnew)/df.logg * 100

    df.R = df.R.apply(round, args=(3,))
    df.Rnew = df.Rnew.apply(round, args=(3,))
    df.Rperc = df.Rperc.apply(round, args=(1,))
    df.Mnew = df.Mnew.apply(round, args=(3,))
    df.M = df.M.apply(round, args=(3,))
    df.Mperc = df.Mperc.apply(round, args=(1,))
    df.loggPerc = df.loggPerc.apply(round, args=(1,))
    df.loggnew = df.loggnew.apply(round, args=(2,))
    return df
Пример #22
0
def parallel(func, inputs, n_jobs, expand_args=False):
    """
    Convenience wrapper around joblib's parallelization.
    """
    if expand_args:
        return Parallel(n_jobs=n_jobs)(delayed(func)(*args) for args in inputs)
    else:
        return Parallel(n_jobs=n_jobs)(delayed(func)(arg) for arg in inputs)
Пример #23
0
def analysis(foldername, outdir, referencenum, exten, n_estimators, min_samples_leaf, max_depth):
    """
    Start the analysis

    Input:
        1) Path to the driver directory
        2) Path where the submission file should be written
        3) Number of drivers to compare against
    """
    start = datetime.now()
    submission_id = datetime.now().strftime("%H_%M_%B_%d_%Y")

    folders = [os.path.join(foldername, f) for f in os.listdir(foldername) if os.path.isdir(os.path.join(foldername, f))]

    nonstandfeatfile = 'Features66-NOSTAND-nreprot.csv'
    # generates csv file with NON STANDARIZED features to calculate means and standards afterwards:
    if os.path.exists(nonstandfeatfile):
        print 'initial calculation of all features for standarizing purposes will be skipped because file exists:',nonstandfeatfile
        pass
    else:
        allfeats = Parallel(n_jobs=60)(delayed(F_Features4onedriver)(folder, exten) for folder in folders)
        with open(nonstandfeatfile, 'a') as featsfile:
            csvwriter = csv.writer(featsfile, delimiter=',')
            for item in allfeats:
                for i in xrange(len(item)):
                    csvwriter.writerow(item[i])

    ## Choose between one of the following two lines:
#    STAND = False
    STAND = True
    if STAND:
 	# calculates means and standard deviations in features:
        means, stds = F_calcmeanstsds(nonstandfeatfile)
    else:
        means = None
        stds = None


    # sample drivers to compare individual ones:
    seed(13)
#    referencefolders = [folders[i] for i in sorted(sample(xrange(len(folders)), referencenum))]
    referencefolders = [folders[i] for i in sorted(sample(xrange(len(folders)), int(len(folders)/3)))]
    print 'Generating refdata not in parallel, please wait some minutes...'
    referencedrivers = []
    for referencefolder in referencefolders:
#        referencedrivers.append(Driver(referencefolder, exten, STAND, means=means, stds=stds))
         referencedrivers.append(DriverSelect(referencefolder, exten, STAND, means=means, stds=stds))
    generatedata(referencedrivers)

    results = Parallel(n_jobs=60)(delayed(perform_analysis)(folder, exten, STAND, means, stds, n_estimators, min_samples_leaf, max_depth) for folder in folders)

    namesubmisfile = "RFR13-nrro-R0.3-spacbrtrpalxyab-std-e%i-s%i-d%i.csv" % (n_estimators, min_samples_leaf, max_depth)
    with open(os.path.join(outdir, namesubmisfile), 'w') as writefile:
        writefile.write("driver_trip,prob\n")
        for item in results:
            writefile.write("%s\n" % item)
    print 'submission file ',namesubmisfile,' written'
    print 'Done, elapsed time: %s' % str(datetime.now() - start)
Пример #24
0
def make_surrogates_ctps(phase_array, nrepeat=1000, mode='shuffle', n_jobs=4,
                         verbose=None):
    ''' calculate surrogates from an array of (phase) trials
        by means of shuffling the phase

    Parameters
    ----------
    phase_trial : 4d ndarray of dimension [nfreqs x ntrials x nchan x nsamples]

    Optional:
    nrepeat:

    mode: 2 different modi are allowed.
        'mode=shuffle' whill randomly shuffle the phase values. This is the default
        'mode=shift' whill randomly shift the phase values
    n_jobs: number of cpu nodes to use
    verbose:  verbose level (does not work yet)

    Returns
    -------
    pt : shuffled phase trials

    '''

    from joblib import Parallel, delayed
    from mne.parallel import parallel_func
    from mne.preprocessing.ctps_ import kuiper

    nfreq, ntrials, nsources, nsamples = phase_array.shape
    pk = np.zeros((nfreq, nrepeat, nsources, nsamples), dtype='float32')

    # create surrogates:  parallised over nrepeats
    parallel, my_kuiper, _ = parallel_func(kuiper, n_jobs, verbose=verbose)
    for ifreq in range(nfreq):
        for isource in range(nsources):
            # print ">>> working on frequency: ",bp[ifreq,:],"   source: ",isource+1
            print ">>> working on frequency range: ",ifreq + 1,"   source: ",isource + 1
            pt = phase_array[ifreq, :, isource, :]  # extract [ntrials, nsamp]

            if(mode=='shuffle'):
                # shuffle phase values for all repetitions
                pt_s = Parallel(n_jobs=n_jobs, verbose=0)(delayed(shuffle_data)
                                (pt) for i in range(nrepeat))
            else:
                # shift all phase values for all repetitions
                pt_s = Parallel(n_jobs=n_jobs, verbose=0)(delayed(shift_data)
                                (pt) for i in range(nrepeat))

            # calculate Kuiper's statistics for each phase array
            out = parallel(my_kuiper(i) for i in pt_s)

            # store stat and pk in different arrays
            out = np.array(out, dtype='float32')
            # ks[ifreq,:,isource,:] = out[:,0,:]  # is actually not needed
            pk[ifreq, :, isource, :] = out[:, 1, :]  # [nrepeat, pk_idx, nsamp]

    return pk
Пример #25
0
def run(f,r,args=None, threads=0, verbose=0):
    if threads == 0:
        threads = multiprocessing.cpu_count()
    
    if args:
        return Parallel(n_jobs=threads, verbose=verbose)(delayed(f)(i, *args) for i in r)
    else:
        return Parallel(n_jobs=threads, verbose=verbose)(delayed(f)(i) for i in r)
        
Пример #26
0
    def __init__(self, data_same, mean, std, nframes=1, batch_size=1, marginf=0, only_same=False):
        dtw_costs = zip(*data_same)[5]
        self._orig_x1s = zip(*data_same)[3]
        self._orig_x2s = zip(*data_same)[4]
        self._words_frames = numpy.asarray([fb.shape[0] for fb in self._orig_x1s])
        self.print_mean_DTW_costs(dtw_costs)

        self._mean = mean
        self._std = std
        self._nframes = nframes
        self._nwords = batch_size
        self._margin = marginf
        self._only_same = only_same
        # marginf says if we pad taking a number of frames as margin

        same_spkr = 0
        for i, tup in enumerate(data_same):
            if tup[1] == tup[2]:
                same_spkr += 1
        ratio = same_spkr * 1. / len(data_same)
        print "ratio same spkr / all for same:", ratio
        data_diff = []
        ldata_same = len(data_same)-1
        same_spkr_diff = 0
        for i in xrange(len(data_same)):
            word_1 = random.randint(0, ldata_same)
            word_1_type = data_same[word_1][0]
            word_2 = random.randint(0, ldata_same)
            while data_same[word_2][0] == word_1_type:
                word_2 = random.randint(0, ldata_same)

            wt1 = random.randint(0, 1)
            wt2 = random.randint(0, 1)
            if data_same[word_1][1+wt1] == data_same[word_2][1+wt2]:
                same_spkr_diff += 1
            p1 = data_same[word_1][3+wt1]
            p2 = data_same[word_2][3+wt2]
            r1 = p1[:min(len(p1), len(p2))]
            r2 = p2[:min(len(p1), len(p2))]
            data_diff.append((r1, r2))
        ratio = same_spkr_diff * 1. / len(data_diff)
        print "ratio same spkr / all for diff:", ratio

        self._data_same = zip(zip(*data_same)[3], zip(*data_same)[4],
                zip(*data_same)[-2], zip(*data_same)[-1])
        self._data_diff = data_diff

        self.remix()

        if self._nframes > 1:
            # pad the orig_xes1/2 once and for all
            self._orig_x1s = joblib.Parallel(n_jobs=cpu_count()-3)(
                    joblib.delayed(pad)(x, self._nframes, self._margin)
                    for x in self._orig_x1s)
            self._orig_x2s = joblib.Parallel(n_jobs=cpu_count()-3)(
                    joblib.delayed(pad)(x, self._nframes, self._margin)
                    for x in self._orig_x2s)
Пример #27
0
    def transform(self, catalog, subjects_id):
        catalog_ = copy.deepcopy(catalog)
        study_dir = make_dir(self.data_dir, self.study_id, strict=False)
        if isinstance(self.subject_key_, dict):
            save_table(self.subject_key_,
                       os.path.join(study_dir, 'subject_key.txt'))
        save_table(self.task_key_, os.path.join(study_dir, 'task_key.txt'),
                   merge=self.merge_tasks)
        save_table({'TR': catalog_[0]['tr']},
                   os.path.join(study_dir, 'scan_key.txt'))

        model_dir = make_dir(study_dir, 'models', self.model_id, strict=False)
        save_task_contrasts(model_dir, catalog_[0], merge=self.merge_tasks)
        save_condition_key(model_dir, catalog_[0], merge=self.merge_tasks)

        n_jobs = -1 if self.n_jobs != 1 else 1

        self.encoder_ = IntraEncoder(hrf_model=self.hrf_model,
                                     drift_model=self.drift_model,
                                     memory=self.memory,
                                     n_jobs=n_jobs)

        all_niimgs = self.encoder_.fit_transform(catalog_, subjects_id)

        if subjects_id is None:
            subjects_id = [doc['subject_id'] for doc in catalog]

        outputs = Parallel(n_jobs=self.n_jobs)(
            delayed(_compute_glm)(
                LinearModeler(masker=self.masker,
                              reporter=os.path.join(
                                  study_dir, subject_id,
                                  'model', self.model_id),
                              glm_model=self.glm_model,
                              hrf_model=self.hrf_model,
                              contrast_type=self.contrast_type,
                              output_z=self.output_z,
                              output_stat=self.output_stat,
                              output_effects=self.output_effects,
                              output_variance=self.output_variance),
                niimgs=niimgs,
                design_matrices=design_matrices,
                contrasts=doc['contrasts'])
                for subject_id, doc, niimgs, design_matrices in zip(
                    subjects_id,
                    catalog_,
                    all_niimgs,
                    self.encoder_.design_matrices_))

        if self.resample:
            Parallel(n_jobs=n_jobs)(
                delayed(_resample_img)(
                    doc[dtype][cid], self.target_affine, self.target_shape, )
                for doc in outputs for dtype in doc for cid in doc[dtype])

        return outputs
Пример #28
0
def test_simple(loop):
    with cluster() as (s, [a, b]):
        with parallel_backend('distributed', loop=loop,
                scheduler_host=('127.0.0.1', s['port'])):

            seq = Parallel()(delayed(inc)(i) for i in range(10))
            assert seq == [inc(i) for i in range(10)]

            seq = Parallel()(delayed(inc)(i) for i in range(10))
            assert seq == [inc(i) for i in range(10)]
Пример #29
0
def process_batch(image_db, label_db, fnames_b, y_b):
    print "Reading the images and labels"
    with Parallel(n_jobs=-1) as parallel:
        Xb = parallel(delayed(load_im_tuple)
                      (fname, i) for i, fname in fnames_b)
        yb = parallel(delayed(load_y_tuple)(y, i) for i, y in y_b)
    print "Writing image data"
    _write_batch_lmdb(image_db, Xb)
    print "Writing label data"
    _write_batch_lmdb(label_db, yb)
Пример #30
0
def get_training_sets():
    X = list(joblib.Parallel(n_jobs=-1)(
        joblib.delayed(get_features_for_path)(i)
        for i in TRAIN_DIR.iterdir()))
    y = list(joblib.Parallel(n_jobs=-1)(
        joblib.delayed(get_target_for_path)(i)
        for i in TARGET_DIR.iterdir()))
    X = np.concatenate(X)
    y = np.concatenate(y)
    logging.info("Finished loading")
    return X, y
Пример #31
0
def rptree_leaf_array_parallel(rp_forest):
    result = joblib.Parallel(n_jobs=-1, prefer="threads")(
        joblib.delayed(get_leaves_from_tree)(rp_tree) for rp_tree in rp_forest)
    # result = [get_leaves_from_tree(rp_tree) for rp_tree in rp_forest]
    return result
        dct_x = torch_apply(dct.dct, dct_x)

        dct_x = dct_x.to(device)
        y = y.to(device)

        outputs_grad = []
        outputs = []
        for i in range(len(models)):
            out = models[i](dct_x[i, ...])
            outputs_grad.append(out)
            outputs.append(out.detach())

        # This line makes multiple calls to train_slice function
        # Parallelization
        Parallel(n_jobs=16, prefer="threads", verbose=0)(
            delayed(train_slice)(i, models[i], dct_x[i, ...], y, outputs, ops[i]) \
            for i in range(len(models))
        )

        res = torch.empty(shape[0], 10, shape[2])
        for i in range(len(models)):
            res[i, ...] = models[i](dct_x[i, ...])

        res = torch_apply(dct.idct, res).to(device)
        res = scalar_tubal_func(res)
        res = torch.transpose(res, 0, 1)
        criterion = nn.CrossEntropyLoss()
        total_loss = criterion(res, y)

        _, predicted = torch.max(res, 1)
        total += y.size(0)
                    Y_test_result = np.concatenate((Y_test_result, Y))
                    del [[X, Y, gp, Y_pred]]


    Y_test_result = np.ravel(Y_test_result)
    Y_pred_result = np.ravel(Y_pred_result)
    SVM_overall_accuracy = accuracy_score(Y_test_result, Y_pred_result)
    print("subject = "+str(testing_subject)+" window_size = "+str(window_size)+" phase_number = "+str(phase_number)+" Accuracy = "+str(SVM_overall_accuracy))

    base_path_dir = "/HDD/hipexo/Inseung/Result/"
<<<<<<< HEAD
    text_file1 = base_path_dir + "SVM_phasesweep.txt"
=======
    text_file1 = base_path_dir + SVM_saving_file + ".txt"
>>>>>>> e1f27d97f3bebba058c4f85ce5f6a72f3dceee6f

    msg1 = ' '.join([str(testing_subject),str(window_size),str(transition_point),str(phase_number),str(SVM_overall_accuracy),"\n"])
    return text_file1, msg1

run_combos = []
for testing_subject in [6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 27 ,28]:
    for window_size in [350]:
        for transition_point in [0.2]:
            for phase_number in [1]:
                for kernel_type in ['rbf']:
                        run_combos.append([testing_subject, window_size, transition_point, phase_number, kernel_type])

result = Parallel(n_jobs=-1)(delayed(SVM_parallel)(combo) for combo in run_combos)
for r in result:
    with open(r[0],"a+") as f:
        f.write(r[1])
Пример #34
0
def uncles(X,
           type='A',
           Ks=[n for n in range(4, 21, 4)],
           params=None,
           methods=None,
           methodsDetailed=None,
           U=None,
           Utype='PM',
           relabel_technique='minmin',
           setsP=None,
           setsN=None,
           dofuzzystretch=False,
           wsets=None,
           wmethods=None,
           GDM=None,
           smallestClusterSize=11,
           CoPaMfinetrials=1,
           CoPaMfinaltrials=1,
           binarise_techniqueP='DTB',
           binarise_paramP=np.arange(0.0, 1.1, 0.1, dtype='float'),
           binarise_techniqueN='DTB',
           binarise_paramN=np.concatenate(([sys.float_info.epsilon],
                                           np.arange(0.1,
                                                     1.1,
                                                     0.1,
                                                     dtype='float'))),
           Xnames=None,
           deterministic=False,
           ncores=1):
    Xloc = ds.listofarrays2arrayofarrays(X)
    L = len(Xloc)  # Number of datasets

    # Fix parameters
    if params is None: params = {}
    if setsP is None: setsP = [x for x in range(int(math.floor(L / 2)))]
    if setsN is None: setsN = [x for x in range(int(math.floor(L / 2)), L)]
    setsPN = np.array(np.concatenate((setsP, setsN), axis=0), dtype=int)
    Xloc = Xloc[setsPN]
    L = np.shape(Xloc)[0]  # Number of datasets
    if wsets is None:
        wsets = np.array([1 for x in range(L)])
    else:
        wsets = np.array(wsets)[setsPN]
    if GDM is None:
        Ng = np.shape(Xloc[0])[0]
        GDMloc = np.ones([Ng, L], dtype='bool')
    else:
        GDMloc = GDM[:, setsPN]
        Ng = GDMloc.shape[0]
    if Xnames is None:
        Xnames = ['X{0}'.format(l) for l in range(L)]

    if methods is None:
        methods = [['k-means']]
        # largest_DS = np.max([x.shape[0] for x in Xloc])
        # if (largest_DS <= maxgenesinsetforpdist):
        #     if (deterministic):
        #         methods = [['k-means'], ['HC']]
        #     else:
        #         methods = [['k-means'], ['SOMs'], ['HC']]
        # else:
        #     if (deterministic):
        #         methods = [['k-means']]
        #     else:
        #         methods = [['k-means'], ['SOMs']]
    else:
        largest_DS = np.max([x.shape[0] for x in Xloc])
        if (largest_DS > maxgenesinsetforpdist):
            methods = [
                m for m in methods
                if 'hc' not in [entry.lower() for entry in m]
            ]
            if not methods:
                io.log('No valid base clustering can be used. Please note that clust would not use HC clustering ' \
                       'on datasets with more than {0} genes. You have a dataset with {1} genes.' \
                       ''.format(maxgenesinsetforpdist, largest_DS))
                io.log('Clust will terminate here.')
                io.log(op.bottomline(), addextrastick=False)
                sys.exit()
    if methodsDetailed is None:
        methodsDetailedloc = np.array([methods for l in range(L)])
    else:
        methodsDetailedloc = methodsDetailed[setsPN]
    if wmethods is None:
        wmethods = [[1 for x in m] for m in methodsDetailedloc]
    elif not isinstance(wmethods[0], (list, tuple, np.ndarray)):
        wmethods = np.tile(methods, [L, 1])
    else:
        wmethods = np.array(wmethods)[setsPN]

    setsPloc = [ii for ii in range(len(setsP))]
    if L > len(setsPloc):
        setsNloc = [ii for ii in range(len(setsPloc), L)]

    Ds = [nu.closest_to_square_factors(k)
          for k in Ks]  # Grid sizes for the SOMs method for each value of K
    NKs = len(Ks)  # Number of K values

    # Clustering
    if U is None:
        Utype = 'PM'
        Uloc = np.array([None] * (L * NKs)).reshape([L, NKs])
        totalparallel = np.sum(Ks) * np.sum(
            [len(meths) for meths in methodsDetailedloc])
        for meths in methodsDetailedloc:
            for meth in meths:
                if 'k-means' in meth:
                    totalparallel += np.max(Ks) * np.max(Ks)
                    continue
        io.resetparallelprogress(totalparallel)

        for l in range(L):
            # Cache kmeans initialisations for the dataset once to save time:
            cl.cache_kmeans_init(Xloc[l],
                                 Ks,
                                 methodsDetailedloc[l],
                                 datasetID=l)

            # Now go to parallel clustering
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                Utmp = Parallel(n_jobs=ncores)\
                    (delayed(clustDataset)
                     (Xloc[l], Ks[ki], Ds[ki], methodsDetailedloc[l], GDMloc[:, l], Ng, l) for ki in range(NKs))

                Utmp = [u for u in Utmp]
                for ki in range(NKs):
                    Uloc[l, ki] = Utmp[ki]

                gc.collect()
                #io.updateparallelprogress(np.sum(Ks) * len(methodsDetailedloc))

    else:
        Uloc = ds.listofarrays2arrayofarrays(U)[setsPN]

    # Calculate a CoPaM for each dataset at each K
    CoPaMsFine = np.array([None] * (L * NKs)).reshape([L, NKs])
    for l in range(L):
        for ki in range(NKs):
            if Utype.lower() == 'pm':
                CoPaMsFineTmp = [
                    generateCoPaM(Uloc[l, ki],
                                  relabel_technique=relabel_technique,
                                  X=[Xloc[l]],
                                  w=wmethods[l],
                                  K=Ks[ki],
                                  GDM=GDMloc[:, l].reshape([-1, 1]))
                    for i in range(CoPaMfinetrials)
                ]
            elif Utype.lower() == 'idx':
                CoPaMsFineTmp = \
                    [generateCoPaMfromidx(Uloc[l, ki], relabel_technique=relabel_technique, X=Xloc,
                                          w=wmethods[l], K=Ks[ki])
                     for i in range(CoPaMfinetrials)]
            else:
                raise ValueError('Invalid Utype')
            CoPaMsFine[l,
                       ki] = generateCoPaM(CoPaMsFineTmp,
                                           relabel_technique=relabel_technique,
                                           X=[Xloc[l]],
                                           GDM=GDMloc[:, l].reshape([-1, 1]))

            if dofuzzystretch:
                CoPaMsFine[l, ki] = fuzzystretch(CoPaMsFine[l, ki])

    # Calculate the final CoPaM for each K
    CoPaMs = np.array([None] * (CoPaMfinaltrials * NKs)).reshape(
        [CoPaMfinaltrials, NKs])
    CoPaMsP = np.array([None] * (CoPaMfinaltrials * NKs)).reshape(
        [CoPaMfinaltrials, NKs])
    CoPaMsN = np.array([None] * (CoPaMfinaltrials * NKs)).reshape(
        [CoPaMfinaltrials, NKs])
    for t in range(CoPaMfinaltrials):
        for ki in range(NKs):
            if type == 'A':
                if Utype.lower() == 'pm':
                    CoPaMs[t, ki] = generateCoPaM(
                        CoPaMsFine[:, ki],
                        relabel_technique=relabel_technique,
                        w=wsets,
                        X=Xloc,
                        GDM=GDMloc)
                elif Utype.lower() == 'idx':
                    CoPaMs[t, ki] = generateCoPaMfromidx(
                        CoPaMsFine[:, ki],
                        relabel_technique=relabel_technique,
                        X=Xloc,
                        w=wsets,
                        GDM=GDMloc)
                else:
                    raise ValueError('Invalid Utype')
            elif type == 'B':
                if Utype.lower() == 'pm':
                    CoPaMsP[t, ki] = generateCoPaM(
                        CoPaMsFine[setsPloc, ki],
                        relabel_technique=relabel_technique,
                        X=Xloc,
                        w=wsets[setsPloc],
                        GDM=GDMloc[:, setsPloc])
                    CoPaMsN[t, ki] = generateCoPaM(
                        CoPaMsFine[setsNloc, ki],
                        relabel_technique=relabel_technique,
                        X=Xloc,
                        w=wsets[setsNloc],
                        GDM=GDMloc[:, setsNloc])
                elif Utype.lower() == 'idx':
                    CoPaMsP[t, ki] = generateCoPaMfromidx(
                        CoPaMsFine[setsPloc, ki],
                        relabel_technique=relabel_technique,
                        X=Xloc,
                        w=wsets[setsPloc],
                        GDM=GDMloc[:, setsPloc])
                    CoPaMsN[t, ki] = generateCoPaMfromidx(
                        CoPaMsFine[setsNloc, ki],
                        relabel_technique=relabel_technique,
                        X=Xloc,
                        w=wsets[setsNloc],
                        GDM=GDMloc[:, setsNloc])
                else:
                    raise ValueError('Invalid Utype')
            else:
                raise ValueError(
                    'Invalid UNCLES type. It has to be either A or B')

    # Binarise
    NPp = len(binarise_paramP)  # Number of P params
    NNp = len(binarise_paramN)  # Number of N params
    if type == 'A':
        B = np.zeros([CoPaMfinaltrials, NPp, 1, NKs], dtype=object)
        Mc = np.zeros([CoPaMfinaltrials, NKs], dtype=object)
    elif type == 'B':
        B = np.zeros([CoPaMfinaltrials, NPp, NNp, NKs], dtype=object)
        Mc = np.zeros([CoPaMfinaltrials, NKs], dtype=object)

    for t in range(CoPaMfinaltrials):
        for ki in range(NKs):
            if type == 'A':
                # Pre-sorting binarisation
                for p in range(NPp):
                    B[t, p, 0, ki] = binarise(CoPaMs[t,
                                                     ki], binarise_techniqueP,
                                              binarise_paramP[p])
                Mc[t, ki] = [np.sum(Bp, axis=0) for Bp in B[t, :, 0, ki]]

                # Sorting
                CoPaMs[t, ki] = sortclusters(CoPaMs[t, ki], Mc[t, ki],
                                             smallestClusterSize)

                # Post-sorting binarisation
                for p in range(NPp):
                    B[t, p, 0, ki] = binarise(CoPaMs[t,
                                                     ki], binarise_techniqueP,
                                              binarise_paramP[p])
                Mc[t, ki] = [np.sum(Bp, axis=0) for Bp in B[t, :, 0, ki]]
            elif type == 'B':
                # Pre-sorting binarisation
                BP = [
                    binarise(CoPaMsP[t, ki], binarise_techniqueP,
                             binarise_paramP[p]) for p in range(NPp)
                ]
                McP = [np.sum(BPp, axis=0) for BPp in BP]

                BN = [
                    binarise(CoPaMsN[t, ki], binarise_techniqueN,
                             binarise_paramN[p]) for p in range(NNp)
                ]
                McN = [np.sum(BNp, axis=0) for BNp in BN]

                # Sorting
                CoPaMsP[t, ki] = sortclusters(CoPaMsP[t, ki], McP,
                                              smallestClusterSize)
                CoPaMsN[t, ki] = sortclusters(CoPaMsN[t, ki], McN,
                                              smallestClusterSize)

                # Post-sorting binarisation
                BP = [
                    binarise(CoPaMsP[t, ki], binarise_techniqueP,
                             binarise_paramP[p]) for p in range(NPp)
                ]
                McP = [np.sum(BPp, axis=0) for BPp in BP]

                BN = [
                    binarise(CoPaMsN[t, ki], binarise_techniqueN,
                             binarise_paramN[p]) for p in range(NNp)
                ]
                McN = [np.sum(BNp, axis=0) for BNp in BN]

                # UNCLES B logic
                for pp in range(NPp):
                    for pn in range(NNp):
                        B[t, pp, pn, ki] = BP[pp]
                        B[t, pp, pn, ki][np.any(BN[pn], axis=1)] = False

                # Fill Mc
                Mc[t, ki] = [None] * Ks[ki]
                for k in range(Ks[ki]):
                    Mc[t, ki][k] = np.zeros([NPp, NNp])
                    for pp in range(NPp):
                        for pn in range(NNp):
                            Mc[t, ki][k][pp, pn] = np.sum(B[t, pp, pn, ki][:,
                                                                           k])

    # Prepare and return the results:
    params = dict(
        params, **{
            'methods': methods,
            'setsP': setsPloc,
            'setsN': setsNloc,
            'dofuzzystretch': dofuzzystretch,
            'type': type,
            'Ks': Ks,
            'NKs': NKs,
            'wsets': wsets,
            'wmethods': wmethods,
            'Ds': Ds,
            'L': L,
            'CoPaMs': CoPaMs,
            'smallestclustersize': smallestClusterSize,
            'GDM': GDMloc
        })

    UnclesRes = collections.namedtuple('UnclesRes',
                                       ['B', 'Mc', 'params', 'X', 'U'])
    return UnclesRes(B, Mc, params, Xloc, Uloc)
def main():

    parser = argparse.ArgumentParser(description="calculate MCD.")
    parser.add_argument("--conf", type=str, help="configuration file")
    parser.add_argument("--spkr_conf",
                        type=str,
                        help="speaker configuration file")
    parser.add_argument(
        "--featdir",
        type=str,
        help="root directory of ground truth h5",
    )
    parser.add_argument("--outwavdir",
                        type=str,
                        help="converted waveform directory")
    parser.add_argument(
        "--out",
        type=str,
        help="if omitted, then output to sys.stdout",
    )
    parser.add_argument("--n_jobs",
                        default=1,
                        type=int,
                        help="number of parallel jobs")
    args = parser.parse_args()

    # logging info
    logging.basicConfig(
        level=logging.INFO,
        stream=sys.stdout,
        format="%(asctime)s (%(module)s:%(lineno)d) "
        "%(levelname)s: %(message)s",
    )

    # load configure files
    conf = load_yaml(args.conf)
    spkr_conf = load_yaml(args.spkr_conf)

    # load converted files. If mcep, use h5; else, waveform
    if conf["output_feat_type"] == "mcep":
        converted_files = sorted(list(Path(args.outwavdir).glob("*.h5")))
    else:
        converted_files = sorted(list(Path(args.outwavdir).rglob("*.wav")))
    logging.info(f"number of utterances = {len(converted_files)}")

    # load ground truth scp
    featdir = Path(args.featdir) / conf["feature"]["label"]
    gt_feats = open_featsscp(featdir / "eval" / "feats.scp")

    if args.out is None:
        out = sys.stdout
    else:
        out = open(args.out, "w", encoding="utf-8")

    MCD_list = Parallel(args.n_jobs)([
        delayed(calculate)(cv_path, gt_feats, conf, spkr_conf)
        for cv_path in converted_files
    ])

    # summarize by pair
    pairwise_MCD = {}
    for k, v in MCD_list:
        orgspk, tarspk, _ = k.split("-")
        pair = orgspk + " " + tarspk
        if pair not in pairwise_MCD:
            pairwise_MCD[pair] = []
        pairwise_MCD[pair].append(v)

    for k in sorted(pairwise_MCD.keys()):
        mcd_list = pairwise_MCD[k]
        mean_mcd = float(sum(mcd_list) / len(mcd_list))
        out.write(f"{k} {mean_mcd:.3f}\n")
Пример #36
0
def calculate_accuracy(
        path_embeddings,
        meter_acc: tnt.meter.ClassErrorMeter,
        meter_auc: tnt.meter.AUCMeter,
        type='range',
        norm='l2',
        triplet_similarity='cos',
        mode='cpu',
        embedding_size=None,
        class_max_dist=None,  # precomputed
        class_centroids=None,
        y_list=None,  #precumputed
        sample_count=None,  #precomputed
        paths_embs_idx_path_pairs=None):  # precomputed

    paths_embs = FileUtils.listSubFiles(path_embeddings)

    # calculate centroids first
    if class_max_dist is None:
        class_centroids = {}
        class_max_dist = {}
        y_list = []
        paths_embs_idx_path_pairs = []
        sample_count = 0

        for path_emb in paths_embs:
            if path_emb.endswith('.json'):
                y_each = int(os.path.basename(path_emb).split('.')[0])
                path_emb_json = f'{path_embeddings}/{y_each}.json'
                path_emb_mem = f'{path_embeddings}/{y_each}.mmap'

                emb_json = FileUtils.loadJSON(path_emb_json)
                emb_mem = np.memmap(path_emb_mem,
                                    mode='r',
                                    dtype=np.float16,
                                    shape=(emb_json['count'], embedding_size))

                paths_embs_idx_path_pairs.append((sample_count, y_each))
                sample_count += emb_json['count']

                y_list += (np.ones(
                    (emb_json['count'], ), dtype=np.int) * y_each).tolist()

                class_centroids[y_each] = np.average(emb_mem, axis=0)
                if norm == 'l2':
                    class_centroids[y_each] = normalize_vec(
                        class_centroids[y_each])

                np_class_centroids_tiled = np.tile(class_centroids[y_each],
                                                   (len(emb_mem), 1))
                list_dists = get_distance(np_class_centroids_tiled, emb_mem,
                                          triplet_similarity, mode).tolist()
                list_dists = sorted(list_dists, reverse=False)
                list_dists = list_dists[:max(
                    2, int(len(list_dists) * 0.9)
                )]  # drop 10 top percent embeddings as they could contain noise
                class_max_dist[y_each] = list_dists[
                    -1]  # last largest distance

    classes_size = int(np.max(y_list)) + 1

    # store distance matrix as memmap for optimization
    path_dists_mem = f'{path_embeddings}/dists.mmap'
    is_exist_dists_mem = os.path.exists(path_dists_mem)
    dists_mem = np.memmap(path_dists_mem,
                          mode='r+' if is_exist_dists_mem else 'w+',
                          dtype=np.float16,
                          shape=(sample_count, classes_size))
    #dists_mem.flush()

    path_centroids_mem = f'{path_embeddings}/dists.mmap'
    is_exist_centroids_mem = os.path.exists(path_centroids_mem)
    centroids_mem = np.memmap(path_centroids_mem,
                              mode='r+' if is_exist_centroids_mem else 'w+',
                              dtype=np.float16,
                              shape=(classes_size, embedding_size))
    for key, value in class_centroids.items():
        centroids_mem[key] = value
    #centroids_mem.flush()

    if not is_exist_dists_mem:
        Parallel(n_jobs=multiprocessing.cpu_count() * 2, backend='threading')(
            delayed(process_dists)(idx_start, y_each, y_list, path_embeddings,
                                   sample_count, classes_size, embedding_size,
                                   triplet_similarity, mode)
            for idx_start, y_each in paths_embs_idx_path_pairs)

        dists_mem = np.memmap(path_dists_mem,
                              mode='r',
                              dtype=np.float16,
                              shape=(sample_count, classes_size))

    # iterate through precomputed distances to add to data to meters for mem optimization
    chunk_size = 1024
    for idx_chunk_start in range(sample_count // chunk_size + 1):
        idx_chunk_end = min(sample_count, idx_chunk_start + chunk_size)
        chunk_each_size = idx_chunk_end - idx_chunk_start

        if chunk_each_size == 0:
            break

        if type == 'range':
            predicted = np.zeros((chunk_each_size, classes_size),
                                 dtype=np.float)
        else:
            predicted = np.ones(
                (chunk_each_size, classes_size), dtype=np.float) * 1e9
        target = np.zeros((chunk_each_size, classes_size), dtype=np.float)

        for idx_y in class_max_dist.keys():
            max_dist = class_max_dist[idx_y]
            for idx_class in range(chunk_each_size):
                target[idx_class, y_list[idx_chunk_start + idx_class]] = 1.0

            dists = dists_mem[idx_chunk_start:idx_chunk_end]

            if type == 'range':
                for idx_emb, dist in enumerate(dists):
                    if max_dist > dist[idx_y]:
                        predicted[idx_emb, idx_y] += 1.0
            else:
                predicted[:, idx_y] = np.minimum(
                    predicted[:, idx_y], dists[:, idx_y]
                )  # store for each class closest embedding with distance value

        if type == 'range':
            predicted = predicted / (np.sum(predicted, axis=1, keepdims=True) +
                                     1e-18)
        else:
            # TODO softmax/hardmax based accuracy
            idx_class = np.argmin(
                predicted, axis=1)  # for each sample select closest distance
            predicted = np.zeros_like(predicted)  # init probabilities vector
            predicted[
                np.arange(predicted.shape[0]),
                idx_class] = 1.0  # for each sample set prob 100% by columns
        y_chunk = np.array(y_list[idx_chunk_start:idx_chunk_end])
        meter_acc.add(predicted, y_chunk)

        # AssertionError: targets should be binary (0, 1)
        idxes_classes = np.argmax(predicted, axis=1)
        target_tp = np.array(np.equal(y_chunk, idxes_classes), dtype=np.int)
        meter_auc.add(np.max(predicted, axis=1), target_tp)

    return class_max_dist, class_centroids, y_list, sample_count, paths_embs_idx_path_pairs
Пример #37
0
for lack in lack_list:
    sub_dir = 'tmp_L{:d}/'.format(lack)
    source = np.load(out_dir + sub_dir +
                     "{:0>3d}_tmp_cat.npy".format(index)).item()
    beam = np.load(beam_dir +
                   "{:d}d_beam_sigma{:d}.npy".format(int(dim - lack), sigma))
    #=========================================================================================
    start = time.time()
    #after_smooth = dict()
    for i, key in enumerate(source.keys()):
        #=========================================================
        # Percentage Indicator
        if i % 100 == 0:
            print('Now: ' + str(float(i) / len(source) * 100) + '%')
        #=========================================================
        # Do Gaussian Smooth
        gal_pos = list(key)
        if gal_pos.count("Lack") <= (len(shape) - 3):
            gal_pos_array = np.asarray(gal_pos)
            gal_pos_array_str = np.array(gal_pos_array, dtype=str)
            no_lack_ind = np.where(gal_pos_array_str != "Lack")[0]
            Parallel(n_jobs=10)(delayed(gaussian_smooth)(
                pos, gal_pos_array, gal_pos_array_str, new_key, no_lack_ind)
                                for pos in beam)
        break
    end = time.time()
    print("Saving result ...\n")
    chdir(out_dir + sub_dir)
    #np.save("{:0>3d}_{:d}d_after_smooth".format(int(index), int(dim-lack)), np.array(after_smooth))
    print("Gaussian Smooth took {:.3f} secs\n".format(end - start))
# how many systems to probe and average over
systems = 1000
start = 220000  # look at the same examples as the network test dataset

#loop over all global optimization configurations
for i in range(len(ftype)):
    #print a nice headder for the log file
    print('\n\n', '-' * 10, 'Starting, FT=%d, NG=%d' % (ftype[i], nglob[i]),
          '-' * 10)
    sta = datetime.datetime.now()
    print(sta)
    #parallel call
    #parallelized by system, so each system runs independently on a core
    results = Parallel(n_jobs=cores)(
        delayed(gen_fcn_pd)(g, ftype[i], nglob[i])
        for g in tqdm(range(start, start + systems)))
    end = datetime.datetime.now()
    #system runtime per system
    #particular to the number of parallel calls
    print('Runtime:', end - sta)
    sys_runtime = (end - sta) / systems
    print('Per System:', sys_runtime)

    #calcuate the metrics to find the algorithm optimzaiton performance in each configuration
    results = np.array(results)
    #calculate RMSE thickness and materials accuracy metrics
    metrics = accutest(
        np.transpose(
            np.array([
                np.argmax(tm1[start:start + systems, :], axis=1),
Пример #39
0
parser.add_argument("-d", "--dstdir", type=str, help="dst image folder")
parser.add_argument("-n", "--n_jobs", type=int, default=30, help="parallel jobs")
parser.add_argument("-p", "--parallel", action='store_true', default=False, help="if parallel")
args = parser.parse_args()
srcdir = args.srcdir
dstdir = args.dstdir
n_jobs = args.n_jobs
parallel = args.parallel


def squeeze_along_z(filename):
    print(filename)
    srcpath = os.path.join(srcdir, filename)
    dstpath = os.path.join(dstdir, filename)

    npsrc, header = nrrd.read(srcpath)
    npsrc = npsrc.astype(np.float16)
    npdst = np.average(npsrc, axis=2)
    npdst = npdst.astype(np.int32)
    nrrd.write(dstpath, npdst)


filelist = os.listdir(srcdir)

if parallel:
    Parallel(n_jobs=n_jobs, backend="multiprocessing")(
        delayed(squeeze_along_z)(filename) for filename in filelist)
else:
    for filename in filelist:
        squeeze_along_z(filename)
Пример #40
0
#except:
#  logger.error("Excel conversion script failed")
#  sys.exit(1)
end_time_report_seconds = time.time()
report_seconds = [end_time_report_seconds, -start_time_report_seconds]
time_report_seconds = sum(report_seconds)
#few remaning things: emailing, confluence page adding attachements etc.

#few remaning things: emailing, confluence page adding attachements etc.
start_time_compression = time.time()

if multiload == True:
  #joblib_method = "processes"
  joblib_method = "threads"
  if verbose == True:
    joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.outArchiveV)('archiving in parallel (' + joblib_method + '): ' + config.cyan  + report_file, report_file, env, new_tmp) for report_file in report_output_list )
  else:
    joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.outArchive)('archiving in parallel (' + joblib_method + '): ' + config.cyan  + report_file, report_file, env, new_tmp) for report_file in report_output_list )
  if nobteq == True:
    logger.warning('log archiving will be omitted')
  else:
    if verbose == True:
      joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.logArchiveV)('archiving in parallel (' + joblib_method + '): ' + config.cyan  + log_file, log_file, new_log) for log_file in log_file_list )
    else:
      joblib.Parallel(n_jobs=config.cpu_cores, prefer=joblib_method)(joblib.delayed(config.logArchive)('archiving in parallel (' + joblib_method + '): ' + config.cyan  + log_file, log_file, new_log) for log_file in log_file_list )
else:
  for report_file in report_output_list:
    if verbose == True:
      config.outArchiveV('archiving (same process/thread): ' + config.cyan + report_file, report_file, env, new_tmp)
    else:
      config.outArchive('archiving (same process/thread): ' + config.cyan + report_file, report_file, env, new_tmp)
Пример #41
0
if load_pr:
    print("Loading existing precision/recall info")
    existing = {
        model
        for model in models if os.path.isfile(pr_file.format(model))
    }
    for model in existing:
        with open(pr_file.format(model), 'rb') as f:
            info[model] = pickle.load(f)
    models = list(set(models) - existing)

print("Computing precision/recall")
output = [
    x for x in Parallel(n_jobs=-1)(
        delayed(_process_file)(pred_dir, model, file) for model in models
        for file in os.listdir(os.path.join(pred_dir, model))) if x is not None
]
for file in output:
    info[file.model].append(file)

print(info.keys())

# FIXME: Why is this not working?
# Save precision and recall
#for model in models:
#	with open(pr_file.format(model), 'wb') as f:
#		pickle.dump(info[model], f)

print("Computing means and stds")
with open(f1_file, 'w') as score:
Пример #42
0
def overlap(p,
            xt,
            yt,
            diat,
            rott,
            chord,
            B,
            x0,
            y0,
            dia,
            Vinf,
            pointcalc,
            param=None,
            veltype='ind',
            integration='gskr'):
    """
    Calculating wake velocities around a turbine based on wake overlap from surrounding turbines
    (using the 21-point Gauss-Kronrod rule quadrature integration; Simpson's rule integration can be used via VAWT_Wake_Model.f90)

    Parameters
    ----------
    p : int
        number of points to calculate the velocity around a turbine (typically 36)
    xt : array
        downstream positions of surrounding turbine(s) in flow domain (m)
    yt : array
        lateral position of surrounding turbine(s) in flow domain (m)
    diat : array
        diameters of surrounding turbines (m)
    rott : array
        rotation rates of surrounding turbines (rad/s)
    chord : float
        chord length of the turbines (m)
    B : int
        number of turbine blades
    x0 : float
        downstream position in flow domain of turbine to be calculated (m)
    y0 : float
        lateral position in flow domain of turbine to be calculated (m)
    dia : float
        diameter of turbine to be calculated (m)
    Vinf : float
        free stream velocity (m/s)
    pointcalc : bool
        calculate the overlap at a point (True) or at p points around the blade flight path (False)
    param : array
        the coefficients used for the EMG distributions ('None' will provide the published coefficients automatically)
    veltype : string
        the type of velocity to calculate ('all': velocity magnitude, 'x': x-induced velocity, 'y': y-induced velocity,
        'ind': vector of both x- and y-induced velocities without free stream, 'vort': vorticity profile neglecting integration)
    integration : string
        the type of integration method used ('simp': Simpson's Rule, 'gskr': 21 Point Gauss-Kronrod Rule)
    m : int
        the number of downstream divisions requested for Simpson's Rule (must be divisible by 2); neglected otherwise
    n : int
        the number of downstream divisions requested for Simpson's Rule (must be divisible by 2); neglected otherwise

    Returns
    ----------
    velx : array
        final induced x-velocity at each point around the turbine being calculated (m/s)
    vely : array
        final induced y-velocity at each point around the turbine being calculated (m/s)
    """
    # initializing local variables and arrays
    t = np.size(xt)  # number of turbines
    xd = np.zeros(p)
    yd = np.zeros(p)
    velx = np.zeros(p)
    vely = np.zeros(p)
    velx_int = np.zeros(p)
    vely_int = np.zeros(p)

    # Use parallelization (with joblib)
    parallel = True
    # parallel = False

    # finding points around the flight path of the blades
    for i in range(p):
        if pointcalc == False:
            theta = (2.0 * pi / p) * i - (2.0 * pi / p) / 2.0
            xd[i] = x0 - sin(theta) * (dia / 2.0)
            yd[i] = y0 + cos(theta) * (dia / 2.0)
        elif pointcalc == True:
            xd[0] = x0
            yd[0] = y0
    intex = np.zeros(p)
    intey = np.zeros(p)

    if (t == 1):  # coupled configuration (only two VAWTs)
        if pointcalc == False:
            if parallel == True:
                wake = Parallel(n_jobs=-1)(delayed(velocity_field)(
                    xt[0], yt[0], xd[j], yd[j], Vinf, diat[0], rott[0], chord,
                    B, param, veltype, integration) for j in range(p))
                for i in range(p):
                    velx[i] = wake[i][0] * Vinf
                    vely[i] = wake[i][1] * Vinf
            elif parallel == False:
                for j in range(p):
                    wake = velocity_field(xt[0], yt[0], xd[j], yd[j], Vinf,
                                          diat[0], rott[0], chord, B, param,
                                          veltype, integration)
                    velx[j] = wake[0] * Vinf
                    vely[j] = wake[1] * Vinf
        elif pointcalc == True:
            wake = velocity_field(xt[0], yt[0], xd[0], yd[0], Vinf, diat[0],
                                  rott[0], chord, B, param, veltype,
                                  integration)
            velx[0] = wake[0] * Vinf
            vely[0] = wake[1] * Vinf

    else:  # multiple turbine wake overlap
        if pointcalc == False:
            if parallel == True:
                wake = Parallel(n_jobs=-1)(delayed(velocity_field)(
                    xt[w], yt[w], xd[q], yd[q], Vinf, diat[w], rott[w], chord,
                    B, param, veltype, integration) for w in range(t)
                                           for q in range(p))
            for j in range(t):
                for k in range(p):
                    if parallel == True:
                        velx_int[k] = -wake[k + j * p][0]
                        vely_int[k] = wake[k + j * p][1]
                    elif parallel == False:
                        wake = velocity_field(xt[j], yt[j], xd[k], yd[k], Vinf,
                                              diat[j], rott[j], chord, B,
                                              param, veltype, integration)
                        velx_int[k] = -wake[0]
                        vely_int[k] = wake[1]

                    # sum of squares of velocity deficits
                    if (velx_int[k] >= 0.0):
                        intex[k] = intex[k] + (velx_int[k])**2
                    else:
                        intex[k] = intex[k] - (velx_int[k])**2

                    if (vely_int[k] >= 0.0):
                        intey[k] = intey[k] + (vely_int[k])**2
                    else:
                        intey[k] = intey[k] - (vely_int[k])**2
        elif pointcalc == True:
            for j in range(t):
                wake = velocity_field(xt[j], yt[j], xd[0], yd[0], Vinf,
                                      diat[j], rott[j], chord, B, param,
                                      veltype, integration)
                velx_int[0] = -wake[0]
                vely_int[0] = wake[1]

                # sum of squares of velocity deficits
                if (velx_int[0] >= 0.0):
                    intex[0] = intex[0] + (velx_int[0])**2
                else:
                    intex[0] = intex[0] - (velx_int[0])**2

                if (vely_int[0] >= 0.0):
                    intey[0] = intey[0] + (vely_int[0])**2
                else:
                    intey[0] = intey[0] - (vely_int[0])**2

        # square root of sum of squares
        for l in range(p):
            if (intex[l] >= 0.0):
                velx[l] = -Vinf * (sqrt(intex[l]))
            else:
                velx[l] = Vinf * (sqrt(fabs(intex[l])))

            if (intey[l] >= 0.0):
                vely[l] = Vinf * (sqrt(intey[l]))
            else:
                vely[l] = -Vinf * (sqrt(fabs(intey[l])))

    return velx, vely
Пример #43
0
                    for ev in env:
                        writeCache(ev, cache)
                    cache = {}
                cnt += 1
        for ev in env:
            writeCache(ev, cache)

    for iter in range((max_n_synth // 1000000) + 1):
        if iter == 0:
            n_imgs = max_n_synth % 1000000
        else:
            n_imgs = 1000000
        n_jobs = torch.cuda.device_count()
        kwargs_gen = (dict(device=i, nsamples=int(n_imgs / n_jobs))
                      for i in range(n_jobs))
        data = Parallel(n_jobs=n_jobs)(delayed(GenImgs)(**kwargs)
                                       for kwargs in kwargs_gen)

        for d in data:
            for i in tqdm(range(len(d[0]))):
                imageKey = 'image-%09d' % cnt
                labelKey = 'label-%09d' % cnt
                cache[imageKey] = d[0][i]
                cache[labelKey] = d[1][i]
                if (cnt - cnt_orig) % 1000 == 0:
                    for n in range(len(n_synth)):
                        if n_synth[n] >= (cnt - cnt_orig):
                            writeCache(env[n], cache)
                    cache = {}
                cnt += 1
Пример #44
0
    def load(self):
        cols = []
        with open(self.file, 'r') as f:
            cols = f.readline().replace('\n', '').split(self.sep)

        for col in self.drop_col:
            if col in cols:
                cols.remove(col)

        df = pd.read_csv(self.file, sep=self.sep, usecols=cols)
        attributes = list(df.keys())
        attributes.remove(self.tid_col)

        if self.label_col and self.label_col != self.tid_col:
            attributes.remove(self.label_col)

        lat_lon = self.lat in attributes and self.lon in attributes

        if lat_lon:
            attributes.remove(self.lat)
            attributes.remove(self.lon)

        tids = sorted(df[self.tid_col].unique())

        def load_tids(s):
            ret = []

            for idx in range(s.start, s.stop):
                tid = tids[idx]
                traj = df.loc[df[self.tid_col] == tid, attributes].values

                if lat_lon:
                    loc = df.loc[df[self.tid_col] == tid,
                                 [self.lat, self.lon]].values
                    new_traj = []

                    for i, _ in enumerate(loc):
                        point = list(traj[i])
                        point.append(loc[i])
                        new_traj.append(point)

                    traj = new_traj
                ret.append(traj)
            return ret

        labels = None
        func = delayed(load_tids)

        data = Parallel(n_jobs=self.n_jobs, verbose=0)(
            func(s) for s in gen_even_slices(len(tids), self.n_jobs))
        data = np.concatenate(data)

        if self.label_col:
            labels = df \
                .drop_duplicates(subset=[self.tid_col, self.label_col],
                                 inplace=False) \
                .sort_values(self.tid_col,
                             ascending=True,
                             inplace=False)[self.label_col].values

        if lat_lon:
            attributes.append('lat_lon')

        return TrajectoryData(attributes=attributes,
                              data=data,
                              tids=tids,
                              labels=labels)
Пример #45
0
    def optimize(
        self,
        func,  # type: ObjectiveFuncType
        n_trials=None,  # type: Optional[int]
        timeout=None,  # type: Optional[float]
        n_jobs=1,  # type: int
        catch=(),  # type: Union[Tuple[()], Tuple[Type[Exception]]]
        callbacks=None,  # type: Optional[List[Callable[[Study, structs.FrozenTrial], None]]]
        gc_after_trial=True  # type: bool
    ):
        # type: (...) -> None
        """Optimize an objective function.

        Args:
            func:
                A callable that implements objective function.
            n_trials:
                The number of trials. If this argument is set to :obj:`None`, there is no
                limitation on the number of trials. If :obj:`timeout` is also set to :obj:`None`,
                the study continues to create trials until it receives a termination signal such
                as Ctrl+C or SIGTERM.
            timeout:
                Stop study after the given number of second(s). If this argument is set to
                :obj:`None`, the study is executed without time limitation. If :obj:`n_trials` is
                also set to :obj:`None`, the study continues to create trials until it receives a
                termination signal such as Ctrl+C or SIGTERM.
            n_jobs:
                The number of parallel jobs. If this argument is set to :obj:`-1`, the number is
                set to CPU count.
            catch:
                A study continues to run even when a trial raises one of the exceptions specified
                in this argument. Default is an empty tuple, i.e. the study will stop for any
                exception except for :class:`~optuna.exceptions.TrialPruned`.
            callbacks:
                List of callback functions that are invoked at the end of each trial.
            gc_after_trial:
                Flag to execute garbage collection at the end of each trial. By default, garbage
                collection is enabled, just in case. You can turn it off with this argument if
                memory is safely managed in your objective function.
        """

        if not isinstance(catch, tuple):
            raise TypeError(
                "The catch argument is of type \'{}\' but must be a tuple.".
                format(type(catch).__name__))

        if not self._optimize_lock.acquire(False):
            raise RuntimeError(
                "Nested invocation of `Study.optimize` method isn't allowed.")

        try:
            if n_jobs == 1:
                self._optimize_sequential(func, n_trials, timeout, catch,
                                          callbacks, gc_after_trial, None)
            else:
                time_start = datetime.datetime.now()

                if n_trials is not None:
                    _iter = iter(range(n_trials))
                elif timeout is not None:
                    # This is needed for mypy
                    actual_timeout = timeout  # type: float
                    _iter = iter(
                        lambda: (datetime.datetime.now() - time_start).
                        total_seconds() > actual_timeout, True)
                else:
                    # The following expression makes an iterator that never ends.
                    _iter = iter(int, 1)

                with Parallel(n_jobs=n_jobs, prefer="threads") as parallel:
                    if not isinstance(parallel._backend, joblib.parallel.ThreadingBackend) and \
                       isinstance(self._storage, storages.InMemoryStorage):
                        msg = 'The default storage cannot be shared by multiple processes. ' \
                              'Please use an RDB (RDBStorage) when you use joblib for ' \
                              'multi-processing. The usage of RDBStorage can be found in ' \
                              'https://optuna.readthedocs.io/en/stable/tutorial/rdb.html.'
                        warnings.warn(msg, UserWarning)
                        _logger.warning(msg)

                    parallel(
                        delayed(self._optimize_sequential)
                        (func, 1, timeout, catch, callbacks, gc_after_trial,
                         time_start) for _ in _iter)
        finally:
            self._optimize_lock.release()
Пример #46
0
def parallel_df(func, df, series):
    n_jobs = min(cpu_count(), len(df.columns))
    col_chunks = np.array_split(range(len(df.columns)), n_jobs)
    lst = Parallel(n_jobs=n_jobs)(delayed(func)(df.iloc[:, col_chunk], series)
                                  for col_chunk in col_chunks)
    return pd.concat(lst)
Пример #47
0
        pass  #added
data = data_new  #added

batches = [data[i:i + 1] for i in xrange(0, len(data))]
dataset = MolTreeDataset(batches, vocab, assm=False)
loader = DataLoader(dataset,
                    batch_size=1,
                    shuffle=False,
                    num_workers=0,
                    collate_fn=lambda x: x[0])

torch.manual_seed(args.seed)


def helper(batch):
    mol_batch = batch[0]
    x_tree_vecs, _, x_mol_vecs = model.encode(batch[1], batch[2])
    assert x_tree_vecs.size(0) == x_mol_vecs.size(0)

    for k in xrange(args.num_decode):
        z_tree_vecs, z_mol_vecs = model.fuse_noise(x_tree_vecs, x_mol_vecs)
        smiles = mol_batch[0].smiles
        new_smiles = model.decode(z_tree_vecs[0].unsqueeze(0),
                                  z_mol_vecs[0].unsqueeze(0))
        if new_smiles != None:
            print smiles, new_smiles


Parallel(n_jobs=args.n_core)(delayed(helper)(batch) for batch in loader)
#for batch in loader:
Пример #48
0
    ax = fig.add_subplot(gs[1,2:4])
    ax.plot(ph*24, f, '.k', ms=5)
    print(ph.max(), ph.min())

    ax.set_xlim(-48*dur, 48*dur)
    ax.set_ylim(1-1.5*depth, 1+5e-3)

    ax.set_title(r''+ targetfile + r'   $P=%.5f$' % period + r'   SNR=%f' % snr)
    ax.set_xlabel('Hours from mid-transit')
    ax.set_ylabel('Normalized flux')

    plt.show()


else:
    from joblib import Parallel, delayed, Memory

    #memory  = Memory('./cachedir', verbose=0)
    #costoso = memory.cache(run_BLS)

    allfiles = glob.glob(folder + 'TIC*.dat')
    #results  = np.memmap('temp.npz', dtype='float32', mode='w+', shape=(len(allfiles),9))

    #results  = np.array(Parallel(n_jobs=args.ncpu, verbose=0)(delayed(costoso)(f) for f in tqdm(allfiles)))
    results  = np.array(Parallel(n_jobs=args.ncpu, verbose=0)(delayed(run_BLS)(f) for f in tqdm(allfiles)))
    order    = np.argsort(results[:,5])[::-1]
    results  = results[order]
    print(results)

    np.savetxt(args.output, results, fmt='%s')
Пример #49
0
    #         for x in range(len(f)): # For every row in the constant image
    #             for y in range(len(f[x])): # For every column in that row
    #                 try:
    #                     val_a = f[x][y]
    #                 except IndexError:
    #                     val_a = 1
    #                 try:
    #                     val_b = e[x + offset_x][y + offset_y]
    #                 except IndexError:
    #                     val_b = 1
    #                 val = min(val_a, val_b)
    #                 if smallest_row is None and val < THRESHOLD:
    #                     smallest_row = (x,y)
    #                 if val < THRESHOLD and y < smallest_col[1]:
    #                     smallest_col = (x,y)
    #                 if val < THRESHOLD:
    #                     greatest_row = (x,y)
    #                 if val < THRESHOLD and y > greatest_col[1]:
    #                     greatest_col = (x,y)
    #                 f[x][y] = val # Record the super-imposed value into f
    #         config = Configuration(smallest_row,smallest_col,greatest_row,greatest_col,f,e,j) # Creates a new configuration
    #         if configurations is None or config < configurations:
    #             configurations = config
    #         return configurations

    Parallel(n_jobs=2)(delayed(analyze)(
        c,
        skimage.color.rgb2gray(
            skimage.io.imread('images_0-3_jpg/{}'.format(img))), img)
                       for img in directory)
Пример #50
0
#nmax = 40000 #Número máximo hasta el que queremos buscar primos
nmax = 400000 #Número máximo hasta el que queremos buscar primos
inputs = range(0, nmax)


def isPrime(num):
    if num < 1:
        return False
    elif num == 2:
        return True
    else:
        for i in range(2, num):
            if num % i == 0:
                return False
        return True            


starttime = time.time()

num_cores = multiprocessing.cpu_count()

#Si accedemos a zonas de memoria comunes (listas diccionarios...) , require='sharedmem'
#pero el rendimiento cae mucho:
results = Parallel(n_jobs=num_cores )(delayed(isPrime)(i) for i in inputs)


print ('encontrados en si %s' % results.count(True))
print ('encontrados en no %s' % results.count(False))

print('That took {} seconds'.format(time.time() - starttime))
Пример #51
0
    def generate(self,
                 n_generations=100,
                 population_size=50,
                 individual_size=10,
                 monitor=None):
        log.info(
            f'Starting working. n_generations {n_generations}, population_size {population_size}, individual_size {individual_size}'
        )

        parallel_backend = None
        try:
            if population_size != 1:
                parallel_backend = parallel_backend(
                    'threading', n_jobs=self.n_parallel_jobs)

            population = self.toolbox.population(
                size=population_size, individual_size=individual_size)

            # Evaluate the entire population
            #fitnesses = map(toolbox.evaluate, population)
            fitnesses = Parallel()(delayed(self.toolbox.evaluate)(individual)
                                   for individual in population)
            for ind, fit in zip(population, fitnesses):
                ind.fitness.values = fit

            for g in range(n_generations):
                log.debug(f'Running generation {g}')

                # Select the next generation individuals
                offspring = self.toolbox.select(population, len(population))
                log.debug(f'g{g} offspring')

                # Clone the selected individuals
                offspring = [self.toolbox.clone(o) for o in offspring]
                #offspring = map(toolbox.clone, offspring)
                log.debug(f'g{g} clone')

                if (self.crossover_prob > 0):
                    # Apply crossover and mutation on the offspring
                    for child1, child2 in zip(offspring[::2], offspring[1::2]):
                        if random.random() < crossover_prob:
                            self.toolbox.mate(child1, child2)
                            del child1.fitness.values
                            del child2.fitness.values
                    log.debug(f'g{g} crossover')

                if (self.mutation_prob > 0):
                    for mutant in offspring:
                        if random.random() < self.mutation_prob:
                            self.toolbox.mutate(mutant, self.drawing_problem)
                            del mutant.fitness.values
                    log.debug(f'g{g} mutation')

                # Evaluate the individuals with an invalid fitness
                invalid_ind = [
                    ind for ind in offspring if not ind.fitness.valid
                ]
                #fitnesses = map(toolbox.evaluate, invalid_ind)
                fitnesses = Parallel()(
                    delayed(self.toolbox.evaluate)(individual)
                    for individual in invalid_ind)
                for ind, fit in zip(invalid_ind, fitnesses):
                    ind.fitness.values = fit

                # The population is entirely replaced by the offspring
                population[:] = offspring

                log.debug(f'{g} final population')
                if monitor is not None:
                    monitor.submit(population)

            return population
        finally:
            if parallel_backend is not None:
                parallel_backend.close()
Пример #52
0
def setupFeeds(check_update, documents):
    for item, doc in documents.items():
        if "netset" in item or "ipset" in item:
            iUpdateHrs = update_interval / 60 / 60
            Parallel(n_jobs=multiprocessing.cpu_count(), prefer='threads')(
                delayed(GetFireHoleLists)(check_update, itm) for itm in doc)
def get_bboxes_val(annotations_dir):
	bboxes = []
	videos = os.listdir(annotations_dir)
	bboxes = sum(Parallel(n_jobs=8)(delayed(get_folder_bndbox)(annotations_dir, video, 'val') 
						for video in tqdm(videos, total=len(videos), file=sys.stdout)), [])
	return bboxes
Пример #54
0
    if bSkipFeeds:
        print("Fetching new and updated feeds... [Update older than 24 hrs]")
        setupFeeds(args.skip_update, documents)
        LoadFeeds(documents)

    if getFQDN:
        print("Note: Hostname lookups will increase processing time.")

    lstColumns = ["IP, City, Country, ASN, ASN Org, FQDN, Indicators"]

    lstResults = []

    if args.file:
        with open(args.file, "r", encoding='utf-8') as f:
            lstResults = Parallel(n_jobs=multiprocessing.cpu_count(),
                                  prefer='threads')(delayed(ipProcess)(ip)
                                                    for ip in f)
    elif args.ip:
        lstResults.append(ipProcess(args.ip.rstrip()))
    else:
        print("Provide an ip or file to process...")

    #Remove skipped lines that didn't have threat feed hits
    if (bHitsOnly):
        lstResults = [i for i in lstResults if i]

    #Output results
    print("\r\n")
    print("\r\n".join(lstColumns))
    print("\r\n".join(lstResults))
def remember(experiment, occlusion=None, bars_type=None, tolerance=0):
    """ Creates images from features.

    Uses the decoder part of the neural networks to (re)create images from features.

    Parameters
    ----------
    experiment : TYPE
        DESCRIPTION.
    occlusion : TYPE, optional
        DESCRIPTION. The default is None.
    tolerance : TYPE, optional
        DESCRIPTION. The default is 0.

    Returns
    -------
    None.

    """

    for i in range(constants.training_stages):
        testing_data_filename = constants.data_name + constants.testing_suffix
        testing_data_filename = constants.data_filename(
            testing_data_filename, i)
        testing_features_filename = constants.features_name(
            experiment, occlusion, bars_type) + constants.testing_suffix
        testing_features_filename = constants.data_filename(
            testing_features_filename, i)
        testing_labels_filename = constants.labels_name + constants.testing_suffix
        testing_labels_filename = constants.data_filename(
            testing_labels_filename, i)
        memories_filename = constants.memories_name(experiment, occlusion,
                                                    bars_type, tolerance)
        memories_filename = constants.data_filename(memories_filename, i)
        labels_filename = constants.labels_name + constants.memory_suffix
        labels_filename = constants.data_filename(labels_filename, i)
        model_filename = constants.model_filename(constants.model_name, i)

        testing_data = np.load(testing_data_filename)
        testing_features = np.load(testing_features_filename)
        testing_labels = np.load(testing_labels_filename)
        memories = np.load(memories_filename)
        labels = np.load(labels_filename)
        model = tf.keras.models.load_model(model_filename)

        # Drop the classifier.
        autoencoder = Model(model.input, model.output[1])
        autoencoder.summary()

        # Drop the encoder
        input_mem = Input(shape=(constants.domain, ))
        decoded = get_decoder(input_mem)
        decoder = Model(inputs=input_mem, outputs=decoded)
        decoder.summary()

        for dlayer, alayer in zip(decoder.layers[1:], autoencoder.layers[31:]):
            dlayer.set_weights(alayer.get_weights())

        produced_images = decoder.predict(testing_features)
        n = len(testing_labels)

        Parallel(n_jobs=constants.n_jobs, verbose=5)(delayed(store_images)(
            original, produced,
            constants.testing_directory(experiment, occlusion, bars_type), i,
            j, label) for (j, original, produced, label) in zip(
                range(n), testing_data, produced_images, testing_labels))

        total = len(memories)
        steps = len(constants.memory_fills)
        step_size = int(total / steps)

        for j in range(steps):
            print('Decoding memory size ' + str(j) + ' and stage ' + str(i))
            start = j * step_size
            end = start + step_size
            mem_data = memories[start:end]
            mem_labels = labels[start:end]
            produced_images = decoder.predict(mem_data)

            Parallel(n_jobs=constants.n_jobs, verbose=5)(
                delayed(store_memories)(label, produced, features,
                                        constants.memories_directory(
                                            experiment, occlusion, bars_type,
                                            tolerance), i, j)
                for (produced, features,
                     label) in zip(produced_images, mem_data, mem_labels))
# threshold_vec=np.arange(0,.3,0.1)
num_cores = multiprocessing.cpu_count()

# volumes=Parallel(n_jobs=num_cores)(
#         delayed(read_image)(path_fuzzy=path_fuzzy[i],
#                             path_gt=path_gt[i],i=i)
#         for i in range(len(path_fuzzy)))#
res_all = []
for cntr in range(len(path_fuzzy)):
    xsl_nm = test_path + out_dir + str.split(
        str.split(path_gt[cntr], '/')[-1], '_gtv.mha')[0] + '.xlsx'
    [logits, labels] = read_imgs(path_fuzzy[cntr], path_gt[cntr])
    res = Parallel(n_jobs=num_cores)(
        delayed(tp_tn_fp_fn)(logits=logits,
                             labels=labels,
                             threshold=threshold_vec[i],
                             path_fuzzy=path_fuzzy[cntr],
                             cntr=cntr) for i in range(len(threshold_vec)))

    df = pd.DataFrame(res,
                      columns=pd.Index(
                          ['name', 'threshold', 'TP', 'TN', 'FP', 'FN'],
                          name='Genus'))
    # Create a Pandas Excel writer using XlsxWriter as the engine.

    writer = pd.ExcelWriter(xsl_nm, engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    df.to_excel(writer, sheet_name='Sheet1')

    # Close the Pandas Excel writer and output the Excel file.
Пример #57
0
    y.sort()

    # Uncomment the next two lines if you want to sparsify the plot further.
    # y = y[0::500]
    # x = x[0::500]

    plt.scatter(x, y, marker='.')

    # plt.show()
    plt.xlabel("Kth nearest Neighbour")
    plt.ylabel("Distance")

    plt.savefig(filename + ".png")
    plt.clf()
    return


files = []
for file in glob.glob("output_twitter.txt*"):
    files.append(file)

files.sort()

print files

num_cores = 8

results = Parallel(n_jobs=num_cores)(delayed(processInput)(file)
                                     for file in files)
def Ridge_OptimalAlpha_KFold(Training_Data, Training_Score, Fold_Quantity,
                             Alpha_Range, ResultantFolder, Parallel_Quantity):

    Subjects_Quantity = len(Training_Score)
    Sorted_Index = np.argsort(Training_Score)
    Training_Data = Training_Data[Sorted_Index, :]
    Training_Score = Training_Score[Sorted_Index]

    Inner_EachFold_Size = np.int(
        np.fix(np.divide(Subjects_Quantity, Fold_Quantity)))
    MaxSize = Inner_EachFold_Size * Fold_Quantity
    EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize
    tmp = np.arange(Fold_Quantity - 1, -1, -1)
    EachFold_Max = EachFold_Max - tmp
    Remain = np.mod(Subjects_Quantity, Fold_Quantity)
    for j in np.arange(Remain):
        EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity

    print(Alpha_Range)
    Inner_Corr = np.zeros((Fold_Quantity, len(Alpha_Range)))
    Inner_MAE_inv = np.zeros((Fold_Quantity, len(Alpha_Range)))
    Alpha_Quantity = len(Alpha_Range)
    for k in np.arange(Fold_Quantity):

        Inner_Fold_K_Index = np.arange(k, EachFold_Max[k], Fold_Quantity)
        Inner_Fold_K_Data_test = Training_Data[Inner_Fold_K_Index, :]
        Inner_Fold_K_Score_test = Training_Score[Inner_Fold_K_Index]
        Inner_Fold_K_Data_train = np.delete(Training_Data,
                                            Inner_Fold_K_Index,
                                            axis=0)
        Inner_Fold_K_Score_train = np.delete(Training_Score,
                                             Inner_Fold_K_Index)

        Parallel(n_jobs=Parallel_Quantity,
                 backend="threading")(delayed(Ridge_SubAlpha)(
                     Inner_Fold_K_Data_train, Inner_Fold_K_Score_train,
                     Inner_Fold_K_Data_test, Inner_Fold_K_Score_test,
                     Alpha_Range[l], l, ResultantFolder)
                                      for l in np.arange(len(Alpha_Range)))

        for l in np.arange(Alpha_Quantity):
            print(l)
            Alpha_l_Mat_Path = ResultantFolder + '/Alpha_' + str(l) + '.mat'
            Alpha_l_Mat = sio.loadmat(Alpha_l_Mat_Path)
            Inner_Corr[k, l] = Alpha_l_Mat['Corr'][0][0]
            Inner_MAE_inv[k, l] = Alpha_l_Mat['MAE_inv']
            os.remove(Alpha_l_Mat_Path)

        Inner_Corr = np.nan_to_num(Inner_Corr)
    Inner_Corr_Mean = np.mean(Inner_Corr, axis=0)
    Inner_Corr_Mean = (Inner_Corr_Mean -
                       np.mean(Inner_Corr_Mean)) / np.std(Inner_Corr_Mean)
    Inner_MAE_inv_Mean = np.mean(Inner_MAE_inv, axis=0)
    Inner_MAE_inv_Mean = (Inner_MAE_inv_Mean - np.mean(Inner_MAE_inv_Mean)
                          ) / np.std(Inner_MAE_inv_Mean)
    Inner_Evaluation = Inner_Corr_Mean + Inner_MAE_inv_Mean

    Inner_Evaluation_Mat = {
        'Inner_Corr': Inner_Corr,
        'Inner_MAE_inv': Inner_MAE_inv,
        'Inner_Evaluation': Inner_Evaluation
    }
    sio.savemat(ResultantFolder + '/Inner_Evaluation.mat',
                Inner_Evaluation_Mat)

    Optimal_Alpha_Index = np.argmax(Inner_Evaluation)
    Optimal_Alpha = Alpha_Range[Optimal_Alpha_Index]
    return (Optimal_Alpha, Inner_Corr, Inner_MAE_inv)
Пример #59
0
        for subset in ['images', 'instances', 'labels']:
            try:
                os.mkdir(os.path.join(data_path, image_set + '_new', subset))
            except:
                pass

    for image_set in ['training', 'validation']:
        for subset in ['images', 'instances', 'labels']:
            target_path = os.path.join(data_path, image_set, subset)
            file_names = os.listdir(target_path)
            file_names = [
                os.path.join(target_path, file_name)
                for file_name in file_names
            ]

            result = Parallel(n_jobs=8)(delayed(downscale)(r)
                                        for r in file_names)

    for image_set in ['testing']:
        for subset in ['images']:
            target_path = os.path.join(data_path, image_set, subset)
            file_names = os.listdir(target_path)
            file_names = [
                os.path.join(target_path, file_name)
                for file_name in file_names
            ]

            result = Parallel(n_jobs=8)(delayed(downscale)(r)
                                        for r in file_names)
Пример #60
0
            if is_timeseries_split:
                estimated_rewards_by_reg_model = reg_model.predict(
                    context=bandit_feedback["context_test"],
                )
            else:
                estimated_rewards_by_reg_model = reg_model.predict(
                    context=bandit_feedback["context"][~is_for_reg_model],
                )
            performance_reg_model_b = evaluate_reg_model(
                bandit_feedback=bandit_feedback,
                is_timeseries_split=is_timeseries_split,
                estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
                is_for_reg_model=is_for_reg_model,
            )

            return performance_reg_model_b

    processed = Parallel(backend="multiprocessing", n_jobs=n_jobs, verbose=50,)(
        [delayed(process)(i) for i in np.arange(n_runs)]
    )
    # save performance of the regression model in './logs' directory.
    if not is_mrdr:
        performance_reg_model = {metric: dict() for metric in ["auc", "rce"]}
        for b, performance_reg_model_b in enumerate(processed):
            for metric, metric_value in performance_reg_model_b.items():
                performance_reg_model[metric][b] = metric_value
        DataFrame(performance_reg_model).describe().T.round(6).to_csv(
            log_path / f"performance_reg_model.csv"
        )