Пример #1
0
    def encode_by_tokens(self, graph_set):
        sentence_tokens = graph_set[0].get("tokens", [])
        sentence_encoded = [
            utils.get_idx(t, self._word2idx) for t in sentence_tokens
        ]
        edges_encoded = []
        for g in graph_set:
            first_edge = graph.get_graph_first_edge(g)
            property_label = first_edge.get('label', '')
            edge_ids = [
                utils.get_idx(t, self._word2idx)
                for t in property_label.split()
            ]
            edges_encoded.append(edge_ids)

        return sentence_encoded, edges_encoded
Пример #2
0
def get_interp_data(z_sample, maps, logfile=None, verbose=True):
    """
    Finds the two projections with redshifts that are the nearest higher and
    nearest lower redshifts and extracts the

    Parameters
    ----------
    z_sample : float
        The redshift of interest in the interpolation.

    maps : array or array-like
        The filenames of the column density maps. These have the same indexing
        as redshift_arr

    logfile :
        The file to write the logs.


    Returns
    -------
    map_high : str

    map_high : str

    """

    z_exist = np.empty(len(maps))
    for i in range(len(maps)):
        with h5py.File(maps[i], "r") as ds:
            z_exist[i] = ds["Header"].attrs["Redshift"]

    # Get index
    idx_low, idx_high = utils.get_idx(z_sample, z_exist)

    # Get redshift of maps lower/higher than z_sample
    z_low, z_high = z_exist[idx_low], z_exist[idx_high]
    dist_low, dist_high = utils.z_to_mpc(z_low), utils.z_to_mpc(z_high)
    map_low, map_high = maps[idx_low], maps[idx_high]
    #data_low, data_high = h5py.File(map_low), h5py.File(map_high)

    if logfile:
        wlog(
            "{0:<10} {1:>10}\
             {2:<10} {3:>10}".format("idx_low", idx_low, "idx_high", idx_high),
            logfile, verbose)
        wlog(
            "{0:<10} {1:>10.5}\
             {2:<10} {3:>10.5}".format("z_low", z_low, "z_high", z_high),
            logfile, verbose)
        wlog(
            "{0:<10} {1:>9.5}\
             {2} {3:>9.5}".format("dist_low", dist_low, "dist_high",
                                  dist_high), logfile, verbose)
        wlog(
            "{0:<10} {1}\
             {2:<10} {3}\n".format("map_low", map_low, "map_high", map_high),
            logfile, verbose)

    return map_low, map_high
Пример #3
0
 def encode_by_tokens(self, graph_set):
     sentence_tokens = graph_set[0].get("tokens", [])
     sentence_encoded = [
         utils.get_idx(t, self._word2idx) for t in sentence_tokens
     ]
     graphs_encoded = []
     for g in graph_set:
         edges_encoded = []
         for edge in g.get('edgeSet', []):
             property_label = edge.get('label', '')
             edge_ids = [
                 utils.get_idx(t, self._word2idx)
                 for t in property_label.split()
             ]
             edges_encoded.append(edge_ids)
         graphs_encoded.append(edges_encoded)
     return sentence_encoded, graphs_encoded
 def encode_graphs(self, graph_set):
     graphs_encoded = []
     for g in graph_set:
         edges_encoded = []
         for edge in g.get('edgeSet', []):
             property_label = edge.get('label', '')
             edge_ids = [utils.get_idx(t, self._word2idx) for t in property_label.split()]
             edges_encoded.append(edge_ids)
         graphs_encoded.append(edges_encoded)
     return graphs_encoded
Пример #5
0
def feature_extraction(features, filtered, valid_labels, fs, window):
    # all feature calculations must end up the same size
    X = np.empty((len(valid_labels[window:]), 0))
    for i in features:
        if i == 'linelength':
            X = np.concatenate((X, linelength(filtered, window)), axis=1)
        if i == 'delta':
            X = np.concatenate(
                (X,
                 bandpower(filtered, window,
                           utils.get_idx(delta_band, window, fs), fs)),
                axis=1)
        if i == 'theta':
            X = np.concatenate(
                (X,
                 bandpower(filtered, window,
                           utils.get_idx(theta_band, window, fs), fs)),
                axis=1)
        if i == 'alpha':
            X = np.concatenate(
                (X,
                 bandpower(filtered, window,
                           utils.get_idx(alpha_band, window, fs), fs)),
                axis=1)
        if i == 'beta':
            X = np.concatenate(
                (X,
                 bandpower(filtered, window,
                           utils.get_idx(beta_band, window, fs), fs)),
                axis=1)
        if i == 'gamma':
            X = np.concatenate(
                (X,
                 bandpower(filtered, window,
                           utils.get_idx(gamma_band, window, fs), fs)),
                axis=1)

    # size of labels should be consistent with X.shape[0]
    y = valid_labels[window:]

    return X, y
Пример #6
0
    def train(self):
        os.makedirs(self.summ_path+"model/")
        optimizer = keras.optimizers.Adam(0.00001)
        global_step = 0
        print("Training Started. Results and summary files are stored at", self.summ_path)
        for cur_epoch in trange(config.epoch):
            trn_sup_idx, trn_qry_idx, trn_lbl = utils.get_idx(lbl=self.trn_lbl)
            for cur_step in trange(0, config.iter_cnt, config.batch_size):
                cur_sup_dat = self.trn_dat[trn_sup_idx[cur_step:cur_step+config.batch_size]]
                cur_qry_dat = self.trn_dat[trn_qry_idx[cur_step:cur_step+config.batch_size]]
                cur_lbl = trn_lbl[cur_step:cur_step+config.batch_size]
                self.train_one_step(x_sup=cur_sup_dat, x_qry=cur_qry_dat, lbl=cur_lbl,
                                    model=self.all_model, optim=optimizer, vars=self.all_model.trainable_variables,
                                    step=global_step, log=global_step%10==0)

                global_step+=1

            val_sup_idx, val_qry_idx, val_lbl = utils.get_idx(lbl=self.val_lbl, iter_cnt=100)
            self.logger(x_sup=self.val_dat[val_sup_idx], x_qry=self.val_dat[val_qry_idx], lbl=val_lbl,
                        model=self.all_model, step=global_step)

            self.all_model.save_weights(self.summ_path+"model/%04d.h5"%cur_epoch)
 def get_edge_feature_vector(self, edge):
     edge_kbid = edge.get('kbID')[:-1] if 'kbID' in edge else utils.unknown_el
     right_label_ids = [utils.get_idx(t, self._word2idx) for t in edge.get('canonical_right', "").split()][
                       :self._p.get('symbolic.features', {}).get("right.label", 0)]
     feature_vector = [self._property2idx.get(edge_kbid, 0),
                       self._property2idx.get(edge['hopUp'][:-1] if 'hopUp' in edge else utils.all_zeroes, 0),
                       self._property2idx.get(edge['hopDown'][:-1] if 'hopDown' in edge else utils.all_zeroes, 0),
                       self._modifier2idx.get("argmax" if "argmax" in edge
                                              else "argmin" if "argmin" in edge
                                              else "num" if "num" in edge
                                              else "filter" if "filter" in edge
                                              else utils.all_zeroes, 0), self._type2idx.get(edge.get('type', utils.unknown_el), 0),
                       self._propertytype2idx.get(edge['kbID'][-1] if 'kbID' in edge else utils.unknown_el, 0),
                       ] + right_label_ids
     assert len(feature_vector) <= self._feature_vector_size
     return feature_vector
Пример #8
0
def longest_increasing_seq(A):
	"""
	Longest Increasing Subsequence. Given a sequence of n real
	numbers A(1) ... A(n), determine a subsequence (not 
	necessarily contiguous) of maximum length in which the 
	values in the subsequence form a strictly increasing sequence.
	"""

	D, T = [1] * len(A), [-1] * len(A)
	maxLen, maxIdx = 1, -1
	for i in range(1, len(A)):
		for j in range(i):
			if A[i] > A[j] and D[i] < D[j] + 1:
				D[i] = D[j] + 1
				T[i] = j
		if D[i] > maxLen:
			maxLen = D[i]
			maxIdx = i
	l = utils.get_idx(maxIdx, T, [])
	return maxLen, [A[i] for i in l]
Пример #9
0
 def test_get_idx(self):
     self.assertEquals(utils.get_idx(['a','b','c'], 1), 'b')
     self.assertEquals(utils.get_idx(pd.Series(['a','b','c']), 1), 'b')
Пример #10
0
def print_indices(f, freq_band, window, fs):
    idxStartBin, idxEndBin = utils.get_idx(freq_band, window, fs)
    f.write("val idxStartBin = %d\n" % idxStartBin)
    f.write("val idxEndBin = %d\n" % idxEndBin)
    f.write("\n\n")
Пример #11
0
def plot_distributions(samples,
                       output_dir,
                       bin_sizes,
                       plot_var,
                       sig_tag,
                       weight_type='None',
                       normalize=False,
                       density=True,
                       log=True,
                       file_name=''):
    if 'top' in sig_tag: tag = r'$t\bar{t}$'
    elif 'BSM' in sig_tag: tag = 'BSM'
    elif 'OoD' in sig_tag: tag = 'OoD'
    if 'OoD' in sig_tag:
        labels = {0: [tag, 'QCD'], 1: [tag + ' (weighted)', 'QCD (weighted)']}
    else:
        labels = {0: [tag, 'QCD'], 1: [tag + ' (cut)', 'QCD (cut)']}
    colors = ['tab:orange', 'tab:blue', 'tab:brown']
    alphas = [1, 0.5]
    xlabel = {
        'pt': '$p_t$',
        'm': '$m$',
        'rljet_n_constituents': 'Number of constituents'
    }[plot_var]
    plt.figure(figsize=(13, 8))
    pylab.grid(True)
    axes = plt.gca()
    if not isinstance(samples, list): samples = [samples]
    for m in [0, 1]:
        for n in range(len(samples)):
            sample = samples[n]
            condition = sample['JZW'] == -1 if m == 0 else sample[
                'JZW'] >= 0 if m == 1 else sample['JZW'] >= -2
            if not np.any(condition): continue
            variable = np.float32(sample[plot_var][condition])
            weights = sample['weights'][condition]
            if 'flat' in weight_type:
                min_val, max_val = max(0, np.min(variable)), np.max(variable)
            else:
                min_val, max_val = max(0, np.min(sample[plot_var])), np.max(
                    sample[plot_var])
            bins = get_idx(max_val,
                           bin_size=bin_sizes[plot_var],
                           min_val=min_val,
                           integer=False,
                           tuples=False)
            if normalize:
                weights *= 100 / (np.sum(sample['weights'])
                                  )  #100/(np.sum(weights))
            if density:
                indices = np.searchsorted(bins, variable, side='right')
                weights /= np.take(np.diff(bins),
                                   np.minimum(indices,
                                              len(bins) - 1) - 1)
            pylab.hist(variable,
                       bins,
                       histtype='step',
                       weights=weights,
                       color=colors[m],
                       lw=2,
                       log=log,
                       alpha=alphas[n],
                       label=labels[n][m])
    if 'OoD' in sig_tag:
        if plot_var == 'm':
            pylab.xlim(0, 1200)
            pylab.ylim(1e0, 1e5)
        elif plot_var == 'pt':
            pylab.xlim(0, 3000)
            pylab.ylim(1e0, 1e5)
    elif 'Geneva' in sig_tag:
        if plot_var == 'm':
            pylab.xlim(0, 500)
            pylab.ylim(1e-2, 1e5)
        elif plot_var == 'pt':
            pylab.xlim(0, 2000)
            pylab.ylim(1e-2, 1e5)
    else:
        if plot_var == 'm':
            pylab.xlim(0, 500)
            pylab.ylim(1e0, 1e7)
        elif plot_var == 'pt':
            pylab.xlim(0, 2000)
            pylab.ylim(1e0, 1e7)
    axes.xaxis.set_minor_locator(ticker.AutoMinorLocator(10))
    if not log: axes.yaxis.set_minor_locator(ticker.AutoMinorLocator(10))
    plt.xlabel(xlabel + ' (GeV)', fontsize=24)
    y_label = ' density' if density else ''
    if normalize: y_label += ' (%)'
    elif sig_tag in ['top-UFO', 'BSM']:
        y_label += ' (' + r'58.5 fb$^{-1}$' + ')'
    plt.ylabel('Distribution' + y_label, fontsize=24)
    axes.tick_params(axis='both', which='major', labelsize=14)
    plt.legend(loc='upper right',
               ncol=1 if len(samples) == 1 else 2,
               fontsize=18)
    if file_name == '':
        file_name = (plot_var if plot_var == 'pt' else 'mass') + '_dist.png'
    file_name = output_dir + '/' + file_name
    print('Saving', format(plot_var, '2s'), 'distributions  to:', file_name)
    plt.savefig(file_name)
Пример #12
0
               supports,
               fmt=fmt,
               delimiter=',')
    np.savetxt("generated_files/alpha_vectors.csv",
               alpha_vector,
               fmt=fmt,
               delimiter=',')
    np.savetxt("generated_files/intercepts.csv",
               intercept,
               fmt=fmt,
               delimiter=',')

    for i in features:
        if i == 'delta':
            np.savetxt("generated_files/delta_index.csv",
                       utils.get_idx(fe.delta_band, window, fs),
                       fmt='%d',
                       delimiter=',')
        if i == 'theta':
            np.savetxt("generated_files/theta_index.csv",
                       utils.get_idx(fe.theta_band, window, fs),
                       fmt='%d',
                       delimiter=',')
        if i == 'alpha':
            np.savetxt("generated_files/alpha_index.csv",
                       utils.get_idx(fe.alpha_band, window, fs),
                       fmt='%d',
                       delimiter=',')
        if i == 'beta':
            np.savetxt("generated_files/beta_index.csv",
                       utils.get_idx(fe.beta_band, window, fs),
Пример #13
0
def get_data_for_interpolation(z_sample,
                               redshift_arr,
                               projections,
                               logfile=None):
    """
    Finds the two projections with redshifts that are the nearest higher and 
    nearest lower redshifts and extracts the

    Parameters
    ----------
    z_sample : float
        The redshift of interest in the interpolation.

    redshift_arr: array or array-like
        The redshifts from the projections ordered by snapshot number.

    projections : array or array-like
        The filenames of the projections. These have the same indexing as
        redshift_arr

    logfile : 
        The file to write the logs.


    Returns
    -------
    data_low :
        The data of the projection with the nearest lower redshift to z_sample.

    dist_low :
        The comoving distance to the projection with the nearest lower 
        redshift to z_sample.

    data_high :
        The data of the projection with the nearest higher redshift to z_sample.

    dist_high : 
        The comoving distance to the projection with the nearest higher 
        redshift to z_sample.
    """

    if logfile:
        logfile.write("\n-----------------")
        logfile.write(
            "\nGetting Interpolation Data: z = {0:.5f}".format(z_sample))
        logfile.write("\n-----------------\n")

    idx_low, idx_high = utils.get_idx(z_sample, redshift_arr)
    z_low, z_high = redshift_arr[idx_low], redshift_arr[idx_high]
    dist_low, dist_high = utils.z_to_mpc(z_low), utils.z_to_mpc(z_high)
    proj_low, proj_high = projections[idx_low], projections[idx_high]
    data_low, data_high = h5py.File(proj_low), h5py.File(proj_high)

    if logfile:
        logfile.write("{0:<10} {1:10}\n{2:<10} {3:10}\n".format(
            "idx_low", idx_low, "idx_high", idx_high))
        logfile.write("{0:<10} {1:10.5}\n{2:<10} {3:10.5}\n".format(
            "z_low", z_low, "z_high", z_high))
        logfile.write("{0:<10} {1:10.5}\n{2:<10} {3:10.5}\n".format(
            "dist_low", dist_low, "dist_high", dist_high))
        logfile.write("{0:<10} {1}\n{2:<10} {3}".format(
            "proj_low", proj_low, "proj_high", proj_high))

    return data_low, dist_low, data_high, dist_high
Пример #14
0
 def test_get_idx(self):
     self.assertEquals(utils.get_idx(['a', 'b', 'c'], 1), 'b')
     self.assertEquals(utils.get_idx(pd.Series(['a', 'b', 'c']), 1), 'b')
 def encode_question(self, graph_set):
     sentence_tokens = graph_set[0].get("tokens", [])
     sentence_encoded = [utils.get_idx(t, self._word2idx) for t in sentence_tokens]
     return sentence_encoded