Python gen_data示例，gen_data.gen_data Python示例

示例#1

0

显示文件

def test(args):

    inference_graph = tf.Graph()
    with tf.Session(graph=inference_graph) as sess:

        if not args.restore_path or not args.vars_file:
            print(
                '\n\n\tSpecify a restore_path: --restore_path=<path_to_ckpt> and --vars_file=<vars_file_pathname>\n\n'
            )
            quit()

        trained_model_saver, w, b = restore_graph(sess, args)

        _y_ = inference_graph.get_tensor_by_name('y_:0')
        _loss = inference_graph.get_tensor_by_name('loss:0')
        _x = inference_graph.get_tensor_by_name('x:0')
        _y = inference_graph.get_tensor_by_name('y:0')

        while (1):
            w, b, train_input, train_output = gen_data(int(args.batch_size),
                                                       int(args.num_inputs), w,
                                                       b)
            y_ = sess.run(_y_, feed_dict={_x: train_input, _y: train_output})
            loss = sess.run(_loss,
                            feed_dict={
                                _x: train_input,
                                _y: train_output
                            })
            y_acc = y_
            print('Mean Squared Error Loss: %2f\n' % loss)
            print(train_output)
            print('\n')
            print(y_acc)
            print('\n')
            input('Press Enter to continue...')

示例#2

0

显示文件

def test(n_rows: int, n_partitions: int):
    """对比collect_dict_1函数和collect_dict_2函数的性能。
    collect_dict_1使用DataFrame的collect方法再转为Dict，
    collect_dict_2使用Rdd的collect方法再转为Dict，
    结论：两者差异不明显
    ***************************************************************************
    运行时间
    ---------------------------------------------------------------------------
    100,000条数据，10个分区
    函数collect_dict_1运行--100次!
    平均值为--0.006 s!
    中位数为--0.006 s!
    最小值为--0.005 s!
    最大值为--0.010 s!

    函数collect_dict_2运行--100次!
    平均值为--0.007 s!
    中位数为--0.007 s!
    最小值为--0.006 s!
    最大值为--0.010 s!
    ---------------------------------------------------------------------------
    1,000,000条数据， 100个分区
    函数collect_dict_1运行--100次!
    平均值为--0.053 s!
    中位数为--0.052 s!
    最小值为--0.047 s!
    最大值为--0.139 s!

    函数collect_dict_2运行--100次!
    平均值为--0.056 s!
    中位数为--0.056 s!
    最小值为--0.051 s!
    最大值为--0.062 s!
    ---------------------------------------------------------------------------
    ***************************************************************************

    ***************************************************************************
    占用内存
    ---------------------------------------------------------------------------
    函数collect_dict_1把DataFrame全部加载到内存中再转换为Dict，
    最大内存消耗 = DataFrame + Dict + 容器引用的对象

    函数collect_dict_2把Rdd reduce为Dict，
    最大内存消耗 = Dict + 容器引用的对象
    ---------------------------------------------------------------------------
    ***************************************************************************

    Arguments:
        n_rows {int} -- 随机生成的数据行数。
        n_partitions {int} -- 随机生成的数据分区数。
    """
    print("对比collect_dict_1函数和collect_dict_2函数的性能...")
    print("共%d条数据，%d个分区!" % (n_rows, n_partitions))
    spark = SparkSession.builder.appName("gen_data_test").getOrCreate()
    data = gen_data(spark, n_rows=n_rows).repartition(n_partitions).cache()
    data.show()
    collect_dict_1(data)
    collect_dict_2(data)

示例#3

0

显示文件

    def openFrame(self):
        self.data = gen_data.gen_data("review1")
        self.words_num = self.word_num.get()
        self.root.withdraw()
        if not self.word_num:
           messagebox.showinfo(title='提示', message='请输入正确的单词数！')
        else:
           self.reviewFrame = tk.Toplevel()
           self.reviewFrame.geometry("500x450")
           self.reviewFrame.title("review")
           self.data.start(int(self.words_num))   
           tk.Label(self.reviewFrame,text="您已掌握了",font=('楷体',12)).place(x=90,y=10,anchor=tk.NW)
           tk.Label(self.reviewFrame,textvariable=self.data.master_words,font=('Times New Roman',12)).place(x=200,y=10,anchor=tk.NW)
           tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=10,anchor=tk.NW)
           tk.Label(self.reviewFrame,text="现还剩",font=('楷体',12)).place(x=90,y=30,anchor=tk.NW)
           tk.Label(self.reviewFrame,textvariable=self.data.left_words,font=('Times New Roman',12)).place(x=200,y=30,anchor=tk.NW)
           tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=30,anchor=tk.NW)

           tk.Label(self.reviewFrame,textvariable=self.data.word_text,font=('Times New Roman',20)).place(x=200,y=60,anchor=tk.NW)
                
           checkbox = tk.Checkbutton(self.reviewFrame,text="show sentences",variable=self.data.status,command=self.data.show_sentences)
           checkbox.place(x=370,y=90,anchor=tk.NW)
           
           tk.Label(self.reviewFrame,textvariable=self.data.sentences,wraplength = 400,font=('Times New Roman',10)).place(x=50,y=120,anchor=tk.NW)
           
           self.data.gen_choices()
           button1 = tk.Button(self.reviewFrame,textvariable=self.data.answer1,height=1,width=25)
           button1.place(x=150,y=220,anchor=tk.NW)
           button2 = tk.Button(self.reviewFrame,textvariable=self.data.answer2,height=1,width=25)
           button2.place(x=150,y=270,anchor=tk.NW)
           button3 = tk.Button(self.reviewFrame,textvariable=self.data.answer3,height=1,width=25)
           button3.place(x=150,y=320,anchor=tk.NW)
           button4 = tk.Button(self.reviewFrame,textvariable=self.data.answer4,height=1,width=25)
           button4.place(x=150,y=370,anchor=tk.NW)
           tk.Label(self.reviewFrame,textvariable=self.data.response,font=('Times New Roman',12)).place(x=50,y=400,anchor=tk.NW)
           button5 = tk.Button(self.reviewFrame,text="Next")
           button5.place(x=350,y=420,anchor=tk.NW)
           button6 = tk.Button(self.reviewFrame,text="Exit")
           button6.place(x=400,y=420,anchor=tk.NW)
           self.reviewFrame.bind('<a>',self.check_answer1)
           self.reviewFrame.bind('<b>',self.check_answer2)
           self.reviewFrame.bind('<c>',self.check_answer3)
           self.reviewFrame.bind('<d>',self.check_answer4)
           self.reviewFrame.bind('<n>',self.review_next1)
           self.reviewFrame.bind('<q>',self.save_and_quit)
           button1.bind('<Button-1>',self.check_answer1)
           button2.bind('<Button-1>',self.check_answer2)
           button3.bind('<Button-1>',self.check_answer3)
           button4.bind('<Button-1>',self.check_answer4)
           button5.bind('<Button-1>',self.review_next1)
           button6.bind('<Button-1>',self.save_and_quit)
           self.buttonlist1 = [button1,button2,button3,button4]
           self.buttonlist2 = [button5]

示例#4

0

显示文件

def test(n_rows: int, n_partitions: int):
    """对比collect_set_1函数和collect_set_2函数的性能。
    结论：函数collect_dict_1更快。
    ***************************************************************************
    运行时间
    ---------------------------------------------------------------------------
    100,000条数据，10个分区
    函数collect_dict_1运行--100次!
    平均值为--0.017 s!
    中位数为--0.016 s!
    最小值为--0.011 s!
    最大值为--0.038 s!

    函数collect_dict_2运行--100次!
    平均值为--0.034 s!
    中位数为--0.032 s!
    最小值为--0.027 s!
    最大值为--0.068 s!
    ---------------------------------------------------------------------------
    1,000,000条数据， 100个分区
    函数collect_dict_1运行--100次!
    平均值为--0.021 s!
    中位数为--0.021 s!
    最小值为--0.018 s!
    最大值为--0.029 s!

    函数collect_dict_2运行--100次!
    平均值为--0.069 s!
    中位数为--0.068 s!
    最小值为--0.062 s!
    最大值为--0.093 s!
    ---------------------------------------------------------------------------
    ***************************************************************************

    Arguments:
        n_rows {int} -- 随机生成的数据行数。
        n_partitions {int} -- 随机生成的数据分区数。
    """
    print("对比collect_dict_1函数和collect_dict_2函数的性能...")
    print("共%d条数据，%d个分区!" % (n_rows, n_partitions))
    spark = SparkSession.builder.appName("gen_data_test").getOrCreate()

    # 产生重复数据
    n_duplicated = 10
    data = gen_data(spark, n_rows // n_duplicated)
    for _ in range(n_duplicated):
        data.union(data)

    data = data.repartition(n_partitions).cache()
    data.show()
    collect_set_1(data)
    collect_set_2(data)

示例#5

0

显示文件

文件： frank.py 项目： memazouni/ACNet

def run(Phi, ndims, theta, N, seed):

    phi = Phi(torch.tensor(theta))
    id = get_info()
    s, log_ll = gen_data(phi, ndims, N, seed)
    print('avg_log_likelihood', torch.mean(log_ll))

    import matplotlib.pyplot as plt
    plt.scatter(s.detach().numpy()[:, 0], s.detach().numpy()[:, 1])
    plt.show()

    d = {'samples': s, 'log_ll': log_ll}
    pickle.dump(d, open('./data/frank%s.p' % id, 'wb'))
    ex.add_artifact('./data/frank%s.p' % id)

示例#6

0

显示文件

 def openFrame2(self):
     self.data = gen_data.gen_data("review2")
     self.words_num = self.word_num.get()
     self.root.withdraw()
     if not self.word_num:
        messagebox.showinfo(title='提示', message='请输入正确的单词数！')
     else:
        self.reviewFrame = tk.Toplevel()
        self.reviewFrame.geometry("500x370")
        self.reviewFrame.title("review")
        self.data.start(int(self.words_num))   
        tk.Label(self.reviewFrame,text="您已掌握了",font=('楷体',12)).place(x=90,y=10,anchor=tk.NW)
        tk.Label(self.reviewFrame,textvariable=self.data.master_words,font=('Times New Roman',12)).place(x=200,y=10,anchor=tk.NW)
        tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=10,anchor=tk.NW)
        tk.Label(self.reviewFrame,text="现还剩",font=('楷体',12)).place(x=90,y=30,anchor=tk.NW)
        tk.Label(self.reviewFrame,textvariable=self.data.left_words,font=('Times New Roman',12)).place(x=200,y=30,anchor=tk.NW)
        tk.Label(self.reviewFrame,text="个单词",font=('楷体',12)).place(x=320,y=30,anchor=tk.NW)
        tk.Label(self.reviewFrame,textvariable=self.data.word_text,font=('Times New Roman',20)).place(x=200,y=60,anchor=tk.NW)                
        checkbox = tk.Checkbutton(self.reviewFrame,text="show sentences",variable=self.data.status,command=self.data.show_sentences)
        checkbox.place(x=370,y=90,anchor=tk.NW)           
        tk.Label(self.reviewFrame,textvariable=self.data.sentences,wraplength = 400,font=('Times New Roman',10)).place(x=50,y=120,anchor=tk.NW)
        
        self.data.gen_choices()
        self.Myanswer = tk.StringVar()
        self.myanswer = tk.Entry(self.reviewFrame,textvariable=self.Myanswer,font=('Times New Roman',12),width=50,fg='black')
        self.myanswer.place(x=30,y=220,anchor=tk.NW)
        tk.Label(self.reviewFrame,textvariable=self.data.response,font=('Times New Roman',12)).place(x=50,y=250,anchor=tk.NW)
        button1 = tk.Button(self.reviewFrame,text="Confirm")
        button1.place(x=120,y=280,anchor=tk.NW)
        button2 = tk.Button(self.reviewFrame,text="Add",command=self.add_trans)
        button2.place(x=300,y=280,anchor=tk.NW)
        button3 = tk.Button(self.reviewFrame,text="Next")
        button3.place(x=350,y=330,anchor=tk.NW)
        button4 = tk.Button(self.reviewFrame,text="Exit")
        button4.place(x=400,y=330,anchor=tk.NW)
        
        self.reviewFrame.bind('<y>',self.check_my)
        self.reviewFrame.bind('<n>',self.review_next2)
        self.reviewFrame.bind('<q>',self.save_and_quit)
        button1.bind('<Button-1>',self.check_my)
        button3.bind('<Button-1>',self.review_next2)
        button4.bind('<Button-1>',self.save_and_quit)

示例#7

0

显示文件

文件： train_char_rnn.py 项目： ssfg/Lasagne-CharRNN

def main():
    print "Building network ..."
    l_out = build_network(N_BATCH)
    read_model_data(l_out, 'lstm_iter_60000')
    print "Done building network"

    target_values = T.tensor3('target_output')
    input_values = T.tensor3('input')

    network_output = lasagne.layers.get_output(l_out, input_values)

    # categorical crossentropy loss because it's the proper way
    cost = T.mean(categorical_crossentropy(T.reshape(network_output, (N_BATCH*MAX_LENGTH, N_FEAT_DIM)) , T.reshape(target_values, (N_BATCH*MAX_LENGTH, N_FEAT_DIM))))
    all_params = lasagne.layers.get_all_params(l_out)
    print "Computing updates..."
    updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)
    print "Compiling functions..."

    train = theano.function(
        [input_values, target_values], cost, updates=updates)
    compute_cost = theano.function([input_values, target_values], cost)
    train_f = open('chatlog.txt','r')
    f_data = train_f.read()
    print "Training ..."
    try:
        for n in xrange(N_ITERATIONS):
            X, Y = gen_data(f_data, n, N_BATCH, MAX_LENGTH)
            train(X, Y)
            if not n % CHECK_FREQUENCY:
                cost_val = compute_cost(X, Y)
                print "Iteration {} training cost = {}".format(n, cost_val)
            if n % CHECKPOINT_FREQUENCY == 0 and n > 0:
                print "Saving checkpoint..."
                fname = "lstm_iter_%d" % (n)
                write_model_data(l_out, fname)
        
    except KeyboardInterrupt:
        pass

示例#8

0

显示文件

文件： compute.py 项目： lowlighter/sound

def compute(file,
            fs=0,
            time_res=0,
            amp_res=0,
            fmin=0,
            fmax=0,
            fcs=[],
            nb_filters=0,
            q=0,
            n=0,
            filters=[],
            filters_fq=[],
            ax=None,
            plotd=True,
            dbfs=False,
            spec_only=False,
            spec_xlim=False,
            drc_tl=False,
            drc_th=False,
            drc_r=False,
            adc_res=16,
            formants=[]):
    """
    Exécute la chaîne de traitement dans sa totalité avec le fichier audio en paramètre.

    :param file: Nom du fichier audio à traiter ou liste d'amplitude
    :type file: string ou number
    :param fs: Fréquence d'échantillonage (uniquement si une liste d'amplitude est donnée pour le paramètre file)
    :type fs: number
    :param adc_res: Résolution du convertisseur analogique numérique
    :type adc_res: number
    :param drc_tl: Seuil bas du compresseur audio
    :type drc_tl: number
    :param drc_th: Seuil haut du compresseur audio
    :type drc_th: number
    :param drc_r: Taux de compression du compresseur audio
    :type drc_r: number
    :param fmin: Fréquence minimum
    :type fmin: number
    :param fmax: Fréquence maximum
    :type fmax: number
    :param fcs: Liste de fréquences centrales personnalisées (dans ce cas, les paramètres fmin, fmax et nb_filters sont ignorés)
    :type fcs: number[]
    :param nb_filters: Nombre de filtres
    :type nb_filters: number
    :param q: Facteur de qualité
    :type q: number
    :param n: Ordre du filtre
    :type n: number
    :param filters: Banque de filtre déjà générée (dans ce cas, les paramètres de génération de filtres sont ignorés)
    :type filters: filtre[]
    :param filters_fq: Listes d'objets contenant "fc", "fl" et "fh" indiquant les fréquences caractéristiques du filtre associé
    :type filters_fq: object("fc", "fl", "fh")[]
    :param time_res: Résolution temporelle
    :type time_res: number
    :param amp_res: Résolution en amplitude
    :type amp_res: number
    :param formants: Liste des formants à tracer sur la figure ("a", "e", "i", "o", "u")
    :type formants: string[]
    :param ax: Surface de dessin existante (une nouvelle figure sera crée si aucune n'est donnée en paramètre)
    :type ax: figure
    :param plot_d: Si actif, affiche le spectrogramme de chaque fichier traité
    :type plot_d: bool
    :param spec_only: Si actif, affiche uniquement le spectrogramme sur mesure (dans ce cas, précisez un titre)
    :type spec_only: string
    :param spec_xlim: Modifie la limite supérieure de l'axe des abscisses du spectrogramme
    :type spec_xlim: number
    :param dbfs: Affiche le spectre db FS
    :type dbfs: boolean
    :return: Liste des segments temporels, liste des fréquences et liste des séquences d'énergies
    :rtype: number[], number[], number[][]
    """
    # Récupération du fichier audio et génération du bruit (si précisé)
    if type(file) == list:
        if (type(file[0]) == str):
            fs, y = sw.read(file[0])
            y = np.array(y)
            for i in range(1, len(file)):
                d, noise = sw.read(file[i])
                for j in range(0, min(len(y), len(noise))):
                    y[j] = y[j] + noise[j]
        else:
            y = file
    # Récupération du fichier audio
    elif type(file) == str:
        fs, y = sw.read(file)
    else:
        y = file
    N = len(y)
    t = np.linspace(0, N / fs, N)

    # Compresseur audio
    if drc_r != False:
        y = drc(y, tl=drc_tl, th=drc_th, ratio=drc_r)

    # Convertisseur analogique numérique
    if adc_res < 16:
        y = adc(y, adc_res)

    # Filtrage
    if ((nb_filters > 0) or (len(fcs) > 0)):
        filters, filters_fq = gen_filters(q,
                                          n,
                                          fs,
                                          nb_filters=nb_filters,
                                          fmin=fmin,
                                          fmax=fmax,
                                          fcs=fcs)
    filtered = gen_filtered(y, fs, filters)

    # Spectrogramme
    rsegs, rfreqs, rseqs = gen_data(filtered, fs, time_res, amp_res,
                                    filters_fq)

    # Suppression du silence au début de l'échantillon
    rsum = np.sum(rseqs, axis=0)
    for i in range(len(rsum)):
        if rsum[i] != 0:
            break
    if spec_only:
        rsegs = np.delete(rsegs, range(len(rsegs) - i, len(rsegs)))
    else:
        rsegs = np.delete(rsegs, range(0, i))
    rseqs = np.delete(rseqs, range(0, i), 1)

    # Affichage
    if plotd:
        if spec_only:
            plot_datagram(rsegs,
                          rfreqs,
                          rseqs,
                          title=spec_only,
                          xlim=spec_xlim,
                          formants=formants)
        else:
            plot_data(y,
                      t,
                      rsegs,
                      rfreqs,
                      rseqs,
                      ax=ax,
                      xlim=spec_xlim,
                      dbfs=dbfs,
                      formants=formants)
    return rsegs, rfreqs, rseqs

示例#9

0

显示文件

    epochs = 30
    X = x_train
    Y = y_train
    Z = z_train
    for e in range(epochs):
        mu, Sigma1 = E_step(beta_old, sigma_u, sigma_e, X, Y, Z)
        beta_old, sigma_u, sigma_e = M_step(mu, Sigma1, X, Y, Z)
        if e % 10 == 0:
            pred = prediction(X, beta_old, mu, Sigma1, Z)
    pred = prediction(x_test, beta_old, mu, Sigma1, z_test)
    plt.scatter(x_test[:, 1], pred, marker='x')
    print(AccuarcyCompute(pred, y_test))


if __name__ == '__main__':
    Xdata, y = gen_data(10)
    Xdata = np.concatenate((np.ones((Xdata.shape[0], 1)), Xdata),
                           axis=1).astype(float)
    for i in range(len(y)):
        EM(y[i])
'''
(0.002460019249207814, 0.03566158009767635, 4.97)
(0.002691816505975828, 0.03861964516235123, 4.6)
(0.003364610734050656, 0.04351645944560852, 5.58)
(0.002092487267656577, 0.03565267468930575, 5.53)
(0.0017462519262057027, 0.034892449525360095, 3.7)
(0.00247624675649647, 0.03753371018468406, 4.35)
(0.003737213958193634, 0.04866151945218468, 8.92)
(0.0026849614351548, 0.03899822956705998, 4.29)
(0.0013618905932646977, 0.03088042985437457, 3.09)
(0.00444138987523014, 0.05103304217010265, 4.18)

示例#10

0

显示文件

文件： __init__.py 项目： mkarish14/Information_Security_HackCAPTCHA

import gen_data
import time
import glob

if __name__=="__main__":
    array_folder = ["training/a/*.png","training/b/*.png","training/c/*.png","training/d/*.png","training/e/*.png",
               "training/f/*.png","training/g/*.png","training/h/*.png","training/i/*.png","training/k/*.png",
               "training/l/*.png","training/m/*.png","training/n/*.png","training/o/*.png","training/p/*.png",
               "training/r/*.png","training/s/*.png","training/t/*.png","training/u/*.png","training/v/*.png",
               "training/w/*.png","training/x/*.png","training/y/*.png","training/z/*.png"]
    for item in array_folder:
        files = glob.glob(item)
        for file in files:
            gen_data.gen_data(file, ord(item.split("/")[1]))
    print "training complete"

示例#11

0

显示文件

文件： classification.py 项目： xiawenxing/classification-clustering

import numpy as np
from gen_data import gen_data
from plot import plot
from todo import func

no_iter = 1000  # number of iteration
no_train = 70  # Your code here  # number of training data
no_test = 30  # Yourcode here  # number of testing data
no_data = 100  # number of all data
assert (no_train + no_test == no_data)

cumulative_train_err = 0
cumulative_test_err = 0

for i in range(no_iter):
    X, y, w_f = gen_data(no_data)
    X_train, X_test = X[:, :no_train], X[:, no_train:]
    y_train, y_test = y[:, :no_train], y[:, no_train:]
    print(X_train)
    print(y_train)
    w_g = func(X_train, y_train)
    # Compute training, testing error
    # Your code here
    # Answer begin
    # e.g
    train_err = 0
    test_err = 0
    train_P, train_N = X_train.shape
    test_P, test_N = X_test.shape
    Xt = X_train.T
    for i in range(train_N):

示例#12

0

显示文件

import gen_data
import numpy as np
import model
import pandas

itol = lambda l: [l]

data = gen_data.gen_data(gen_data.get_dists(100), 100000, 10, .2, 1 / 5, .1)
train = (data[0][:70000], np.array(list(map(itol, data[1][:70000]))))
test = (data[0][70000:], np.array(list(map(itol, data[1][70000:]))))


def train_model():
    x = model.train(list(zip(*train)),
                    list(zip(*test)),
                    list(zip(*test)),
                    1024, [5, 5],
                    epochs=50)
    return x


def get_res(x):
    res = np.array(list(map(lambda l: [l], x['results'][0][0])))
    return res


def get_pandas():
    return pandas.DataFrame(train[0]).assign(score=train[1])

示例#13

0

显示文件

文件： driver.py 项目： aahamed/Experiments

def create_data(N):
  data_file = "./data/data.txt"
  query_file = "./data/query.txt"
  gen_data.gen_data(data_file, N)
  gen_data.gen_data(query_file, N)

示例#14

0

显示文件

文件： big-query-upload.py 项目： nicholasjhana/gb-etl-vfly

	print("Downloading previous data from google big query...")


#	#drop the pandas index that is added when uploading to gbq
	data_gbq = data_gbq.iloc[:,1:]
	data_last30 = data[data['date'] >= last30]


	#details for data extraction
	bucket = "verifly-adjust"
	days = 30
	save_path = '/home/nick/adjust/data/verifly'


	#download and combine batched data from yesterday
	raw_data_last30 = gen_data.gen_data(bucket, days, save_path)
#	data_last30.to_csv(local_path + 'vfly_raw_11062019.csv')
	#transform batched data to metrics and save as csv
	agg_metrics, cohort_metrics = metrics.apply_metrics(raw_data_last30)

    data_last30_and_aggmetrics = data_last30.append(agg_metrics, ignore_index=True)
    
    final_data = merge_data(data_last_30_and_aggmetrics, cohort_metrics)
	#combine the two csv files. data was saved as csv as a poor solution to merging a multiindexed dataframe
#	data_gbq = pd.read_csv('/home/nick/adjust/data/verifly/deliverables/vfly-deliverables.csv')
#	data_gbq = data_gbq.iloc[:,1:]


#	print('base data ')
#	print(data_gbq.columns) 
#	data_yesterday = pd.read_csv("deliverables_" + str(yesterday) + ".csv")

示例#15

0

显示文件

def train(args):

    tf_config = None
    tf_config_json = None
    cluster = None
    job_name = None
    task_index = None
    ps_hosts = []
    worker_hosts = []
    config_file = False
    job_name = None
    task_index = 0

    try:
        print(os.environ['TF_CONFIG'])
        config_file = True
    except KeyError:
        pass

    if config_file:
        tf_config = os.environ.get('TF_CONFIG', '{}')
        tf_config_json = json.loads(tf_config)
        cluster = tf_config_json.get('cluster', {})
        job_name = tf_config_json.get('task', {}).get('type', "")
        task_index = tf_config_json.get('task', {}).get('index', "")
        ps_hosts = cluster.get("ps")
        worker_hosts = cluster.get("worker")
    else:
        ps_hosts = args.ps_hosts.split(',')
        worker_hosts = args.worker_hosts.split(',')
        job_name = args.job_name
        task_index = args.task_index

    graph = tf.Graph()
    var_path = cwd + '/' + args.checkpoint_dir + '/variables/'

    # Create a cluster from the parameter server and worker hosts.
    cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})

    # Create and start a server for the local task.
    server = tf.train.Server(cluster, job_name=job_name, task_index=task_index)

    if job_name == "ps":
        server.join()
    elif job_name == "worker":

        # Assigns ops to the local worker by default.
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % task_index,
                    cluster=cluster)):

            with graph.as_default():

                # Graph object and scope created
                # ...now define all parts of the graph here
                feed_fwd_model = FF(args, graph)
                saver = tf.train.Saver()
                init = tf.global_variables_initializer()

                # Now that the graph is defined, create a session to begin running
                with tf.Session() as sess:

                    sess.run(init)
                    # Prepare to Save model
                    i = 0
                    model = 'model%s' % i
                    try:
                        os.makedirs(cwd + '/' + args.checkpoint_dir)
                    except OSError:
                        if not os.path.isdir(cwd + '/' + args.checkpoint_dir):
                            raise
                    ckpt_file_index = Path(cwd + '/' + args.checkpoint_dir +
                                           '/' + model + '.ckpt.index')
                    ckpt_file = Path(cwd + '/' + args.checkpoint_dir + '/' +
                                     model + '.ckpt')
                    while ckpt_file_index.is_file():
                        i += 1
                        model = 'model%s' % i
                        ckpt_file_index = Path(cwd + '/' +
                                               args.checkpoint_dir + '/' +
                                               model + '.ckpt.index')
                    ckpt_file = Path(cwd + '/' + args.checkpoint_dir + '/' +
                                     model + '.ckpt')

                    num_epochs = int(args.num_epochs)
                    y_acc = np.zeros(
                        (int(args.batch_size), int(args.num_outputs)))
                    loss = None
                    y_ = None

                    w = []
                    b = []
                    if (args.restore_path != None):
                        trained_model_saver, w, b = restore_graph(sess, args)
                        print('...continuing training')

                    # guards against accidental updates to the graph which can cause graph
                    # increase and performance decay over time (with more iterations)
                    sess.graph.finalize()

                    for e in range(num_epochs):
                        w, b, train_input, train_output = gen_data(
                            int(args.batch_size), int(args.num_inputs), w, b)
                        y_, loss, _ = sess.run(feed_fwd_model.run(),
                                               feed_dict={
                                                   feed_fwd_model.x:
                                                   train_input,
                                                   feed_fwd_model.y:
                                                   train_output
                                               })
                        y_acc = y_
                        threshold = 1000
                        w_b_saved = False
                        if ((e % 50) == 0):
                            print('epoch: %d - loss: %2f' % (e, loss))
                            if (e > 0 and (e % threshold == 0)):
                                print('Writing checkpoint %d' % e)
                                print(train_output, w, b)
                                print('\n')
                                print(y_acc,
                                      sess.run(feed_fwd_model.weights)[0],
                                      sess.run(feed_fwd_model.biases)[0])
                                save_path, w_b_saved = checkpoint(
                                    sess, saver, ckpt_file, w, b, w_b_saved,
                                    var_path, model, e)
                    save_path, w_b_saved = checkpoint(
                        sess, saver, ckpt_file, w, b, w_b_saved, var_path,
                        model)  # final checkpoint
                    print('Model saved to %s' % str(save_path))
                    sess.close()

示例#16

0

显示文件

文件： driver.py 项目： aahamed/Experiments

def create_data(N):
    data_file = "./data/data.txt"
    query_file = "./data/query.txt"
    gen_data.gen_data(data_file, N)
    gen_data.gen_data(query_file, N)

示例#17

0

显示文件

文件： linear_regression.py 项目： JianfaLai/MMNN

    x_train, x_test, y_train, y_test = train_test_split(X[:, :4],
                                                        y,
                                                        test_size=0.3)
    model = linreg.fit(x_train, y_train)
    n_predic_y_test = model.predict(x_test)
    plt.scatter(x_test[:, 1], n_predic_y_test, marker='x')

    #Get the Result
    var = np.mean((n_predic_y_test - y_test)**2)
    error = np.mean(abs(n_predic_y_test - y_test))
    error_percentage = round(error / np.mean(abs(y_test)) * 100, 2)
    return var, error, error_percentage


if __name__ == '__main__':
    X, y = gen_data(10)
    linreg = LinearRegression()
    for i in range(len(y)):
        print(LR(y[i]))
'''
(0.0015366263968832226, 0.029558440011940437, 4.12)
(0.007524175125879509, 0.06941106394372618, 9.41)
(0.051449122033343286, 0.191328963800147, 23.79)
(0.20463488066853824, 0.33023352718617116, 56.65)
(0.11767823522992713, 0.28898224042626236, 28.81)
(0.32396914319907705, 0.4598516734961317, 55.71)
(0.2232187293220335, 0.39778403290574266, 82.12)
(1.578167663561671, 1.0617364678404835, 94.25)
(0.639188444611845, 0.6624016711869223, 60.31)
(0.4300697392308613, 0.5422248373416173, 51.63)

示例#18

0

显示文件

 def __init__(self,root,word_num):  #word_num是一个变量        
     self.root = root
     self.data = gen_data.gen_data("recite")
     self.word_num = word_num
     self.seq = 1

示例#19

0

显示文件

文件： MMNN.py 项目： JianfaLai/MMNN

        inputs_test = pt.autograd.Variable(inputs_test)
        labels_test = pt.autograd.Variable(labels_test)
        random_e_test = pt.from_numpy(random_effect_test).float()
        outputs_test = model(inputs_test.float(), random_e_test)
        error_percentage = AccuarcyCompute(outputs_test, labels_test.float())
        if error_percentage[2] == last:
            repeat += 1
        else:
            last = error_percentage[2]
        if repeat > 5:
            break
    print("Testing", error_percentage)


if __name__ == '__main__':
    Xdata, y = gen_data(5, size=1000)
    Xdata = np.concatenate((np.ones((Xdata.shape[0], 1)), Xdata),
                           axis=1).astype(float)
    for i in range(len(y)):
        EMNN(y[i])
'''
Testing (0.0015800932, 0.029433494, 4.1)
Testing (0.0029981958, 0.044504516, 4.91)
Testing (0.002603048, 0.0364661, 4.62)
Testing (0.002724695, 0.042232957, 6.26)
Testing (0.002901133, 0.038665723, 4.27)
Testing (0.0019755429, 0.036214557, 3.75)
Testing (0.002415244, 0.040518273, 6.68)
Testing (0.0023834745, 0.035962757, 3.54)
Testing (0.0021507577, 0.035938308, 3.04)
Testing (0.0031414144, 0.044444475, 3.79)

示例#20

0

显示文件

文件： gravspy_main2.py 项目： mzevin1/GravitySpy

if __name__ == '__main__':
    #import data that does not change between batches
    #conf_matrices = loadmat('conf_matrices.mat')

    #tmpCM  = []
    #tmpCM1 = []
    #for iN in range(conf_matrices['conf_matrices'].size):
    #    tmpCM.append(conf_matrices['conf_matrices'][iN]['userID'][0][0][0])
    #    tmpCM1.append(conf_matrices['conf_matrices'][iN]['conf_matrix'][0])

    #conf_matrices  = pd.DataFrame({ 'userID' : tmpCM,'conf_matrix' : tmpCM1})
    retired_images = pd.DataFrame({ 'imageID' : [], 'class' : []})
    PP_matrices    = pd.DataFrame({ 'imageID' : [],'pp_matrix' : []})

    hold,conf_matrices = gen_data.gen_data()

    #for loop to iterate over each batch
    for i in range(1,2):
        #batch_name = 'batch' + str(i) + '.mat' #batch1.mat, batch2.mat, etc
        #batch = loadmat(batch_name) #read batch file
        #tmpType         = []
        #tmpLabels       = []
        #tmpuserIDs      = []
        #tmpTruelabel    = []
        #tmpImageID      = []
        #tmpML_posterior = []
        # Subtracting 1 off the index from the mat file for the "labels" so that the indexing works in python.
        #for iN in range(batch['images'].size):
        #    tmpType.append(batch['images'][iN]['type'][0][0])
        #    tmpLabels.append(batch['images'][iN]['labels'][0][0]-1)

示例#21

0

显示文件

# Get a guess
#lsmr_solver = LSMRsolver(data, guesses)
#embed()

# ================

#Solver1 = LBFGSsolver(data=data, guess=truth, truth=truth)
#Solver2 = LBFGSsolver(data=data, guess=guesses, truth=truth)

#prm = np.load("lsmr_solver_x.npy")
#prm[:2*lsmr_solver.Nhkl] = np.exp(prm[:2*lsmr_solver.Nhkl])
#guesses["Gprm"] = prm[2*lsmr_solver.Nhkl:]
# ..

if __name__ == "__main__":
    data = gen_data.gen_data(Nshot_max=500)
    guesses = gen_data.guess_data(data, perturbate=True)
    truth = gen_data.guess_data(data, perturbate=False)
    #prm = np.load("_temp_4.npz")
    #guesses["IAprm"] = prm["AmpA_final"]
    #guesses["IBprm"] = prm["AmpB_final"]
    #guesses["Gprm"] = prm["Gain_final"]

    #LogSolve = LogIsolver(data=data, guess=guesses, truth=truth)

    LogSolveCurve = LogIsolverCurve(use_curvatures=False,
                                    data=data,
                                    guess=guesses,
                                    truth=truth)

    embed()