Exemplo n.º 1
0
def Get_vector_of_difference_between_clusters(data_path, result_path, step,
                                              n_features, Embedding):
    #data_path=FLAGS.data_path
    #result_path=FLAGS.result_dir
    #step=FLAGS.step
    #n_features = FLAGS.n_features_embedding

    data1 = GetData(data_path + str(2 * step) + "/")
    print(data_path + str(2 * step) + "/")
    cluster1_mean = np.zeros(n_features)
    for d in data1:
        cluster1_mean += Embedding(d)
    cluster1_mean /= len(data1)

    data2 = GetData(data_path + str(2 * step + 1) + "/")
    cluster2_mean = np.zeros(n_features)
    for d in data2:
        cluster2_mean += Embedding(d)
    cluster2_mean /= len(data2)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    np.savetxt(result_path + str(2 * step) + "_" + str(2 * step + 1) + ".txt",
               cluster2_mean - cluster1_mean)
    np.savetxt(result_path + str(2 * step) + ".txt", cluster1_mean)
    np.savetxt(result_path + str(2 * step + 1) + ".txt", cluster2_mean)
Exemplo n.º 2
0
 def test_invest_business(self):
     r = random.random()
     phone_num = '13812345698'
     img_code = '8888'
     phone_code = '666666'
     pwd = 'admin123'
     real_name = '张三'
     card_id = '330102199003073332'
     dy_server = 'on'
     try:
         # 获取图片验证码
         self.reg_login.get_img_code(r)
         # 获取手机验证码
         self.reg_login.get_note_code(phone_num, img_code)
         # 注册
         reg = self.reg_login.register(phone_num, pwd, img_code, phone_code,
                                       dy_server)
         print(reg.json())
         # 登录
         self.reg_login.login(phone_num, pwd)
         # 实名认证
         self.account.approve_real_name(real_name, card_id)
         # 开户
         r1 = self.recharge.set_account()
         # 获取第三方
         res1 = GetData.get_html_data(r1)
         third1 = self.session.post(url=res1[0], data=res1[1])
         logging.info('第三方响应文本:{}'.format(third1.text))
         self.assertIn('OK', third1.text)
         # 获取充值验证码
         self.recharge.get_charge_verify_code(r)
         # 充值
         r2 = self.recharge.recharge('chinapnrTrust', '2000', 'reForm',
                                     '8888')
         # 第三方
         res2 = GetData.get_html_data(r2)
         third2 = self.session.post(url=res2[0], data=res2[1])
         logging.info('第三方响应文本:{}'.format(third2.text))
         self.assertIn('OK', third2.text)
         # 投资产品详情成功
         self.invest.get_invest_detail(842)
         # 投资
         r3 = self.invest.invest(842, 1000)
         # 第三方
         res3 = GetData.get_html_data(r3)
         third3 = self.session.post(url=res3[0], data=res3[1])
         logging.info('第三方响应文本:{}'.format(third3.text))
         self.assertIn('OK', third3.text)
     except Exception as e:
         logging.error(e)
         raise
Exemplo n.º 3
0
def main():
    EXPERIMENT.set_code(CodeLogger(CODE_FILES))
    EXPERIMENT.add_tag("FC")

    dataset_x, dataset_y = GetData(is_train=True)
    train_data = GetDataloader(dataset_x, dataset_y, BATCH_SIZE)

    dataset_x, dataset_y = GetData(is_train=False)
    test_data = GetDataloader(dataset_x, dataset_y, BATCH_SIZE)

    model = MyNet()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 0.5)
    model_saver = ModelSaver(os.path.join("Models", "FC"))

    for epoch in range(EPOCHS):
        loss, accuracy, f1score, precision, recall = train(
            train_data, model, optimizer, EXPERIMENT)
        print("Training loss @ epoch", epoch, "=", loss)
        print("Training accuracy @ epoch", epoch, "=", accuracy)
        print("Training F1 score @ epoch", epoch, "=", f1score)
        print("Training precision @ epoch", epoch, "=", precision)
        print("Training recall @ epoch", epoch, "=", recall)
        with EXPERIMENT.train():
            EXPERIMENT.log_metric("Batch loss", loss, step=epoch)
            EXPERIMENT.log_metric("Batch accuracy", accuracy, step=epoch)
            EXPERIMENT.log_metric("Batch F1 score", f1score, step=epoch)
            EXPERIMENT.log_metric("Batch precision", precision, step=epoch)
            EXPERIMENT.log_metric("Batch recall", recall, step=epoch)

        loss, accuracy, f1score, precision, recall = test(
            test_data, model, EXPERIMENT)
        print("Validation loss @ epoch", epoch, "=", loss)
        print("Validation accuracy @ epoch", epoch, "=", accuracy)
        print("Validation F1 score @ epoch", epoch, "=", f1score)
        print("Validation precision @ epoch", epoch, "=", precision)
        print("Validation recall @ epoch", epoch, "=", recall)
        with EXPERIMENT.test():
            EXPERIMENT.log_metric("Batch loss", loss, step=epoch)
            EXPERIMENT.log_metric("Batch accuracy", accuracy, step=epoch)
            EXPERIMENT.log_metric("Batch F1 score", f1score, step=epoch)
            EXPERIMENT.log_metric("Batch precision", precision, step=epoch)
            EXPERIMENT.log_metric("Batch recall", recall, step=epoch)
        print("###############################################\n")
        model_saver.Save("epoch" + str(epoch) + ".pt", model, optimizer,
                         scheduler)

        if (epoch % 10 == 0):
            scheduler.step()
def BuildPairsFromRealImages(data_path1, data_path2, result, result_dir1,
                             result_dir2, Embedding):
    if not (os.path.exists(result)):
        os.makedirs(result)
    os.makedirs(result_dir1)
    os.makedirs(result_dir2)
    n_clusters = 20
    data1 = GetData(data_path1)
    random.shuffle(data1)
    data2 = GetData(data_path2)
    images_indexes1, clusters1 = ClusterData(data1, FLAGS.n_features_embedding,
                                             Embedding, n_clusters,
                                             result + "0_test/", FLAGS.step)
    images_indexes2, clusters2 = ClusterData(data2, FLAGS.n_features_embedding,
                                             Embedding, n_clusters,
                                             result + "1_test/", FLAGS.step)
    print("VARIETION")
    print(np.mean(clusters1.cluster_centers_, axis=0),
          np.mean(clusters2.cluster_centers_, axis=0))
    print(
        np.linalg.norm(clusters1.cluster_centers_ -
                       np.mean(clusters1.cluster_centers_, axis=0)),
        np.linalg.norm(clusters2.cluster_centers_ -
                       np.mean(clusters2.cluster_centers_, axis=0)))

    index = 0
    for cl in range(n_clusters):
        data1_ = [data1[i] for i in images_indexes1[cl]]
        im = GetTheClosestPoint(clusters1.cluster_centers_[cl], data1_,
                                Embedding)
        for cl1 in range(n_clusters):
            data2_ = [data2[i] for i in images_indexes2[cl1]]
            im1 = GetTheClosestPoint(clusters1.cluster_centers_[cl], data2_,
                                     Embedding)
            subprocess.call(["cp", im, result_dir1 + str(index) + ".jpg"])
            subprocess.call(["cp", im1, result_dir2 + str(index) + ".jpg"])
            index += 1

    for cl in range(n_clusters):
        data2_ = [data2[i] for i in images_indexes2[cl]]
        im1 = GetTheClosestPoint(clusters2.cluster_centers_[cl], data2_,
                                 Embedding)
        for cl1 in range(n_clusters):
            data1_ = [data1[i] for i in images_indexes1[cl1]]
            im = GetTheClosestPoint(clusters2.cluster_centers_[cl], data1_,
                                    Embedding)
            subprocess.call(["cp", im, result_dir1 + str(index) + ".jpg"])
            subprocess.call(["cp", im1, result_dir2 + str(index) + ".jpg"])
            index += 1
Exemplo n.º 5
0
def AutoEncoderLatent(data_path, feature_dimension_embedding, Embedding):
    data = GetData(data_path)
    random.shuffle(data)
    latent = np.zeros([len(data), feature_dimension_embedding])
    for d in range(len(data)):
        latent[d] = Embedding(data[d])
    return data, latent
Exemplo n.º 6
0
 def test01_set_account(self):
     r = self.recharge.set_account()
     print(r.text)
     common_assert(self, r, status_code=200, status=200, desc='form')
     h = GetData.get_html_data(r)
     r2 = self.session.post(url=h[0], data=h[1])
     common_assert(self, r2, status_code=200, status=None)
Exemplo n.º 7
0
def GIST_latent(data_path, feature_dimension_embedding):
    data = GetData(data_path)
    random.shuffle(data)
    latent = np.zeros([len(data), feature_dimension_embedding])
    for d in range(len(data)):
        latent[d] = Gist(data[d])
    return data, latent
Exemplo n.º 8
0
def CheckProbabilityLabel(meta_data, cluster, n_features):
    res = np.zeros(n_features)

    data = GetData(cluster)
    for d in data:
        res += meta_data[os.path.basename(d).split("_")[-1][:-4]]

    return res
Exemplo n.º 9
0
 def test02_invest(self):
     r = self.invest.invest(842, 1000)
     res = GetData.get_html_data(r)
     r2 = self.session.post(url=res[0], data=res[1])
     logging.info('响应数据:{}'.format(r2.text))
     try:
         self.assertIn('OK', r2.text)
     except Exception as e:
         logging.error(e)
         raise
Exemplo n.º 10
0
 def __init__(self, data_path, n_bins, n_pictures_per_bin, n_features, Embedding, data_path_truth, Embedding_truth, im):
     self.data_path = data_path
     self.Embedding = Embedding
     self.image_search = ImageSearch(data_path, n_bins, n_pictures_per_bin, n_features, Embedding)
     data_truth_ = GetData(data_path_truth)
     self.data_truth = {}
     for d in data_truth_:
         self.data_truth[os.path.basename(d).split("_")[-1]] = Embedding_truth(d)
     self.chosen_picture = im
     self.truth_value = self.data_truth[os.path.basename(self.chosen_picture).split("_")[-1]]
     self.image_search.show_images()
Exemplo n.º 11
0
 def test03_recharge(self):
     self.recharge.get_charge_verify_code(random.random())
     r = self.recharge.recharge('chinapnrTrust', '500', 'reForm', '8888')
     logging.info('响应:{}'.format(r.json()))
     common_assert(self, r, desc='form')
     h = GetData.get_html_data(r)
     r2 = self.session.post(url=h[0], data=h[1])  # 第三方开户
     try:
         common_assert(self, r2, status=None)
     except Exception as e:
         logging.error(e)
         raise
Exemplo n.º 12
0
def GetPicturesWithLabels(meta_data, data_path, dictionary, result_dir):
    print(dictionary)
    os.makedirs(result_dir)
    data = GetData(data_path)
    for d in data:
        basename = os.path.basename(d).split("_")
        meta_features = meta_data[basename[-1][:-4]]
        for f in dictionary.keys():
            if meta_features[dictionary[f][0]] > 0:
                subprocess.call([
                    'cp', d, result_dir + basename[0] + "_" +
                    str(dictionary[f][1]) + "_" + basename[-1]
                ])
Exemplo n.º 13
0
def main(query="you know nothing", cmd="script", top_n=5):
    if len(sys.argv) > 2:
        query = sys.argv[1]
        cmd = sys.argv[2]
        top_n = int(sys.argv[3])
    buffer = ""

    data = GetData()
    if cmd == "scene": # search for scene
        query = QueryPreprocess(query)
        eps = GetEps()
        vec, tfidf = GetModel(data, eps)
        sceneSet = GetEpisodeSet(data, eps, vec, tfidf, query, top_n)
        for i, (x, y, z) in enumerate(sceneSet):
            buffer += "{}. {}-<{}>:\r{}\r\r".format(i+1, x, y, z)
    elif cmd == "script": # search for script
        docs = GetScriptDataByLines()
        scriptSet = GetScriptSet(docs, data, query, top_n)
        for i, (x, y, z) in enumerate(scriptSet):
            buffer += "{}. {}-<{}>:\r{}\r\r".format(i+1, x, y, z)
    print(buffer)
Exemplo n.º 14
0
def My_latent(data_path, svm_path, feature_dimension_embedding, result_dir):
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    data = GetData(data_path)
    svm = []
    for i in range(feature_dimension_embedding):
        svm.append(np.genfromtxt(svm_path + str(i + 1) + ".txt"))

    for i in range(feature_dimension_embedding):
        os.makedirs(result_dir + str(i) + "/")
    for d in range(len(data)):
        g = Gist(data[d])
        latent = np.array(
            [np.dot(svm[i], g) for i in range(feature_dimension_embedding)])
        basename = os.path.basename(data[d]).split("_")[-1]
        for i in range(feature_dimension_embedding):
            subprocess.call([
                'cp', data[d],
                result_dir + str(i) + "/" + str(latent[i] + 10) + "_" +
                "_".join(str(f) for f in latent) + "_" + basename
            ])
    return data
Exemplo n.º 15
0
    print(direction_path)
    AutoEncoderRankingByDirection(data, latent, direction_path,
                                  n_features_embedding, result_dir)

if FLAGS.check_interpretability:
    data_path = FLAGS.data_path
    n_bins = FLAGS.n_bins
    result_dir = FLAGS.result_dir
    step = FLAGS.step

    dir1 = result_dir + str(2 * step) + "/"
    dir2 = result_dir + str(2 * step + 1) + "/"
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    for i in range(n_bins):
        os.makedirs(result_dir + str(step) + "_" + str(i))

    bins = DevideFeatureIntoBins(data_path, n_bins, 0)
    data = GetData(data_path)
    for d in data:
        basename = os.path.basename(d)
        basename = basename.split("_")
        #print(bins, basename[0], d)
        bin = 0
        while (bin < n_bins and (float(basename[0]) > bins[bin])):
            bin += 1
        subprocess.call([
            'cp', d, result_dir + str(step) + "_" + str(bin) + "/" +
            "_".join(basename[1:])
        ])
Exemplo n.º 16
0
def main():
    # os.makedirs(os.path.join("Models", "Word2Vec"), exist_ok=True)
    data, word_frequency = GetData(TRAIN_PATH,
                                   to_lower=True,
                                   remove_proper_nouns=True)
    # Not using unk embeddings yet
    # I am doing lowercase. Think about it!

    # Load Google's embeddings
    word2vec = KeyedVectors.load_word2vec_format(os.path.join(
        "Data", "GoogleNews-vectors-negative300.bin"),
                                                 binary=True)

    # # Train word2vec
    # word2vec = BuildWord2Vec(os.path.join(
    #     "Data", "GoogleNews-vectors-negative300.bin"), data)

    # Get word_index dictionary
    word_index = BuildWord2Index(word_frequency.keys(),
                                 word2vec.wv.vocab.keys())

    train_loader, test_loader = GetDataLoader(data, word_index)

    # Set up the model
    model = MyNet(len(word_index), EMBEDDING_DIM)

    optimizer = th.optim.Adamax(model.GetTrainableParameters(),
                                lr=LEARNING_RATE)
    scheduler = th.optim.lr_scheduler.StepLR(optimizer,
                                             step_size=10,
                                             gamma=0.9)
    saver = ModelSaver(CWD)

    model.InitializeEmbeddings(word2vec, word_index, EMBEDDING_DIM)

    print("Train batches =", len(train_loader))
    print("Test batches =", len(test_loader))
    print(
        "####################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$####################\n"
    )

    with open("out.txt", "w") as file:
        for epoch in range(EPOCHS):
            print("Epoch", epoch)

            loss, real_prob, fake_prob = train(model, train_loader, optimizer)

            print("Train loss =", loss)
            print("Train real prob =", real_prob)
            print("Train fake prob =", fake_prob)
            file.write("Train loss = " + str(loss) + "\n")
            file.write("Train real prob = " + str(real_prob) + "\n")
            file.write("Train fake prob = " + str(fake_prob) + "\n")

            scheduler.step()

            if (epoch % TEST_EVERY == 0):
                loss, real_prob, fake_prob = test(model, test_loader)

                print("Test loss =", loss)
                print("Test real prob =", real_prob)
                print("Test fake prob =", fake_prob)
                file.write("Test loss = " + str(loss) + "\n")
                file.write("Test real prob = " + str(real_prob) + "\n")
                file.write("Test fake prob = " + str(fake_prob) + "\n")

                print(
                    "####################$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$####################\n"
                )

            if (epoch % 1 == 0):
                saver.SaveCheckpoint(model, word_index, optimizer, scheduler,
                                     str(epoch) + ".pt")
Exemplo n.º 17
0
def json_data(file_name, case_name):
    return GetData.get_json_data(file_name, case_name)
Exemplo n.º 18
0
Arquivo: setup.py Projeto: zhifei2/ASR
import argparse
from utils import GetData

parser = argparse.ArgumentParser(
    description='Create spectrogram magnitude files in dataset.')

parser.add_argument('--audio-dir',
                    type=str,
                    dest='audio_dir',
                    help='Directory of MedleyDB Audio files',
                    required=True)

if __name__ == '__main__':

    d = GetData(audio_dir=audio_dir)
    d.convert_all_wav_files()
Exemplo n.º 19
0
def get_traits():
	get_data = GetData()
	data = get_data.getDataValues()
	trait_parser = ParseTraits(data)
	return trait_parser.getTraits()
Exemplo n.º 20
0
import numpy as np
import tensorflow as tf
import argparse
from utils import GetData
import matplotlib.pyplot as plt

NUM_FREQ_BINS = 1025
SAMPLES_PER_FRAME = 20
SAVE_ITERATIONS = 20
CONFIDENCE = 0.5
GPU_ON = True

n_input_units = NUM_FREQ_BINS * SAMPLES_PER_FRAME
n_hidden_units_h0 = NUM_FREQ_BINS * SAMPLES_PER_FRAME
n_output_units = NUM_FREQ_BINS * SAMPLES_PER_FRAME
fetch_data = GetData()
batch_size = 10
n_channels = 1
dropout = 0.0  # No dropout
test_pred_conf = 0.5  # threshold for prediction on test

if len(sys.argv) == 2:
    fetch_data = GetData(sys.argv[1])

X = tf.placeholder(tf.float32,
                   shape=(None, n_input_units),
                   name='data_placeholder')

y = tf.placeholder(tf.float32,
                   shape=(None, n_output_units),
                   name='labels_placeholder')
Exemplo n.º 21
0

"""if __name__ == "__main__":
    root = tk.Tk()
    root.geometry('+%d+%d' % (100, 100))

    ShowImages(root, "../CycleGAN_shoes/Toy/PCA_GIST/0/", 2, 5, 3,
               Embedding("3Features")).pack(fill="both", expand=True)
    #ShowImages(root, "../RankSVM_DATA/Shoes/AutoEncoder/result9/","./RankSVM_DATA/Shoes/Supervised/result/", "result/",
    #           "../RankSVM_DATA/Shoes/AutoEncoder/", 3).pack(fill="both", expand=True)
    root.mainloop()"""

if __name__ == "__main__":
    n_iterations = 7
    n_images = 1000
    data = GetData("../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/")

    data_path0 = "../CycleGAN_shoes/Toy/My_interpretability/0/"
    data_path1 = "../CycleGAN_shoes/Toy/AutoEncoderByClusterDirection/0/"
    data_path2 = "../CycleGAN_shoes/Toy/PCA_AutoEncoder/0/"
    data_path3 = "../CycleGAN_shoes/Toy/PCA_GIST/0/"

    bar = progressbar.ProgressBar(maxval=n_images, \
                                  widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])

    data_paths = [data_path0, data_path1, data_path2, data_path3]
    simuliators = []
    for method in range(len(data_paths)):
        simuliators.append(UserSimuliator (data_paths[method], 2, 5, 3,
               Embedding("3Features"),  "../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/", Embedding("LabelToyShoes"),
                 random.choice(data)))
Exemplo n.º 22
0
    '600808', '600832', '600837', '600875', '600900', '601006', '601009',
    '601088', '601111', '601166', '601168', '601169', '601186', '601318',
    '601328', '601333', '601390', '601398', '601600', '601601', '601628',
    '601666', '601699', '601727', '601766', '601808', '601857', '601866',
    '601872', '601898', '601899', '601919', '601939', '601958', '601988',
    '601991', '601998'
]
x_train_list = []
y_train_list = []
x_test_list = []
y_test_list = []
i = 0
for stock in ID:
    i += 1
    print(str(i) + '\t' + stock)
    x_temp, y_temp = GetData.get_data(stock)
    x_train_list += x_temp[0:int(len(x_temp) * 0.7)]
    y_train_list += y_temp[0:int(len(y_temp) * 0.7)]
    x_test_list += x_temp[int(len(x_temp) * 0.7):]
    y_test_list += y_temp[int(len(y_temp) * 0.7):]

print('Training...')
print(np.array(x_train_list).shape)
x_train = np.array(x_train_list)[:, :, 0:12]
y_train = np.array(y_train_list)
x_test = np.array(x_test_list)[:, :, 0:12]
y_test = np.array(y_test_list)

print(x_train.shape)
print(x_test.shape)
if FLAGS.take_pictures_from_first_bin:
    print("AAAAAAA")
    data_path = FLAGS.data_path
    result_dir = FLAGS.result_dir
    step = FLAGS.step

    n_bins = FLAGS.n_bins
    bins = DevideFeatureIntoBins(data_path, n_bins, 0)
    data = []
    for root, subdirs, files in os.walk(data_path):
        for f in files:
            if os.path.splitext(f)[1].lower() in ('.jpg', '.jpeg'):
                data += [os.path.join(root, f)]
    print(len(data))

    data1_len = len(GetData(data_path + "0_/"))
    data2_len = len(GetData(data_path + "1_/"))
    os.makedirs(result_dir)
    if data1_len < data2_len:
        for d in data:
            basename = os.path.basename(d)
            basename = basename.split("_")
            if (float(basename[0]) < bins[0]):
                subprocess.call(['cp', d, result_dir + "_".join(basename[1:])])
    else:
        for d in data:
            basename = os.path.basename(d)
            basename = basename.split("_")
            if (float(basename[0]) > bins[1]):
                subprocess.call(['cp', d, result_dir + "_".join(basename[1:])])
Exemplo n.º 24
0
        s_data = line.strip().split(":\t")
        acc.append([s_data[0], float(s_data[1])])
    result = sorted(acc, key=lambda x: x[1], reverse=True)
    print(result)
    for i in range(1):
        ID.append(result[i][0])

i = 0
x_train_list = []
y_train_list = []
x_test_list = []
y_test_list = []
for stock in ID:
    i += 1
    print(str(i) + '\t' + stock)
    x_temp, y_temp = GetData.get_data_rumor_tensor(stock)
    x_train_list += x_temp[0:int(len(x_temp) * 0.7)]
    y_train_list += y_temp[0:int(len(y_temp) * 0.7)]
    x_test_list += x_temp[int(len(x_temp) * 0.7):]
    y_test_list += y_temp[int(len(y_temp) * 0.7):]

print('Training...')
print(np.array(x_train_list).shape)
x_train = np.array(x_train_list)
y_train = np.array(y_train_list)
x_test = np.array(x_test_list)
y_test = np.array(y_test_list)
s_train = np.array(x_train_list)
s_test = np.array(x_test_list)

# print('Training...')
Exemplo n.º 25
0
# -*- coding: utf-8 -*-

import numpy as np

from Model import model_pre
from utils import GetData

print('Getting Data....')

ID = GetData.get_stock_ID('../Data/stock.txt')
ID.remove('600005')

x_train_list = []
y_train_list = []
x_test_list = []
y_test_list = []

i = 0

for stock in ID:
    i += 1
    print(str(i) + '\t' + stock)
    x_temp = GetData.get_data_quota(stock)
    x_train_list += x_temp[0:int(len(x_temp) * 0.7)]
    x_test_list += x_temp[int(len(x_temp) * 0.7):]

for stock in ID:
    i += 1
    print(str(i) + '\t' + stock)
    x_temp, y_temp = GetData.get_data(stock)
    y_train_list += y_temp[0:int(len(y_temp) * 0.7)]