Пример #1
0
def run_model(train_file, train_labfile, test_file=None, valid_ratio=0.1,
              batchsize=240, epoch=10, neurons=36, n_hiddenlayer=2, lr=1e-2,
              base_dir='../Data/', save_prob=False, dropout_rate=0.2):
    """Run the deep neural network with droput"""
    print("Start")
    st = datetime.now()

    data = load_data(base_dir + train_file)
    label_data, label_map = load_label(base_dir + train_labfile)

    # window size = 9, output = 48 phonemes
    n_input = data.shape[1] * 9
    n_output = 48
    N = int(data.shape[0] * (1 - valid_ratio))

    print("Done loading data. Start constructing the model...")
    functions = construct_DNN(n_input, n_output, archi=neurons,
                              n_hid_layers=n_hiddenlayer, lr=lr,
                              dropout_rate=dropout_rate)
    gradient_update, feed_forward = functions

    print("Finish constructing the model. Start Training...")
    result = train_model(N, epoch, batchsize, gradient_update,
                         feed_forward, data, label_data, n_output,
                         dropout_rate)
    obj_history, valid_accu, cache = result

    # train accuracy
    train_accu = accuracy(0, N, data, feed_forward, n_output,
                          label_data, cache, dropout_rate)
    print("Training Accuracy: %.4f %%" % (100 * train_accu))

    # validation
    valid_accu = accuracy(N, data.shape[0], data, feed_forward,
                          n_output, label_data, cache, dropout_rate)
    print("Validation Accuracy: %.4f %%" % (100 * valid_accu))

    if save_prob:
        accuracy(0, data.shape[0], data, feed_forward, n_output,
                 label_data, cache, dropout_rate,
                 save_pred=True, save_name='ytrain_prob')

    if test_file:
        test_predict(base_dir + test_file, label_map, feed_forward,
                     base_dir, dropout_rate, save_prob=save_prob)

    print("Done, Using %s." % str(datetime.now() - st))
Пример #2
0
import tensorflow as tf
from tensorflow.contrib import slim
from tensorflow.contrib.slim.nets import resnet_v1

import utils

datadirect = '../../data/DatasetA_train_20180813/'
train_txt = 'train.txt'
label_txt = 'label_list.txt'
attr_txt = 'attribute_list.txt'
lblattr_txt = 'attributes_per_class.txt'
lblemb_txt = 'class_wordembeddings.txt'

##################preparation pipelines#######################
df_lbl = utils.load_label(datadirect, label_txt)
num_classes = df_lbl.shape[0]

df_pair = utils.load_pair(datadirect, train_txt)
df_pair = pd.merge(df_pair, df_lbl, on='label_code', how='left')
imgnum = df_pair.shape[0]

df_attrname, df_lblattr = utils.load_attr(datadirect, attr_txt, lblattr_txt)
df_attr = pd.merge(df_pair[['label_code']],
                   df_lblattr,
                   on='label_code',
                   how='left')
attrnum = df_attrname.shape[0]

df_lblattr = pd.merge(df_lblattr, df_lbl, on='label_code', how='left')
adj_attrsim = utils.create_adjattr(df_lblattr, num_classes)
Пример #3
0
def main(args):

    # load graph data
    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
          'start loading...',
          flush=True)
    if args.supervised == 'True':
        train_pool, train_labels, nlabels, multi = utils.load_label(args.label)
        train_data, num_nodes, num_rels, train_indices, ntrain, node_attri = utils.load_supervised(
            args, args.link, args.node, train_pool)
    elif args.supervised == 'False':
        train_data, num_nodes, num_rels, node_attri = utils.load_unsupervised(
            args, args.link, args.node)
        nlabels = 0
    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
          'finish loading...',
          flush=True)

    # check cuda
    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(args.gpu)
    print('check 1', flush=True)
    # create model
    model = TrainModel(node_attri,
                       num_nodes,
                       args.n_hidden,
                       num_rels,
                       nlabels,
                       num_bases=args.n_bases,
                       num_hidden_layers=args.n_layers,
                       dropout=args.dropout,
                       use_cuda=use_cuda,
                       reg_param=args.regularization)
    print('check 2', flush=True)
    if use_cuda:
        model.cuda()
    print('check 3', flush=True)
    # build adj list and calculate degrees for sampling
    degrees = utils.get_adj_and_degrees(num_nodes, train_data)
    print('check 4', flush=True)
    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # training loop
    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
          "start training...",
          flush=True)
    for epoch in range(args.n_epochs):
        model.train()

        # perform edge neighborhood sampling to generate training graph and data
        if args.supervised == 'True':
            g, node_id, edge_type, node_norm, matched_labels, matched_index = \
            utils.generate_sampled_graph_and_labels_supervised(
                train_data, args.graph_batch_size, args.graph_split_size,
                num_rels, degrees, args.negative_sample, args.edge_sampler,
                train_indices, train_labels, multi, nlabels, ntrain, if_train=True, label_batch_size=args.label_batch_size)
            if multi: matched_labels = torch.from_numpy(matched_labels).float()
            else: matched_labels = torch.from_numpy(matched_labels).long()
        elif args.supervised == 'False':
            g, node_id, edge_type, node_norm, data, labels = \
            utils.generate_sampled_graph_and_labels_unsupervised(
                train_data, args.graph_batch_size, args.graph_split_size,
                num_rels, degrees, args.negative_sample,
                args.edge_sampler)
            data, labels = torch.from_numpy(data), torch.from_numpy(labels)

        # set node/edge feature
        node_id = torch.from_numpy(node_id).view(-1, 1).long()
        edge_type = torch.from_numpy(edge_type)
        edge_norm = node_norm_to_edge_norm(
            g,
            torch.from_numpy(node_norm).view(-1, 1))

        deg = g.in_degrees(range(g.number_of_nodes())).float().view(-1, 1)
        if use_cuda:
            node_id, deg, g = node_id.cuda(), deg.cuda(), g.to('cuda')
            edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()
            if args.supervised == 'True':
                matched_labels = matched_labels.cuda()
            elif args.supervised == 'False':
                data, labels = data.cuda(), labels.cuda()

        embed, pred = model(g, node_id, edge_type, edge_norm)
        if args.supervised == 'True':
            loss = model.get_supervised_loss(pred, matched_labels,
                                             matched_index, multi)
        elif args.supervised == 'False':
            loss = model.get_unsupervised_loss(g, embed, data, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       args.grad_norm)  # clip gradients
        optimizer.step()
        optimizer.zero_grad()

        print(
            time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
            "Epoch {:05d} | Loss {:.4f}".format(epoch, loss.item()),
            flush=True)

    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
          "training done",
          flush=True)

    print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ", time.localtime()) +
          "start output...",
          flush=True)
    model.eval()
    if args.attributed == 'True':
        np.random.shuffle(train_data)
        node_emb, node_over = np.zeros((num_nodes, args.n_hidden)), set()
        batch_total = math.ceil(len(train_data) / args.graph_batch_size)
        for batch_num in range(batch_total):

            # perform edge neighborhood sampling to generate training graph and data
            g, old_node_id, edge_type, node_norm, data, labels = \
                utils.generate_sampled_graph_and_labels_unsupervised(
                    train_data, args.graph_batch_size, args.graph_split_size,
                    num_rels, degrees, args.negative_sample,
                    args.edge_sampler)

            # set node/edge feature
            node_id = torch.from_numpy(old_node_id).view(-1, 1).long()
            edge_type = torch.from_numpy(edge_type)
            edge_norm = node_norm_to_edge_norm(
                g,
                torch.from_numpy(node_norm).view(-1, 1))
            if use_cuda:
                node_id, g = node_id.cuda(), g.to('cuda')
                edge_type, edge_norm = edge_type.cuda(), edge_norm.cuda()

            embed, _ = model(g, node_id, edge_type, edge_norm)
            node_emb[old_node_id] = embed.detach().cpu().numpy().astype(
                np.float32)

            for each in old_node_id:
                node_over.add(each)

            print(time.strftime("%a, %d %b %Y %H:%M:%S +0000: ",
                                time.localtime()) +
                  f'finish output batch nubmer {batch_num} -> {batch_total}',
                  flush=True)

        utils.save(args, node_emb)

    elif args.attributed == 'False':
        utils.save(
            args, model.rgcn.layers[0].embedding.weight.detach().cpu().numpy())

    emb, labs = creoSpazio("./data/PubMed/emb.dat")
    new_emb = TSNEImpl(emb)
    drawImpl(new_emb, labs, "./ciao1.png")

    return
# -*- coding: utf-8 -*-
# Created by Jinkey on 2018/1/4.
__author__ = 'Jinkey'

import tensorflow as tf
import jieba as jb
import numpy as np

import utils

titles = utils.load_data(catalogue=utils.MULTI_FLAG)
target = utils.load_label(catalogue=utils.MULTI_FLAG)

max_sequence_length = 30
embedding_size = 50

# 标题分词
titles = [".".join(jb.cut(t, cut_all=True)) for t in titles]

# word2vec 词袋化
vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(
    max_sequence_length, min_frequency=1)
text_processed = np.array(list(vocab_processor.fit_transform(titles)))

# 读取标签
dict = vocab_processor.vocabulary_._mapping
sorted_vocab = sorted(dict.items(), key=lambda x: x[1])

# 配置网络结构
model = utils.build_netword(catalogue=utils.MULTI_FLAG,
                            dict=dict,
    #Hyperparameters
    batch_size = args.batch_size
    lr = args.lr
    momentum = args.momentum
    num_epoch = args.num_epoch
    data_path = args.data_path
    label_path = args.label_path
    k = args.k
    input_dim = 40*(2*k+1)
    output_dim = 138

    '''Step 1: Load Dataset'''
    print('Loading Training Data...')
    train_data, train_idx = utils.load_data(os.path.join(args.data_path,'train.npy'), k)
    train_label = utils.load_label(os.path.join(args.label_path,'train_labels.npy'))
    train_dataset = utils.SpeechDataset(train_data,train_label,train_idx,k)
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )

    print('Loading Validation Data...')
    val_data, val_idx = utils.load_data(os.path.join(args.data_path, 'dev.npy'), k)
    val_label = utils.load_label(os.path.join(args.label_path,'dev_labels.npy'))
    val_dataset = utils.SpeechDataset(val_data,val_label,val_idx,k)
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size
    )
    for ori_velo_path in tqdm.tqdm(sorted(ori_velo_paths)):
        velo_idx = get_filename(ori_velo_path, False)
        calib_path = get_file_path_by_idx(velo_idx, src_calib_dir)
        label_path = get_file_path_by_idx(velo_idx, src_label_dir)
        image_path = get_file_path_by_idx(velo_idx, src_image_dir)

        output_velo_path = os.path.join(output_disturb_dir, velo_idx + ".bin")
        output_viz_velo_ori_path = os.path.join(output_viz_original_dir,
                                                velo_idx + ".jpg")
        output_viz_velo_disturb_path = os.path.join(output_viz_disturb_dir,
                                                    velo_idx + ".jpg")

        # Load calibration
        calib = read_calib_file(calib_path)
        # Load labels
        labels = load_label(label_path)
        # Load Lidar PC
        pc_velo = load_velo_scan(ori_velo_path)[:, :3]

        proj_cam_to_velo = project_cam2_to_velo(calib)

        temp = np.asarray([[1, 1, 1]])
        delete_inds = np.asarray([0])
        for obj in labels:
            # get obj range info
            range_info = get_obj_range_info(obj)
            inds = get_obj_inds(pc_velo, range_info)
            selected = pc_velo[inds]
            selected = selected[selected[:, 2].argsort()]
            selected = get_randon_pc(selected)
            temp = np.concatenate((temp, selected), axis=0)
Пример #7
0
def model_q2(data_path,
             save_path,
             train_data,
             train_label,
             test_data,
             test_label,
             C,
             iter_time,
             separate_type,
             print_predict=False,
             PCA_visualize=False,
             tSNE_visualize=False):

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    training_data = utils.load_data(data_path, train_data)
    training_label = utils.load_label(data_path, train_label)
    testing_data = utils.load_data(data_path, test_data)
    testing_label = utils.load_label(data_path, test_label)

    if separate_type == 'random':
        trn_1_r1_data, trn_1_r1_label, trn_1_r2_data, trn_1_r2_label = utils.separate_data_random(
            data_path, train_data, train_label, mode='1r')
        trn_2_r1_data, trn_2_r1_label, trn_2_r2_data, trn_2_r2_label = utils.separate_data_random(
            data_path, train_data, train_label, mode='2r')
        trn_3_r1_data, trn_3_r1_label, trn_3_r2_data, trn_3_r2_label = utils.separate_data_random(
            data_path, train_data, train_label, mode='3r')
    elif separate_type == 'prior':
        trn_1_r1_data, trn_1_r1_label, trn_1_r2_data, trn_1_r2_label = utils.separate_data_prior(
            data_path, train_data, train_label, mode='1r')
        trn_2_r1_data, trn_2_r1_label, trn_2_r2_data, trn_2_r2_label = utils.separate_data_prior(
            data_path, train_data, train_label, mode='2r')
        trn_3_r1_data, trn_3_r1_label, trn_3_r2_data, trn_3_r2_label = utils.separate_data_prior(
            data_path, train_data, train_label, mode='3r')

    if PCA_visualize == True:
        utils.visual_2D_PCA(training_data, training_label, save_path)
    if tSNE_visualize == True:
        utils.visual_2D_tSNE(training_data, training_label, save_path)

    scaler = preprocessing.StandardScaler().fit(training_data)
    trn_1_r1_data = scaler.transform(trn_1_r1_data)
    trn_1_r2_data = scaler.transform(trn_1_r2_data)
    trn_2_r1_data = scaler.transform(trn_2_r1_data)
    trn_2_r2_data = scaler.transform(trn_2_r2_data)
    trn_3_r1_data = scaler.transform(trn_3_r1_data)
    trn_3_r2_data = scaler.transform(trn_3_r2_data)
    testing_data = scaler.transform(testing_data)

    model_1_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_1_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_2_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_2_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_3_r1 = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_3_r2 = SVC(C=C, max_iter=iter_time, class_weight='balanced')

    model_1_r1.fit(trn_1_r1_data, trn_1_r1_label)
    model_1_r2.fit(trn_1_r2_data, trn_1_r2_label)
    model_2_r1.fit(trn_2_r1_data, trn_2_r1_label)
    model_2_r2.fit(trn_2_r2_data, trn_2_r2_label)
    model_3_r1.fit(trn_3_r1_data, trn_3_r1_label)
    model_3_r2.fit(trn_3_r2_data, trn_3_r2_label)

    y_predict = []
    right_predict = 0.0
    for i in range(len(testing_data)):
        y1_r1 = int(model_1_r1.predict([testing_data[i]])[0])
        y1_r2 = int(model_1_r2.predict([testing_data[i]])[0])
        y2_r1 = int(model_2_r1.predict([testing_data[i]])[0])
        y2_r2 = int(model_2_r2.predict([testing_data[i]])[0])
        y3_r1 = int(model_3_r1.predict([testing_data[i]])[0])
        y3_r2 = int(model_3_r2.predict([testing_data[i]])[0])

        predict_1 = [[y1_r1, y1_r2], [y2_r1, y2_r2], [y3_r1, y3_r2]]
        predict_2 = [y1_r1 and y1_r2, y2_r1 and y2_r2, y3_r1 and y3_r2]

        y_possible = np.where(np.array(predict_2) == 1)[0]

        if len(y_possible) > 0:
            y_idx = np.random.randint(low=0, high=len(y_possible))
            y = y_possible[y_idx] + 1
        else:
            y = np.random.randint(low=1, high=4)
        y_predict.append(y)
        if y == testing_label[i][0]:
            right_predict = right_predict + 1.0

    acc = right_predict * 1.0 / len(testing_data)

    joblib.dump(model_1_r1, os.path.join(save_path, 'svm_model_1_r1.m'))
    joblib.dump(model_1_r2, os.path.join(save_path, 'svm_model_1_r2.m'))
    joblib.dump(model_2_r1, os.path.join(save_path, 'svm_model_2_r1.m'))
    joblib.dump(model_2_r2, os.path.join(save_path, 'svm_model_2_r2.m'))
    joblib.dump(model_3_r1, os.path.join(save_path, 'svm_model_3_r1.m'))
    joblib.dump(model_3_r2, os.path.join(save_path, 'svm_model_3_r2.m'))

    if print_predict == True:
        print y_predict

    f1 = open(os.path.join(save_path, 'problem_2.txt'), 'a')
    f1.write(
        'If we use our Part vs Part model by sklearn, and the separate type is %s, the classification accuracy is: %g.\n'
        % (separate_type, acc))
    f1.close()

    return acc
Пример #8
0
def model_q1(data_path,
             save_path,
             train_data,
             train_label,
             test_data,
             test_label,
             C,
             iter_time,
             print_predict=False,
             PCA_visualize=False,
             tSNE_visualize=False):

    if not os.path.exists(save_path):
        os.mkdir(save_path)

    training_data = utils.load_data(data_path, train_data)
    testing_data = utils.load_data(data_path, test_data)
    training_label = utils.load_label(data_path, train_label)
    testing_label = utils.load_label(data_path, test_label)

    training_label_1 = utils.load_label(data_path, train_label, mode='1r')
    training_label_2 = utils.load_label(data_path, train_label, mode='2r')
    training_label_3 = utils.load_label(data_path, train_label, mode='3r')

    if PCA_visualize == True:
        utils.visual_2D_PCA(training_data, training_label, save_path)
    if tSNE_visualize == True:
        utils.visual_2D_tSNE(training_data, training_label, save_path)

    scaler = preprocessing.StandardScaler().fit(training_data)
    training_data = scaler.transform(training_data)
    testing_data = scaler.transform(testing_data)

    model = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_1r = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_2r = SVC(C=C, max_iter=iter_time, class_weight='balanced')
    model_3r = SVC(C=C, max_iter=iter_time, class_weight='balanced')

    model.fit(training_data, training_label)
    model_1r.fit(training_data, training_label_1)
    model_2r.fit(training_data, training_label_2)
    model_3r.fit(training_data, training_label_3)

    acc_multi = model.score(testing_data, testing_label)

    y_predict = []
    right_predict = 0.0
    for i in range(len(testing_data)):
        y1 = model_1r.predict([testing_data[i]])
        y2 = model_2r.predict([testing_data[i]])
        y3 = model_3r.predict([testing_data[i]])
        y_possible = np.where(np.array([y1, y2, y3]) == 1)[0]
        if len(y_possible) > 0:
            y_idx = np.random.randint(low=0, high=len(y_possible))
            y = y_possible[y_idx] + 1
        else:
            y = np.random.randint(low=1, high=4)
        y_predict.append(y)
        if y == testing_label[i][0]:
            right_predict = right_predict + 1.0

    acc = right_predict * 1.0 / len(testing_data)

    joblib.dump(model_1r, os.path.join(save_path, 'svm_model_1r.m'))
    joblib.dump(model_2r, os.path.join(save_path, 'svm_model_2r.m'))
    joblib.dump(model_3r, os.path.join(save_path, 'svm_model_3r.m'))

    if print_predict == True:
        print y_predict

    f1 = open(os.path.join(save_path, 'problem_1.txt'), 'w')
    f1.write(
        'If we use multi-classifying SVM in sklearn, the classification accuracy is: %g;\n'
        % (acc_multi))
    f1.write(
        'If we use our 1 vs Rest model by sklearn, the classification accuracy is: %g.'
        % (acc))
    f1.close()

    return acc_multi, acc
Пример #9
0
def main(args):

    #先看数据集数据是否存在
    if not os.path.exists(args.dataset_train_dir) or not os.path.exists(
            args.dataset_validate_dir):
        raise NameError(
            '数据集路径"./dataset/MIR-1K/Wavfile"或"./dataset/MIR-1K/UndividedWavfile"不存在!'
        )

    # 1. 导入需要训练的数据集文件路径,存到列表中即可
    train_file_list = load_file(args.dataset_train_dir)
    valid_file_list = load_file(args.dataset_validate_dir)

    # 数据集的采样率
    mir1k_sr = args.dataset_sr
    # 用于短时傅里叶变换,窗口大小
    n_fft = 1024
    # 步幅;帧移对应卷积中的stride;
    hop_length = n_fft // 4

    # Model parameters
    # 学习率
    learning_rate = args.learning_rate

    # 用于创建rnn节点数
    num_hidden_units = [1024, 1024, 1024, 1024, 1024]
    # batch 长度
    batch_size = args.batch_size
    # 获取多少帧数据
    sample_frames = args.sample_frames
    # 训练迭代次数
    iterations = args.iterations
    # dropout
    dropout_rate = args.dropout_rate

    # 模型保存路径
    model_dir = args.model_dir
    model_filename = args.model_filename

    #导入训练数据集的wav数据,
    #wavs_mono_train存的是单声道,wavs_label_train 存的是标签
    label_train = load_label(args.dataset_label_dir, sr=mir1k_sr)
    label_test = load_label(args.dataset_label_dir, sr=mir1k_sr)
    wavs_mono_train, wavs_label_train = load_wavs(filenames=train_file_list,
                                                  wavs_label=label_train,
                                                  sr=mir1k_sr)
    # 通过短时傅里叶变换将声音转到频域
    stfts_mono_train, stfts_label_train = wavs_to_specs(
        wavs_mono=wavs_mono_train,
        wavs_label=wavs_label_train,
        n_fft=n_fft,
        hop_length=hop_length)

    # 跟上面一样,只不过这里是测试集的数据
    wavs_mono_valid, wavs_label_valid = load_wavs(filenames=valid_file_list,
                                                  wavs_label=label_test,
                                                  sr=mir1k_sr)
    stfts_mono_valid, stfts_label_valid = wavs_to_specs(
        wavs_mono=wavs_mono_valid,
        wavs_label=wavs_label_valid,
        n_fft=n_fft,
        hop_length=hop_length)

    #初始化模型
    model = SVMRNN(num_features=n_fft // 2 + 1,
                   num_hidden_units=num_hidden_units)

    # 加载模型,如果没有模型,则初始化所有变量
    startepo = model.load(file_dir=model_dir)

    print('startepo:' + str(startepo))

    #开始训练
    for i in (range(iterations)):
        #从模型中断处开始训练
        if i < startepo:
            continue

        # 获取下一batch数据
        data_mono_batch, data_label_batch = get_next_batch(
            stfts_mono=stfts_mono_train,
            stfts_label=stfts_label_train,
            batch_size=batch_size,
            sample_frames=sample_frames)

        #获取频率值
        x_mixed_src, _ = separate_magnitude_phase(data=data_mono_batch)
        y_label_src, _ = separate_magnitude_phase(data=data_label_batch)

        #送入神经网络,开始训练
        train_loss = model.train(x_mixed_src=x_mixed_src,
                                 y_label_src=y_label_src,
                                 learning_rate=learning_rate,
                                 dropout_rate=dropout_rate)

        if i % 10 == 0:
            print('Step: %d Train Loss: %f' % (i, train_loss))

        if i % 200 == 0:
            #这里是测试模型准确率的
            print('==============================================')
            data_mono_batch, data_label_batch = get_next_batch(
                stfts_mono=stfts_mono_valid,
                stfts_label=stfts_label_valid,
                batch_size=batch_size,
                sample_frames=sample_frames)

            x_mixed_src, _ = separate_magnitude_phase(data=data_mono_batch)
            y_label_src, _ = separate_magnitude_phase(data=data_label_batch)

            y_sing_src_pred, validate_loss = model.validate(
                x_mixed_src=x_mixed_src,
                y_label_src=y_label_src,
                dropout_rate=dropout_rate)
            print('Step: %d Validation Loss: %f' % (i, validate_loss))
            print('==============================================')

        if i % 200 == 0:
            model.save(directory=model_dir,
                       filename=model_filename,
                       global_step=i)
Пример #10
0
    arg = parse_args()
    print("========Call with Arguments========")
    print(arg)

    if not os.path.exists(RESULTS_PATH):
        os.mkdir(RESULTS_PATH)
        print(">>> Directory {} created.".format(RESULTS_PATH))

    if not os.path.exists(BCM_PATH):
        os.mkdir(BCM_PATH)
        print(">>> Directory {} created.".format(BCM_PATH))

    print("\n========Reading Data========")
    data, _ = load_mat(arg.data_path, False, 1, 1, ',', True, False, None,
                       None)
    label = load_label(arg.label_path, ',', '0')
    data = data["data"]
    k_means_logger = Logger(LOG_PATH,
                            "Benchmark_K_MEANS.log",
                            benchmark_logger=True)
    dbscan_logger = Logger(LOG_PATH,
                           "Benchmark_DBSCAN.log",
                           benchmark_logger=True)

    k_means_results = {}
    dbscan_results = {}

    print("\n========Benchmarking========")

    for dim in DR_DIM:
        for method in ["PCA", "TSNE"]:
Пример #11
0
    if args.algo == 'pane':
        Xf = utils.load_emd(path_emb + ".f", n, d / 2, n - 1)
        Xb = utils.load_emd(path_emb + ".b", n, d / 2, n - 1)
        Xf = preprocessing.normalize(Xf, norm='l2', axis=1)
        Xb = preprocessing.normalize(Xb, norm='l2', axis=1)
        X = np.hstack([Xf, Xb])
        print(X.shape)
    else:
        X = utils.load_emd(path_emb, n, d, n - 1)

    path_label = settings.DATA_INFO[args.data]['path'] + 'labels.txt'

    maf1 = []
    mif1 = []
    if args.multi:
        y = utils.load_label(path_label, n)
        X, y = filter(X, y)
        y = MultiLabelBinarizer(sparse_output=True).fit_transform(y)
    else:
        y = utils.read_cluster(n, path_label)

    for ratio in [0.9, 0.7, 0.5, 0.3, 0.1]:
        print("labelled data ratio:" + str(1 - ratio))
        macro_f1_avg, micro_f1_avg = eval(X, y, ratio, args.multi, 3)
        maf1.append(macro_f1_avg)
        mif1.append(micro_f1_avg)
        print("macro-f1=%f, micro-f1=%f", macro_f1_avg, micro_f1_avg)

    print(maf1)
    print(mif1)
Пример #12
0
 def get_label(self, i):
     assert (i < self.size)
     fname = os.path.join(self.label, '{:06d}.txt'.format(i))
     return utils.load_label(fname)
Пример #13
0
var = 'card2'
A = sp.load_npz(
    '/home/sh/anaconda3/fraud/ieee-fraud-detection/edge.npz')  #(N, N)
A = A.astype(np.int16)
A = A.toarray()
gc.collect()
#%%
X = np.load(
    '/home/sh/anaconda3/fraud/ieee-fraud-detection/X.npy')  # (27, N, F)
#%%
X = transpose(X, [1, 0, 2])  #(N,27,F)
X = np.array(X, dtype=np.float16)
gc.collect()
#%%
Y_train, Y_val, Y_test = load_label()  # (27, n, 2)
idx_train = np.load(
    '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_train.npy')  #(N,)
idx_val = np.load(
    '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_val.npy')  # (N,)
idx_test = np.load(
    '/home/sh/anaconda3/fraud/ieee-fraud-detection/mask_test.npy')  #(N,)
idx_train = idx_train.astype(np.int16)
idx_val = idx_val.astype(np.int16)
idx_test = idx_test.astype(np.int16)
gc.collect()
# Parameters
t = 27
N = X.shape[0]  # Number of nodes in the graph
F = X.shape[2]  # Original feature dimension
#n_classes = 2