Пример #1
0
def gpn_analysis():
    """ Check each component of GPN
    """
    g = utils.GraphGenerator.get_gene_proximity_network( '../data/architecture/genome.txt', 50000)

    for i, sg in enumerate(g.get_components()):
        sg.io.dump()
        conc_vec, pf_vec, pr_vec = utils.get_all_data(g)

        present(
            'Real-Life data PF (all) on GPN component \#%d' % i, plotter.Plotter.loglog,
            'gene concentration', conc_vec,
            'perron-frobenius eigenvector', pf_vec,
            plt_args={'alpha': 0.02}
        )

        present(
            'Real-Life data pagerank (all) on GPN component \#%d' % i, plotter.Plotter.loglog,
            'gene concentration', conc_vec,
            'pagerank', pr_vec,
            plt_args={'alpha': 0.02}
        )

        present(
            'Histogram of Real-Life Data (all) on GPN component \#%d' % i, plotter.Plotter.plot_histogram,
            'gene concentration', 'count', conc_vec
        )
def cross_validation(k, methods, Cs_NLK, Cs_SVM, degrees, lambdas):
    """
    Apply cross-validation on NLCK algorithm. A first cross-validation is done on the values of C, d and lambda
    in NLCK, in order to find the optimal non-linear combination of kernels along with C, d, lambda. Then,
    for each triplet (and hence the corresponding weights vector), cross validation is done on the regularization
    constant of C_SVM, C.
    :param k: int, which dataset to use (k=1, 2 or 3)
    :param methods: list of string, kernel methods
    :param Cs_NLK: np.array, regularization constants in NLCK algorithm
    :param Cs_SVM: np.array, regularization constants in C_SVM algorithm
    :param degrees: np.array, degrees to explore (usually np.range(1, 5))
    :param lambdas: np.array, lambdas (corresponding to parameter 'fnorm' in NLCK) to explore
    :return: pd.DataFrame with the following columns:
            - 'methods': kernels method used
            - 'C_NLCK': regularization constants in NLCK algorithm
            - 'd': degree in NLCK algorithm
            - 'lambda': normalization parameter in NLCK algorithm
            - 'Best C CSVM': best regularization constant in CSVM after cross validation
            - 'val acc': accuracy obtained on validation set
    """
    # Load data
    data, data1, data2, data3, kernels, ID = utils.get_all_data(methods)
    data_k = [data1, data2, data3]
    # Initialize results DataFrame
    p = len(kernels)
    n_param = len(Cs_NLK) * len(degrees) * len(lambdas)
    init = np.zeros(n_param)
    results = pd.DataFrame({
        'methods': [methods] * len(init),
        'C NLCK': init,
        'd': init,
        'lambda': init,
        'Best C CSVM': init,
        'val acc': init
    })
    # Reformat
    X_train, y_train, X_val, y_val, X_test, kernels, ID = utils.reformat_data(
        data_k[k - 1], kernels, ID)
    # Start cross validation on triplet (C, d, lambda)
    for i, param in tqdm(enumerate(product(Cs_NLK, degrees, lambdas)),
                         total=n_param):
        C, d, lbda = param
        print('NLCK C={}, degree={}, lambda={}'.format(C, d, lbda))
        # Compute kernel
        Km = NLCK(X_train, y_train, ID, kernels, C=C, eps=1e-9,
                  degree=d).get_K(fnorm=lbda)
        # Cross validation on constant C of C-SVM
        C_opt, scores_tr, scores_te, mean_scores_tr, mean_scores_te = \
            utils.cross_validation(Ps=Cs_SVM,
                                   data=[X_train, y_train, X_val, y_val, X_test],
                                   algo='CSVM',
                                   kfolds=3,
                                   K=Km,
                                   ID=ID,
                                   pickleName='cv_C_SVM_NLCK_C{}_d{}_l{}_p{}_k{}.pkl'.format(C, d, lbda, p, k))
        # Save results
        results.iloc[i, 1:6] = C, d, lbda, C_opt, np.max(mean_scores_te)
    return results
Пример #3
0
 def get(self):
     login_name, authority = get_login_name(self)
     author_secure = get_login_author_secure(self)
     res = get_all_data()
     self.render('tasks.html',
                 author_secure=author_secure,
                 loginname=login_name,
                 query_id='',
                 all_tasks=res['all_data'],
                 totalpage=res['totalpage'],
                 c_page=0)
Пример #4
0
def aligned_kernels(methods):
    """
    Apply ALIGNF algorithm for each data set
    :param methods: list of strings, kernels methods
    :return: - data: X_train, y_train, X_val, y_val, X_test
             - data1: X_train_1, y_train_1, X_val_1, y_val_1, X_test_1
             - data2: X_train_2, y_train_2, X_val_2, y_val_2, X_test_2
             - data3: X_train_3, y_train_3, X_val_3, y_val_3, X_test_3
             - aligned_k: list of aligned kernels
             - ID: np.array, IDs
    """
    data, data1, data2, data3, kernels, ID = utils.get_all_data(methods)
    aligned_k = []
    for d in [data1, data2, data3]:
        X, y, _, _, _ = d
        aligned_k.append(ALIGNF(X, y, ID, kernels).get_K())
    return data, data1, data2, data3, aligned_k, ID
Пример #5
0
def timeseries_plot(y_value, companies, start, end):
    data = get_all_data()
    traces = []
    for c in companies:
        trace = go.Scatter(
            name=c,
            x=data[data["company_name"] == c][start:end].index,
            y=data[data["company_name"] == c][y_value][start:end],
        )
        traces.append(trace)

    layout = go.Layout(
        title=f"Timeseries analysis of {capitalize(y_value)}",
        xaxis={"title": "Date"},
        yaxis={"title": capitalize(y_value)},
    )

    output_plot = go.Figure(data=traces, layout=layout)
    return output_plot
Пример #6
0
 def post(self):
     login_name, authority = get_login_name(self)
     author_secure = get_login_author_secure(self)
     query = {}
     query_id = self.get_argument('query_id', '')
     if query_id:
         try:
             query = {'_id': ObjectId(query_id)}
         except:
             query = {'username': query_id}
         #query = {'$or':[{'username':query_id},{'_id':query_id}]}
     c_page = int(self.get_argument('c_page', 0))
     res = get_all_data(c_page, query, preload_b_e)
     self.render('everyday_tasks.html',
                 author_secure=author_secure,
                 loginname=login_name,
                 query_id=query_id,
                 all_tasks=res['all_data'],
                 totalpage=res['totalpage'],
                 c_page=c_page)
Пример #7
0
def real_life_all():
    g = utils.GraphGenerator.get_regulatory_graph('../data/architecture/network_tf_gene.txt', '../data/architecture/genome.txt', 50000)
    conc_vec, pf_vec, pr_vec = utils.get_all_data(g)

    present(
        'Real-Life data PF (all)', plotter.Plotter.loglog,
        'gene concentration', conc_vec,
        'perron-frobenius eigenvector', pf_vec,
        plt_args={'alpha': 0.02}
    )

    present(
        'Real-Life data pagerank (all)', plotter.Plotter.loglog,
        'gene concentration', conc_vec,
        'pagerank', pr_vec,
        plt_args={'alpha': 0.02}
    )

    present(
        'Histogram of Real-Life Data (all)', plotter.Plotter.plot_histogram,
        'gene concentration', 'count', conc_vec
    )
Пример #8
0
        task0[i] = threading.Thread(target=profane_recognizer.get_profane_time,
                                    args=(0, i, val[0], val[1], val[2]))
        task0[i].start()

    for i, _ in enumerate(audio_infor_0):
        print(str(i + 1) + ' / ' + str(len(audio_infor_0)))
        task0[i].join()

    print('Second Step')
    task1 = [None] * len(audio_infor_1)
    for i, val in enumerate(audio_infor_1):
        task1[i] = threading.Thread(target=profane_recognizer.get_profane_time,
                                    args=(1, i, val[0], val[1], val[2]))
        task1[i].start()

    for i, _ in enumerate(audio_infor_1):
        print(str(i + 1) + ' / ' + str(len(audio_infor_1)))
        task1[i].join()

    #Get result from txt files in result_detector folder
    print('Load Result')
    result = utils.get_all_data()
    print(result)
    print('Generate sound')
    utils.generate_sound(result)

    print('Combine Video and Audio')
    utils.combine_audio_and_video(input_video)

    print('Complete!')
Пример #9
0
import tensorflow as tf
import sys
from input import DataInput
from model import Model
from utils import _eval, get_all_data

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
random.seed(1234)
np.random.seed(1234)
tf.set_random_seed(1234)

train_batch_size = 32
test_batch_size = 512
best_auc = 0.0

train_set, test_set, cate_list, user_count, item_count, cate_count = get_all_data()

print("train_set 0", train_set[0])
print("test_set 0", test_set[0])

gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

  model = Model(user_count, item_count, cate_count, cate_list)
  sess.run(tf.global_variables_initializer())
  sess.run(tf.local_variables_initializer())

  print('test_gauc: %.4f\t test_auc: %.4f\t best_auc: %.4f' % _eval(sess, model, test_set, test_batch_size, best_auc))
  sys.stdout.flush()
  lr = 0.001
  start_time = time.time()
Пример #10
0
def main():
    # set_rnd_seed(31)   # reproducibility

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--data_dir_train',
                        type=str,
                        default='./data/brats_19/Train',
                        metavar='DATA_TRAIN',
                        help="data train directory")
    parser.add_argument('--data_dir_val',
                        type=str,
                        default='./data/brats_19/Validation',
                        metavar='DATA_VAL',
                        help="data validation directory")
    parser.add_argument('--log_dir',
                        type=str,
                        default='logs/',
                        metavar='LOGS',
                        help="logs directory")
    parser.add_argument('--models_dir',
                        type=str,
                        default='models/',
                        metavar='MODELS',
                        help="models directory")
    parser.add_argument('--batch_size',
                        type=int,
                        default=16,
                        metavar='BATCH',
                        help="batch size")
    parser.add_argument('--learning_rate',
                        type=float,
                        default=2.0e-5,
                        metavar='LR',
                        help="learning rate")
    parser.add_argument('--epochs',
                        type=int,
                        default=1e6,
                        metavar='EPOCHS',
                        help="number of epochs")
    parser.add_argument('--zdim',
                        type=int,
                        default=16,
                        metavar='ZDIM',
                        help="Number of dimensions in latent space")
    parser.add_argument('--load',
                        type=str,
                        default='',
                        metavar='LOADDIR',
                        help="time string of previous run to load from")
    parser.add_argument('--binary_input',
                        type=bool,
                        default=False,
                        metavar='BINARYINPUT',
                        help="True=one input channel for each tumor structure")
    parser.add_argument('--use_age',
                        type=bool,
                        default=False,
                        metavar='AGE',
                        help="use age in prediction")
    parser.add_argument('--use_rs',
                        type=bool,
                        default=False,
                        metavar='RESECTIONSTATUS',
                        help="use resection status in prediction")

    args = parser.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device in use: {}".format(device))
    torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.
                                  is_available() else torch.FloatTensor)

    logdir_suffix = '-%s-zdim=%d-beta=5000-alpha=%.5f-lr=%.5f-gamma=%d-batch=%d' % (
        args.data_dir_train.replace("Train", "").replace(".", "").replace(
            "/",
            ""), args.zdim, alpha, args.learning_rate, gamma, args.batch_size)
    if args.use_age:
        logdir_suffix += "-age"
    if args.use_rs:
        logdir_suffix += "-rs"
    if args.binary_input:
        logdir_suffix += "-binary_input"
    if args.load == "":
        date_str = str(dt.now())[:-7].replace(":", "-").replace(
            " ", "-") + logdir_suffix
    else:
        date_str = args.load
    args.models_dir = join(args.models_dir, date_str)
    args.log_dir = join(args.log_dir, date_str)
    os.makedirs(args.log_dir, exist_ok=True)
    os.makedirs(args.models_dir, exist_ok=True)
    check_args(args)
    writer = SummaryWriter(args.log_dir + '-train')

    ## Get dataset

    data = get_all_data(args.data_dir_train,
                        args.data_dir_val,
                        orig_data_shape,
                        binary_input=args.binary_input)

    x_data_train_labeled, x_data_train_unlabeled, x_data_val, y_data_train_labeled, y_data_val, y_dim = data
    if args.binary_input:
        n_labels = x_data_train_labeled.shape[1]
    else:
        n_labels = len(
            np.bincount(x_data_train_labeled[:10].astype(np.int8).flatten()))
    x_data_train_labeled = x_data_train_labeled.astype(np.int8)
    x_data_train_unlabeled = x_data_train_unlabeled.astype(np.int8)
    x_data_val = x_data_val.astype(np.int8)

    if args.use_age:
        age_std = 12.36
        age_mean = 62.2
        age_l = np.expand_dims(np.load(join(args.data_dir_train, "age_l.npy")),
                               1)
        age_u = np.expand_dims(np.load(join(args.data_dir_train, "age_u.npy")),
                               1)
        age_v = np.expand_dims(np.load(join(args.data_dir_val, "age.npy")), 1)
        age_l = (age_l - age_mean) / age_std
        age_u = (age_u - age_mean) / age_std
        age_v = (age_v - age_mean) / age_std
    else:
        age_l, age_u, age_v = [], [], []

    if args.use_rs:
        rs_l = one_hot(np.load(join(args.data_dir_train, "rs_l.npy")), 2)
        rs_u = one_hot(np.load(join(args.data_dir_train, "rs_u.npy")), 2)
        rs_v = one_hot(np.load(join(args.data_dir_val, "rs.npy")), 2)
    else:
        rs_l, rs_u, rs_v = [], [], []

    if args.use_rs and args.use_age:
        c_l = np.concatenate([age_l, rs_l], axis=1)
        c_u = np.concatenate([age_u, rs_u], axis=1)
        c_v = np.concatenate([age_v, rs_v], axis=1)
        c_dim = c_l.shape[1]
    elif args.use_rs:
        c_l, c_u, c_v = rs_l, rs_u, rs_v
        c_dim = c_l.shape[1]
    elif args.use_age:
        c_l, c_u, c_v = age_l, age_u, age_v
        c_dim = c_l.shape[1]
    else:
        c_l, c_u, c_v = np.array([]), np.array([]), np.array([])
        c_dim = 0

    y_data_val = y_data_val[:len(x_data_val)]
    print('x unlabeled data shape:', x_data_train_unlabeled.shape)
    print('x labeled data shape:', x_data_train_labeled.shape)
    print('x val data shape:', x_data_val.shape)
    assert data_shape == tuple(x_data_val.shape[2:])
    print('input labels: %d' % n_labels)

    model = SemiVAE(args.zdim,
                    y_dim,
                    c_dim,
                    n_labels=n_labels,
                    binary_input=args.binary_input).to(device)
    print_num_params(model)

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    start_epoch = 0

    if args.load != "":
        print("Loading model from %s" % args.models_dir)
        nums = [int(i.split("_")[-1]) for i in os.listdir(args.models_dir)]
        start_epoch = max(nums)
        model_path = join(args.models_dir, "model_epoch_%d" % start_epoch)
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if 'model_global_step' in checkpoint.keys():
            model.global_step = checkpoint['model_global_step']
        start_epoch = checkpoint['epoch']
        print("Loaded model at epoch %d, total steps: %d" %
              (start_epoch, model.global_step))

    t_start = dt.now()
    for epoch in range(int(start_epoch + 1), int(args.epochs)):
        train(x_data_train_unlabeled, x_data_train_labeled,
              y_data_train_labeled, x_data_val, y_data_val, c_l, c_u, c_v,
              args.batch_size, epoch, model, optimizer, device, log_interval,
              writer, args.log_dir, n_labels)
        if (dt.now() - t_start).total_seconds() > 3600 * 2:
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'model_global_step': model.global_step,
                }, join(args.models_dir, "model_epoch_%d" % epoch))
            t_start = dt.now()
        sys.stdout.flush()  # need this when redirecting to file
Пример #11
0
check_NLCK   = False  # Use NLCK algorithm
check_CVNLCK = False  # Use cross validation on NLCK hyperparameters
check_other  = False  # Free

if __name__ == '__main__':
    if build_kernel:
        methods = ['GP_k3_g1', 'MM_k5_m1', 'WD_d10']
        for method in methods:
            X_train, y_train, X_val, y_val, X_test, K, ID = utils.get_training_datas(method=method, replace=True)
            # Put replace = False not to erase the previous saves

    elif check_method:
        method = 'MM_k6_m1'
        algo = 'CSVM'
        solver = 'CVX'
        data, data1, data2, data3, K, ID = utils.get_all_data([method])
        Cs = np.sort([i * 10 ** j for (i, j) in product(range(1, 10), range(-2, 1))])
        # Perform cross validation on data set 1 (TF = 1)
        utils.cross_validation(Ps=Cs, data=data1, algo=algo, solver=solver, kfolds=3, K=K, ID=ID)

    elif check_alignf:
        methods = ['MM_k3_m1', 'WD_d5', 'SS_l1_k3']
        data, data1, data2, data3, kernels, ID = ALIGNF.aligned_kernels(methods)
        K = kernels[0]  # 0 index for first data set
        X_train_1, y_train_1, X_val_1, y_val_1, X_test_1, K_1, ID_1 = utils.reformat_data(data1, [K], ID)
        Cs = np.sort([i * 10 ** j for (i, j) in product(range(1, 10), range(-3, 2))])
        utils.cross_validation(Ps=Cs, data=data1, algo='CSVM', kfolds=5, K=K_1[0], ID=ID_1)

    elif check_NLCK:
        methods = ['SP_k6', 'SP_k5', 'SP_k4']
        data, data1, data2, data3, kernels, ID = utils.get_all_data(methods)
Пример #12
0
def get_all_data():
    res = []
    for tup in utils.get_all_data():
        print(tup)
        res.append({"name": tup[0], "value": int(tup[1])})
    return jsonify({"data": res})
Пример #13
0
            print("第 " + str(i) + " 次" + "反向传播训练")
            train_correct, train_label_predict = self.calculate_correct(train_data, train_label)
            train_corrects.append(train_correct)
            '''
            print("训练集正确率为: " + str(train_correct))
            test_correct, test_label_predict = self.calculate_correct(word_test, res_test)
            test_corrects.append(test_correct)
            print("测试集正确率为: " + str(test_correct))            
            '''

            i += 1
        return train_corrects, test_corrects


word_train, word_test, res_train, res_test = utils.get_data()
train, train_label = utils.get_all_data()
test = utils.get_test_image_matrix()

bp = BPNetwork()

if __name__ == '__main__':
    '''
    初始化神经网络的结构
    输入层 28 * 28 = 784
    输出层 12
    '''
    hid = 100
    bp.setup(784, 12, hid)
    # 初始化学习率,训练次数
    learn = 0.01
    times = 130
Пример #14
0
from utils import get_all_data, reformat_data, export_predictions
from NLCKernels import NLCK
from SVM import C_SVM

# Best submission : Non-Linear Combination of 10 kernels listed below:
methods = [
    'SP_k4', 'SP_k5', 'SP_k6', 'MM_k4_m1', 'MM_k5_m1', 'MM_k6_m1', 'WD_d4',
    'WD_d5', 'WD_d10'
]

# Import data
data, data1, data2, data3, kernels, ID = get_all_data(methods)

# Use the algorithm on the first data set with the corresponding hyperparameters (see the report, table 1)
print('\n\n')
X_train_1, y_train_1, X_val_1, y_val_1, X_test_1, kernels_1, ID_1 = reformat_data(
    data1, kernels, ID)
Km1 = NLCK(X_train_1, y_train_1, ID_1, kernels_1, C=1, eps=1e-9,
           degree=3).get_K(fnorm=5, n_iter=50)
svm1 = C_SVM(Km1, ID_1, C=1.9, solver='CVX')
svm1.fit(X_train_1, y_train_1)

# Use the algorithm on the second data set with the corresponding hyperparameters (see the report, table 1)
print('\n\n')
X_train_2, y_train_2, X_val_2, y_val_2, X_test_2, kernels_2, ID_2 = reformat_data(
    data2, kernels, ID)
Km2 = NLCK(X_train_2, y_train_2, ID_2, kernels_2, C=10, eps=1e-9,
           degree=4).get_K(fnorm=5, n_iter=50)
svm2 = C_SVM(Km2, ID_2, C=2.1, solver='CVX')
svm2.fit(X_train_2, y_train_2)
Пример #15
0
		return ('Travel between the driver\'s starting location and the shared'
				' final destination must be possible by automobile.'
				' Please try again.')
	meetup_location = get_meeting_location(
		DG, my_df, stops, start_coord, polies, trip_names)
	if meetup_location == -1:
		return ('Driver must be passing through the Greater Toronto Area.'
				' Please try again.')
	return 'Arrange to meet at: ' + meetup_location

transit_top = 43.90975
transit_left = -79.649908
transit_bottom = 43.591811
transit_right = -79.123111

my_df, stops, trip_names, DG = get_all_data()

# start_pedestrian = 'Union station toronto' 
# start_drive = 'Toronto Public LIbrary'
# end_drive = 'High Park toronto'
# start_coord, start_address = get_gmaps_coords(start_pedestrian)
# (start_address, end_address, polies, points, drive_start_coord, 
# 	end_coord) = get_gmaps_route(A=start_drive, B=end_drive)
# meetup_location = get_meeting_location(
# 	DG, my_df, stops, start_coord, polies, trip_names)
# print(meetup_location)
# exit()

FB_API_URL = 'https://graph.facebook.com/v2.6/me/messages'
ACCESS_TOKEN = os.environ['ACCESS_TOKEN']
VERIFY_TOKEN = os.environ['VERIFY_TOKEN']