예제 #1
0
 def load_classifier(self, path):
     """Loads a trained classifier from file.
     IMPORTANT: Doesn't load the associated corpus, so it can't train, just classify/test."""
     main_clf = utils.load_object(path)
     sub_clfs = utils.load_object(path + '1')
     self.clf.set_classifier(main_clf)
     self.clf.subclassifiers = sub_clfs
예제 #2
0
 def crawl(self, **kwargs):
     from crawler.settings import spider, pipeline
     # 把引擎看成main函数
     # 从数据库读取链接
     spider = load_object(spider)(**kwargs)  # 需要 start_urls
     pipeline = load_object(pipeline)(**kwargs)  # 需要logger
     self.engine.open_spider(spider, pipeline,
                             iter(spider.start_requests()))
예제 #3
0
def run_test(config):

    models = config['models']

    total_mse = [0 for i in range(len(models))]
    total_rmse = [0 for i in range(len(models))]
    total_r2 = [0 for i in range(len(models))]
    total_adj_r2 = [0 for i in range(len(models))]

    total_accuracy = [0 for i in range(len(models))]
    total_balanced_accuracy = [0 for i in range(len(models))]

    print("Training and testing")

    for i, model in enumerate(models):

        print(model)
        temp_config = get_config({**config, 'model': model})
        temp_config['print'] = config['print']

        data_train, labels_train = load_object(temp_config['data_path'])
        data_test, labels_test = load_object(temp_config['test_path'])

        baseline_targets_train = np.array(
            load_object('./data_processed/baseline_targets_train.pkl'),
            dtype=object)
        baseline_targets_test = np.array(
            load_object('./data_processed/baseline_targets_test.pkl'),
            dtype=object)

        fold = [data_train, labels_train, data_test, labels_test]
        fold_base = [baseline_targets_train, baseline_targets_test]

        if model == 'baseline':
            mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                temp_config, fold, fold_base)
        else:
            mse, rmse, r2, adj_r2, accuracy, balanced_accuracy = run_model(
                temp_config, fold)

        total_mse[i] += mse
        total_rmse[i] += rmse
        total_r2[i] += r2
        total_adj_r2[i] += adj_r2

        total_accuracy[i] += accuracy
        total_balanced_accuracy[i] += balanced_accuracy

    # Print the results in a table
    table = [['mse'] + total_mse,
             ['root_mse'] + total_rmse, ['r2_score'] + total_r2,
             ['adj_r2_score'] + total_adj_r2, ['accuracy'] + total_accuracy,
             ['bal_accuracy'] + total_balanced_accuracy]

    pd.DataFrame(table, columns=models).to_csv("final_results.csv")
    print(tabulate(table, headers=['metrics'] + models,
                   tablefmt="fancy_grid"))  # plain
예제 #4
0
    def load(model_root, model_name):
        path = os.path.join(model_root, model_name)

        loss = utils.load_object(os.path.join(path, model_name + "_loss"))
        params = utils.load_object(os.path.join(path, model_name + "_params"))
        vocab = utils.load_object(os.path.join(path, model_name + "_vocab"))

        model = SkipGramModel(vocab, params["embedding_dim"], params["use_cuda"])
        model.load_state_dict(torch.load(os.path.join(path, model_name)))

        return (model, loss, params)
예제 #5
0
 def crawl(self, **kwargs):
     engine = Engine(self)  # 需要考虑,两个爬虫是否共用一个engine;加入self会占用很多内存
     from crawler.settings import spider, pipeline
     # 把引擎看成main函数
     # 从数据库读取链接
     if 'spider' in kwargs:
         spider = kwargs['spider']
     spider = load_object(spider)(**kwargs)  # 需要 start_urls
     if 'pipeline' in kwargs:
         pipeline = kwargs['pipeline']
     pipeline = load_object(pipeline)(**kwargs)  # 需要logger
     engine.open_spider(spider, pipeline, iter(spider.start_requests()))
예제 #6
0
    def __init__(self, path, api: ApiAdapter):
        pushed_snapshot_path = os.path.join(
            path, os.path.join(".course_git", "pushed_snapshot.pkl"))
        commited_snapshot_path = os.path.join(
            path, os.path.join(".course_git", "commited_snapshot.pkl"))

        if not os.path.exists(".course_git"):
            raise Exception("There is no repository in this directory")
        if not os.path.exists(pushed_snapshot_path) or not os.path.exists(
                commited_snapshot_path):
            raise Exception("Invalid sync directory")

        self.pushed_snapshot = utils.load_object(pushed_snapshot_path)
        self.commited_snapshot = utils.load_object(commited_snapshot_path)
예제 #7
0
def generate_rdm_all_gradient(nnet,
                              name,
                              blanks,
                              rdm_type=analysis.SPEARMAN,
                              save_files=True,
                              title="RDM training combined",
                              from_file=False,
                              delete_blank_states=True):
    if not from_file:
        if rdm_type != analysis.SPEARMAN:
            raise Exception("not implemented")
        hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all(
            nnet)
        hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari(
            nnet, blanks)
        hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev(
            nnet, blanks)
        print("Both: {0}, {1}".format(accuracy_totals_both,
                                      accuracy_fullseqs_both))
        print("Ari: {0}, {1}".format(accuracy_totals_ari,
                                     accuracy_fullseqs_ari))
        print("Bev: {0}, {1}".format(accuracy_totals_bev,
                                     accuracy_fullseqs_bev))

        hidden = utils.flatten_onelevel(hidden_bev) +\
                 utils.flatten_onelevel(hidden_ari) +\
                 utils.flatten_onelevel(hidden_both)

        hidden_left = []
        hidden_right = []
        for vector in hidden:
            hidden_left.append(vector[:len(vector) // 2])
            hidden_right.append(vector[len(vector) // 2:])

        # Now cut the hidden layer in two.
        rdmatrix_left = analysis.rdm_spearman(hidden_left)
        rdmatrix_right = analysis.rdm_spearman(hidden_right)
        # save the massive rdm for debug purposes (so that I don't have to generate it all over again everytime).
        utils.save_object(name + "rdmatright", rdmatrix_right)
        utils.save_object(name + "rdmatleft", rdmatrix_left)
    else:
        rdmatrix_left = utils.load_object(name + "rdmatleft")
        rdmatrix_right = utils.load_object(name + "rdmatright")

    rdmatrix_left, labels = model2.process_matrix(rdmatrix_left,
                                                  delete_blank_states)
    rdmatrix_right, _ = model2.process_matrix(rdmatrix_right,
                                              delete_blank_states)

    return rdmatrix_left, rdmatrix_right, labels
예제 #8
0
def generate_rdm_all(nnet,
                     name,
                     rdm_type=analysis.SPEARMAN,
                     save_files=True,
                     title="RDM training combined",
                     from_file=False,
                     delete_blank_states=True,
                     collapse_rdm=True):
    if not from_file:
        if rdm_type != analysis.SPEARMAN:
            raise Exception("not implemented")
        hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all(
            nnet)
        hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari(
            nnet, blanks=True)
        hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev(
            nnet, blanks=True)
        print("Both: {0}, {1}".format(accuracy_totals_both,
                                      accuracy_fullseqs_both))
        print("Ari: {0}, {1}".format(accuracy_totals_ari,
                                     accuracy_fullseqs_ari))
        print("Bev: {0}, {1}".format(accuracy_totals_bev,
                                     accuracy_fullseqs_bev))

        hidden = utils.flatten_onelevel(hidden_bev) +\
                 utils.flatten_onelevel(hidden_ari) +\
                 utils.flatten_onelevel(hidden_both)
        rdmatrix = analysis.rdm_euclidian(hidden)

        utils.save_object(name + "rdmat", rdmatrix)
    else:
        rdmatrix = utils.load_object(name + "rdmat")

    return model2.process_matrix(rdmatrix, delete_blank_states)
예제 #9
0
def make_rdm_and_mds_reg_hierarchy(name):
    model = utils.load_object(name, 1)  # eg 'noise_test2'
    hidden = accuracy_test_reg_hierarchy(model)
    # Turn a list of tensors into a list of np vectors
    for i, tensor in enumerate(hidden):
        hidden[i] = tensor.numpy().reshape(-1)

    # Now cut that in two
    left_units = [vector[:len(hidden) / 2] for vector in hidden]
    right_units = [vector[len(hidden) / 2:] for vector in hidden]

    # Make the labels for the rdms
    labels = []
    for i, sequence in enumerate(pnas2018task.seqs):
        for action in sequence[1:]:
            labels.append(str(i) + '_' + action)
    for side in [[left_units, "left units"], [right_units, "right_units"]]:
        rdm = analysis.rdm_spearman(side[0])
        analysis.plot_rdm(rdm, labels, "Spearman rho matrix" + side[1])

        #for i in range(4):
        #    mdsy = analysis.mds(side[0][6*i:6*i+6])
        #    analysis.plot_mds_points(mdsy, range(len(mdsy)), labels=labels[6*i:6*i+6])

        mdsy = analysis.mds(side[0])
        for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']):
            analysis.plot_mds_points(mdsy[6 * i:6 * i + 6],
                                     range(6),
                                     labels=labels[6 * i:6 * i + 6],
                                     style=style)
예제 #10
0
def plot_pd_overlap(subjects=[1],fix_pd=True,threshold=0.3):
    face_dataset = utils.load_object("data_1_50_fixPD_Label_False.pkl")
    figs = []
    for subject_idx in subjects:
        # [0,70,140,...]
        start_idx = ((subject_idx*70)-70)
        # [69,139,209,...]
        stop_idx = (subject_idx*70)-1
        
        # prepare pd_signal numpy array
        pd_signals = []
        fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(14, 12))
        axes.grid(True)
        for i in range(start_idx,stop_idx+1):                         
            if fix_pd:
                output, _, _ = detect_glitch(face_dataset[i]['PD_avg_filtered'],threshold=threshold)                
            else:
                output = face_dataset[i]['PD_avg_filtered']
                 
            pd_signals.append(output) 
            axes.plot(output)
        
        fig.suptitle("Testsubject: " + str(subject_idx))
        figs.append(fig)
        print(subject_idx)
        
    return figs
예제 #11
0
파일: web.py 프로젝트: alsoncahyadi/absa_ml
def results():
    if request.method == 'GET':
        data = utils.load_object("data.pkl")
    elif request.method == 'POST':
        if 'file' not in request.files:
            print('No file part')
        file = request.files['file']

        if file.filename == '':
            print('No selected file')

        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            raw_reviews_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            print(raw_reviews_path)
            data = main.main(raw_reviews_path)
            utils.save_object(data, "data.pkl")

    new_data = get_processed_data(data)
    return render_template('results.html',
        bar_chart=new_data['bar_chart'],
        pie_chart=new_data['pie_chart'],
        ratings=data[5],
        stars=new_data['stars'],
        table=zip(new_data['markup_sents'], new_data['sentiment_colors']),
        categories=Const.CATEGORIES,
        tuples=new_data['pretty_tuples'],
    )
예제 #12
0
def get_faps_np_df(pickle_file='data_1_51.pkl'):
    face_dataset = utils.load_object(pickle_file)
    faps_df = pd.DataFrame(face_dataset[:]['faceFAP'])
    faps_df.columns = ['faps']
    faps_df['ori_idx'] = [i for i in range(len(face_dataset))]
    faps_df['faps'] = faps_df['faps'].apply(lambda x: np.array(x))
    return faps_df
예제 #13
0
def make_rdm_and_mds(name, with_goals=False):
    model = utils.load_object(name, 1)  # eg 'noise_test2'
    if with_goals:
        hidden = pnashierarchy.accuracy_test_with_goals(model)
    else:
        hidden = accuracy_test(model, noise)
    # Turn into a list of simple vectors
    for i, tensor in enumerate(hidden):
        hidden[i] = tensor.numpy().reshape(-1)
    rdmatrix = analysis.rdm_spearman(hidden)
    labels = []
    for i, sequence in enumerate(pnas2018task.seqs):
        for action in sequence[1:]:
            labels.append(str(i) + '_' + action)
    analysis.plot_rdm(rdmatrix, labels, "Spearman rho matrix")

    for i in range(4):
        mdsy = analysis.mds(hidden[6 * i:6 * i + 6])
        analysis.plot_mds_points(mdsy,
                                 range(len(mdsy)),
                                 labels=labels[6 * i:6 * i + 6])

    mdsy = analysis.mds(hidden)
    for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']):
        analysis.plot_mds_points(mdsy[6 * i:6 * i + 6],
                                 range(6),
                                 labels=labels[6 * i:6 * i + 6],
                                 style=style,
                                 show=(i == 3))
예제 #14
0
def read_score(folder, dat_id, descr_id, clf_id):
    """Read test scores from a file and compute the average value

    Parameters
    ----------
    folder : string
        Full path of the folder where data are saved.
    dat_id : string
        Short name of a dataset.
    descr_id : string
        Short name of a descriptor.
    clf_id : string
        Short name of a classifier.
        
    Returns
    -------
    ts_avg : float
        Average of test scores.

    """
    result_path = utils.filepath(folder, dat_id, descr_id, clf_id)
    if os.path.isfile(result_path):
        result = utils.load_object(result_path)
        test_scores = [ts for _, ts in result]
        ts_avg = 100 * np.mean(test_scores)
        return ts_avg
    else:
        return None
예제 #15
0
def get_user_embedding(sess, user_id):
    # If user_to_behave is already run
    path_user_to_behave = os.path.join(os.path.dirname(config.save_path),
                                       'user_to_behave.pkl')
    if os.path.isfile(path_user_to_behave):
        user_to_behave = load_object(path_user_to_behave)
    else:
        user_to_behave = get_user_behave()

    # Get embeddings
    embeddings = get_embeddings(sess)

    # Get embeddings for each event
    embed_events = []
    for event in user_to_behave[user_id]:
        embed_event = []
        for feature in range(len(event)):
            num_cat_value = config.feature_desc[feature]
            if num_cat_value == 1:
                embed_event.append(list(event[feature]))
            else:
                embed_event.append((embeddings[num_cat_value][event[feature]]))
        embed_events.append(embed_event)

    return embed_events
    def load(model_root, model_name):
        path = os.path.join(model_root, model_name)

        loss = utils.load_object(os.path.join(path, model_name + "_loss"))
        params = utils.load_object(os.path.join(path, model_name + "_params"))
        vocab_x = utils.load_object(os.path.join(path,
                                                 model_name + "_vocab_x"))
        vocab_y = utils.load_object(os.path.join(path,
                                                 model_name + "_vocab_y"))

        model = EmbedAlignModel(vocab_x,
                                vocab_y,
                                params["embedding_dim"],
                                random_state=params["random_state"],
                                use_cuda=params["use_cuda"])
        model.load_state_dict(torch.load(os.path.join(path, model_name)))
        return (model, loss, params)
def aggregate_participants_by_mean():
    dfs = load_object("data_processed/subdata_pr_su.pkl")[0]
    averages_df = []
    for df in dfs:
        averages_per_variable = pd.DataFrame(df).mean(axis=0)
        averages_df.append(averages_per_variable)

    return pd.DataFrame(averages_df)
예제 #18
0
class Config:
    main_path = os.path.abspath("/home/mikey/Data/POETdataset/PascalImages/")
    class_names = [
        'dog', 'aeroplane', 'boat', 'bicycle', 'cat', 'cow', 'diningtable',
        'horse', 'motorbike', 'sofa'
    ]
    IMG_SIZE = 60 * 4 * 2
    PATCH_WIDTH = 120
    SMALLER_IMG_SIZE = 150
    T = int(IMG_SIZE / PATCH_WIDTH)**2
    new_dir = 'soft_attention_features_' + str(PATCH_WIDTH)
    new_dir_img = 'soft_attention_images_' + str(PATCH_WIDTH)
    name_to_class_dict = {
        class_name: i
        for i, class_name in enumerate(class_names)
    }
    train_ids, test_ids = utils.load_object(
        '../train_ids.pkl'), utils.load_object('../test_ids.pkl')
def main(bo_input_directory, random_input_directory, output_directory):

    BO_iter = load_object(bo_input_directory + '/iterations.dat')
    random_iter = load_object(random_input_directory +
                              '/iterations.dat')  # 1.654
    BO_vals = load_object(bo_input_directory + '/best_vals.dat')  # 0.682
    random_vals = load_object(random_input_directory + '/best_vals.dat')

    plt.figure()
    plt.plot(
        BO_iter,
        BO_vals,
        label='Bayesian Optimisation',
    )
    plt.plot(random_iter, random_vals, label='Random Sampling')
    plt.xlabel('Number of Function Evaluations')
    plt.ylabel('Best Feasible Objective Function Value')
    plt.legend()
    pylab.savefig(output_directory + "/BO_vs_random.png")
예제 #20
0
    def get_iterator(self, path):
        dataset = [
            {
                'sentence': torch.from_numpy(data_point['sentence']),
                'score': torch.from_numpy(data_point['score']),
            }
            for data_point in load_object(path)
        ]

        return DataLoader(dataset, batch_size=args.batch_size, num_workers=args.n_cpus)
예제 #21
0
def run_model1_combined():
    # COMBINED #
    #num_training_steps = 100000
    #nnet = nn.ElmanGoalNet(size_hidden=15, initialization=nn.UNIFORM, size_goal1=0, size_goal2=0,
    #                       size_observation=len(task.symbols), size_action=len(task.symbols), learning_rate=0.01, algorithm=nn.ADAM)
    #nnet.L2_regularization = 0.00001
    #train_all(nnet, num_training_steps)
    #utils.save_object("cogloadtasknet", nnet)
    nnet = utils.load_object("cogloadtasknet")
    generate_rdm_all(nnet, name="cogloadtasknet")
예제 #22
0
def main():
    # Load configuration
    config = Config()

    # Parse user_list representations
    user_list, user_ids = [], []
    with open(config.rep_path, 'r') as data_file:
        lines = data_file.readlines()
        for line in lines:
            user_ = line.split(':')[1].replace('[', '').replace(']"}', '').split()
            id_ = line.split(':')[0].replace('{', '').replace('"', '')
            user = [float(u) for u in user_[1:len(user_)]]
            user_list.append(user)
            user_ids.append(id_)  
    user_list = np.array(user_list)

    # If tsne is already run
    path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne')
    if os.path.isfile(path_user_tsne):
        user_tsne = load_object(path_user_tsne)
    else:    
        # Run TSNE
        model = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        user_tsne = model.fit_transform(user_list)    

        # Save TSNE objects
        print "Save user_tsne."
        save_object(user_tsne, 'save/user_tsne')
    
    # Run DBSCAN
    db = DBSCAN(eps=3, min_samples=50, algorithm='brute').fit(user_tsne)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    # Save clustering results
    save_object(user_ids, 'save/user_ids_db')
    save_object(labels, 'save/labels_db')
    
    # Drawing clustering
    unique_labels = set(labels)
    colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        if k == -1: continue  
        class_member_mask = (labels == k)    
        xy = user_tsne[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6)    
        xy = user_tsne[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3)       
    
    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()
예제 #23
0
def classify(text):
    driver = Processor()
    doc = Document(raw_text=text)
    driver.process_document(doc)
    driver.clf = utils.load_object('saved_classifier-367')
    class_label = driver.clf.classify(doc)
    subclass_label = driver.clf.subclassify(doc, class_label)
    labels = [class_label, subclass_label]
    print 'Labels: '
    print labels
    return labels
예제 #24
0
def make_rdm_and_mds(name):
    model = utils.load_object(name)  # eg 'noise_test2'
    hidden = get_model_hidden_activations(model)
    rdmatrix = analysis.rdm_spearman(hidden)
    labels = []
    for goal in tce.action_list:
        for action in tce.action_list[goal]:
            labels.append(goal + '_' + action)
    analysis.plot_rdm(rdmatrix, labels, "Spearman rho matrix")
    mdsy = analysis.mds(hidden)
    analysis.plot_mds_points(mdsy, range(len(mdsy)), labels=labels)
예제 #25
0
def main(_):
    # Rebuild the graph
    def_graph = tf.Graph().as_default()
    auto_encoder = AutoEncoder(config)
    auto_encoder.build_encoder(config.feature_desc)

    # Create session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    # Load the auto encoding model
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state('save')
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)

    # Analyse DBScan results on t-sne
    user_ids_db = np.array(load_object('save/user_ids_db'))
    labels_db = load_object('save/labels_db')

    user_ids1 = user_ids_db[(labels_db == 2)][0:30]
    user_ids2 = user_ids_db[(labels_db == 6)][0:30]

    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)

    save_object(cluster1, 'save/cluster1_db')
    save_object(cluster2, 'save/cluster2_db')

    # Analyse K-means results on reps
    user_ids_km = np.array(load_object('save/user_ids_km'))
    labels_km = load_object('save/labels_km')

    user_ids1 = user_ids_km[(labels_km == 2)][0:30]
    user_ids2 = user_ids_km[(labels_km == 6)][0:30]

    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)

    save_object(cluster1, 'save/cluster1_km')
    save_object(cluster2, 'save/cluster2_km')
예제 #26
0
def main(_):   
    # Rebuild the graph
    def_graph = tf.Graph().as_default()
    auto_encoder = AutoEncoder(config)
    auto_encoder.build_encoder(config.feature_desc)
    
    # Create session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    
    # Load the auto encoding model
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state('save')
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
  
    # Analyse DBScan results on t-sne
    user_ids_db = np.array(load_object('save/user_ids_db'))
    labels_db = load_object('save/labels_db')
    
    user_ids1 = user_ids_db[(labels_db==2)][0:30]
    user_ids2 = user_ids_db[(labels_db==6)][0:30]
     
    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)
    
    save_object(cluster1, 'save/cluster1_db')
    save_object(cluster2, 'save/cluster2_db')
    
    # Analyse K-means results on reps
    user_ids_km = np.array(load_object('save/user_ids_km'))
    labels_km = load_object('save/labels_km')
    
    user_ids1 = user_ids_km[(labels_km==2)][0:30]
    user_ids2 = user_ids_km[(labels_km==6)][0:30]
     
    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)    

    save_object(cluster1, 'save/cluster1_km')
    save_object(cluster2, 'save/cluster2_km')
예제 #27
0
def make_rdm_noisy(name,
                   num_networks,
                   noise,
                   num_runs_per_network=10,
                   title="-",
                   save_files=True,
                   skips=[],
                   rdm_type=analysis.SPEARMAN):
    # Make one rdm for each network
    rdmatrices = []
    for i in range(num_networks + len(skips)):
        if i in skips:
            continue
        model = utils.load_object(name, i)
        hiddens = []
        for j in range(num_runs_per_network):
            hidden, _ = accuracy_test(model, name=str(i), noise=noise)
            for k, tensor in enumerate(hidden):
                hidden[k] = tensor.numpy().reshape(-1)
            hiddens.append(hidden)
        rdmatrix = analysis.rdm_noisy_mahalanobis(hiddens)
        rdmatrices.append(rdmatrix)

    # Now average over all matrices
    avg_matrix = None
    for matrix in rdmatrices:
        if avg_matrix is None:
            avg_matrix = matrix
        else:
            avg_matrix += matrix
    avg_matrix = avg_matrix / num_networks
    name = name + '_' + rdm_type
    np.savetxt(name + "_rdm_mat.txt", avg_matrix, delimiter="\t", fmt='%.2e')
    labels = []
    for i, sequence in enumerate(pnas2018task.seqs):
        for action in sequence[1:]:
            labels.append(str(i) + '_' + action)
    analysis.plot_rdm(avg_matrix, labels, title + " spearman rho matrix")
    if save_files:
        plt.savefig(name + '_rdm')
    plt.clf()

    mdsy = analysis.mds(avg_matrix)
    for i, style in enumerate(['ro-', 'b|--', 'gx-.', 'k_:']):
        analysis.plot_mds_points(mdsy[6 * i:6 * i + 6],
                                 range(6),
                                 labels=labels[6 * i:6 * i + 6],
                                 style=style)
    plt.title(title)
    if save_files:
        plt.savefig(name + '_mds')
    plt.clf()
    return avg_matrix
예제 #28
0
def make_rdm_multiple_predictive(name, num_networks,title="-", save_files=True):
    # Make one rdm for each network
    optimal_list = []
    rdmatrices = []
    for i in range(num_networks):
        model = utils.load_object(name, i)
        hidden, optimal = accuracy_test_predictive(model, i)
        optimal_list.append(optimal)
        if optimal:
            # Turn into a list of simple vectors
            for i, tensor in enumerate(hidden):
                hidden[i] = tensor.numpy().reshape(-1)
            rdmatrix = analysis.rdm_spearman(hidden)
            rdmatrices.append(rdmatrix)
    print("{0} networks, of which {1} achieve optimal accuracy".format(num_networks, optimal_list.count(True)))
    # Now average over all matrices
    avg_matrix = None
    for matrix in rdmatrices:
        if avg_matrix is None:
            avg_matrix = matrix
        else:
            avg_matrix += matrix
    avg_matrix = avg_matrix / len(rdmatrices)

    # delete the unwanted rows and columns:
    #avg_matrix = np.delete(avg_matrix, [0, 6, 12], 0)
    #avg_matrix = np.delete(avg_matrix, [0, 6, 12], 1)
    nps = 6  # number of elements per sequence

    if save_files:
        np.savetxt(name+".csv", avg_matrix, delimiter=",")
    labels = []
    for i, sequence in enumerate(pnas2018task.seqs):
        for action in sequence[0:-1]:
            labels.append(str(i)+'_'+action)
    analysis.plot_rdm(avg_matrix, labels, title + " spearman rho matrix")
    if save_files:
        plt.savefig(name+'_rdm')
    plt.clf()

    mdsy = analysis.mds(avg_matrix)
    for i, style in enumerate(['ro-', 'b|--', 'gx-.']):
        analysis.plot_mds_points(mdsy[nps * i:nps * i + nps], range(nps), labels=labels[nps * i:nps * i + nps], style=style)
    plt.title(title)
    if save_files:
        plt.savefig(name + '_mds')
    plt.clf()
    return avg_matrix
예제 #29
0
 def from_settings(cls, settings, engine):
     """ Build middleware pipeline from settings """
     mwlist = cls._get_mwlist_from_settings(settings)
     middlewares = []
     for clspath in mwlist:
         try:
             mwcls = utils.load_object(clspath)
             if hasattr(mwcls, 'from_settings'):
                 mw = mwcls.from_settings(settings, engine)
             else:
                 mw = mwcls(engine)
             middlewares.append(mw)
         except NotConfigured, e:
             if e.args:
                 clsname = clspath.split('.')[-1]
                 log.msg('Disabled %s: %s' % (clsname, e.args[0]))
예제 #30
0
def run_model1_ari():
    # ARI #
    num_training_steps = 10000
    nnet = nn.ElmanGoalNet(size_hidden=15,
                           initialization=nn.UNIFORM,
                           size_goal1=0,
                           size_goal2=0,
                           size_observation=len(task.symbols),
                           size_action=len(task.symbols),
                           learning_rate=0.01,
                           algorithm=nn.ADAM)
    nnet.L2_regularization = 0.00001
    train_ari(nnet, num_training_steps)
    utils.save_object("cogloadtasknet_ari", nnet)
    nnet = utils.load_object("cogloadtasknet_ari")
    generate_rdm_ari(nnet, name="cogloadtasknet_ari")
예제 #31
0
def get_features(folder, dataset, descriptor):
    """Return texture features for a single dataset and descriptor.

    Parameters
    ----------
    folder : string
        Full path of the folder where data are saved.
    dataset : texdata.TextureDataset
        Object that encapsulates data of a texture dataset.
    descriptor : hep.HEP
        Object that encapsulates data of a texture descriptor.

    Returns
    -------
    X : array
        Texture features. The number of rows is equal to the number of
        samples and the number of columns is equal to the dimensionality
        of the feature space. If an error occurs within the call to 
        `apply_descriptor`, returns None.
        
    """
    multiscale_features = []
    dataset_id = dataset.acronym
    for rad in descriptor.radius:
        descr_single = copy.deepcopy(descriptor)
        descr_single.radius = [rad]
        descr_single_id = descr_single.abbrev()
        feat_path = utils.filepath(folder, dataset_id, descr_single_id)
        if os.path.isfile(feat_path):
            X = utils.load_object(feat_path)
        else:
            print(f'Computing {dataset_id}--{descr_single_id}')

            if hasattr(descr_single, 'components'):
                X = concatenate_feats(folder, dataset, descr_single)
            else:
                X = apply_descriptor(dataset, descr_single)
            if X is not None:
                utils.save_object(X, feat_path)
            else:
                break
        multiscale_features.append(X)
    else:
        X = np.concatenate(multiscale_features, axis=-1)
    return X
def get_results(args):
    # -get param-stamp
    param_stamp = get_param_stamp_from_args(args)
    # -check whether already run, and if not do so
    if os.path.isfile("{}/dict-{}.pkl".format(args.r_dir, param_stamp)):
        print("{}: already run".format(param_stamp))
    else:
        print("{}: ...running...".format(param_stamp))
        main_cl.run(args)
    # -get average precisions
    fileName = '{}/prec-{}.txt'.format(args.r_dir, param_stamp)
    file = open(fileName)
    ave = float(file.readline())
    file.close()
    # -results-dict
    dict = utils.load_object("{}/dict-{}".format(args.r_dir, param_stamp))
    # -return tuple with the results
    return (dict, ave)
예제 #33
0
def get_user_embedding(sess, user_id):
    # If user_to_behave is already run
    path_user_to_behave = os.path.join(os.path.dirname(config.save_path), 'user_to_behave.pkl')
    if os.path.isfile(path_user_to_behave):
        user_to_behave = load_object(path_user_to_behave)
    else:    
        user_to_behave = get_user_behave()
    
    # Get embeddings 
    embeddings = get_embeddings(sess)
    
    # Get embeddings for each event
    embed_events = []
    for event in user_to_behave[user_id]:
        embed_event = []
        for feature in range(len(event)):
            num_cat_value = config.feature_desc[feature]
            if num_cat_value == 1:
                embed_event.append(list(event[feature]))
            else:
                embed_event.append((embeddings[num_cat_value][event[feature]]))
        embed_events.append(embed_event)
            
    return embed_events
예제 #34
0
파일: engine.py 프로젝트: Frowse/openross
 def __init__(self):
     imageproc_cls = utils.load_object(settings.IMAGE_PROCESSOR)
     self.imageproc = imageproc_cls.from_settings(settings, self)
     statsd.Connection.set_defaults(host=settings.STATSD_HOST,
                                    port=settings.STATSD_PORT,
                                    sample_rate=1)
예제 #35
0
def main():
    # Load configuration
    config = Config()

    # Parse user_list representations
    user_list = []
    user_id_list = []
    with open(config.rep_path, 'r') as data_file:
        lines = data_file.readlines()
        for line in lines:
            user_ = line.split(':')[1].replace('[','').replace(']"}','').split()
            user = [float(u) for u in user_[1:len(user_)]]
            user_list.append(user)
            user_id_list.append(line.split(':')[0].replace('{','').replace('"',''))
    user_list = np.array(user_list)
    user_id_list = np.array(user_id_list)

    # If tsne is already run
    path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne')
    if os.path.isfile(path_user_tsne):
        user_tsne = load_object(path_user_tsne)
    else:    
        # Run TSNE
        model = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        user_tsne = model.fit_transform(user_list)    

        # Save TSNE objects
        print "Save user_tsne."
        save_object(user_tsne, 'save/user_tsne')
    
    # Run KMeans clustering
    kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10)
    km = kmeans.fit(user_list)
    
    # Get cluster labels
    labels = km.labels_
    unique_labels = set(labels)

    # Save clustering results
    save_object(user_id_list, 'save/user_ids_km')
    save_object(labels, 'save/labels_km')
        
    # Save the cluster_to_user dict
    cluster_to_user = dict()
    for k in unique_labels:
        class_member_mask = (labels == k)
        class_k = user_id_list[class_member_mask]
        cluster_to_user[k] = class_k
    save_object(cluster_to_user, 'save/cluster_to_user')
    
    # Save the user_to_cluster dict
    user_to_cluster = dict()
    for user, label in zip(user_id_list, labels):
        user_to_cluster[user] = label
    save_object(user_to_cluster, 'save/user_to_cluster')    
    
    # Plot results
    colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        class_member_mask = (labels == k)
        xy = user_tsne[class_member_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3)    

    plt.title('KMeans Clustering')
    plt.show()
예제 #36
0
def get_netflix_details_from_title(title):
    netflix_dict = utils.load_object('netflix_dict')
    unog_json = utils.load_object('unog_details')

    if(title in netflix_dict):
        return netflix_dict[title]
예제 #37
0
def get_user_of_cluster(cluster_id):
    cluster_to_user = load_object("save/cluster_to_user")
    return cluster_to_user[cluster_id]
예제 #38
0
def past_count():
    if os.path.isfile('unog_details.pkl'):
        return utils.load_object('unog_details')['COUNT']
    else:
        return 0