Пример #1
0
def generate_rdm_all(nnet,
                     name,
                     rdm_type=analysis.SPEARMAN,
                     save_files=True,
                     title="RDM training combined",
                     from_file=False,
                     delete_blank_states=True,
                     collapse_rdm=True):
    if not from_file:
        if rdm_type != analysis.SPEARMAN:
            raise Exception("not implemented")
        hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all(
            nnet)
        hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari(
            nnet, blanks=True)
        hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev(
            nnet, blanks=True)
        print("Both: {0}, {1}".format(accuracy_totals_both,
                                      accuracy_fullseqs_both))
        print("Ari: {0}, {1}".format(accuracy_totals_ari,
                                     accuracy_fullseqs_ari))
        print("Bev: {0}, {1}".format(accuracy_totals_bev,
                                     accuracy_fullseqs_bev))

        hidden = utils.flatten_onelevel(hidden_bev) +\
                 utils.flatten_onelevel(hidden_ari) +\
                 utils.flatten_onelevel(hidden_both)
        rdmatrix = analysis.rdm_euclidian(hidden)

        utils.save_object(name + "rdmat", rdmatrix)
    else:
        rdmatrix = utils.load_object(name + "rdmat")

    return model2.process_matrix(rdmatrix, delete_blank_states)
Пример #2
0
def results():
    if request.method == 'GET':
        data = utils.load_object("data.pkl")
    elif request.method == 'POST':
        if 'file' not in request.files:
            print('No file part')
        file = request.files['file']

        if file.filename == '':
            print('No selected file')

        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
            raw_reviews_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            print(raw_reviews_path)
            data = main.main(raw_reviews_path)
            utils.save_object(data, "data.pkl")

    new_data = get_processed_data(data)
    return render_template('results.html',
        bar_chart=new_data['bar_chart'],
        pie_chart=new_data['pie_chart'],
        ratings=data[5],
        stars=new_data['stars'],
        table=zip(new_data['markup_sents'], new_data['sentiment_colors']),
        categories=Const.CATEGORIES,
        tuples=new_data['pretty_tuples'],
    )
Пример #3
0
def train_and_save(num_models, name, num_episodes):
    for i in range(num_models):
        model = nn.ElmanGoalNet()
        train_supervised_teacoffeeenv(model, num_episodes)
        utils.save_object(name, model)
        print('Trained and saved model #{0} of {1}\n'.format(
            i + 1, num_models))
Пример #4
0
def benchmark_cost_function(data_sizes):
    polygon = pickle.load(open('utils/good_path_for_rectangle.pickle', 'rb'))

    save_folder = generate_folder_name()
    iterations = 10
    results = np.empty((len(data_sizes), iterations, 2), dtype=float)

    for i, dsize in enumerate(data_sizes):
        data = dg.generate_rectangle_set(dsize)

        print("\nRun {} with value {}".format(i+1, dsize))

        # Compile functions and warm up GPU
        acoc.cost_function_gpu(data.T, polygon)

        for j in range(iterations):
            utils.print_on_current_line('Iteration {}/{}'.format(j, iterations))
            start_cpu = time.clock()
            acoc.cost_function(data.T, polygon)
            end_cpu = time.clock()
            results[i][j][0] = end_cpu - start_cpu

            start_gpu = time.clock()
            acoc.cost_function_gpu(data.T, polygon)
            end_gpu = time.clock()
            results[i][j][2] = end_gpu - start_gpu

    mean_results = np.mean(results, axis=1).T
    acoc_plotter.plot_bar_chart_gpu_benchmark(mean_results, data_sizes, ['CPython', 'GPU'], save_folder, 'results')

    np.set_printoptions(precision=7, suppress=False)
    print("\nResults: \n{}".format(mean_results))
    utils.save_object(mean_results, save_folder, 'results')
Пример #5
0
def build_netflix_details_dict(unog_json):
    # TODO: get details of countries available in
    unog_dict = {}
    for movie in unog_json['ITEMS']:
        unog_dict[movie[1]] = movie[0]

    utils.save_object(unog_dict, 'netflix_dict')
def create_cls(img_path,ae_path,out_path):
    img_frame=images.read_image_frame(img_path)
    n_cats=data.get_n_cats(img_frame)
    print(n_cats)
    cls=comp.create_extractor(n_cats,ae_path)
    #cls=nn.built_nn_cls(n_cats)
    cls=deep.learning_iter(img_frame,cls,n_epochs=1000)
    utils.save_object(cls.get_model(),out_path)
    return cls
Пример #7
0
def create_cls(in_path,ae_path,out_path):
    imgs=data.read_image_frame(in_path)
    X=imgs['Images'].tolist()
    y=imgs['Category'].tolist()
    n_cats=max(y)+1
    cls=comp.create_extractor(n_cats,ae_path)
    deep.learning_iter_super(cls,X,y,n_epochs=1000)
    utils.save_object(cls.get_model(),out_path) 
    return cls
Пример #8
0
 def save_estimators(self,
                     save_path=Const.CE_ROOT + 'model/ann/best_{}.model'):
     ann_sklearn_model_index = len(self.pipeline.steps) - 1
     estimators = self.pipeline.steps[ann_sklearn_model_index][
         1].estimators_
     for i, estimator in enumerate(estimators):
         estimator.model.save(save_path.format(i))
     utils.save_object(
         self.pipeline.steps[ann_sklearn_model_index][1].label_binarizer_,
         save_path.format('labelbinarizer'))
Пример #9
0
def create_sda(in_path,out_path,cls_config_path):
    imgs=data.read_image_frame(in_path)
    X=imgs['Images'].tolist()
    y=imgs['Category'].tolist()
    n_cats=max(y)+1
    hyper_params=tools.read_hyper_params(cls_config_path)
    hyper_params['n_out']=n_cats
    cls=sda.built_sda_cls(hyper_params)
    deep.learning_iter_super(cls,X,y,n_epochs=1000)
    utils.save_object(cls.model,out_path) 
    return cls
Пример #10
0
def save_reduction(in_path,out_path,nn_path,csv=False):
    dataset=load_data(in_path,1)
    dataset=[inst for inst in dataset]
    autoencoder=AutoEncoderReduction(nn_path)
    projected=autoencoder.transform(dataset)
    print("Save to file") 
    utils.save_object(out_path,projected)
    if(csv):
        print("Save to csv file") 
        csv_path=out_path.replace(".obj",".csv")
        utils.to_csv_file(csv_path,projected)
Пример #11
0
def train_multiple(number, filename, from_file=False):
    # train the models
    if not from_file:
        for i in range(number):
            print(i)
            model, _ = train()
            accuracy_test(model)
            utils.save_object(filename, model)

    # make the rdms
    make_rdm_multiple(filename, number)
Пример #12
0
def create_time_series(conf,dim=0):
    action_path=conf['action']
    cls_path=conf['cls_ts']
    cls_config=conf['cls_config']
    out_path=conf['series']
    actions=data.read_actions(action_path)
    extractor=sda.read_sda(cls_path,cls_config)
    all_t_series=[make_action_ts(extractor,action,dim) for action in actions]
    utils.make_dir(out_path)
    for action_ts in all_t_series:
        full_path=out_path+action_ts.name
        utils.save_object(action_ts,full_path)
Пример #13
0
def generate_rdm_all_gradient(nnet,
                              name,
                              blanks,
                              rdm_type=analysis.SPEARMAN,
                              save_files=True,
                              title="RDM training combined",
                              from_file=False,
                              delete_blank_states=True):
    if not from_file:
        if rdm_type != analysis.SPEARMAN:
            raise Exception("not implemented")
        hidden_both, accuracy_totals_both, accuracy_fullseqs_both = test_network_all(
            nnet)
        hidden_ari, accuracy_totals_ari, accuracy_fullseqs_ari = test_network_ari(
            nnet, blanks)
        hidden_bev, accuracy_totals_bev, accuracy_fullseqs_bev = test_network_bev(
            nnet, blanks)
        print("Both: {0}, {1}".format(accuracy_totals_both,
                                      accuracy_fullseqs_both))
        print("Ari: {0}, {1}".format(accuracy_totals_ari,
                                     accuracy_fullseqs_ari))
        print("Bev: {0}, {1}".format(accuracy_totals_bev,
                                     accuracy_fullseqs_bev))

        hidden = utils.flatten_onelevel(hidden_bev) +\
                 utils.flatten_onelevel(hidden_ari) +\
                 utils.flatten_onelevel(hidden_both)

        hidden_left = []
        hidden_right = []
        for vector in hidden:
            hidden_left.append(vector[:len(vector) // 2])
            hidden_right.append(vector[len(vector) // 2:])

        # Now cut the hidden layer in two.
        rdmatrix_left = analysis.rdm_spearman(hidden_left)
        rdmatrix_right = analysis.rdm_spearman(hidden_right)
        # save the massive rdm for debug purposes (so that I don't have to generate it all over again everytime).
        utils.save_object(name + "rdmatright", rdmatrix_right)
        utils.save_object(name + "rdmatleft", rdmatrix_left)
    else:
        rdmatrix_left = utils.load_object(name + "rdmatleft")
        rdmatrix_right = utils.load_object(name + "rdmatright")

    rdmatrix_left, labels = model2.process_matrix(rdmatrix_left,
                                                  delete_blank_states)
    rdmatrix_right, _ = model2.process_matrix(rdmatrix_right,
                                              delete_blank_states)

    return rdmatrix_left, rdmatrix_right, labels
Пример #14
0
def get_unog_json():
    s = requests.session()
    r = s.get(base_url)
    php_session_cookie = r.cookies['PHPSESSID']

    # Collect the total movies available "COUNT"
    r = s.get(unog_url.format(php_session_cookie=php_session_cookie, count=1), headers = headers)
    count = r.json()['COUNT']

    if (int(count) > past_count()):
        # re-fire request to get all available movies
        r = s.get(unog_url.format(php_session_cookie=php_session_cookie, count=count), headers=headers)

        utils.save_object(r.json(), 'unog_details')
        build_netflix_details_dict(r.json())
Пример #15
0
def train_model(dataframe,feature_names):
    X = dataframe
    y= dataframe['y'].values.reshape(-1,)
    X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.8,random_state=11)
    grid_search_parameters = {'max_features':[15,20]}
    final_clf = grid_search_random_forest(X_train[feature_names].copy(),y_train,grid_search_parameters,cv=5,n_jobs=-1)
    save_object(final_clf,directory_config['root_dir'],directory_config['model_object_dir'],directory_config['model_object_name'])
    train_with_pred,test_with_pred = get_datasets_with_predictions(final_clf,X_train.copy(),X_test.copy(),feature_names)
    train_with_pred.to_csv(directory_config['root_dir']  + directory_config['train_predictions_dir'] + directory_config['model_object_name'] + '_train_predictions'  + '.csv',index=False)
    test_with_pred.to_csv(directory_config['root_dir']  + directory_config['test_predictions_dir'] + directory_config['model_object_name'] + '_test_predictions' + '.csv',index=False)
    print("Evaluation on train")
    model_evaluation(train_with_pred)
    print("Evaluation on test")
    model_evaluation(test_with_pred)
    return final_clf,train_with_pred,test_with_pred
Пример #16
0
    def save(self, file_name, artifact):
        if isinstance(artifact, NeuralNetwork):
            base = os.path.join(self.__root_dir, self.__models_dir)
            save_object(os.path.join(base, file_name + '.cfg'),
                        (artifact.get_input_metadata(),
                         artifact.get_output_metadata()))
            artifact.get_model().save_weights(
                os.path.join(base, file_name + '.wt'))

        elif isinstance(artifact, Experiment):
            basedir = os.path.join(self.__root_dir, self.__experiments_dir)
            save_object(os.path.join(basedir, file_name + '.exp'), artifact)

        else:
            raise Exception('Attempt to save unsupported artifact.')
Пример #17
0
def run_model1_ari():
    # ARI #
    num_training_steps = 10000
    nnet = nn.ElmanGoalNet(size_hidden=15,
                           initialization=nn.UNIFORM,
                           size_goal1=0,
                           size_goal2=0,
                           size_observation=len(task.symbols),
                           size_action=len(task.symbols),
                           learning_rate=0.01,
                           algorithm=nn.ADAM)
    nnet.L2_regularization = 0.00001
    train_ari(nnet, num_training_steps)
    utils.save_object("cogloadtasknet_ari", nnet)
    nnet = utils.load_object("cogloadtasknet_ari")
    generate_rdm_ari(nnet, name="cogloadtasknet_ari")
 def save_model(self, model_root, model_name, loss, params):
     path = os.path.join(model_root, model_name)
     utils.mkdir(path)
     torch.save(self.state_dict(), os.path.join(path, model_name))
     utils.save_object(self.vocab_x,
                       os.path.join(path, model_name + "_vocab_x"))
     utils.save_object(self.vocab_y,
                       os.path.join(path, model_name + "_vocab_y"))
     utils.save_object(loss, os.path.join(path, model_name + "_loss"))
     utils.save_object(params, os.path.join(path, model_name + "_params"))
Пример #19
0
def get_features(folder, dataset, descriptor):
    """Return texture features for a single dataset and descriptor.

    Parameters
    ----------
    folder : string
        Full path of the folder where data are saved.
    dataset : texdata.TextureDataset
        Object that encapsulates data of a texture dataset.
    descriptor : hep.HEP
        Object that encapsulates data of a texture descriptor.

    Returns
    -------
    X : array
        Texture features. The number of rows is equal to the number of
        samples and the number of columns is equal to the dimensionality
        of the feature space. If an error occurs within the call to 
        `apply_descriptor`, returns None.
        
    """
    multiscale_features = []
    dataset_id = dataset.acronym
    for rad in descriptor.radius:
        descr_single = copy.deepcopy(descriptor)
        descr_single.radius = [rad]
        descr_single_id = descr_single.abbrev()
        feat_path = utils.filepath(folder, dataset_id, descr_single_id)
        if os.path.isfile(feat_path):
            X = utils.load_object(feat_path)
        else:
            print(f'Computing {dataset_id}--{descr_single_id}')

            if hasattr(descr_single, 'components'):
                X = concatenate_feats(folder, dataset, descr_single)
            else:
                X = apply_descriptor(dataset, descr_single)
            if X is not None:
                utils.save_object(X, feat_path)
            else:
                break
        multiscale_features.append(X)
    else:
        X = np.concatenate(multiscale_features, axis=-1)
    return X
Пример #20
0
    def __init__(self, nodes_file, edges_file, org_id):
        self.nodes_file = nodes_file
        self.edges_file = edges_file
        self.org_id = org_id
        self.file_name = 'organism-{}.bak'.format(org_id)

        node_data = utils.load_json(nodes_file)
        self.id_to_node = {ind: node for ind, node in enumerate(node_data)}
        self.node_to_id = {node: ind for ind, node in enumerate(node_data)}

        edge_data = utils.load_json(edges_file)

        # dimensions of Incidence Matrix
        self.node_count = len(node_data)
        # self.edge_count = len(edge_data)
        self.dimensions = (len(node_data), len(edge_data))

        # # incidence matrix would be too big -> ignored
        # self.incidence = np.zeros(dimensions)

        self.edges = set()
        self.adjacency = np.zeros((self.node_count, self.node_count))
        for index, edge in enumerate(edge_data):
            n1 = self.node_to_id[edge[0]]
            n2 = self.node_to_id[edge[1]]
            self.edges.add((min(n1, n2), max(n1, n2)))
            # ignore edge weights
            self.adjacency[n1][n2] = 1
            self.adjacency[n2][n1] = 1

        self.degree = sum(self.adjacency)

        # # P = D^-1 * A
        # self.transition = self.adjacency / self.degree

        message = ('{} - Organism imported successfully').format(org_id)
        utils.print_log(message)

        message = ('{} - number of nodes and edges = {}').format(
            org_id, self.dimensions)
        utils.print_log(message)

        utils.save_object(self, self.file_name)
Пример #21
0
def save_subset(data_train, targets_train, data_test, targets_test, baseline_targets, config):

    # Now we want to save all the data
    if not os.path.exists(config['save_folder']):
        os.makedirs(config['save_folder'])

    # Now we inmediately divide the data into subsets
    nmbr_columns = len(config['columns'])
    total_columns = len(data_train[0].iloc[0])

    # Some values that we need to determine until where the columns go
    nmbr_ma = nmbr_columns * 3
    nmbr_tsfp = nmbr_columns * 787  # note that this only works if you use the comprehensive tsfresh pack
    subset_names = ['m', 'pr', 'pr_su', 'pr_su_bf', 'pr_su_bf_ma']  # 'pr_su_bf_ma_tsfp' , 'pr_su_bf_ma_tsfp_tsfd'
    subset_indices = [(0, 1), (0, 4), (0, 20), (0, 43),
                      (0, 43 + nmbr_ma)]  # (0, 43 + nmbr_ma + nmbr_tsfp), (0, total_columns)

    for name, indices in zip(subset_names, subset_indices):
        subset = []

        for i in range(len(data_train)):
            subset.append(data_train[i].iloc[:, indices[0]: indices[1]])

        subset_test = []
        for j in range(len(data_test)):
            subset_test.append(data_test[j].iloc[:, indices[0]: indices[1]])

        save_data(subset, targets_train, config['save_folder'] + '/subdata_' + name + '.pkl')
        save_data(subset_test, targets_test, config['save_folder'] + '/subdata_' + name + '_test.pkl')
        save_pandas(subset, targets_train, config['save_folder'] + '/subdata_' + name + '.csv')


    save_data(data_train, targets_train, filename=config['save_folder'] + '/processed_data_basic_train.pkl')
    save_data(data_test, targets_test, filename=config['save_folder'] + '/processed_data_basic_test.pkl')

    # Saves the data to a pandas file before saving it as a pickle object in a different format
    if config['save_panda']:
        save_pandas(data_train, targets_train, filename=config['save_folder'] + '/processed_data_pandas.csv')

    # Now we do the same for the baseline targets
    targets_train, targets_test = split_test(None, baseline_targets, seed=config['seed'], split=config['test_split'])
    save_object(targets_train, filename=config['save_folder'] + '/baseline_targets_train.pkl')
    save_object(targets_test, filename=config['save_folder'] + '/baseline_targets_test.pkl')
Пример #22
0
def get_user_behave():
    # The user_to_behave dict
    user_to_behave = dict()

    # Read data chunk by chunk
    with open(config.data_path, 'r') as data_file:
        while True:
            # Get one batch at a time
            exit = True
            lines_gen = islice(data_file, config.batch_size)
            for line in lines_gen:
                user_behave = json.loads(line).values()[0]

                # Remove those users who have less than num_min_event
                if (len(user_behave) <= config.num_min_event):
                    continue

                if (len(user_behave) < config.num_event + 1):
                    zero_pad = [[0 for _ in range(len(user_behave[0]))]
                                for _ in range(config.num_event -
                                               len(user_behave) + 1)]
                    user_behave = user_behave + zero_pad
                elif (len(user_behave) > config.num_event + 1):
                    user_behave = user_behave[(len(user_behave) -
                                               config.num_event -
                                               1):len(user_behave)]

                # Add to dict
                user_id = json.loads(line).keys()[0]
                user_to_behave[user_id] = np.array(
                    user_behave)[:,
                                 list([1, 2, 3, 7, 8, 9, 10, 11, 12, 13])]
                exit = False

            # Batch is empty then exit
            if exit: break

    # Save user_to_behave object
    save_object(
        user_to_behave,
        os.path.join(os.path.dirname(config.save_path), 'user_to_behave.pkl'))
    return user_to_behave
Пример #23
0
def concatenate_feats(data_folder, dataset, descriptor):
    """Compute features through concatenation of texture models.

    Parameters
    ----------
    data_folder : str
        Full path of the folder where data are saved.
    dataset : texdata.TextureDataset
        Object that encapsulates data of a texture dataset.
    descriptor : hep.HEP
        Object that encapsulates data of a texture descriptor.
    
    Returns
    -------
    X : array
        Computed features. The number of rows is equal to the number of
        samples and the number of columns is equal to the sum of the 
        dimensionalities of the concatenated texture models. If an error 
        occurs in the call to `apply_descriptor`, it returns `None`.

    """
    dat_id = dataset.acronym
    params = {k: v for k, v in descriptor.__dict__.items()}
    feats = []

    for component in descriptor.components: 
        descr = component(**params)
        descr_id = descr.abbrev()
        feat_path = utils.filepath(data_folder, dat_id, descr_id)
        if os.path.isfile(feat_path):
            X = utils.load_object(feat_path)
        else:
            X = apply_descriptor(dataset, descr)
            if X is not None:
                utils.save_object(X, feat_path)
            else:
                break
        feats.append(X)
    else:
        X = np.concatenate(feats, axis=-1)

    return X
Пример #24
0
def build_modelling_dataset(dataset, mode, total_features):
    vectorizer = build_vectorizer(mode, total_features)
    output = vectorizer.fit_transform(dataset['ITM_KEY'].tolist())
    save_object(vectorizer, directory_config['root_dir'],
                directory_config['transformer_dir'],
                directory_config['transformer_object_name'])
    output_array = output.toarray()
    output_df = pd.DataFrame(output_array)
    feature_names = vectorizer.get_feature_names()
    output_df.columns = feature_names
    save_object(feature_names, directory_config['root_dir'],
                directory_config['features_list_dir'],
                directory_config['features_list_object_name'])
    modelling_dataset = pd.concat([output_df, dataset], axis=1)
    modelling_dataset.to_csv(
        directory_config['root_dir'] + directory_config['modelling_data_dir'] +
        directory_config['filename'] + '_' +
        directory_config['features_list_object_name'] + '.csv',
        index=False)
    return modelling_dataset, feature_names
Пример #25
0
def extract_features(data_folder, imgs_folder, args):
    """"Compute texture features.
    
    Check whether features have been already computed. If they haven't, 
    extract features from each dataset using each descriptor in
    `args` and save them to disk. If the descriptor is multi-scale, 
    a separate file is created for each single value of the radius.

    Parameters
    ----------
    data_folder : string
        Full path of the folder where data are saved.
    imgs_folder : string
        Full path of the folder where texture datasets are stored.
    args : argparse.Namespace
        Command line arguments.
        
    """
    utils.boxed_text('Extracting features...', symbol='*')

    for dat in gen_datasets(imgs_folder, args.dataset):
        dat_id = dat.acronym
        for descr in gen_descriptors(args):
            for rad in descr.radius:
                descr_rad = copy.deepcopy(descr)
                descr_rad.radius = [rad]
                descr_rad_id = descr_rad.abbrev()
                feat_path = utils.filepath(data_folder, dat_id, descr_rad_id)
                if os.path.isfile(feat_path):
                    print(f'Found {dat_id}--{descr_rad_id}', flush=True)
                else:
                    print(f'Computing {dat_id}--{descr_rad_id}', flush=True)
                    if hasattr(descr_rad, 'components'):
                        X = concatenate_feats(data_folder, dat, descr_rad)
                    else:
                        X = apply_descriptor(dat, descr_rad)
                    if X is not None:
                        utils.save_object(X, feat_path)
                        del X
Пример #26
0
def train_model(algorithm, feats_pct, lang, output, profile_name):
    """ Prepares arguments to train and saves a NodeClassif object

	Arguments:
	----------
		algorithm:
			type: string
			info: name of the algorithm to train

		feats_pct:
			type: int
			info: percentage of features to keep

		lang:
			type: string
			info: language to perform the tokenizer process

		output:
			type: string
			info: output file name including extension

		profile_name:
			type: string
			info: name of the JSON training profile file
	"""

    if (feats_pct < 0) or (feats_pct > 100):
        exit('The specified features percentage is invalid')

    profile_data = read_json(file_name=profile_name, file_type='profile_t')

    node_classif = NodeClassif(
        algorithm=algorithm.lower(),
        feats_pct=feats_pct,
        lang=lang,
    )

    node_classif.train(profile_data)
    save_object(node_classif, output, 'model')
Пример #27
0
def get_user_behave():
    # The user_to_behave dict
    user_to_behave = dict()
    
    # Read data chunk by chunk
    with open(config.data_path, 'r') as data_file:
        while True:
            # Get one batch at a time
            exit = True
            lines_gen = islice(data_file, config.batch_size)
            for line in lines_gen:
                user_behave = json.loads(line).values()[0]
                
                # Remove those users who have less than num_min_event
                if (len(user_behave) <= config.num_min_event):
                    continue

                if (len(user_behave) < config.num_event + 1):
                    zero_pad = [[0 for _ in range(len(user_behave[0]))] 
                                for _ in range(config.num_event-len(user_behave)+1)]
                    user_behave = user_behave + zero_pad
                elif (len(user_behave) > config.num_event + 1):
                    user_behave = user_behave[(len(user_behave)-config.num_event-1) 
                                              : len(user_behave)]
                
                # Add to dict                       
                user_id = json.loads(line).keys()[0]
                user_to_behave[user_id] = np.array(user_behave)[:, list([1,2,3,7,8,9,10,11,12,13])]
                exit = False
            
            # Batch is empty then exit
            if exit: break
    
    # Save user_to_behave object
    save_object(user_to_behave, os.path.join(os.path.dirname(config.save_path), 
                                             'user_to_behave.pkl'))
    return user_to_behave
Пример #28
0
def benchmark_cost_function(data_sizes):
    polygon = pickle.load(open('utils/good_path_for_rectangle.pickle', 'rb'))

    save_folder = generate_folder_name()
    iterations = 10
    results = np.empty((len(data_sizes), iterations, 2), dtype=float)

    for i, dsize in enumerate(data_sizes):
        data = dg.generate_rectangle_set(dsize)

        print("\nRun {} with value {}".format(i + 1, dsize))

        # Compile functions and warm up GPU
        acoc.cost_function_gpu(data.T, polygon)

        for j in range(iterations):
            utils.print_on_current_line('Iteration {}/{}'.format(
                j, iterations))
            start_cpu = time.clock()
            acoc.cost_function(data.T, polygon)
            end_cpu = time.clock()
            results[i][j][0] = end_cpu - start_cpu

            start_gpu = time.clock()
            acoc.cost_function_gpu(data.T, polygon)
            end_gpu = time.clock()
            results[i][j][2] = end_gpu - start_gpu

    mean_results = np.mean(results, axis=1).T
    acoc_plotter.plot_bar_chart_gpu_benchmark(mean_results, data_sizes,
                                              ['CPython', 'GPU'], save_folder,
                                              'results')

    np.set_printoptions(precision=7, suppress=False)
    print("\nResults: \n{}".format(mean_results))
    utils.save_object(mean_results, save_folder, 'results')
Пример #29
0
def main():
    # Load configuration
    config = Config()

    # Parse user_list representations
    user_list, user_ids = [], []
    with open(config.rep_path, 'r') as data_file:
        lines = data_file.readlines()
        for line in lines:
            user_ = line.split(':')[1].replace('[', '').replace(']"}', '').split()
            id_ = line.split(':')[0].replace('{', '').replace('"', '')
            user = [float(u) for u in user_[1:len(user_)]]
            user_list.append(user)
            user_ids.append(id_)  
    user_list = np.array(user_list)

    # If tsne is already run
    path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne')
    if os.path.isfile(path_user_tsne):
        user_tsne = load_object(path_user_tsne)
    else:    
        # Run TSNE
        model = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        user_tsne = model.fit_transform(user_list)    

        # Save TSNE objects
        print "Save user_tsne."
        save_object(user_tsne, 'save/user_tsne')
    
    # Run DBSCAN
    db = DBSCAN(eps=3, min_samples=50, algorithm='brute').fit(user_tsne)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

    # Save clustering results
    save_object(user_ids, 'save/user_ids_db')
    save_object(labels, 'save/labels_db')
    
    # Drawing clustering
    unique_labels = set(labels)
    colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        if k == -1: continue  
        class_member_mask = (labels == k)    
        xy = user_tsne[class_member_mask & core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6)    
        xy = user_tsne[class_member_mask & ~core_samples_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3)       
    
    plt.title('Estimated number of clusters: %d' % n_clusters_)
    plt.show()
Пример #30
0
def main(_):   
    # Rebuild the graph
    def_graph = tf.Graph().as_default()
    auto_encoder = AutoEncoder(config)
    auto_encoder.build_encoder(config.feature_desc)
    
    # Create session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    
    # Load the auto encoding model
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state('save')
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
  
    # Analyse DBScan results on t-sne
    user_ids_db = np.array(load_object('save/user_ids_db'))
    labels_db = load_object('save/labels_db')
    
    user_ids1 = user_ids_db[(labels_db==2)][0:30]
    user_ids2 = user_ids_db[(labels_db==6)][0:30]
     
    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)
    
    save_object(cluster1, 'save/cluster1_db')
    save_object(cluster2, 'save/cluster2_db')
    
    # Analyse K-means results on reps
    user_ids_km = np.array(load_object('save/user_ids_km'))
    labels_km = load_object('save/labels_km')
    
    user_ids1 = user_ids_km[(labels_km==2)][0:30]
    user_ids2 = user_ids_km[(labels_km==6)][0:30]
     
    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)    

    save_object(cluster1, 'save/cluster1_km')
    save_object(cluster2, 'save/cluster2_km')
Пример #31
0
def main(_):
    # Rebuild the graph
    def_graph = tf.Graph().as_default()
    auto_encoder = AutoEncoder(config)
    auto_encoder.build_encoder(config.feature_desc)

    # Create session
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    # Load the auto encoding model
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state('save')
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)

    # Analyse DBScan results on t-sne
    user_ids_db = np.array(load_object('save/user_ids_db'))
    labels_db = load_object('save/labels_db')

    user_ids1 = user_ids_db[(labels_db == 2)][0:30]
    user_ids2 = user_ids_db[(labels_db == 6)][0:30]

    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)

    save_object(cluster1, 'save/cluster1_db')
    save_object(cluster2, 'save/cluster2_db')

    # Analyse K-means results on reps
    user_ids_km = np.array(load_object('save/user_ids_km'))
    labels_km = load_object('save/labels_km')

    user_ids1 = user_ids_km[(labels_km == 2)][0:30]
    user_ids2 = user_ids_km[(labels_km == 6)][0:30]

    cluster1 = cluster_feature_analysis(sess, user_ids1)
    cluster2 = cluster_feature_analysis(sess, user_ids2)

    save_object(cluster1, 'save/cluster1_km')
    save_object(cluster2, 'save/cluster2_km')
Пример #32
0
def run(args):

    # Set default arguments & check for incompatible options
    args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen
    args.g_iters = args.iters if args.g_iters is None else args.g_iters
    args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay
    args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni
    # -if [log_per_task], reset all logs
    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters
        args.sample_log = args.iters
    # -if [iCaRL] is selected, select all accompanying options
    if hasattr(args, "icarl") and args.icarl:
        args.use_exemplars = True
        args.add_exemplars = True
        args.bce = True
        args.bce_distill = True
    # -if XdG is selected but not the Task-IL scenario, give error
    if (not args.scenario == "task") and args.xdg:
        raise ValueError("'XdG' is only compatible with the Task-IL scenario.")
    # -if EWC, SI or XdG is selected together with 'feedback', give error
    if args.feedback and (args.ewc or args.si or args.xdg or args.icarl):
        raise NotImplementedError(
            "EWC, SI, XdG and iCaRL are not supported with feedback connections."
        )
    # -if binary classification loss is selected together with 'feedback', give error
    if args.feedback and args.bce:
        raise NotImplementedError(
            "Binary classification loss not supported with feedback connections."
        )
    # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine)
    if args.xdg and (not args.replay == "none") and (args.ewc or args.si):
        raise NotImplementedError(
            "XdG is not supported with both '{}' replay and EWC / SI.".format(
                args.replay))
        #--> problem is that applying different task-masks interferes with gradient calculation
        #    (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately)
    # -if 'BCEdistill' is selected for other than scenario=="class", give error
    if args.bce_distill and not args.scenario == "class":
        raise ValueError(
            "BCE-distill can only be used for class-incremental learning.")
    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    scenario = args.scenario
    # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain"
    # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL)
    if args.singlehead and args.scenario == "task":
        scenario = "domain"

    # If only want param-stamp, get it printed to screen and exit
    if hasattr(args, "get_stamp") and args.get_stamp:
        _ = get_param_stamp_from_args(args=args)
        exit()

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    #-------------------------------------------------------------------------------------------------#

    #----------------#
    #----- DATA -----#
    #----------------#

    # Prepare data for chosen experiment
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         verbose=True,
         exception=True if args.seed == 0 else False,
     )

    #print(train_datasets, test_datasets)
    #a = input()
    #-------------------------------------------------------------------------------------------------#

    #------------------------------#
    #----- MODEL (CLASSIFIER) -----#
    #------------------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if args.feedback:
        model = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            z_dim=args.z_dim,
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        model.lamda_pl = 1.  #--> to make that this VAE is also trained to classify
    else:
        model = Classifier(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            fc_drop=args.fc_drop,
            fc_nl=args.fc_nl,
            fc_bn=True if args.fc_bn == "yes" else False,
            excit_buffer=True if args.xdg and args.gating_prop > 0 else False,
            binaryCE=args.bce,
            binaryCE_distill=args.bce_distill,
        ).to(device)

    # Define optimizer (only include parameters that "requires_grad")
    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer
    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    #-------------------------------------------------------------------------------------------------#

    #----------------------------------#
    #----- CL-STRATEGY: EXEMPLARS -----#
    #----------------------------------#

    # Store in model whether, how many and in what way to store exemplars
    if isinstance(model, ExemplarHandler) and (args.use_exemplars
                                               or args.add_exemplars
                                               or args.replay == "exemplars"):
        model.memory_budget = args.budget
        model.norm_exemplars = args.norm_exemplars
        model.herding = args.herding

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- CL-STRATEGY: ALLOCATION -----#
    #-----------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        if args.ewc:
            model.fisher_n = args.fisher_n
            model.gamma = args.gamma
            model.online = args.online
            model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        if args.si:
            model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and (args.xdg
                                                and args.gating_prop > 0):
        mask_dict = {}
        excit_buffer_list = []
        for task_id in range(args.tasks):
            mask_dict[task_id + 1] = {}
            for i in range(model.fcE.layers):
                layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear
                if task_id == 0:
                    excit_buffer_list.append(layer.excit_buffer)
                n_units = len(layer.excit_buffer)
                gated_units = np.random.choice(n_units,
                                               size=int(args.gating_prop *
                                                        n_units),
                                               replace=False)
                mask_dict[task_id + 1][i] = gated_units
        model.mask_dict = mask_dict
        model.excit_buffer_list = excit_buffer_list

    #-------------------------------------------------------------------------------------------------#

    #-------------------------------#
    #----- CL-STRATEGY: REPLAY -----#
    #-------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    if isinstance(model, Replayer):
        model.replay_targets = "soft" if args.distill else "hard"
        model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = True if (args.replay == "generative"
                         and not args.feedback) else False
    if train_gen:
        # -specify architecture
        generator = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            fc_layers=args.g_fc_lay,
            fc_units=args.g_fc_uni,
            z_dim=args.g_z_dim,
            classes=config['classes'],
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        # -set optimizer(s)
        generator.optim_list = [{
            'params':
            filter(lambda p: p.requires_grad, generator.parameters()),
            'lr':
            args.lr_gen
        }]
        generator.optim_type = args.optimizer
        if generator.optim_type in ("adam", "adam_reset"):
            generator.optimizer = optim.Adam(generator.optim_list,
                                             betas=(0.9, 0.999))
        elif generator.optim_type == "sgd":
            generator.optimizer = optim.SGD(generator.optim_list)
    else:
        generator = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- REPORTING -----#
    #---------------------#

    # Get parameter-stamp (and print on screen)
    param_stamp = get_param_stamp(
        args,
        model.name,
        verbose=True,
        replay=True if (not args.replay == "none") else False,
        replay_model_name=generator.name if
        (args.replay == "generative" and not args.feedback) else None,
    )

    # Print some model-characteristics on the screen
    # -main model
    print("\n")
    utils.print_model_info(model, title="MAIN MODEL")
    # -generator
    if generator is not None:
        utils.print_model_info(generator, title="GENERATOR")

    # Prepare for plotting in visdom
    # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf
    precision_dict = evaluate.initiate_precision_dict(args.tasks)
    precision_dict_exemplars = evaluate.initiate_precision_dict(
        args.tasks) if args.use_exemplars else None
    # -visdom-settings
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format(
            fb="1M-" if args.feedback else "",
            replay="{}{}".format(args.replay, "D" if args.distill else ""),
            syn="-si{}".format(args.si_c) if args.si else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if args.online else "") if args.ewc else "",
            xdg="" if (not args.xdg) or args.gating_prop == 0 else
            "-XdG{}".format(args.gating_prop),
            icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars
                               and args.bce and args.bce_distill) else "",
            bud="-bud{}".format(args.budget) if
            (args.use_exemplars or args.add_exemplars
             or args.replay == "exemplars") else "",
        )
        visdom = {'env': env_name, 'graph': graph_name}
        if args.use_exemplars:
            visdom_exemplars = {
                'env': env_name,
                'graph': "{}-EX".format(graph_name)
            }
    else:
        visdom = visdom_exemplars = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(
            log=args.loss_log,
            visdom=visdom,
            model=model if args.feedback else generator,
            tasks=args.tasks,
            iters_per_task=args.iters if args.feedback else args.g_iters,
            replay=False if args.replay == "none" else True)
    ] if (train_gen or args.feedback) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters,
                           replay=False if args.replay == "none" else True)
    ] if (not args.feedback) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    sample_cbs = [
        cb._sample_cb(
            log=args.sample_log,
            visdom=visdom,
            config=config,
            test_datasets=test_datasets,
            sample_size=args.sample_n,
            iters_per_task=args.iters if args.feedback else args.g_iters)
    ] if (train_gen or args.feedback) else [None]

    # Callbacks for reporting and visualizing accuracy
    # -visdom (i.e., after each [prec_log]
    eval_cb = cb._eval_cb(
        log=args.prec_log,
        test_datasets=test_datasets,
        visdom=visdom,
        precision_dict=None,
        iters_per_task=args.iters,
        test_size=args.prec_n,
        classes_per_task=classes_per_task,
        scenario=scenario,
    )
    # -pdf / reporting: summary plots (i.e, only after each task)
    eval_cb_full = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        precision_dict=precision_dict,
        iters_per_task=args.iters,
        classes_per_task=classes_per_task,
        scenario=scenario,
    )
    # -with exemplars (both for visdom & reporting / pdf)
    eval_cb_exemplars = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        visdom=visdom_exemplars,
        classes_per_task=classes_per_task,
        precision_dict=precision_dict_exemplars,
        scenario=scenario,
        iters_per_task=args.iters,
        with_exemplars=True,
    ) if args.use_exemplars else None
    # -collect them in <lists>
    eval_cbs = [eval_cb, eval_cb_full]
    eval_cbs_exemplars = [eval_cb_exemplars]

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- TRAINING -----#
    #--------------------#

    print("--> Training:" + args.name)
    print("Total tasks:" + str(args.tasks_to_complete))
    # Keep track of training-time
    start = time.time()
    # Train model
    train_cl(
        args.tasks_to_complete,
        args.name,
        model,
        train_datasets,
        test_datasets,
        replay_mode=args.replay,
        scenario=scenario,
        classes_per_task=classes_per_task,
        iters=args.iters,
        batch_size=args.batch,
        generator=generator,
        gen_iters=args.g_iters,
        gen_loss_cbs=generator_loss_cbs,
        sample_cbs=sample_cbs,
        eval_cbs=eval_cbs,
        loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs,
        eval_cbs_exemplars=eval_cbs_exemplars,
        use_exemplars=args.use_exemplars,
        add_exemplars=args.add_exemplars,
    )
    # Get total training-time in seconds, and write to file
    training_time = time.time() - start
    time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w')
    time_file.write('{}\n'.format(training_time))
    time_file.close()

    #-------------------------------------------------------------------------------------------------#

    #----------------------#
    #----- EVALUATION -----#
    #----------------------#

    print("\n\n--> Evaluation ({}-incremental learning scenario):".format(
        args.scenario))

    # Evaluate precision of final model on full test-set
    precs = [
        evaluate.validate(
            model,
            test_datasets[i],
            verbose=False,
            test_size=None,
            task=i + 1,
            with_exemplars=False,
            allowed_classes=list(
                range(classes_per_task * i, classes_per_task *
                      (i + 1))) if scenario == "task" else None)
        for i in range(args.tasks)
    ]
    print("\n Precision on test-set (softmax classification):")
    for i in range(args.tasks):
        print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
    average_precs = sum(precs) / args.tasks
    print('=> average precision over all {} tasks: {:.4f}'.format(
        args.tasks, average_precs))

    # -with exemplars
    if args.use_exemplars:
        precs = [
            evaluate.validate(
                model,
                test_datasets[i],
                verbose=False,
                test_size=None,
                task=i + 1,
                with_exemplars=True,
                allowed_classes=list(
                    range(classes_per_task * i, classes_per_task *
                          (i + 1))) if scenario == "task" else None)
            for i in range(args.tasks)
        ]
        print("\n Precision on test-set (classification using exemplars):")
        for i in range(args.tasks):
            print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
        average_precs_ex = sum(precs) / args.tasks
        print('=> average precision over all {} tasks: {:.4f}'.format(
            args.tasks, average_precs_ex))
    print("\n")

    #-------------------------------------------------------------------------------------------------#

    #------------------#
    #----- OUTPUT -----#
    #------------------#

    # Average precision on full test set
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(
        average_precs_ex if args.use_exemplars else average_precs))
    output_file.close()
    # -precision-dict
    file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
    utils.save_object(
        precision_dict_exemplars if args.use_exemplars else precision_dict,
        file_name)

    # Average precision on full test set not evaluated using exemplars (i.e., using softmax on final layer)
    if args.use_exemplars:
        output_file = open(
            "{}/prec_noex-{}.txt".format(args.r_dir, param_stamp), 'w')
        output_file.write('{}\n'.format(average_precs))
        output_file.close()
        # -precision-dict:
        file_name = "{}/dict_noex-{}".format(args.r_dir, param_stamp)
        utils.save_object(precision_dict, file_name)

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- PLOTTING -----#
    #--------------------#

    # If requested, generate pdf
    if args.pdf:
        # -open pdf
        pp = visual_plt.open_pdf("{}/{}.pdf".format(args.p_dir, param_stamp))

        # -show samples and reconstructions (either from main model or from separate generator)
        if args.feedback or args.replay == "generative":
            evaluate.show_samples(model if args.feedback else generator,
                                  config,
                                  size=args.sample_n,
                                  pdf=pp)
            for i in range(args.tasks):
                evaluate.show_reconstruction(
                    model if args.feedback else generator,
                    test_datasets[i],
                    config,
                    pdf=pp,
                    task=i + 1)

        # -show metrics reflecting progression during training
        figure_list = []  #-> create list to store all figures to be plotted
        # -generate all figures (and store them in [figure_list])
        figure = visual_plt.plot_lines(
            precision_dict["all_tasks"],
            x_axes=precision_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([precision_dict["average"]],
                                       x_axes=precision_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)
        if args.use_exemplars:
            figure = visual_plt.plot_lines(
                precision_dict_exemplars["all_tasks"],
                x_axes=precision_dict_exemplars["x_task"],
                line_names=[
                    'task {}'.format(i + 1) for i in range(args.tasks)
                ])
            figure_list.append(figure)
        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

        # -close pdf
        pp.close()
Пример #33
0
def save_composite(cls,out_path):
    utils.save_object(cls.get_model(),out_path)
Пример #34
0
user_email = input("Please enter the user email you want to delete: ")

users_hids = []
for participant in participants.values():

    if participant.hashed_email == encrypt(
            user_email) and participant.unsubscribe == False:
        users_hids.append(participant.hashed_subject_id)

string_hids = f"\n".join([x.decode("utf-8") for x in users_hids])
print(
    f'Found {len(users_hids)} ids subcribed with that email here is the list:\n{string_hids}'
)

user_to_unsubscribe = input(
    "Type the item number of appearance corresponding to the sID /!\ start with 0, or type any letter to do thing: "
)

if user_to_unsubscribe.isdigit():
    indice = int(user_to_unsubscribe)
    participants[users_hids[indice]].unsubscribe = True
    participants[users_hids[indice]].unsubscribe_dt = datetime.datetime.now(
        utc)
    print(
        f"User {users_hids[indice].decode('utf-8')} has been unsubscribed, once emailing is active, the user will receive an automatic unsubscription email"
    )
else:
    print("Nothing done")

save_object(participants, DATA_PATH)
Пример #35
0
def create_autoencoder(in_path,out_path,dim=0):
    actions=data.read_actions(action_path)
    imgs=data.get_projections(dim,actions)
    cls=ae.built_ae_cls()
    deep.learning_iter_unsuper(cls,imgs,n_epochs=500)
    utils.save_object(cls.model,out_path) 
Пример #36
0
def main():
    # Load configuration
    config = Config()

    # Parse user_list representations
    user_list = []
    user_id_list = []
    with open(config.rep_path, "r") as data_file:
        lines = data_file.readlines()
        for line in lines:
            user_ = line.split(":")[1].replace("[", "").replace(']"}',
                                                                "").split()
            user = [float(u) for u in user_[1:len(user_)]]
            user_list.append(user)
            user_id_list.append(
                line.split(":")[0].replace("{", "").replace('"', ""))
    user_list = np.array(user_list)
    user_id_list = np.array(user_id_list)

    # If tsne is already run
    path_user_tsne = os.path.join(os.path.dirname(config.save_path),
                                  "user_tsne")
    if os.path.isfile(path_user_tsne):
        user_tsne = load_object(path_user_tsne)
    else:
        # Run TSNE
        model = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        user_tsne = model.fit_transform(user_list)

        # Save TSNE objects
        print "Save user_tsne."
        save_object(user_tsne, "save/user_tsne")

    # Run KMeans clustering
    kmeans = KMeans(init="k-means++", n_clusters=8, n_init=10)
    km = kmeans.fit(user_list)

    # Get cluster labels
    labels = km.labels_
    unique_labels = set(labels)

    # Save clustering results
    save_object(user_id_list, "save/user_ids_km")
    save_object(labels, "save/labels_km")

    # Save the cluster_to_user dict
    cluster_to_user = dict()
    for k in unique_labels:
        class_member_mask = (labels == k)
        class_k = user_id_list[class_member_mask]
        cluster_to_user[k] = class_k
    save_object(cluster_to_user, "save/cluster_to_user")

    # Save the user_to_cluster dict
    user_to_cluster = dict()
    for user, label in zip(user_id_list, labels):
        user_to_cluster[user] = label
    save_object(user_to_cluster, "save/user_to_cluster")

    # Plot results
    colors = plt.get_cmap("Spectral")(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        class_member_mask = (labels == k)
        xy = user_tsne[class_member_mask]
        plt.plot(xy[:, 0],
                 xy[:, 1],
                 "o",
                 markerfacecolor=col,
                 markeredgecolor="k",
                 markersize=3)

    plt.title("KMeans Clustering")
    plt.show()
Пример #37
0
def main():
    # Load configuration
    config = Config()

    # Parse user_list representations
    user_list = []
    user_id_list = []
    with open(config.rep_path, 'r') as data_file:
        lines = data_file.readlines()
        for line in lines:
            user_ = line.split(':')[1].replace('[','').replace(']"}','').split()
            user = [float(u) for u in user_[1:len(user_)]]
            user_list.append(user)
            user_id_list.append(line.split(':')[0].replace('{','').replace('"',''))
    user_list = np.array(user_list)
    user_id_list = np.array(user_id_list)

    # If tsne is already run
    path_user_tsne = os.path.join(os.path.dirname(config.save_path), 'user_tsne')
    if os.path.isfile(path_user_tsne):
        user_tsne = load_object(path_user_tsne)
    else:    
        # Run TSNE
        model = TSNE(n_components=2, random_state=0)
        np.set_printoptions(suppress=True)
        user_tsne = model.fit_transform(user_list)    

        # Save TSNE objects
        print "Save user_tsne."
        save_object(user_tsne, 'save/user_tsne')
    
    # Run KMeans clustering
    kmeans = KMeans(init='k-means++', n_clusters=8, n_init=10)
    km = kmeans.fit(user_list)
    
    # Get cluster labels
    labels = km.labels_
    unique_labels = set(labels)

    # Save clustering results
    save_object(user_id_list, 'save/user_ids_km')
    save_object(labels, 'save/labels_km')
        
    # Save the cluster_to_user dict
    cluster_to_user = dict()
    for k in unique_labels:
        class_member_mask = (labels == k)
        class_k = user_id_list[class_member_mask]
        cluster_to_user[k] = class_k
    save_object(cluster_to_user, 'save/cluster_to_user')
    
    # Save the user_to_cluster dict
    user_to_cluster = dict()
    for user, label in zip(user_id_list, labels):
        user_to_cluster[user] = label
    save_object(user_to_cluster, 'save/user_to_cluster')    
    
    # Plot results
    colors = plt.get_cmap('Spectral')(np.linspace(0, 1, len(unique_labels)))
    for k, col in zip(unique_labels, colors):
        class_member_mask = (labels == k)
        xy = user_tsne[class_member_mask]
        plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=3)    

    plt.title('KMeans Clustering')
    plt.show()
Пример #38
0
def run(args, verbose=False):

    # Create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    # If only want param-stamp, get it and exit
    if args.get_stamp:
        from param_stamp import get_param_stamp_from_args
        print(get_param_stamp_from_args(args=args))
        exit()

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # Report whether cuda is used
    if verbose:
        print("CUDA is {}used".format("" if cuda else "NOT(!!) "))

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    #-------------------------------------------------------------------------------------------------#

    #----------------#
    #----- DATA -----#
    #----------------#

    # Prepare data for chosen experiment
    if verbose:
        print("\nPreparing the data...")
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=args.scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         normalize=True if utils.checkattr(args, "normalize") else False,
         augment=True if utils.checkattr(args, "augment") else False,
         verbose=verbose,
         exception=True if args.seed < 10 else False,
         only_test=(not args.train))

    #-------------------------------------------------------------------------------------------------#

    #----------------------#
    #----- MAIN MODEL -----#
    #----------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if verbose and (utils.checkattr(args, "pre_convE") or utils.checkattr(args, "pre_convD")) and \
            (hasattr(args, "depth") and args.depth>0):
        print("\nDefining the model...")
    if utils.checkattr(args, 'feedback'):
        model = define.define_autoencoder(args=args,
                                          config=config,
                                          device=device)
    else:
        model = define.define_classifier(args=args,
                                         config=config,
                                         device=device)

    # Initialize / use pre-trained / freeze model-parameters
    # - initialize (pre-trained) parameters
    model = define.init_params(model, args)
    # - freeze weights of conv-layers?
    if utils.checkattr(args, "freeze_convE"):
        for param in model.convE.parameters():
            param.requires_grad = False
    if utils.checkattr(args, 'feedback') and utils.checkattr(
            args, "freeze_convD"):
        for param in model.convD.parameters():
            param.requires_grad = False

    # Define optimizer (only optimize parameters that "requires_grad")
    model.optim_list = [
        {
            'params': filter(lambda p: p.requires_grad, model.parameters()),
            'lr': args.lr
        },
    ]
    model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))

    #-------------------------------------------------------------------------------------------------#

    #----------------------------------------------------#
    #----- CL-STRATEGY: REGULARIZATION / ALLOCATION -----#
    #----------------------------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner) and utils.checkattr(args, 'ewc'):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        model.fisher_n = args.fisher_n
        model.online = utils.checkattr(args, 'online')
        if model.online:
            model.gamma = args.gamma

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner) and utils.checkattr(args, 'si'):
        model.si_c = args.si_c if args.si else 0
        model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and utils.checkattr(
            args, 'xdg') and args.xdg_prop > 0:
        model.define_XdGmask(gating_prop=args.xdg_prop, n_tasks=args.tasks)

    #-------------------------------------------------------------------------------------------------#

    #-------------------------------#
    #----- CL-STRATEGY: REPLAY -----#
    #-------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    if isinstance(model, ContinualLearner) and hasattr(
            args, 'replay') and not args.replay == "none":
        model.replay_targets = "soft" if args.distill else "hard"
        model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = (hasattr(args, 'replay') and args.replay == "generative"
                 and not utils.checkattr(args, 'feedback'))
    if train_gen:
        # Specify architecture
        generator = define.define_autoencoder(args,
                                              config,
                                              device,
                                              generator=True)

        # Initialize parameters
        generator = define.init_params(generator, args)
        # -freeze weights of conv-layers?
        if utils.checkattr(args, "freeze_convE"):
            for param in generator.convE.parameters():
                param.requires_grad = False
        if utils.checkattr(args, "freeze_convD"):
            for param in generator.convD.parameters():
                param.requires_grad = False

        # Set optimizer(s)
        generator.optim_list = [
            {
                'params': filter(lambda p: p.requires_grad,
                                 generator.parameters()),
                'lr': args.lr_gen if hasattr(args, 'lr_gen') else args.lr
            },
        ]
        generator.optimizer = optim.Adam(generator.optim_list,
                                         betas=(0.9, 0.999))
    else:
        generator = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- REPORTING -----#
    #---------------------#

    # Get parameter-stamp (and print on screen)
    if verbose:
        print("\nParameter-stamp...")
    param_stamp = get_param_stamp(
        args,
        model.name,
        verbose=verbose,
        replay=True if
        (hasattr(args, 'replay') and not args.replay == "none") else False,
        replay_model_name=generator.name if
        (hasattr(args, 'replay') and args.replay in ("generative")
         and not utils.checkattr(args, 'feedback')) else None,
    )

    # Print some model-characteristics on the screen
    if verbose:
        # -main model
        utils.print_model_info(model, title="MAIN MODEL")
        # -generator
        if generator is not None:
            utils.print_model_info(generator, title="GENERATOR")

    # Define [progress_dicts] to keep track of performance during training for storing and for later plotting in pdf
    precision_dict = evaluate.initiate_precision_dict(args.tasks)

    # Prepare for plotting in visdom
    visdom = None
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        replay_statement = "{mode}{fb}{con}{gat}{int}{dis}{b}{u}".format(
            mode=args.replay,
            fb="Rtf" if utils.checkattr(args, "feedback") else "",
            con="Con" if (hasattr(args, "prior") and args.prior == "GMM"
                          and utils.checkattr(args, "per_class")) else "",
            gat="Gat{}".format(args.dg_prop) if
            (utils.checkattr(args, "dg_gates") and hasattr(args, "dg_prop")
             and args.dg_prop > 0) else "",
            int="Int" if utils.checkattr(args, "hidden") else "",
            dis="Dis" if args.replay == "generative" and args.distill else "",
            b="" if
            (args.batch_replay is None or args.batch_replay == args.batch) else
            "-br{}".format(args.batch_replay),
            u="" if args.g_fc_uni == args.fc_units else "-gu{}".format(
                args.g_fc_uni)) if (hasattr(args, "replay")
                                    and not args.replay == "none") else "NR"
        graph_name = "{replay}{syn}{ewc}{xdg}".format(
            replay=replay_statement,
            syn="-si{}".format(args.si_c)
            if utils.checkattr(args, 'si') else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if utils.checkattr(args, "online") else "") if utils.checkattr(
                    args, 'ewc') else "",
            xdg="" if (not utils.checkattr(args, 'xdg')) or args.xdg_prop == 0
            else "-XdG{}".format(args.xdg_prop),
        )
        visdom = {'env': env_name, 'graph': graph_name}

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#

    g_iters = args.g_iters if hasattr(args, 'g_iters') else args.iters

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(
            log=args.loss_log,
            visdom=visdom,
            replay=(hasattr(args, "replay") and not args.replay == "none"),
            model=model if utils.checkattr(args, 'feedback') else generator,
            tasks=args.tasks,
            iters_per_task=args.iters
            if utils.checkattr(args, 'feedback') else g_iters)
    ] if (train_gen or utils.checkattr(args, 'feedback')) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           iters_per_task=args.iters,
                           tasks=args.tasks,
                           replay=(hasattr(args, "replay")
                                   and not args.replay == "none"))
    ] if (not utils.checkattr(args, 'feedback')) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    no_samples = (utils.checkattr(args, "no_samples")
                  or (utils.checkattr(args, "hidden")
                      and hasattr(args, 'depth') and args.depth > 0))
    sample_cbs = [
        cb._sample_cb(log=args.sample_log,
                      visdom=visdom,
                      config=config,
                      test_datasets=test_datasets,
                      sample_size=args.sample_n,
                      iters_per_task=g_iters)
    ] if ((train_gen or utils.checkattr(args, 'feedback'))
          and not no_samples) else [None]

    # Callbacks for reporting and visualizing accuracy, and visualizing representation extracted by main model
    # -visdom (i.e., after each [prec_log]
    eval_cb = cb._eval_cb(
        log=args.prec_log,
        test_datasets=test_datasets,
        visdom=visdom,
        precision_dict=None,
        iters_per_task=args.iters,
        test_size=args.prec_n,
        classes_per_task=classes_per_task,
        scenario=args.scenario,
    )
    # -pdf / reporting: summary plots (i.e, only after each task)
    eval_cb_full = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        precision_dict=precision_dict,
        iters_per_task=args.iters,
        classes_per_task=classes_per_task,
        scenario=args.scenario,
    )
    # -visualize feature space
    latent_space_cb = cb._latent_space_cb(
        log=args.iters,
        datasets=test_datasets,
        visdom=visdom,
        iters_per_task=args.iters,
        sample_size=400,
    )
    # -collect them in <lists>
    eval_cbs = [eval_cb, eval_cb_full, latent_space_cb]

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- TRAINING -----#
    #--------------------#

    if args.train:
        if verbose:
            print("\nTraining...")
        # Train model
        train_cl(
            model,
            train_datasets,
            replay_mode=args.replay if hasattr(args, 'replay') else "none",
            scenario=args.scenario,
            classes_per_task=classes_per_task,
            iters=args.iters,
            batch_size=args.batch,
            batch_size_replay=args.batch_replay if hasattr(
                args, 'batch_replay') else None,
            generator=generator,
            gen_iters=g_iters,
            gen_loss_cbs=generator_loss_cbs,
            feedback=utils.checkattr(args, 'feedback'),
            sample_cbs=sample_cbs,
            eval_cbs=eval_cbs,
            loss_cbs=generator_loss_cbs
            if utils.checkattr(args, 'feedback') else solver_loss_cbs,
            args=args,
            reinit=utils.checkattr(args, 'reinit'),
            only_last=utils.checkattr(args, 'only_last'))
        # Save evaluation metrics measured throughout training
        file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
        utils.save_object(precision_dict, file_name)
        # Save trained model(s), if requested
        if args.save:
            save_name = "mM-{}".format(param_stamp) if (
                not hasattr(args, 'full_stag')
                or args.full_stag == "none") else "{}-{}".format(
                    model.name, args.full_stag)
            utils.save_checkpoint(model,
                                  args.m_dir,
                                  name=save_name,
                                  verbose=verbose)
            if generator is not None:
                save_name = "gM-{}".format(param_stamp) if (
                    not hasattr(args, 'full_stag')
                    or args.full_stag == "none") else "{}-{}".format(
                        generator.name, args.full_stag)
                utils.save_checkpoint(generator,
                                      args.m_dir,
                                      name=save_name,
                                      verbose=verbose)

    else:
        # Load previously trained model(s) (if goal is to only evaluate previously trained model)
        if verbose:
            print("\nLoading parameters of the previously trained models...")
        load_name = "mM-{}".format(param_stamp) if (
            not hasattr(args, 'full_ltag')
            or args.full_ltag == "none") else "{}-{}".format(
                model.name, args.full_ltag)
        utils.load_checkpoint(
            model,
            args.m_dir,
            name=load_name,
            verbose=verbose,
            add_si_buffers=(isinstance(model, ContinualLearner)
                            and utils.checkattr(args, 'si')))
        if generator is not None:
            load_name = "gM-{}".format(param_stamp) if (
                not hasattr(args, 'full_ltag')
                or args.full_ltag == "none") else "{}-{}".format(
                    generator.name, args.full_ltag)
            utils.load_checkpoint(generator,
                                  args.m_dir,
                                  name=load_name,
                                  verbose=verbose)

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- EVALUATION of CLASSIFIER-----#
    #-----------------------------------#

    if verbose:
        print("\n\nEVALUATION RESULTS:")

    # Evaluate precision of final model on full test-set
    precs = [
        evaluate.validate(
            model,
            test_datasets[i],
            verbose=False,
            test_size=None,
            task=i + 1,
            allowed_classes=list(
                range(classes_per_task * i, classes_per_task *
                      (i + 1))) if args.scenario == "task" else None)
        for i in range(args.tasks)
    ]
    average_precs = sum(precs) / args.tasks
    # -print on screen
    if verbose:
        print("\n Accuracy of final model on test-set:")
        for i in range(args.tasks):
            print(" - {} {}: {:.4f}".format(
                "For classes from task"
                if args.scenario == "class" else "Task", i + 1, precs[i]))
        print('=> Average accuracy over all {} {}: {:.4f}\n'.format(
            args.tasks *
            classes_per_task if args.scenario == "class" else args.tasks,
            "classes" if args.scenario == "class" else "tasks", average_precs))
    # -write out to text file
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(average_precs))
    output_file.close()

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- EVALUATION of GENERATOR -----#
    #-----------------------------------#

    if (utils.checkattr(args, 'feedback') or train_gen
        ) and args.experiment == "CIFAR100" and args.scenario == "class":

        # Dataset and model to be used
        test_set = ConcatDataset(test_datasets)
        gen_model = model if utils.checkattr(args, 'feedback') else generator
        gen_model.eval()

        # Evaluate log-likelihood of generative model on combined test-set (with S=100 importance samples per datapoint)
        ll_per_datapoint = gen_model.estimate_loglikelihood(
            test_set, S=100, batch_size=args.batch)
        if verbose:
            print('=> Log-likelihood on test set: {:.4f} +/- {:.4f}\n'.format(
                np.mean(ll_per_datapoint), np.sqrt(np.var(ll_per_datapoint))))
        # -write out to text file
        output_file = open("{}/ll-{}.txt".format(args.r_dir, param_stamp), 'w')
        output_file.write('{}\n'.format(np.mean(ll_per_datapoint)))
        output_file.close()

        # Evaluate reconstruction error (averaged over number of input units)
        re_per_datapoint = gen_model.calculate_recon_error(
            test_set, batch_size=args.batch, average=True)
        if verbose:
            print(
                '=> Reconstruction error (per input unit) on test set: {:.4f} +/- {:.4f}\n'
                .format(np.mean(re_per_datapoint),
                        np.sqrt(np.var(re_per_datapoint))))
        # -write out to text file
        output_file = open("{}/re-{}.txt".format(args.r_dir, param_stamp), 'w')
        output_file.write('{}\n'.format(np.mean(re_per_datapoint)))
        output_file.close()

        # Try loading the classifier (our substitute for InceptionNet) for calculating IS, FID and Recall & Precision
        # -define model
        config['classes'] = 100
        pretrained_classifier = define.define_classifier(args=args,
                                                         config=config,
                                                         device=device)
        pretrained_classifier.hidden = False
        # -load pretrained weights
        eval_tag = "" if args.eval_tag == "none" else "-{}".format(
            args.eval_tag)
        try:
            utils.load_checkpoint(pretrained_classifier,
                                  args.m_dir,
                                  verbose=True,
                                  name="{}{}".format(
                                      pretrained_classifier.name, eval_tag))
            FileFound = True
        except FileNotFoundError:
            if verbose:
                print("= Could not find model {}{} in {}".format(
                    pretrained_classifier.name, eval_tag, args.m_dir))
                print("= IS, FID and Precision & Recall not computed!")
            FileFound = False
        pretrained_classifier.eval()

        # Only continue with computing these measures if the requested classifier network (using --eval-tag) was found
        if FileFound:
            # Preparations
            total_n = len(test_set)
            n_repeats = int(np.ceil(total_n / args.batch))
            # -sample data from generator (for IS, FID and Precision & Recall)
            gen_x = gen_model.sample(size=total_n, only_x=True)
            # -generate predictions for generated data (for IS)
            gen_pred = []
            for i in range(n_repeats):
                x = gen_x[(i *
                           args.batch):int(min(((i + 1) *
                                                args.batch), total_n))]
                with torch.no_grad():
                    gen_pred.append(
                        F.softmax(pretrained_classifier.hidden_to_output(x)
                                  if args.hidden else pretrained_classifier(x),
                                  dim=1).cpu().numpy())
            gen_pred = np.concatenate(gen_pred)
            # -generate embeddings for generated data (for FID and Precision & Recall)
            gen_emb = []
            for i in range(n_repeats):
                with torch.no_grad():
                    gen_emb.append(
                        pretrained_classifier.feature_extractor(
                            gen_x[(i * args.batch
                                   ):int(min(((i + 1) *
                                              args.batch), total_n))],
                            from_hidden=args.hidden).cpu().numpy())
            gen_emb = np.concatenate(gen_emb)
            # -generate embeddings for test data (for FID and Precision & Recall)
            data_loader = utils.get_data_loader(test_set,
                                                batch_size=args.batch,
                                                cuda=cuda)
            real_emb = []
            for real_x, _ in data_loader:
                with torch.no_grad():
                    real_emb.append(
                        pretrained_classifier.feature_extractor(
                            real_x.to(device)).cpu().numpy())
            real_emb = np.concatenate(real_emb)

            # Calculate "Inception Score" (IS)
            py = gen_pred.mean(axis=0)
            is_per_datapoint = []
            for i in range(len(gen_pred)):
                pyx = gen_pred[i, :]
                is_per_datapoint.append(entropy(pyx, py))
            IS = np.exp(np.mean(is_per_datapoint))
            if verbose:
                print('=> Inception Score = {:.4f}\n'.format(IS))
            # -write out to text file
            output_file = open(
                "{}/is{}-{}.txt".format(args.r_dir, eval_tag, param_stamp),
                'w')
            output_file.write('{}\n'.format(IS))
            output_file.close()

            ## Calculate "Frechet Inception Distance" (FID)
            FID = fid.calculate_fid_from_embedding(gen_emb, real_emb)
            if verbose:
                print('=> Frechet Inception Distance = {:.4f}\n'.format(FID))
            # -write out to text file
            output_file = open(
                "{}/fid{}-{}.txt".format(args.r_dir, eval_tag, param_stamp),
                'w')
            output_file.write('{}\n'.format(FID))
            output_file.close()

            # Calculate "Precision & Recall"-curves
            precision, recall = pr.compute_prd_from_embedding(
                gen_emb, real_emb)
            # -write out to text files
            file_name = "{}/precision{}-{}.txt".format(args.r_dir, eval_tag,
                                                       param_stamp)
            with open(file_name, 'w') as f:
                for item in precision:
                    f.write("%s\n" % item)
            file_name = "{}/recall{}-{}.txt".format(args.r_dir, eval_tag,
                                                    param_stamp)
            with open(file_name, 'w') as f:
                for item in recall:
                    f.write("%s\n" % item)

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- PLOTTING -----#
    #--------------------#

    # If requested, generate pdf
    if args.pdf:
        # -open pdf
        plot_name = "{}/{}.pdf".format(args.p_dir, param_stamp)
        pp = evaluate.visual.plt.open_pdf(plot_name)

        # -show metrics reflecting progression during training
        if args.train and (not utils.checkattr(args, 'only_last')):
            # -create list to store all figures to be plotted.
            figure_list = []
            # -generate figures (and store them in [figure_list])
            figure = evaluate.visual.plt.plot_lines(
                precision_dict["all_tasks"],
                x_axes=[
                    i * classes_per_task for i in precision_dict["x_task"]
                ] if args.scenario == "class" else precision_dict["x_task"],
                line_names=[
                    '{} {}'.format(
                        "episode / task"
                        if args.scenario == "class" else "task", i + 1)
                    for i in range(args.tasks)
                ],
                xlabel="# of {}s so far".format("classe" if args.scenario ==
                                                "class" else "task"),
                ylabel="Test accuracy")
            figure_list.append(figure)
            figure = evaluate.visual.plt.plot_lines(
                [precision_dict["average"]],
                x_axes=[
                    i * classes_per_task for i in precision_dict["x_task"]
                ] if args.scenario == "class" else precision_dict["x_task"],
                line_names=[
                    'Average based on all {}s so far'.format((
                        "digit" if args.experiment == "splitMNIST" else
                        "classe") if args.scenario else "task")
                ],
                xlabel="# of {}s so far".format("classe" if args.scenario ==
                                                "class" else "task"),
                ylabel="Test accuracy")
            figure_list.append(figure)
            # -add figures to pdf
            for figure in figure_list:
                pp.savefig(figure)

        gen_eval = (utils.checkattr(args, 'feedback') or train_gen)
        # -show samples (from main model or separate generator)
        if gen_eval and not no_samples:
            evaluate.show_samples(
                model if utils.checkattr(args, 'feedback') else generator,
                config,
                size=args.sample_n,
                pdf=pp,
                title="Generated samples (by final model)")

        # -plot "Precision & Recall"-curve
        if gen_eval and args.experiment == "CIFAR100" and args.scenario == "class" and FileFound:
            figure = evaluate.visual.plt.plot_pr_curves([[precision]],
                                                        [[recall]])
            pp.savefig(figure)

        # -close pdf
        pp.close()

        # -print name of generated plot on screen
        if verbose:
            print("\nGenerated plot: {}\n".format(plot_name))
Пример #39
0
    def mc_study(self):
        """
        perform psedudo-experiments by generating random 
        data and models (using statistical bin uncertainty)

        pseudo data: 
            - sum all model hists
            - then randomise
        model: 
            - randomise each input hist individually
        """
        samples = self.samples

        ntrials = 10000
        if self.plot_toy_fits: ntrials = 10
        
        ## save options before mc study
        tag = self.tag 
        quiet = self.quiet
        self.quiet = True

        toy_arrays = {} 
        for s in samples: 
            if not s in toy_arrays: toy_arrays[s] = {}
            toy_arrays[s]['mean']  = []
            toy_arrays[s]['error'] = []
            toy_arrays[s]['pull']  = []
            toy_arrays[s]['diff']  = []
            toy_arrays[s]['mc']  = []
        data_array = []

        ## initialise toy fitter
        ## VERY important to prepare hists first
        ## before initialising, so TFractionFitter
        ## is not initialise with the real data
        self.prepare_toy_hists()
        self.init_fitter(toy=True)

        for i in xrange(ntrials):
            if i%100==0: print 'trial ',i
            
            #print
            #print 'trial%d summary: ' % i

            self.tag = '%s_trial%d'% (tag,i)
            self.randomise_hists()
            self.toy_fit()
            if not self.fit_status == 0: continue
            if self.plot_toy_fits: self.plot()

           
            temp_means = {}
            temp_errors = {}
            temp_pulls = {} 
            temp_diffs = {} 
            temp_mcs = {} 
            has_zero = False
            n_tot_mc = self.ntot_orig()
            #print 'n_tot_mc: %.1f, h_orig_tot.int: %.1f'% (n_tot_mc,self.h_orig_total.Integral())

            n_tot_fit = self.ntot_fit()
            #print 'n_tot_fit: %.1f, n_data: %.1f' % (n_tot_fit,self.ndata_curr())
            for s in samples: 
                n_mc   = self.nsamp_orig(s)
                n_fit  = self.nsamp_fit(s)
                en_fit = self.ensamp_fit(s)
                pull   = (n_fit-n_mc)/en_fit if en_fit else 0.0
                diff   = n_fit - n_mc
                temp_means[s] = n_fit
                temp_errors[s] = en_fit
                temp_pulls[s] = pull
                temp_diffs[s] = diff
                temp_mcs[s] = self.nsamp_curr(s)
                f_fit = self.fsamp_fit(s)
                #if f_fit == 0.: has_zero = True 
                if f_fit < 0.0000001: has_zero = True            
                f_mc = n_mc / n_tot_mc if n_tot_mc else 0.0
                f_fit = n_fit / n_tot_fit if n_tot_fit else 0.0
                #print '%s, mc: %.1f, fit: %.1f, fmc: %.4f, ffit: %.4f' % (s,n_mc,n_fit,f_mc,f_fit)

            #print 'data, mc: %.1f, fit: %.1f' % (n_tot_mc,n_tot_fit)
            ## remove cases where any component is fit to 0
            ## argument is that we would not take this 
            ## result if we got it in data
            ## probably should try to do something 
            ## better in future
            if not has_zero:      
                for s in samples: 
                    toy_arrays[s]['mean'].append(temp_means[s])
                    toy_arrays[s]['error'].append(temp_errors[s])
                    toy_arrays[s]['pull'].append(temp_pulls[s])
                    toy_arrays[s]['diff'].append(temp_diffs[s])
                    toy_arrays[s]['mc'].append(temp_mcs[s])
                    data_array.append(self.ndata_curr())
            else:
                print 'ERROR - component fit to zero'


        ## restore to original state before toys 
        self.reset_hists()
        self.tag = tag
        self.quiet = quiet

        ## set corrections from toy study
        filename = 'toy_%s.root'%(self.tag)
        for s in samples: 
            a_mean  = toy_arrays[s]['mean']
            a_error = toy_arrays[s]['error']
            a_pull  = toy_arrays[s]['pull']
            a_diff  = toy_arrays[s]['diff']
            a_mc    = toy_arrays[s]['mc']
 
            if not s in self.toy_results: self.toy_results[s] = {}
            self.toy_results[s]['meanm']  = numpy.mean(a_mean)
            self.toy_results[s]['meane']  = numpy.std(a_mean)
            self.toy_results[s]['errorm']  = numpy.mean(a_error)
            self.toy_results[s]['errore']  = numpy.std(a_error)
            self.toy_results[s]['pullm']  = numpy.mean(a_pull)
            self.toy_results[s]['pulle']  = numpy.std(a_pull)
            
            ## create plots
            h_mean  = create_mean_hist(s)
            h_error = create_error_hist(s)
            h_pull  = create_pull_hist(s)
            h_diff  = create_diff_hist(s)
            h_mc    = create_mc_hist(s)
            for v in a_mean: h_mean.Fill(v)
            for v in a_error: h_error.Fill(v)
            for v in a_pull: h_pull.Fill(v)
            for v in a_diff: h_diff.Fill(v)
            for v in a_mc  : h_mc.Fill(v)
            utils.save_object(h_mean,filename)
            utils.save_object(h_error,filename)
            utils.save_object(h_pull,filename)
            utils.save_object(h_diff,filename)
            utils.save_object(h_mc,filename)

        h_mc_data    = create_mc_hist('data')
        for v in data_array: h_mc_data.Fill(v)
        utils.save_object(h_mc_data,filename)


        for isamp in xrange(len(samples)):
            s1 = samples[isamp]
            for isamp2 in xrange(len(samples)):
                if not isamp2 < isamp: continue
                s2 = samples[isamp2]
                h = create_2d_mean_hist(s1,s2)
                for ns1,ns2 in zip(toy_arrays[s1]['mean'],
                                   toy_arrays[s2]['mean']):
                    h.Fill(ns1,ns2)
                utils.save_object(h,filename)
       
        for s in samples:
            h = create_2d_mean_hist('%s_mc'%s,'%s_fit'%s)
            for ns1,ns2 in zip(toy_arrays[s]['mc'],
                               toy_arrays[s]['mean']):
                h.Fill(ns1,ns2)
            utils.save_object(h,filename)



        f = utils.open_file(filename)
        f.Close()
Пример #40
0
def raw_to_pcloud(in_path, out_path):
    pcloud = action.make_action(in_path)
    pcloud.standarize()
    out_path = out_path.replace(".raw", ".cloud")
    print(out_path)
    utils.save_object(pcloud, out_path)
Пример #41
0
def main(argv=None):
    keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
    keep_probability_conv = tf.placeholder(tf.float32,
                                           name="keep_probability_conv")
    image = tf.placeholder(tf.float32,
                           shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1],
                           name="input_image")
    annotation_labels = tf.placeholder(tf.int32,
                                       shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1],
                                       name="annotation_labels")
    annotation_objects = tf.placeholder(
        tf.int32,
        shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1],
        name="annotation_objects")

    # compute labels
    pred_annotation, logits = segment(image, keep_probability_conv, 1,
                                      NUM_OF_CLASSESS, "labels")
    if FLAGS.summary:
        tf.summary.image("input_image", image, max_outputs=2)
        tf.summary.image("ground_truth_labels",
                         tf.cast(annotation_labels, tf.uint8),
                         max_outputs=2)
        tf.summary.image("pred_annotation",
                         tf.cast(pred_annotation, tf.uint8),
                         max_outputs=2)
        tf.summary.image("gt_objects",
                         tf.cast(annotation_objects, tf.uint8),
                         max_outputs=2)

    loss_labels = tf.reduce_mean(
        (tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                        labels=tf.squeeze(
                                                            annotation_labels,
                                                            squeeze_dims=[3]),
                                                        name="loss_labels")))

    # compute objective energy
    combination = tf.concat([image, tf.cast(logits, tf.float32)], -1)
    pred_annotation_o, logits_o = compute_energy(combination,
                                                 keep_probability_conv,
                                                 NUM_OF_CLASSESS + 1, 5,
                                                 "objects")

    with tf.variable_scope("loss_objects"):
        # subtract one from all the nonzero parts of annotation objects
        one = tf.constant(1, dtype=tf.int32, name="const_one")
        zero_m = tf.constant(0, dtype=tf.int32, name="const_zero")

        annotation_objects_sub = tf.subtract(annotation_objects,
                                             one,
                                             name="loss_obj_sub")
        if FLAGS.summary:
            tf.summary.histogram("annotations_o_minus_one",
                                 tf.cast(annotation_objects_sub, tf.uint8))

        annotation_objects_max = tf.maximum(annotation_objects_sub,
                                            zero_m,
                                            name="loss_obj_max")
        if FLAGS.summary:
            tf.summary.histogram("annotations_o_minus_one_1",
                                 tf.cast(annotation_objects_max, tf.uint8))
        #compute objective loss - crossentropy based
        cross_ent_obj = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits_o,
            labels=tf.squeeze(annotation_objects_max, squeeze_dims=[3]),
            name="loss_objects")

        # mask out stuff not detected by segmentation
        zero = tf.constant(0, dtype=tf.int32)
        binary_mask = tf.cast(
            tf.not_equal(tf.cast(pred_annotation, tf.int32), zero), tf.float32)

        if FLAGS.summary:
            tf.summary.tensor_summary("bin_mask",
                                      tf.cast(binary_mask, tf.uint8))

        # weight corners
        one = tf.constant(1, dtype=tf.float32)
        weight_mask = tf.multiply(
            tf.add(tf.cast(annotation_objects_max, tf.float32), one), 100)

        # compute loss
        loss_objects = tf.reduce_mean(
            tf.divide(tf.multiply(cross_ent_obj, binary_mask), weight_mask))
        loss_objects = tf.multiply(loss_objects, 100)

    # # compute objective loss - mean-square error
    #
    #
    #
    # # manipulate gt
    # # subtract 1 --> reduce size of patches
    # # square gt --> make patches deeper
    # one = tf.constant(1, dtype=tf.float32)
    # annotation_objects = tf.square(tf.subtract(tf.cast(annotation_objects, tf.float32), one))
    #
    # # tf.summary.image("pred_objectness", tf.cast(pred_objects, tf.uint8), max_outputs=2)
    # #
    # #    tf.summary.image("ground_truth_objects", tf.cast(annotation_objects, tf.uint8), max_outputs=2)
    # square_diff = tf.square(tf.subtract(logits_o, annotation_objects))
    #
    # # mask out non-segmented part
    # # zero = tf.constant(0, dtype=tf.float32)
    # # binary_mask = tf.cast(tf.not_equal(tf.cast(pred_annotation,tf.float32), zero), tf.float32)
    # # square_diff = tf.multiply(square_diff, binary_mask)
    #
    # loss_objects = tf.multiply(1000.0, tf.reduce_mean(square_diff))

    if FLAGS.mode == "train_combined":
        loss = tf.add(loss_labels, loss_objects)
    else:
        loss = loss_labels

    tf.summary.scalar("entropy", loss)

    trainable_var = tf.trainable_variables()
    # if FLAGS.debug:
    #     for var in trainable_var:
    #         utils.add_to_regularization_and_summary(var)

    train_op = train(loss, trainable_var)

    print("Setting up summary op...")
    summary_op = tf.summary.merge_all()

    if False:
        # try to load cached Data
        fname_train = "train_dsreader" + str(IMAGE_SIZE)
        fname_valid = "valid_dsreader" + str(IMAGE_SIZE)
        if os.path.isfile(fname_train) and os.path.isfile(fname_valid):
            # load cached
            train_dataset_reader = util.load_object(fname_train)
            validation_dataset_reader = util.load_object(fname_valid)
        else:
            # Load Data from disk and cache it
            print("Setting up image reader...")
            train_records, valid_records = util.scene_parsing.read_dataset(
                FLAGS.data_dir)
            train_records, valid_records = music_data.read_dataset(
                FLAGS.data_dir)
            print(len(train_records))
            print(len(valid_records))

            print("Setting up dataset reader")
            image_options = {'resize': False, 'resize_size': IMAGE_SIZE}
            if FLAGS.mode == 'train':
                train_dataset_reader = dataset.BatchDatset(
                    train_records, image_options)
            validation_dataset_reader = dataset.BatchDatset(
                valid_records, image_options)

            if train_dataset_reader is not None:
                util.save_object(train_dataset_reader, fname_train)
            util.save_object(validation_dataset_reader, fname_valid)
    else:
        # just load from disk
        print("Setting up image reader...")
        #train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
        train_records, valid_records = music_data.read_dataset(
            FLAGS.music_data_dir)
        print(len(train_records))
        print(len(valid_records))

        print("Setting up dataset reader")
        image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
        if "train" in FLAGS.mode:
            train_dataset_reader = dataset.BatchDatset(train_records,
                                                       image_options)
        validation_dataset_reader = dataset.BatchDatset(
            valid_records, image_options)

    sess = tf.Session()

    print("Setting up Saver...")
    saver = tf.train.Saver()
    summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)

    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
    if ckpt and ckpt.model_checkpoint_path:
        step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')
                   [1])  # get the step from the last checkpoint
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("Model restored...")
    else:
        step = 0

    if FLAGS.mode == "train_labels":
        for itr in xrange(step, MAX_ITERATION):
            train_images, train_m_annotations, train_o_annotations = train_dataset_reader.next_batch(
                FLAGS.batch_size)
            feed_dict = {
                image: train_images,
                annotation_labels: train_m_annotations,
                keep_probability_conv: 0.85,
                keep_probability: 0.85
            }
            sess.run(train_op, feed_dict=feed_dict)

            if itr % 10 == 0:
                train_loss, summary_str = sess.run([loss, summary_op],
                                                   feed_dict=feed_dict)
                print("Step: %d, Train_loss:%g" % (itr, train_loss))
                # train_loss = sess.run(loss_labels, feed_dict=feed_dict)
                # print("Step: %d, labels_loss:%g" % (itr, train_loss))
                # train_loss = sess.run(loss_objects, feed_dict=feed_dict)
                # print("Step: %d, objects_loss:%g" % (itr, train_loss))
                summary_writer.add_summary(summary_str, itr)

            if itr % 500 == 0 and itr != 0:
                valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch(
                    FLAGS.batch_size)
                valid_loss = sess.run(loss,
                                      feed_dict={
                                          image: valid_images,
                                          annotation_labels:
                                          valid_m_annotations,
                                          annotation_objects:
                                          valid_o_annotations,
                                          keep_probability_conv: 1.0,
                                          keep_probability: 1.0
                                      })
                print("%s ---> Validation_loss: %g" %
                      (datetime.datetime.now(), valid_loss))
                saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

    elif FLAGS.mode == "train_combined":
        for itr in xrange(step, MAX_ITERATION):
            train_images, train_m_annotations, train_o_annotations = train_dataset_reader.next_batch(
                FLAGS.batch_size)
            feed_dict = {
                image: train_images,
                annotation_labels: train_m_annotations,
                annotation_objects: train_o_annotations,
                keep_probability_conv: 0.85,
                keep_probability: 0.85
            }

            # print("debug")
            #
            # from PIL import Image
            # img = Image.fromarray(train_images[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L')
            # img.show()
            #
            # from PIL import Image
            # img = Image.fromarray(train_m_annotations[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L')
            # img.show()
            #
            # from PIL import Image
            # img = Image.fromarray(train_o_annotations[0].reshape((IMAGE_SIZE,IMAGE_SIZE)), 'L')
            # img.show()
            #
            #
            #
            # feed_dict = {image: train_images, annotation_labels: train_m_annotations,annotation_objects:train_o_annotations, keep_probability_conv: 0.85, keep_probability: 0.85}
            # pred_annotation_val, logits_val, logits_labels_val = sess.run([pred_annotation_o, logits_o, logits], feed_dict=feed_dict)
            #
            # feed_dict = {image: train_images, pred_annotation_o: pred_annotation_val, logits_o: logits_val, annotation_objects:train_o_annotations, keep_probability_conv: 0.85, keep_probability: 0.85}
            # cross_ent_obj_val, binary_mask_val, weight_mask_val = sess.run([cross_ent_obj, binary_mask, weight_mask], feed_dict=feed_dict)

            if FLAGS.debug_fetch:
                # wrap session in debugger
                train_op, annotation_objects_sub, annotation_objects_max, logits_o, pred_annotation, binary_mask = sess.run(
                    [
                        train_op, annotation_objects_sub,
                        annotation_objects_max, logits_o, pred_annotation,
                        binary_mask
                    ],
                    feed_dict=feed_dict)

                # sub
                annotation_objects_sub = annotation_objects_sub.reshape(
                    (1, 224, 224))
                plt.imshow(annotation_objects_sub[0])
                plt.show()

                # max
                annotation_objects_max = annotation_objects_max.reshape(
                    (1, 224, 224))
                plt.imshow(annotation_objects_max[0])
                plt.show()

                # binary mask
                pred_annotation = pred_annotation.reshape((1, 224, 224))
                plt.imshow(pred_annotation[0] == 0)
                plt.show()

                # binary mask
                binary_mask = binary_mask.reshape((1, 224, 224))
                plt.imshow(binary_mask[0])
                plt.show()

            else:
                sess.run(train_op, feed_dict=feed_dict)

            if itr % 10 == 0:
                train_loss = sess.run(loss, feed_dict=feed_dict)
                print("Step: %d, Train_loss:%g" % (itr, train_loss))
                train_loss = sess.run(loss_labels, feed_dict=feed_dict)
                print("Step: %d, labels_loss:%g" % (itr, train_loss))
                train_loss = sess.run(loss_objects, feed_dict=feed_dict)
                print("Step: %d, objects_loss:%g" % (itr, train_loss))

            if itr % 250 == 0:
                train_loss, summary_str = sess.run([loss, summary_op],
                                                   feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, itr)

            if itr % 500 == 0 and itr != 0:
                valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch(
                    FLAGS.batch_size)
                valid_loss = sess.run(loss,
                                      feed_dict={
                                          image: valid_images,
                                          annotation_labels:
                                          valid_m_annotations,
                                          annotation_objects:
                                          valid_o_annotations,
                                          keep_probability_conv: 1.0,
                                          keep_probability: 1.0
                                      })
                print("%s ---> Validation_loss: %g" %
                      (datetime.datetime.now(), valid_loss))
                saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

    elif FLAGS.mode == "visualize":
        number_of_batches = 25
        for i in xrange(number_of_batches):
            valid_images, valid_m_annotations, valid_o_annotations = validation_dataset_reader.next_batch(
                FLAGS.batch_size)
            pred_a, pred_o = sess.run(
                [pred_annotation, pred_annotation_o],
                feed_dict={
                    image: valid_images,
                    annotation_labels: valid_m_annotations,
                    annotation_objects: valid_o_annotations,
                    keep_probability_conv: 1.0,
                    keep_probability: 1.0
                })

            valid_o_annotations = np.squeeze(valid_o_annotations, axis=3)
            valid_m_annotations = np.squeeze(valid_m_annotations, axis=3)
            pred_a = np.squeeze(pred_a, axis=3)
            pred_o = np.squeeze(pred_o, axis=3)

            #water_s = util.do_wathershed(pred_a, pred_o)

            for itr in range(FLAGS.batch_size):
                utils.save_image(
                    valid_m_annotations[itr].astype(np.uint8),
                    "../" + FLAGS.logs_dir + "trained_images/ground_truth",
                    name="gt_" + str(i * FLAGS.batch_size + itr) + "_m")
                utils.save_image(
                    valid_o_annotations[itr].astype(np.uint8),
                    "../" + FLAGS.logs_dir + "trained_images/ground_truth",
                    name="gt_" + str(i * FLAGS.batch_size + itr) + "_o")
                utils.save_image(
                    pred_a[itr].astype(np.uint8),
                    "../" + FLAGS.logs_dir + "trained_images/prediction",
                    name="pred_" + str(i * FLAGS.batch_size + itr) + "_m")
                utils.save_image(
                    pred_o[itr].astype(np.uint8),
                    "../" + FLAGS.logs_dir + "trained_images/prediction",
                    name="pred_" + str(i * FLAGS.batch_size + itr) + "_o")
                print("Saved image: %d" % (i * FLAGS.batch_size + itr))
Пример #42
0
def full_hyperparameter_test():
    counter = 0
    failed_networks = 0
    report = open("report.txt", 'a')
    report.write("\nSTART\n")
    for algorithm in [neuralnet.SGD, neuralnet.RMSPROP, neuralnet.ADAM]:
        for initialization in [neuralnet.NORMAL, neuralnet.UNIFORM]:
            for hidden_units in [8, 15, 50]:
                for l1reg in [0., 0.001]:
                    for l2reg in [0., 0.001]:
                        for loss_type in [
                                pnas2018.CROSS_ENTROPY, pnas2018.MSE
                        ]:
                            learning_rate = 0.1
                            if algorithm == neuralnet.RMSPROP or algorithm == neuralnet.ADAM:
                                learning_rate *= 0.1
                            if hidden_units == 8:
                                learning_rate *= 2
                            elif hidden_units == 50:
                                learning_rate *= 0.5
                            elif loss_type == pnas2018.MSE and (
                                    algorithm != neuralnet.RMSPROP
                                    and algorithm != neuralnet.ADAM):
                                learning_rate *= 10
                            counter += 1
                            name = algorithm + initialization + str(
                                hidden_units) + str(l1reg) + str(
                                    l2reg) + loss_type + "{:.3f}".format(
                                        learning_rate)
                            print("Hyperparameter test:" + name + '(' +
                                  str(counter) + ' out of 144)')
                            num_networks = 0
                            while num_networks < 50:
                                print(num_networks)
                                model, rng_avg_sequence = pnas2018.train(
                                    iterations=5000,
                                    algorithm=algorithm,
                                    size_hidden=hidden_units,
                                    learning_rate=learning_rate,
                                    l1reg=l1reg,
                                    l2reg=l2reg,
                                    loss_type=loss_type,
                                    initialization=initialization)
                                _, accuracy_totals = pnas2018.accuracy_test(
                                    model)
                                total_iterations = 5000
                                step = 5000
                                while (rng_avg_sequence < 0.22 or not np.array_equal(accuracy_totals, np.asarray([0.5, 0.5, 1, 1, 1, 1])))\
                                      and total_iterations < 10000:
                                    model, rng_avg_sequence = pnas2018.train(
                                        model,
                                        iterations=step,
                                        algorithm=algorithm,
                                        size_hidden=hidden_units,
                                        learning_rate=learning_rate,
                                        l1reg=l1reg,
                                        l2reg=l2reg,
                                        loss_type=loss_type,
                                        initialization=initialization)
                                    _, accuracy_totals = pnas2018.accuracy_test(
                                        model)
                                    report.write(name + " extra iterations:" +
                                                 str(step) + '\n')
                                    total_iterations += step
                                    print("total iterations=" +
                                          str(total_iterations))
                                if np.array_equal(
                                        accuracy_totals,
                                        np.asarray([0.5, 0.5, 1, 1, 1, 1])):
                                    num_networks += 1
                                else:
                                    print(
                                        "-----------------------------------------!!!!!!!!!!!!!!NETWORK FAILED!!!!!!!!!!!!!!!----------------------------------------"
                                    )
                                    report.write(name + " failed\n")
                                    failed_networks += 1

                                utils.save_object(
                                    algorithm + initialization +
                                    str(hidden_units) + str(l1reg) +
                                    str(l2reg) + loss_type, model)
    print(failed_networks)
    report.close()
Пример #43
0
def train_autoencoder(in_path,out_path,training_epochs=15,
            learning_rate=0.1,batch_size=25):
    dataset = load_data(in_path,batch_size)
    da=learning_autoencoder(dataset,training_epochs,learning_rate,batch_size)
    utils.save_object(out_path,da)
Пример #44
0
def img_to_final(in_path, out_path):
    print(in_path + "/")
    final = td.read_im_action(in_path + "/")
    out_path = out_path.replace(".img", ".final")
    print(out_path)
    utils.save_object(final, out_path)
Пример #45
0
def save_data(data, targets, filename='processed_data_temporal.pkl'):

    new_data = [person.to_numpy() for person in data]
    targets = [target.to_numpy().squeeze(axis=1) for target in targets]

    save_object((new_data, targets), filename)
Пример #46
0
    sum_posts += len(df)
    sum_deleted_posts += len(df.loc[['[deleted]']])
    # iterate through each dataframe to analze the percentages of posts
    for j in range(len(df)):
        if not (df.iloc[j].subreddit in subreddits):
            subreddits.append(df.iloc[j].subreddit)
            subreddit_counts.append(1)
        else:
            subreddit_counts[subreddits.index(df.iloc[j].subreddit)] += 1

# sort the subreddits and counts by highest percentage
subreddit_counts, subreddits = (list(t) for t in zip(
    *sorted(zip(subreddit_counts, subreddits), reverse=True)))

# save subreddit_counts and subreddits
save_object(subreddit_counts, 'objects/subreddit_post_analysis/',
            model_name + "-subreddit_counts")
save_object(subreddits, 'objects/subreddit_post_analysis/',
            model_name + "-subreddits")

# open output file with header
csvfile = open("subreddit_post_analysis.csv", "a")
writer = csv.writer(csvfile,
                    delimiter=',',
                    quotechar='\"',
                    quoting=csv.QUOTE_MINIMAL)
writer.writerow("Pecentage of Posts in each Subreddit:")

# print the percentages of posts in each subreddit present
for subreddit in subreddits:
    writer.writerow([
        subreddit,
Пример #47
0
for i in range(CLASSIFIER_CONFIG['runs']):
    print("\nRun {}/{}\n".format(i + 1, CLASSIFIER_CONFIG['runs']))
    for j, c in enumerate(configurations):
        results[j].append(run(**c))

mean_results = np.array(results).mean(1).tolist()


def np_list_to_csv_string(npl):
    return ",".join(list(map(lambda f: "{:.4f}".format(f), npl)))

csv = []
for arr in mean_results:
    csv.append(np_list_to_csv_string(arr))

utils.save_object(mean_results, SAVE_FOLDER, 'results')
utils.save_string_to_file("\n".join(csv), SAVE_FOLDER, 'results.csv')
utils.save_dict(CLASSIFIER_CONFIG, SAVE_FOLDER, 'config.json')


data = np.array(mean_results)
x = range(data.shape[1])
fig, ax = plt.subplots()

plotter.hide_top_and_right_axis(ax)
ax.yaxis.grid(color='gray')
ax.set_xlabel('Time (seconds)')
ax.set_ylabel('Best polygon solution')
ax.set_prop_cycle(cycler('color', ['c', 'm', 'y', 'k', 'r', 'g', 'b']))

lines = []
Пример #48
0
def binary_to_raw(in_path,out_path):
    raw_action=binary.read_binary(in_path)
    out_path=out_path.replace(".bin",".raw")
    print(out_path)
    utils.save_object(raw_action,out_path) 
Пример #49
0
    def plot(self):

        line_width = 3

        cname = 'c_%s'%self.tag
        c = ROOT.TCanvas(cname,cname,700,700)
        c.cd()
        fr = c.DrawFrame(0.,0.,20.,1.2*self.h_curr_data.GetMaximum(),';Number of Tracks;Events')
        
        ## construct legend
        leg = ROOT.TLegend(0.6,0.2,0.9,0.5)
        leg.SetBorderSize(0)
        leg.SetFillColor(0)
        leg.SetFillStyle(0)

        ## get contributions
        h_bkg = self.fit_hists['bkg']
        h_bkg.Scale(self.nsamp_fit('bkg',corr=False) / h_bkg.Integral())
        if 'anti' in self.samples:
            h_anti = self.fit_hists['anti']
            h_anti.Scale(self.nsamp_fit('anti',corr=False) / h_anti.Integral())
        if self.orig_templates['sig_1p']:
            if self.fix_r1p3p: 
                h_sig_1p = self.orig_templates['sig_1p'].Clone()
                h_sig_mp = self.orig_templates['sig_mp'].Clone()
            else:
                h_sig_1p = self.fit_hists['sig_1p'] 
                h_sig_mp = self.fit_hists['sig_mp']

            h_sig_1p.Scale(self.nsamp_fit('sig_1p',corr=False) / h_sig_1p.Integral())
            h_sig_mp.Scale(self.nsamp_fit('sig_mp',corr=False) / h_sig_mp.Integral())
        else: 
            h_sig = self.fit_hists['sig']
            h_sig.Scale(self.nsamp_fit('sig',corr=False) / h_sig.Integral())

        ## draw error on total model
        ## TODO: fix this up to use orig hists
        h_total = None
        for s in self.samples:
            if not h_total: 
                h_total = self.orig_templates[s].Clone('h_total')
                h_total.Scale( self.nsamp_fit(s) / h_total.Integral() )
            else:
                htemp = self.orig_templates[s]
                h_total.Add( htemp, self.nsamp_fit(s) / htemp.Integral() )
        
        h_total.SetFillColor(ROOT.kRed)
        h_total.SetLineColor(ROOT.kRed)
        h_total.SetLineStyle(1)
        h_total.SetLineWidth(0)
        h_total.SetMarkerSize(0)
        h_total.Draw("SAME,E2")
            
        
        fsum = 0.
        for s in self.samples: fsum += self.fsamp_fit(s)
        print 'fsum: ', fsum

            
        ## total model central value from fit
        #self.fit_hists['model'].Scale(self.ndata() / self.nhist(self.fit_hists['model']))
        #self.fit_hists['model'].Scale(self.ndata_curr() / self.nhist(self.fit_hists['model']))
        self.fit_hists['model'].SetLineWidth(line_width)
        self.fit_hists['model'].Draw("SAME")
        leg.AddEntry(self.h_curr_data,'Data','PL')
        leg.AddEntry(self.fit_hists['model'],'Model','L')
        leg.AddEntry(h_total,'Model (stat.)','F')

        ## draw data
        self.h_curr_data.Draw("SAME")
        
        print 'nfit: ',       self.ntot_fit()
        print 'nfit(corr): ', self.ntot_fit()
        print 'h_mod: ', self.fit_hists['model'].Integral()
        print 'h_tot: ', h_total.Integral()


        # draw bkg
        h_bkg.SetLineColor(self.color_bkg)
        h_bkg.SetLineStyle(self.style_bkg)
        h_bkg.SetLineWidth(line_width)
        h_bkg.Draw("SAME,HIST")
        leg.AddEntry(h_bkg,'Jet','L')
        if self.draw_true_hists: self.orig_templates['bkg'].Draw("SAME,HIST")


        # draw anti
        if 'anti' in self.samples:
            h_anti.SetLineColor(self.color_anti)
            h_anti.SetLineStyle(self.style_anti)
            h_anti.SetLineWidth(line_width)
            h_anti.Draw("SAME,HIST")
            leg.AddEntry(h_anti,'Lep','L')
            if self.draw_true_hists: self.orig_templates['anti'].Draw("SAME,HIST")

        ## 1p3p split signal
        if self.orig_templates['sig_1p']:
            h_sig_1p.SetLineColor(self.color_sig_1p)
            h_sig_1p.SetLineStyle(self.style_sig_1p)
            h_sig_1p.SetLineWidth(line_width)
            
            h_sig_mp.SetLineColor(self.color_sig_mp)
            h_sig_mp.SetLineStyle(self.style_sig_mp)
            h_sig_mp.SetLineWidth(line_width)
            
            h_sig_1p.Draw("SAME,HIST")
            h_sig_mp.Draw("SAME,HIST")
            leg.AddEntry(h_sig_1p,'Tau (1p)','L')
            leg.AddEntry(h_sig_mp,'Tau (mp)','L')
            if self.draw_true_hists: self.orig_templates['sig_1p'].Draw("SAME,HIST")
            if self.draw_true_hists: self.orig_templates['sig_mp'].Draw("SAME,HIST")
        else: 
            h_sig = self.fit_hists['sig']
            h_sig.SetLineColor(self.color_sig)
            h_sig.SetLineStyle(self.style_sig)
            h_sig.SetLineWidth(line_width)
            h_sig.Draw("SAME,HIST")
            leg.AddEntry(h_sig,'Tau','L')
            if self.draw_true_hists: self.orig_templates['sig'].Draw("SAME,HIST")

        
        leg.Draw()

        latex = ROOT.TLatex()
        latex.SetNDC()
        latex.SetTextFont(42)
        latex.DrawLatex(0.4,0.85,self.tag)
        latex.DrawLatex(0.4,0.75,'#chi^{2}/NDF = %.1f / %d' % (self.chi2,self.ndf) ) 

        c.Update()
        c.SaveAs('fit_%s.eps'%self.tag)

        if self.fout: utils.save_object(c,self.fout)
Пример #50
0
def run(args):

    # Set default arguments
    args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay
    args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni
    args.g_iters = args.iters if args.g_iters is None else args.g_iters
    # -if [log_per_task], reset all logs
    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters
        args.sample_log = args.iters
    # -if XdG is selected but not the incremental task learning scenario, give error
    if (not args.scenario == "task") and args.gating_prop > 0:
        raise ValueError(
            "'XdG' only works for the incremental task learning scenario.")
    # -if EWC, SI or XdG is selected together with 'feedback', give error
    if args.feedback and (args.ewc or args.si or args.gating_prop > 0):
        raise NotImplementedError(
            "EWC, SI and XdG are not supported with feedback connections.")
    # -if XdG is selected together with replay of any kind, give error
    if args.gating_prop > 0 and (not args.replay == "none"):
        raise NotImplementedError(
            "XdG is not supported with '{}' replay.".format(args.replay))
    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    #-------------------------------------------------------------------------------------------------#

    #----------------#
    #----- DATA -----#
    #----------------#

    # Prepare data for chosen experiment
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=args.scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         verbose=True,
         exception=True if args.seed == 0 else False,
     )

    #-------------------------------------------------------------------------------------------------#

    #------------------------------#
    #----- MODEL (CLASSIFIER) -----#
    #------------------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if args.feedback:
        model = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            z_dim=args.z_dim,
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        model.lamda_pl = 1.  #--> to make that this VAE is also trained to classify
    else:
        model = Classifier(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            fc_drop=args.fc_drop,
            fc_nl=args.fc_nl,
            fc_bn=True if args.fc_bn == "yes" else False,
            excit_buffer=True if args.gating_prop > 0 else False,
        ).to(device)

    # Define optimizer (only include parameters that "requires_grad")
    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer
    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    # Set loss-function for reconstruction
    if args.feedback:
        model.recon_criterion = nn.BCELoss(size_average=True)

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- CL-STRATEGY: ALLOCATION -----#
    #-----------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        model.fisher_n = args.fisher_n
        model.gamma = args.gamma
        model.online = args.online
        model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and args.gating_prop > 0:
        mask_dict = {}
        excit_buffer_list = []
        for task_id in range(args.tasks):
            mask_dict[task_id + 1] = {}
            for i in range(model.fcE.layers):
                layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear
                if task_id == 0:
                    excit_buffer_list.append(layer.excit_buffer)
                n_units = len(layer.excit_buffer)
                gated_units = np.random.choice(n_units,
                                               size=int(args.gating_prop *
                                                        n_units),
                                               replace=False)
                mask_dict[task_id + 1][i] = gated_units
        model.mask_dict = mask_dict
        model.excit_buffer_list = excit_buffer_list

    #-------------------------------------------------------------------------------------------------#

    #-------------------------------#
    #----- CL-STRATEGY: REPLAY -----#
    #-------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    model.replay_targets = "soft" if args.distill else "hard"
    model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = True if (args.replay == "generative"
                         and not args.feedback) else False
    if train_gen:
        # -specify architecture
        generator = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            fc_layers=args.g_fc_lay,
            fc_units=args.g_fc_uni,
            z_dim=args.z_dim,
            classes=config['classes'],
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        # -set optimizer(s)
        generator.optim_list = [{
            'params':
            filter(lambda p: p.requires_grad, generator.parameters()),
            'lr':
            args.lr
        }]
        generator.optim_type = args.optimizer
        if generator.optim_type in ("adam", "adam_reset"):
            generator.optimizer = optim.Adam(generator.optim_list,
                                             betas=(0.9, 0.999))
        elif generator.optim_type == "sgd":
            generator.optimizer = optim.SGD(generator.optim_list)
        # -set reconstruction criterion
        generator.recon_criterion = nn.BCELoss(size_average=True)
    else:
        generator = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- REPORTING -----#
    #---------------------#

    # Get parameter-stamp (and print on screen)
    param_stamp = utils.get_param_stamp(
        args,
        model.name,
        verbose=True,
        replay=True if (not args.replay == "none") else False,
        replay_model_name=generator.name if
        (args.replay == "generative" and not args.feedback) else None,
    )

    # Print some model-characteristics on the screen
    # -main model
    print("\n")
    utils.print_model_info(model, title="MAIN MODEL")
    # -generator
    if generator is not None:
        utils.print_model_info(generator, title="GENERATOR")

    # Prepare for plotting
    # -open pdf
    pp = visual_plt.open_pdf("{}/{}.pdf".format(
        args.p_dir, param_stamp)) if args.pdf else None
    # -define [precision_dict] to keep track of performance during training for later plotting
    precision_dict = evaluate.initiate_precision_dict(args.tasks)
    # -visdom-settings
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        graph_name = "{fb}{mode}{syn}{ewc}{XdG}".format(
            fb="1M-" if args.feedback else "",
            mode=args.replay,
            syn="-si{}".format(args.si_c) if args.si else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if args.online else "") if args.ewc else "",
            XdG=""
            if args.gating_prop == 0 else "-XdG{}".format(args.gating_prop))
        visdom = {'env': env_name, 'graph': graph_name}
    else:
        visdom = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(log=args.loss_log,
                        visdom=visdom,
                        model=model if args.feedback else generator,
                        tasks=args.tasks,
                        iters_per_task=args.g_iters,
                        replay=False if args.replay == "none" else True)
    ] if (train_gen or args.feedback) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters,
                           replay=False if args.replay == "none" else True)
    ] if (not args.feedback) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    sample_cbs = [
        cb._sample_cb(log=args.sample_log,
                      visdom=visdom,
                      config=config,
                      test_datasets=test_datasets,
                      sample_size=args.sample_n,
                      iters_per_task=args.g_iters)
    ] if (train_gen or args.feedback) else [None]

    # Callbacks for reporting and visualizing accuracy
    # -visdom (i.e., after each [prec_log])
    eval_cb = cb._eval_cb(
        log=args.prec_log,
        test_datasets=test_datasets,
        visdom=visdom,
        iters_per_task=args.iters,
        scenario=args.scenario,
        collate_fn=utils.label_squeezing_collate_fn,
        test_size=args.prec_n,
        classes_per_task=classes_per_task,
        task_mask=True if isinstance(model, ContinualLearner) and
        (args.gating_prop > 0) else False)
    # -pdf: for summary plots (i.e, only after each task)
    eval_cb_full = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        precision_dict=precision_dict,
        scenario=args.scenario,
        collate_fn=utils.label_squeezing_collate_fn,
        iters_per_task=args.iters,
        classes_per_task=classes_per_task,
        task_mask=True if isinstance(model, ContinualLearner) and
        (args.gating_prop > 0) else False)
    # -collect them in <lists>
    eval_cbs = [eval_cb, eval_cb_full]

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- TRAINING -----#
    #--------------------#

    print("--> Training:")
    # Keep track of training-time
    start = time.time()
    # Train model
    train_cl(
        model,
        train_datasets,
        replay_mode=args.replay,
        scenario=args.scenario,
        classes_per_task=classes_per_task,
        iters=args.iters,
        batch_size=args.batch,
        collate_fn=utils.label_squeezing_collate_fn,
        visualize=True if args.visdom else False,
        generator=generator,
        gen_iters=args.g_iters,
        gen_loss_cbs=generator_loss_cbs,
        sample_cbs=sample_cbs,
        eval_cbs=eval_cbs,
        loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs,
    )
    # Get total training-time in seconds, and write to file
    training_time = time.time() - start
    time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w')
    time_file.write('{}\n'.format(training_time))
    time_file.close()

    #-------------------------------------------------------------------------------------------------#

    #----------------------#
    #----- EVALUATION -----#
    #----------------------#

    print('\n\n--> Evaluation ("incremental {} learning scenario"):'.format(
        args.scenario))

    # Generation (plot in pdf)
    if (pp is not None) and train_gen:
        evaluate.show_samples(generator, config, size=args.sample_n, pdf=pp)
    if (pp is not None) and args.feedback:
        evaluate.show_samples(model, config, size=args.sample_n, pdf=pp)

    # Reconstruction (plot in pdf)
    if (pp is not None) and (train_gen or args.feedback):
        for i in range(args.tasks):
            if args.feedback:
                evaluate.show_reconstruction(model,
                                             test_datasets[i],
                                             config,
                                             pdf=pp,
                                             task=i + 1)
            else:
                evaluate.show_reconstruction(generator,
                                             test_datasets[i],
                                             config,
                                             pdf=pp,
                                             task=i + 1)

    # Classifier (print on screen & write to file)
    if args.scenario == "task":
        precs = [
            evaluate.validate(
                model,
                test_datasets[i],
                verbose=False,
                test_size=None,
                task_mask=True if isinstance(model, ContinualLearner)
                and args.gating_prop > 0 else False,
                task=i + 1,
                allowed_classes=list(
                    range(classes_per_task * i, classes_per_task * (i + 1))))
            for i in range(args.tasks)
        ]
    else:
        precs = [
            evaluate.validate(model,
                              test_datasets[i],
                              verbose=False,
                              test_size=None,
                              task=i + 1) for i in range(args.tasks)
        ]
    print("\n Precision on test-set:")
    for i in range(args.tasks):
        print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
    average_precs = sum(precs) / args.tasks
    print('=> average precision over all {} tasks: {:.4f}\n'.format(
        args.tasks, average_precs))

    #-------------------------------------------------------------------------------------------------#

    #------------------#
    #----- OUTPUT -----#
    #------------------#

    # Average precision on full test set (no restrictions on which nodes can be predicted: "incremental" / "singlehead")
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(average_precs))
    output_file.close()

    # Precision-dictionary
    file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
    utils.save_object(precision_dict, file_name)

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- PLOTTING -----#
    #--------------------#

    # If requested, generate pdf
    if pp is not None:
        # -create list to store all figures to be plotted.
        figure_list = []
        # -generate all figures (and store them in [figure_list])
        figure = visual_plt.plot_lines(
            precision_dict["all_tasks"],
            x_axes=precision_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([precision_dict["average"]],
                                       x_axes=precision_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)
        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

    # Close pdf
    if pp is not None:
        pp.close()
Пример #51
0
def main():
    sets = generate_data_sets()
    utils.save_object(sets, 'data_sets')
Пример #52
0
def sean_solution(input_info, **kwargs):
    """
    This solution is designed to be performed as follows:
    1.  Setup anything, such as arrays or sorting.
        Also break down info into components.
    2.  objective can access anything from the main body part.
        As such, pass anything extra which may be needed
        (such as hyper-params) into objective as args (a dict)
        perform the actual solving logic part here.
    3.  At the end, can search over hyper-paramters (the args)
        That are passed into objective.
        However, if doing a simple function, this part
        can be safely ignored.

    """
    # TODO main body part here - especially setup

    D, I, S, V, F, street_info, car_info = input_info

    s_dict = {}
    for s in street_info:
        s_dict[s[2]] = Street(*s)

    # Find busy streets
    for c in car_info:
        paths = c[1:]
        for p in paths:
            s_dict[p].cars_using += 1

    for s in s_dict.values():
        s.calc_score()
        # print(s)

    inter_list = []
    for i in range(I):
        intersection = Intersection(i)
        intersection.find_streets_at_intersection(street_info, D)
        intersection.weight_streets(s_dict)
        intersection.find_street_times()
        inter_list.append(intersection)

    # for I in inter_list:
    #     print(I)

    def objective(args):
        """Actually write the solution part here."""
        # TODO Parse out the args if needed
        val = args.get("name", None)

        # TODO Solve the thing here
        solution = []

        for I in inter_list:
            num_lights = len(I.best_times)
            street_names = [s[2] for s in I.streets]
            line_to_append = [I.id, num_lights]
            idxs = [i for i in range(len(street_names))]
            sorted_idx = [x for x, _ in sorted(zip(idxs, I.best_times), key=lambda pair: pair[1], reverse=True)]
            for i in sorted_idx:
                name = street_names[i]
                time_val = I.best_times[i]
                line_to_append.append((name, time_val))
            solution.append(line_to_append)

        score = 0

        # Return something flexible that can be used with hyperopt
        # Main point is that it has score and solution.
        return {
            "loss": -score,
            "score": score,
            "solution": solution,
            "eval_time": time.time(),
            "status": STATUS_OK,
        }

    # Ignore this bit if not searching hyper_parameters!
    if kwargs.get("search", True):
        trials = Trials()

        # TODO Setup what values the args searching over can have
        space = hp.choice(
            "args",
            [{"arg1": hp.lognormal("arg1", 1, 0.5), "arg2": hp.uniform("arg2", 1, 10)}],
        )

        # TODO If you know the best you do, pass loss_threshold=-best
        # Do hyper-param searching - possible pass per filename num_evals
        best = fmin(
            objective,
            space=space,
            algo=tpe.suggest,
            max_evals=kwargs.get("num_evals", 10),
            trials=trials,
        )

        # Get the best hyper-params from fmin
        print("Best hyper-parameters found were:", best)
        args = space_eval(space, best)

        # Save the trials to disk
        # These trials can be printed using print_trial_info in utils
        out_name = os.path.join(
            kwargs["output_dir"], "_" + kwargs["input_name"][:-2] + "pkl"
        )
        save_object(trials, out_name)

    else:
        # By default, this is just an empty dictionary.
        args = kwargs.get("objective_args")

    result = objective(args)
    return result["solution"], result["score"]
Пример #53
0
def cluster_images(in_path,out_path):
    images=utils.read_array(in_path)
    clusters=clustering_mini_batch(images)
    utils.save_object(out_path,clusters)