Пример #1
0
def main():
    args = parse_args()
    log(args, '\n' + str(args))
    if args.mode == 'train':
        comparision.train(args)
    elif args.mode == 'classify':
        classify(args)
    elif args.mode == 'dataset':
        dataset.build_dataset(args)
    elif args.mode == 'run':
        run_thread = run.MyThread(0, args)
        network_thread = run.MyThread(1, args)
        run_thread.start()
        network_thread.start()
        while network_thread.is_alive():
            continue
    elif args.mode == 'normalize':
        start_time = time.clock()
        if args.split_dir != 'none':
            splits = dataset.split_images(args, start_time)
            log(
                args, '{:.5f}'.format(time.clock() - start_time) + 's ' +
                'Images have been split ')
        if args.normalize:
            dataset.normalize(args, start_time)
            if args.split_dir != 'none':
                dataset.normalize(args, start_time, dirs=splits)
    else:
        log(args,
            'Please select a mode using the tag --mode, use --help for help.',
            True)
Пример #2
0
    def test_normalize(self):
        x = np.array([[1, -1], [-1, 1]], dtype='float32')[None,:,:]
        y = dataset.normalize(x)
        np.testing.assert_array_equal(x, y)

        # this array: mean = -0.5, std = 2.5
        x = np.array([[2, -3], [-3, 2]], dtype='float32')[None,:,:]
        y = dataset.normalize(x)
        np.testing.assert_array_equal(y, [[[1, -1], [-1, 1]]])
Пример #3
0
 def test_normalize(self):
     m = np.array([[1, 4, 0.5, 9], [0, 2, 0.2, 2], [0, 1, 0.01, 8],
                   [1, 2.5, 0.3, 3]])
     norm = np.array([[1, 1, 1, 1], [0, 0.5, 0.4, 0.222],
                      [0, 0.25, 0.02, 0.888], [1, 0.625, 0.6, 0.333]])
     decimal = 3
     np.testing.assert_array_almost_equal(ds.normalize(m), norm, decimal)
Пример #4
0
def predict(classifier, test_files, min_value, max_value):
    """Returns a list of arrays of predicted labels."""
    predictions = []
    for test_file in test_files:
        features = np.genfromtxt(test_file, dtype=np.float32, delimiter=',')[:, :10]
        features = ds.normalize(features, min_value, max_value)
        prediction = classifier.predict(features)
        prediction = np.argmax(prediction, axis=1)
        predictions.append(prediction)
    return predictions
Пример #5
0
 def predict(self, utterance: str) -> dict:
     if self.embedder is None or self.model is None:
         raise ('Load or train model, before predict')
     sent = normalize(utterance)
     sent = self.embedder.create_sentence_embedding(sent, predict=True)
     sent = torch.unsqueeze(sent, 0)
     with torch.no_grad():
         x = self.model.forward(sent,
                                torch.tensor([sent.shape[1]])).view(-1)
         predicted = torch.argmax(x).tolist()
         return {
             'intent': self.labels_dict[predicted],
             'confidence': x[predicted].tolist()
         }
Пример #6
0
def classify(ae, train_files, valid_files, min_value, max_value, logger):

    K.clear_session()

    # Required in order to have reproducible results from a specific random seed
    os.environ['PYTHONHASHSEED'] = '0'

    # Force tf to use a single thread (required for reproducibility)
    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                            inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)

    np.random.seed(13)
    rn.seed(13)
    tf.compat.v1.set_random_seed(13)

    logger.info('Starting model definition...')

    classifier = Sequential()
    for i, layer in enumerate(ae.layers[:int(len(ae.layers) / 2) + 1]):
        classifier.add(layer)
    for layer in classifier.layers:
        layer.trainable = False
    classifier.add(layers.Dense(2, 'sigmoid', name='dense_cls'))
    
    metrics_list = ['accuracy']
    
    classifier.compile('nadam', 'binary_crossentropy', metrics=metrics_list)

    logger.info('Model successfully compiled.')

    training_history = {'aramis_metric': [],
                        'val_aramis_metric': []}

    if not os.path.exists('checkpoints'):
        os.mkdir('checkpoints')

    early_stop_counter = 0
    best_metric = 1
    best_metric_epoch = 1

    for epoch in range(1, 1001):
        logger.info('Classifier epoch {} starting...'.format(epoch))
        true_labels_train, predicted_labels_train = [], []
        true_labels_valid, predicted_labels_valid = [], []
        if epoch % 10 == 0:
            classifier.save(
                'checkpoints/classifier_checkpoint_epoch_{}.h5'.format(epoch))
            K.clear_session()
            classifier = models.load_model(
                'checkpoints/classifier_checkpoint_epoch_{}.h5'.format(epoch))
        if epoch == 50:
            classifier.layers[-2].trainable = True
        if epoch == 300:
            classifier.layers[-3].trainable = True
        for train_file in train_files:
            valid_file = np.random.choice(valid_files)
            logger.debug('Training file: {}'.format(train_file))
            logger.debug('Validation file: {}'.format(valid_file))
            data_train = np.genfromtxt(train_file, dtype=np.float32, delimiter=',')
            # data_train = ds.balanced_sample(data_train)
            data_train = ds.oversample_smote(data_train)
            data_valid = np.genfromtxt(valid_file, dtype=np.float32, delimiter=',')
            features_train = data_train[:, :-1]
            features_valid = data_valid[:, :-1]
            features_train = ds.normalize(features_train, min_value, max_value)
            features_valid = ds.normalize(features_valid, min_value, max_value)
            labels_train = data_train[:, -1]
            labels_train = to_categorical(labels_train, num_classes=2)
            labels_valid = data_valid[:, -1]
            labels_valid = to_categorical(labels_valid, num_classes=2)

            history = classifier.fit(x=features_train, y=labels_train, batch_size=32, epochs=1,
                                     validation_data=(features_valid, labels_valid), verbose=2)
            true_labels_train.append(labels_train)
            predicted_labels_train.append(classifier.predict(features_train))
            true_labels_valid.append(labels_valid)
            predicted_labels_valid.append(classifier.predict(features_valid))
            for key in history.history:
                if key in training_history.keys():
                    training_history[key].extend(history.history[key])
                else:
                    training_history[key] = history.history[key]
        for i in range(len(true_labels_train)):
            true_labels_train[i] = np.argmax(true_labels_train[i], axis=1)
            predicted_labels_train[i] = np.argmax(predicted_labels_train[i], axis=1)
            true_labels_valid[i] = np.argmax(true_labels_valid[i], axis=1)
            predicted_labels_valid[i] = np.argmax(predicted_labels_valid[i], axis=1)
        metric = aramis_metric(true_labels_train, predicted_labels_train)
        val_metric = aramis_metric(true_labels_valid, predicted_labels_valid)
        training_history['aramis_metric'].append(metric)
        training_history['val_aramis_metric'].append(val_metric)
        logger.info('aramis_metric = {}'.format(metric))
        logger.info('val_aramis_metric = {}'.format(val_metric))
        if (val_metric < best_metric) and (epoch > 600):
            classifier.save('checkpoints/classifier_best.h5')
            best_metric = val_metric
            best_metric_epoch = epoch
        for key in history.history:
            avg = np.average(training_history[key][-len(train_files):])  # average of each epoch
            del training_history[key][-len(train_files):]
            training_history[key].append(avg)
        if epoch > 20:
            if not training_history['val_loss'][-1] < training_history['val_loss'][-2]:
                early_stop_counter += 1
                logger.debug('Early stopping counter increased by 1.')
            else:
                if early_stop_counter > 0:
                    early_stop_counter = 0
                    logger.debug('Early stopping counter reset to 0.')
        if early_stop_counter > 10:
            logger.info('Training terminated by early stopping.')
            break
        
    classifier = models.load_model('checkpoints/classifier_best.h5')

    os.rename(r'checkpoints/classifier_best.h5',
              r'checkpoints/classifier_best_epoch_{}_metric_{}.h5'.
              format(best_metric_epoch, str(best_metric)[2:6]))

    return classifier, training_history
Пример #7
0
def train_ae(net_params, layer_params_list, train_files, valid_files, min_value, max_value,
             logger, n_layers=4, weights=None):
    """Trains an AE with the given parameters. Returns the trained AE."""

    K.clear_session()

    # Required in order to have reproducible results from a specific random seed
    os.environ['PYTHONHASHSEED'] = '0'

    # Force tf to use a single thread (required for reproducibility)
    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                            inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)

    # network parameters
    optim = net_params['optim']
    learn_rate = net_params['learn_rate']
    decay = net_params['decay']
    mom = net_params['mom']
    rand_seed = net_params['rand_seed']

    np.random.seed(rand_seed)
    rn.seed(rand_seed)
    tf.compat.v1.set_random_seed(rand_seed)

    if optim == 'adam':
        opt = optimizers.Adam(lr=learn_rate, beta_1=mom, decay=decay, clipvalue=0.3)
    elif optim == 'nadam':
        opt = optimizers.Nadam(lr=learn_rate, beta_1=mom, schedule_decay=decay, clipvalue=0.3)
    elif optim == 'rmsprop':
        opt = optimizers.RMSprop(lr=learn_rate, rho=mom, decay=decay, clipvalue=0.3)
    else:  # adadelta
        opt = optimizers.Adadelta(lr=learn_rate, rho=mom, decay=decay, clipvalue=0.3)

    logger.info('Starting model definition...')

    input_layer = layers.Input(shape=(10,))
    enc = layers.Dense(layer_params_list[-1]['n_neuron'],
                       activation=layer_params_list[-1]['act'],
                       kernel_initializer=layer_params_list[-1]['init'])(input_layer)
    for i in range(n_layers - 1):
        enc = layers.Dense(layer_params_list[-2 - i]['n_neuron'],
                           activation=layer_params_list[-2 - i]['act'],
                           kernel_initializer=layer_params_list[-2 - i]['init'])(enc)
    dec = layers.Dense(layer_params_list[1]['n_neuron'],
                       activation=layer_params_list[1]['act'],
                       kernel_initializer=layer_params_list[1]['init'])(enc)
    for i in range(n_layers - 2):
        dec = layers.Dense(layer_params_list[i + 2]['n_neuron'],
                           activation=layer_params_list[i + 2]['act'],
                           kernel_initializer=layer_params_list[i + 2]['init'])(dec)
    output_layer = layers.Dense(10,
                                activation=layer_params_list[-1]['act'],
                                kernel_initializer=layer_params_list[-1]['init'])(dec)
    # assumption: output layer has the same parameters as the final hidden layer
    ae = models.Model(input_layer, output_layer)
    ae.compile(optimizer=opt, loss='mse')
    if weights:
        ae.set_weights(weights)

    logger.info('Model successfully compiled.')
    logger.info('Network parameters: {}'.format(str(net_params)))
    for i, layer_params in enumerate(layer_params_list):
        logger.info('Layer {} parameters: {}'.format(str(i + 1), str(layer_params)))

    training_history = {}

    if not os.path.exists('checkpoints'):
        os.mkdir('checkpoints')

    early_stop_counter = 0

    for epoch in range(1, 201):
        logger.info('AE epoch {} starting...'.format(epoch))
        if epoch % 10 == 0:
            ae.save('checkpoints/ae_checkpoint.h5')
            K.clear_session()
            ae = models.load_model('checkpoints/ae_checkpoint.h5')
        for train_file in train_files:
            valid_file = np.random.choice(valid_files)
            logger.debug('Training file: {}'.format(train_file))
            logger.debug('Validation file: {}'.format(valid_file))
            data_train = np.genfromtxt(train_file, dtype=np.float32, delimiter=',')
            data_valid = np.genfromtxt(valid_file, dtype=np.float32, delimiter=',')
            features_train = data_train[:, :-1]
            features_valid = data_valid[:, :-1]
            features_train = ds.normalize(features_train, min_value, max_value)
            features_valid = ds.normalize(features_valid, min_value, max_value)
            history = ae.fit(x=features_train, y=features_train, batch_size=32, epochs=1,
                             validation_data=(features_valid, features_valid), verbose=2)
            for key in history.history:
                if key in training_history.keys():
                    training_history[key].extend(history.history[key])
                else:
                    training_history[key] = history.history[key]
        for key in training_history:
            avg = np.average(training_history[key][-len(train_files):])
            del training_history[key][-len(train_files):]
            training_history[key].append(avg)
        if epoch > 20:
            if not training_history['val_loss'][-1] < training_history['val_loss'][-2]:
                early_stop_counter += 1
                logger.debug('Early stopping counter increased by 1.')
            else:
                if early_stop_counter > 0:
                    early_stop_counter = 0
                    logger.debug('Early stopping counter reset to 0.')
        if (early_stop_counter > 10) or (training_history['val_loss'][-1] < 1e-5):
            logger.info('Training terminated by early stopping.')
            break

    return ae, training_history
Пример #8
0
        accuracy_list.append(
            accuracy(log, test_pth_rep, test_label, batch_size) * 100)
    print('Average Accuracy: {:.4f}, Test Time: {:.4f}'.format(
        (sum(accuracy_list) / 50),
        timer() - start_test))


if __name__ == '__main__':
    batch_size = 1
    epoch = 1000
    hid_dim = 512
    adj, feature, _, _, _, train_msk, test_msk, valid_msk, label = load_data(
        data_name='/home/kibum/recommender_system/Graph/data/ind.cora')
    feature = feature.cpu().numpy()
    n_node = feature.shape[0]
    f_size = feature.shape[1]
    adj = preprocess_adj(adj, sparse=True)
    feature = normalize(feature)
    feature = torch.FloatTensor(feature[np.newaxis]).cuda()

    model = DeepGraphInformax(f_size, hid_dim).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_function = torch.nn.BCEWithLogitsLoss()

    pos_lb = torch.ones(batch_size, n_node).cuda()
    neg_lb = torch.zeros(batch_size, n_node).cuda()
    pos_neg_lb = torch.cat((pos_lb, neg_lb), 1)
    # Train the model
    train(epoch, model, optimizer, n_node, feature, loss_function, adj,
          pos_neg_lb, batch_size)
    test(model, train_msk, test_msk, label, adj, feature, hid_dim, batch_size)
Пример #9
0
def targetNormalize(target_data):
    data = target_data.copy()
    normalize(data)
    return data.to_numpy()
Пример #10
0
if __name__ == '__main__':

    # Configure these
    path_to_csv = 'datasets-48149-87794-PJM_Load_hourly.csv'
    train_test_split = [0.8, 0.2]
    train_window = 365
    lr = 0.001
    loss_fn = nn.MSELoss()
    epochs = 1250

    input_set = dataset.SimpleDataset(path_to_csv)
    train_set = dataset.split_train_test(input_set, train_test_split)[0]
    test_set = dataset.split_train_test(input_set, train_test_split)[1]
    train_inout_seq = dataset.create_inout_sequences(
        dataset.normalize(train_set), train_window)

    model = LSTM()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    model.train()

    for i in range(epochs):
        for seq, labels in train_inout_seq:
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                                 torch.zeros(1, 1, model.hidden_layer_size))

            y_pred = model(seq)

            single_loss = loss_fn(y_pred, labels)
Пример #11
0
coea_eval_idx = np.random.randint(0, len(train_files))
while coea_train_idx == coea_eval_idx:  # Training and evaluation data should not be the same.
    coea_eval_idx = np.random.randint(0, len(train_files))

logger_main.info('The file used for CoEA training: {}'.format(
    train_files[coea_train_idx]))
logger_main.info('The file used for CoEA evaluation: {}'.format(
    train_files[coea_eval_idx]))

data_train = np.genfromtxt(train_files[coea_train_idx],
                           dtype=np.float32,
                           delimiter=',')[:, :-1]
data_eval = np.genfromtxt(train_files[coea_eval_idx],
                          dtype=np.float32,
                          delimiter=',')[:, :-1]
data_train = ds.normalize(data_train, min_value, max_value)
data_eval = ds.normalize(data_eval, min_value, max_value)

coea_start_time = datetime.datetime.now()

ca = coea.CoEA(pop_size_bits=6,
               n_layer_species=4,
               layer_weights=layer_weights,
               net_weights=net_weights,
               iters=5000,
               net_pop_size=80,
               data_train=data_train,
               data_eval=data_eval)

logger_main.info(
    'The CoEA initialized with network population size of {}, layer population size of {}, and using '