Exemplo n.º 1
0
def create_list_classifiers_one_level(filename):
    files = os.listdir('../training_csv_files')
    filename = filename.split('/')
    filename = filename[len(filename) - 1]
    filename = filename.split('.')[0]
    for file in files:
        if file.__contains__(filename):
            nb = file.split('_')
            nb = nb[len(nb) - 1]
            nb = nb.split('.')[0]
            name_classifier = file.split('.csv')[0] + '_classifier_' + nb
            #print(list_classifiers_name_id())
            #print(len(list_classifiers_name_id()))
            #print(name_classifier)
            classifier.create_classifier('../training_csv_files/' + file, name_classifier,nb)
Exemplo n.º 2
0
def create_list_classifiers_2ls(filename):
    # 0.05 only 2 records not enough to train
    files = os.listdir('../training_csv_files')
    filename = filename.split('/')
    filename = filename[len(filename) - 1]
    filename = filename.split('.')[0]
    print(filename)
    for file in files:
        if file.__contains__(filename + '_bad') or file.__contains__(filename +
                                                                     '_0_1'):
            nb = file.split('_')
            nb = nb[len(nb) - 1]
            nb = nb.split('.')[0]
            name_classifier = file.split('.csv')[0] + '_classifier_' + nb
            classifier.create_classifier('../training_csv_files/' + file,
                                         name_classifier, nb)
Exemplo n.º 3
0
def __gen_classifiers():
    x_scaler = StandardScaler().fit(x_train)
    x_train_scaled = x_scaler.transform(x_train)

    if SETTINGS['classifier']['pca_size'] > 0:
        x_pca = PCA(n_components=SETTINGS['classifier']['pca_size'])
        x_pca.fit(x_train_scaled)
        x_train_pca = x_pca.transform(x_train_scaled)
    else:
        x_pca = None
        x_train_pca = x_train_scaled

    svm, dt = create_classifier(x_train_pca, x_train_scaled, y_train,
                                SETTINGS['classifier'])
    if SETTINGS['classifier']['save']:
        with open(
                path.join(SETTINGS['classifier']['path'],
                          SETTINGS['classifier']['name']), 'wb') as f:
            print("Saving classifier.")
            features = {
                'svm': svm,
                'dt': dt,
                'x_scaler': x_scaler,
                'x_pca': x_pca
            }
            pickle.dump(features, f, pickle.HIGHEST_PROTOCOL)
        with open(path.join(SETTINGS['classifier']['path'], 'settings.p'),
                  'wb') as f:
            pickle.dump(SETTINGS['classifier'], f, pickle.HIGHEST_PROTOCOL)
    return svm, dt, x_scaler, x_pca
Exemplo n.º 4
0
def main():

    # Instantiate the console arguments function
    args = arg_parser()

    print("GPU setting: {}".format(args.gpu))

    # Define normalization for transforms
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # Define transformations for training, validation and test sets
    data_transforms = create_transforms(30, 224, 256, normalize)

    # Load the datasets from the image folders
    datasets = image_datasets(data_transforms)

    # Define the dataloaders using the image datasets
    loaders = data_loaders(datasets, 32)

    # Instantiate a new model
    model = create_model(arch=args.arch)

    output_units = len(datasets['training'].classes)

    # Create new classifier
    model.classifier = create_classifier(model, args.hidden_layers,
                                         output_units, args.dropout)

    device = check_gpu(args.gpu)
    print(device)
    model.to(device)

    learning_rate = args.learning_rate
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    epochs = args.epochs
    print_every = args.print_every
    steps = 0
    trainloader = loaders['training']
    validloader = loaders['validation']

    # trained_model = train(model, epochs, learning_rate, criterion, optimizer, loaders['training'], loaders['validation'], device)
    trained_model = train(model, trainloader, validloader, device, criterion,
                          optimizer, epochs, print_every, steps)

    print("Training has completed")

    test_model(trained_model, loaders['testing'], device)

    initial_checkpoint(trained_model, args.checkpoint_dir,
                       datasets['training'])
Exemplo n.º 5
0
def classify():
    global CLF

    CLF = create_classifier()
    c = {0: "green", 1: "red"}

    while True:
        result = db.tweets.find_one_and_update({'flag': {'$exists': False}}, {'$set': {'flag': True}})
        try:
            r = CLF.predict(result["tweet"])
            result["color"] = c[r[0]]
            db.sentiment.insert_one(result)
        except Exception, e:
            pass
Exemplo n.º 6
0
def classify():
    print "Starting classification"
    with emolytics.app_context():
        CLF = create_classifier()
        c = {0: "green", 1: "red"}

        while True:
            result = Tweet.query.filter((Tweet.flag == False)).all()
            try:
                for t in result:
                    r = CLF.predict(t.tweet.encode('utf-8'))
                    t.color = c[int(r)]
                db.session.commit()
            except IntegrityError, ie:
                pass
                db.session.rollback()
            except Exception, e:
                pass
Exemplo n.º 7
0
def learn(train, dev, test, args, sargs_str):

    # Read strategy-specific args
    sargs = util.parse(parser, sargs_str.split())

    # Clean out the sandbox
    util.mkdir(sargs['sandbox'], clean=True)

    # Feature columns describe how to use the input
    my_feature_columns = []
    for key in train[0].keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    # Calculate epoch length
    steps_per_epoch = math.ceil(len(train[0]) / sargs['batch'])
    total_steps = sargs['epochs'] * steps_per_epoch

    # Train a classifier
    extra_args = {
        'classes': CLASSES,
        'columns': my_feature_columns,
        'steps_per_epoch': steps_per_epoch,
        'learning_rate': sargs['lr'],
        'model_dir': sargs['sandbox'],
        'warm_start_dir': None
    }
    merged_args = {**args, **sargs, **extra_args}

    # Create a new classifier instance
    classifier = cl.create_classifier(merged_args)

    # Train the model for exactly 1 epoch
    classifier.train(
        input_fn=lambda: pandas2tf.train_input_fn(train, sargs['batch']),
        steps=total_steps)

    # Evaluate the model
    train_result = classifier.evaluate(
        input_fn=lambda: pandas2tf.eval_input_fn(train, sargs['batch']))
    dev_result = classifier.evaluate(
        input_fn=lambda: pandas2tf.eval_input_fn(dev, sargs['batch']))
    test_result = classifier.evaluate(
        input_fn=lambda: pandas2tf.eval_input_fn(test, sargs['batch']))
    return train_result, dev_result, test_result, classifier
Exemplo n.º 8
0
def learn():
    ds_x, ds_y = dataset.load_dataset()
    clsfr = classifier.create_classifier(verbose=True, layer_sizes=(100,25))
    classifier.train(clsfr, ds_x[:9000], ds_y[:9000])
    print(classifier.rate(clsfr, ds_x[9000:], ds_y[9000:]))
    classifier.dump_classifier(clsfr, '100x25')
Exemplo n.º 9
0
def learn(train, dev, test, args, sargs_str):

    # Read strategy-specific args
    sargs = util.parse(parser, sargs_str.split())
   
    # Clean out the sandbox
    util.mkdir(sargs['sandbox'], clean=True)

    # Feature columns describe how to use the input
    my_feature_columns = []
    for key in train[0].keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    # Calculate epoch length
    steps_per_epoch = math.ceil(len(train[0]) / sargs['batch'])

    # Train a classifier
    # Repeat until the model consecutively "misses" a set number of times
    rounds = 1
    misses = miss_streak = 0
    best_result = {'fmes': -1}
    best_model_dir = None
    best_classifier = None
    while miss_streak < sargs['max_misses']:

        model_dir = os.path.join(sargs['sandbox'], 'run_' + str(rounds) + '_' + str(miss_streak))

        extra_args = {
            'classes': CLASSES,
            'columns': my_feature_columns,
            'steps_per_epoch': steps_per_epoch,
            'learning_rate': sargs['lr'] / (2 ** misses),
            'model_dir': model_dir,
            'warm_start_dir': best_model_dir
        }
        merged_args = {**args, **sargs, **extra_args}

        # Create a new classifier instance
        classifier = cl.create_classifier(merged_args)

        # Train the model for exactly 1 epoch
        classifier.train(
            input_fn=lambda:pandas2tf.train_input_fn(train, sargs['batch']),
            steps=steps_per_epoch)

        # Evaluate the model
        eval_result = classifier.evaluate(input_fn=lambda:pandas2tf.eval_input_fn(dev, sargs['batch']))
        log('Round ' + str(rounds) + '_' + str(miss_streak) + ', Fmes: ' + str(best_result['fmes']) + ' --> ' + str(eval_result['fmes']))
        if eval_result['fmes'] > best_result['fmes']:
            best_result = eval_result
            best_model_dir = model_dir
            best_classifier = classifier
            miss_streak = 0
            rounds += 1
            log('Improvement, go on...')
        else:
            miss_streak += 1
            misses += 1
            log('Miss #' + str(misses) + ', (streak = ' + str(miss_streak) + ')')
        
        # Cleanup sandbox not to run out of space due to models
        for m_dir in os.listdir(sargs['sandbox']):
            abs_m_dir = os.path.join(sargs['sandbox'], m_dir)
            if best_model_dir != abs_m_dir and model_dir != abs_m_dir:
                tf.summary.FileWriterCache.clear()
                shutil.rmtree(abs_m_dir)                

    final_result_train = best_classifier.evaluate(input_fn=lambda:pandas2tf.eval_input_fn(train, sargs['batch']))
    final_result_dev = best_classifier.evaluate(input_fn=lambda:pandas2tf.eval_input_fn(dev, sargs['batch']))
    final_result_test = best_classifier.evaluate(input_fn=lambda:pandas2tf.eval_input_fn(test, sargs['batch']))
    return final_result_train, final_result_dev, final_result_test, best_classifier