Exemplo n.º 1
0
def preprocess(lower_threshold, numlines_train, numlines_test, only_trainingset):
    csv_train, storage_train, features_train, targets_train, csv_test, \
    storage_test, features_test = create_names(lower_threshold, upper_threshold,
                                               numlines_train, numlines_test,
                                               only_trainingset, 1)
    '''preprocesses the training data and optionally a separate test data file. Creates features
    and extracts targets.
    '''
    
    prep = pp(lower_threshold, upper_threshold, numlines_train, numlines_test,
              only_trainingset)
    
    #import data from csv files:
    prep.import_csv(csv_train, csv_test, storage_train, storage_test)
    #create features and save to new csv files:
    prep.create_new_csvs(features_train, targets_train, features_test,
                         storage_train, storage_test)
Exemplo n.º 2
0
'''

from NaiveBayes import NaiveBayes
from create_filenames import create_names
import numpy as np
from slice_merge import slice_csv, merge_csvs

lower_threshold = 7
upper_threshold = 100
numlines_train = 5000  # 0 will be interpreted as all lines
numlines_test = 0  # 0 will be interpreted as all lines
slice_size = 1000
use_only_trainingset_and_split = True

features_file_train, targets_file_train, features_file_test, \
predictions_file_test = create_names(lower_threshold, upper_threshold,
                                     numlines_train, numlines_test, 0, 0)

def main():
    features_train = None
    IDs_train = None
    all_targets_train = None
    features_file_test = None
    try:
        print 'load training features...'
        features_train, IDs_train = load_features(features_file_train, 1)
        print 'load training targets...'
        all_targets_train = load_targets(targets_file_train)
        print 'load test features...'
        features_file_test = load_features(features_file_test, 1)
    except IOError:
        print "The corresponding files have not been created yet."
'''

from NaiveBayes import NaiveBayes
from create_filenames import create_names
import numpy as np
from slice_merge import slice_csv, merge_csvs

lower_threshold = 5
upper_threshold = 100
numlines_train = 100  # 0 will be interpreted as all lines

split_at = 0.7


features_file_train, targets_file_train, features_file_test, \
probabilities_filename, predictions_filename = create_names(lower_threshold, upper_threshold,
                                     numlines_train, 0, 1, 0)

def main():
    features = None
    IDs = None
    all_targets = None
    try:
        print 'load training features...'
        features, IDs = load_features(features_file_train, 1)
        print 'load training targets...'
        all_targets = load_targets(targets_file_train)
    except IOError:
        print "The corresponding files have not been created yet."
        print "Please run preprocessing with the same parameters and try again."
        raise SystemExit(0)