def marginalise_demo2(): """Generates a GUI to demo marginalisation on a small joint distribution""" from IO import read_csv from Data import CompactFactor from Demos import marginalise_gui from StringIO import StringIO cancer = CompactFactor(read_csv(StringIO(cancerdat))) # create a normal factor data = cancer['Smoker', 'Cancer', 'Bronchitis'] marginalise_gui(data.normalised())
def florida_demo(): """Show Florida death penalty data""" from IO import read_csv from Data import CompactFactor import Parameters from StringIO import StringIO florida = CompactFactor(read_csv(StringIO(floridadat))) #create a normal factor table = florida['Murderer', 'Sentence', 'Victim'] print table print 'Number of observations is %d' % table.z() Parameters.precision = 6 print table.normalised()
def cancer_table(): """Prints out a small contingency table and its normalised version""" from IO import read_csv from Data import CompactFactor import Parameters from StringIO import StringIO cancer = CompactFactor(read_csv(StringIO(cancerdat))) # create a normal factor table = cancer['Smoker', 'Cancer', 'Bronchitis'] print table print 'Number of observations is %d' % table.z() Parameters.precision = 6 print table.normalised()
from random import sample def random_undersample(dataset, sample_size=-1, sample_percent=-1): """Randomply undersamples without replacemnt a dataset.""" # determine sample size try: if 0 <= sample_size <= len(dataset): s_size = int(sample_size) elif 0 <= sample_percent <= 100: s_size = int(len(dataset) * sample_percent / 100) else: s_size = 0 except: return [] # return sample return sample(dataset, s_size) if __name__ == '__main__': from IO import read_csv, write_csv, read_header file_path = '../data/train.csv' out_file_path = '../data_preprocessed/train_undersampled.csv' # read dataset dataset = read_csv(file_path, has_header=True) header = read_header(file_path) # undersample sample = random_undersample(dataset, sample_percent=5) # write to file write_csv(out_file_path , sample, header=header)