dataset = DataSet( datafile ='../data/raw.csv', na_values=['?'], string_columns=['state'], ) dataset = dataset.set_class_column('communityname') dataset = dataset.drop_columns([ # 'communityname', # 'countyCode', ## 'communityCode', # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', ]) #dataset = dataset.standardize() dataset = dataset.standardize(); dataset = dataset.fix_missing(drop_attributes=True)
crime = DataSet( datafile='../data/raw.csv', nominals=['state', 'communityname', 'countyCode', 'communityCode']) #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', #'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', #'nonViolPerPop', ]) print(type(crime.X)) crime = crime.normalize() crime = crime.take_columns(['nonViolPerPop', 'arsons']) #print(crime)
from Framework.DataSet import * crime = DataSet( datafile ='../data/raw.csv', na_values=['?'], string_columns =['communityname','state'], class_column = 'state' ) crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', 'nonViolPerPop', ]) crime = crime.normalize() data = crime data = data.fix_missing(drop_objects=True) #mat_data = loadmat('../Data/synth1.mat') #X = np.matrix(mat_data['X']) X = data.X