Пример #1
0
dataset = DataSet(
	datafile ='../data/raw.csv',
	na_values=['?'],
	string_columns=['state'],
)
dataset = dataset.set_class_column('communityname')

dataset = dataset.drop_columns([
#   'communityname',
#	'countyCode',
##	'communityCode',
#	'fold',
#	'murders', 'murdPerPop',
#	'rapes', 'rapesPerPop',
#	'robberies', 'robbbPerPop',
#	'assaults', 'assaultPerPop',
#	'burglaries', 'burglPerPop',
#	'larcenies', 'larcPerPop',
#	'autoTheft', 'autoTheftPerPop',
#	'arsons', 'arsonsPerPop',
#	'ViolentCrimesPerPop',
#	'nonViolPerPop',
])
#dataset = dataset.standardize()

dataset = dataset.standardize();

dataset = dataset.fix_missing(drop_attributes=True)


Пример #2
0
crime = DataSet(
    datafile='../data/raw.csv',
    nominals=['state', 'communityname', 'countyCode', 'communityCode'])

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
crime = crime.drop_columns([
    'fold',
    'murders',
    'murdPerPop',
    'rapes',
    'rapesPerPop',
    'robberies',
    'robbbPerPop',
    #	'assaults', 'assaultPerPop',
    'burglaries',
    'burglPerPop',
    'larcenies',
    'larcPerPop',
    'autoTheft',
    'autoTheftPerPop',
    #'arsons', 'arsonsPerPop',
    'ViolentCrimesPerPop',
    #'nonViolPerPop',
])
print(type(crime.X))

crime = crime.normalize()

crime = crime.take_columns(['nonViolPerPop', 'arsons'])
#print(crime)
from Framework.DataSet import *

crime = DataSet(
	datafile ='../data/raw.csv',
	na_values=['?'],
	string_columns =['communityname','state'],
	class_column = 'state'
)

crime = crime.drop_columns([
	'fold',
	'murders', 'murdPerPop',
	'rapes', 'rapesPerPop',
	'robberies', 'robbbPerPop',
	'assaults', 'assaultPerPop',
	'burglaries', 'burglPerPop',
	'larcenies', 'larcPerPop',
	'autoTheft', 'autoTheftPerPop',
	'arsons', 'arsonsPerPop',
	'ViolentCrimesPerPop',
	'nonViolPerPop',
])
crime = crime.normalize()

data = crime

data = data.fix_missing(drop_objects=True)

#mat_data = loadmat('../Data/synth1.mat')
#X = np.matrix(mat_data['X'])
X = data.X