import pylab as pl from Framework.DataSet import * from Tools import writeapriorifile dataset = DataSet( datafile ='../data/normalized.csv', na_values=['?'], string_columns=['state','communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.binarize() minSup = 40 minConf = 90 maxRule = 4 # BEGIN APRIORI filename = '../tmp/apriori.txt' writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)
# 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', ]) #dataset = dataset.standardize() dataset = dataset.standardize(); dataset = dataset.fix_missing(drop_attributes=True) outer_n = 5 inner_n = 3 for outer_i in range(outer_n): X = dataset.X M = dataset.M N = dataset.N displayN = 10
#'countyCode', 'communityCode', #'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', #'ViolentCrimesPerPop', 'nonViolPerPop', ]) crime = crime.normalize() crime = crime.fix_missing(fill_mean=True) crime = crime.discretize('ViolentCrimesPerPop',2) crime = crime.classIn('ViolentCrimesPerPop') print(type(crime.X)) print(crime.y) X=crime.X dataset=crime y = crime.y X = dataset.X y = dataset.y N = dataset.N M = dataset.M
import pylab as pl from Framework.DataSet import * from Tools import writeapriorifile dataset = DataSet( datafile='../data/normalized.csv', na_values=['?'], string_columns=['state', 'communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.binarize() minSup = 40 minConf = 90 maxRule = 4 # BEGIN APRIORI filename = '../tmp/apriori.txt' writeapriorifile.WriteAprioriFile(dataset.X, filename=filename) import numpy as np import subprocess from subprocess import call import re import os # Run Apriori Algorithm print('Mining for frequent itemsets by the Apriori algorithm')
# 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet', 'NumImmig', 'PctEmploy', 'PctPopUnderPov', 'pctUrban' ]) crime = crime.fix_missing(fill_mean=True) crime = crime.standardize() #crime = crime.normalize() #crime = crime.drop_nominals() crime = crime.take_first_n_rows(200) crime = crime.discretize('racePctWhite', 2) crime = crime.set_class_column('racePctWhite') #crime = DataSet(dataframe=crime.df[:200]) #print(crime.df.assaults) print(crime.y) #col = crime.one_of_k('pctUrban', 2) #print(col) #dataset = crime.discretize('pctUrban', 2) #dataset = crime.set_class_column('pctUrban')