import pylab as pl


from Framework.DataSet import *
from Tools import writeapriorifile

dataset = DataSet(
	datafile ='../data/normalized.csv',
	na_values=['?'],
	string_columns=['state','communityname'],
)


dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.binarize()



minSup = 40 
minConf = 90
maxRule = 4



# BEGIN APRIORI
filename = '../tmp/apriori.txt'


writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)

Пример #2
0
#	'murders', 'murdPerPop',
#	'rapes', 'rapesPerPop',
#	'robberies', 'robbbPerPop',
#	'assaults', 'assaultPerPop',
#	'burglaries', 'burglPerPop',
#	'larcenies', 'larcPerPop',
#	'autoTheft', 'autoTheftPerPop',
#	'arsons', 'arsonsPerPop',
#	'ViolentCrimesPerPop',
#	'nonViolPerPop',
])
#dataset = dataset.standardize()

dataset = dataset.standardize();

dataset = dataset.fix_missing(drop_attributes=True)



outer_n = 5
inner_n = 3
for outer_i in range(outer_n):
	
	
	X = dataset.X
	M = dataset.M
	N = dataset.N
	
	
	displayN = 10
	
	#'countyCode', 'communityCode',
	#'fold',
	'murders', 'murdPerPop',
	'rapes', 'rapesPerPop',
	'robberies', 'robbbPerPop',
	'assaults', 'assaultPerPop',
	'burglaries', 'burglPerPop',
	'larcenies', 'larcPerPop',
	'autoTheft', 'autoTheftPerPop',
	'arsons', 'arsonsPerPop',
	#'ViolentCrimesPerPop',
	'nonViolPerPop',
])

crime = crime.normalize()
crime = crime.fix_missing(fill_mean=True)
crime = crime.discretize('ViolentCrimesPerPop',2)

crime = crime.classIn('ViolentCrimesPerPop')

print(type(crime.X))
print(crime.y)
X=crime.X

dataset=crime
y = crime.y

X = dataset.X
y = dataset.y
N = dataset.N
M = dataset.M
Пример #4
0
import pylab as pl

from Framework.DataSet import *
from Tools import writeapriorifile

dataset = DataSet(
    datafile='../data/normalized.csv',
    na_values=['?'],
    string_columns=['state', 'communityname'],
)

dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.binarize()

minSup = 40
minConf = 90
maxRule = 4

# BEGIN APRIORI
filename = '../tmp/apriori.txt'

writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)

import numpy as np
import subprocess
from subprocess import call
import re
import os

# Run Apriori Algorithm
print('Mining for frequent itemsets by the Apriori algorithm')
Пример #5
0
# 	'ViolentCrimesPerPop',
# 	'nonViolPerPop',
# ])
crime = crime.take_columns([
    'racePctHisp',
    'racePctWhite',
    #'racepctblack',
    #'racePctAsian',
    'medIncome',
    'NumStreet',
    'NumImmig',
    'PctEmploy',
    'PctPopUnderPov',
    'pctUrban'
])
crime = crime.fix_missing(fill_mean=True)
crime = crime.standardize()
#crime = crime.normalize()
#crime = crime.drop_nominals()

crime = crime.take_first_n_rows(200)

crime = crime.discretize('racePctWhite', 2)
crime = crime.set_class_column('racePctWhite')
#crime = DataSet(dataframe=crime.df[:200])
#print(crime.df.assaults)
print(crime.y)
#col = crime.one_of_k('pctUrban', 2)
#print(col)
#dataset = crime.discretize('pctUrban', 2)
#dataset = crime.set_class_column('pctUrban')