from Framework.DataSet import *

from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

crime = DataSet(datafile='../data/normalized.csv')

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
# crime = crime.drop_columns([
# 	'fold',
# 	'murders', 'murdPerPop',
# 	'rapes', 'rapesPerPop',
# 	'robberies', 'robbbPerPop',
# #	'assaults', 'assaultPerPop',
# 	'burglaries', 'burglPerPop',
# 	'larcenies', 'larcPerPop',
# 	'autoTheft', 'autoTheftPerPop',
# 	'arsons', 'arsonsPerPop',
# 	'ViolentCrimesPerPop',
# 	'nonViolPerPop',
# ])
crime = crime.take_columns([
	'racePctHisp', 
	'racePctWhite',
	#'racepctblack',
	#'racePctAsian',
	'medIncome', 'NumStreet', 'NumImmig',
	'PctEmploy', "PctPopUnderPov", 'pctUrban'
import pylab as pl


from Framework.DataSet import *
from Tools import writeapriorifile

dataset = DataSet(
	datafile ='../data/normalized.csv',
	na_values=['?'],
	string_columns=['state','communityname'],
)


dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.binarize()



minSup = 40 
minConf = 90
maxRule = 4



# BEGIN APRIORI
filename = '../tmp/apriori.txt'


writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)

示例#3
0
import pylab as pl

from Framework.DataSet import *

crime = DataSet(
    datafile='../data/raw.csv',
    nominals=['state', 'communityname', 'countyCode', 'communityCode'])

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
crime = crime.drop_columns([
    'fold',
    'murders',
    'murdPerPop',
    'rapes',
    'rapesPerPop',
    'robberies',
    'robbbPerPop',
    #	'assaults', 'assaultPerPop',
    'burglaries',
    'burglPerPop',
    'larcenies',
    'larcPerPop',
    'autoTheft',
    'autoTheftPerPop',
    #'arsons', 'arsonsPerPop',
    'ViolentCrimesPerPop',
    #'nonViolPerPop',
])
print(type(crime.X))
示例#4
0
import pylab as pl

from Framework.DataSet import *
from Tools import writeapriorifile

dataset = DataSet(
    datafile='../data/normalized.csv',
    na_values=['?'],
    string_columns=['state', 'communityname'],
)

dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.binarize()

minSup = 40
minConf = 90
maxRule = 4

# BEGIN APRIORI
filename = '../tmp/apriori.txt'

writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)

import numpy as np
import subprocess
from subprocess import call
import re
import os

# Run Apriori Algorithm
print('Mining for frequent itemsets by the Apriori algorithm')
示例#5
0
from Framework.DataSet import *
from Tools import writeapriorifile
import pylab as pl

dataset = DataSet(
	datafile ='../data/raw.csv',
	na_values=['?'],
	string_columns=['state'],
)
dataset = dataset.set_class_column('communityname')

dataset = dataset.drop_columns([
#   'communityname',
#	'countyCode',
##	'communityCode',
#	'fold',
#	'murders', 'murdPerPop',
#	'rapes', 'rapesPerPop',
#	'robberies', 'robbbPerPop',
#	'assaults', 'assaultPerPop',
#	'burglaries', 'burglPerPop',
#	'larcenies', 'larcPerPop',
#	'autoTheft', 'autoTheftPerPop',
#	'arsons', 'arsonsPerPop',
#	'ViolentCrimesPerPop',
#	'nonViolPerPop',
])
#dataset = dataset.standardize()

dataset = dataset.standardize();
示例#6
0
import pylab as pl

from Framework.DataSet import *

dataset = DataSet(
    datafile='../data/raw.csv',
    na_values=['?'],
    string_columns=['state', 'communityname'],
)

dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.standardize()
dataset = dataset.discretize('arsons', 2)
dataset = dataset.set_class_column('arsons', nodelete=True)

print(dataset.y)

#dataset = dataset.normalize()
import pylab as pl


from Framework.DataSet import *

crime = DataSet(datafile='../data/raw.csv', nominals=['state','communityname','countyCode','communityCode'])

crime = crime.drop_columns([
	'state', 'communityname',
	#'countyCode', 'communityCode',
	#'fold',
	'murders', 'murdPerPop',
	'rapes', 'rapesPerPop',
	'robberies', 'robbbPerPop',
	'assaults', 'assaultPerPop',
	'burglaries', 'burglPerPop',
	'larcenies', 'larcPerPop',
	'autoTheft', 'autoTheftPerPop',
	'arsons', 'arsonsPerPop',
	#'ViolentCrimesPerPop',
	'nonViolPerPop',
])

crime = crime.normalize()
crime = crime.fix_missing(fill_mean=True)
crime = crime.discretize('ViolentCrimesPerPop',2)

crime = crime.classIn('ViolentCrimesPerPop')

print(type(crime.X))
print(crime.y)
from toolbox_02450 import clusterplot

from sklearn.mixture import GMM
from pylab import *
from scipy.io import loadmat
from sklearn.mixture import GMM
from sklearn import cross_validation


# Load Matlab data file and extract variables of interest
from Framework.DataSet import *

crime = DataSet(
	datafile ='../data/raw.csv',
	na_values=['?'],
	string_columns =['communityname','state'],
	class_column = 'state'
)

crime = crime.drop_columns([
	'fold',
	'murders', 'murdPerPop',
	'rapes', 'rapesPerPop',
	'robberies', 'robbbPerPop',
	'assaults', 'assaultPerPop',
	'burglaries', 'burglPerPop',
	'larcenies', 'larcPerPop',
	'autoTheft', 'autoTheftPerPop',
	'arsons', 'arsonsPerPop',
	'ViolentCrimesPerPop',
	'nonViolPerPop',
示例#9
0
from Framework.DataSet import *

from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.mixture import GMM
from sklearn import cross_validation

crime = DataSet(datafile='../data/normalized.csv')

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
# crime = crime.drop_columns([
# 	'fold',
# 	'murders', 'murdPerPop',
# 	'rapes', 'rapesPerPop',
# 	'robberies', 'robbbPerPop',
# #	'assaults', 'assaultPerPop',
# 	'burglaries', 'burglPerPop',
# 	'larcenies', 'larcPerPop',
# 	'autoTheft', 'autoTheftPerPop',
# 	'arsons', 'arsonsPerPop',
# 	'ViolentCrimesPerPop',
# 	'nonViolPerPop',
# ])
crime = crime.take_columns([
    'racePctHisp',
    'racePctWhite',
    #'racepctblack',
    #'racePctAsian',
    'medIncome',
from Framework.DataSet import *

from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from sklearn.mixture import GMM
from sklearn import cross_validation

crime = DataSet(datafile='../data/normalized.csv')

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
# crime = crime.drop_columns([
# 	'fold',
# 	'murders', 'murdPerPop',
# 	'rapes', 'rapesPerPop',
# 	'robberies', 'robbbPerPop',
# #	'assaults', 'assaultPerPop',
# 	'burglaries', 'burglPerPop',
# 	'larcenies', 'larcPerPop',
# 	'autoTheft', 'autoTheftPerPop',
# 	'arsons', 'arsonsPerPop',
# 	'ViolentCrimesPerPop',
# 	'nonViolPerPop',
# ])
crime = crime.take_columns([
	'racePctHisp', 
	'racePctWhite',
	#'racepctblack',
	#'racePctAsian',
	'medIncome', 'NumStreet', 'NumImmig',
import pylab as pl


from Framework.DataSet import *
from Framework.PCA import *
from Tools import writeapriorifile

dataset = DataSet(
	datafile ='../data/normalized.csv',
	na_values=['?'],
	string_columns=['state','communityname'],
)


#dataset = dataset.drop_rows([21])

#dataset = dataset.standardize()
dataset = dataset.fix_missing(drop_attributes=True)	
#print(dataset.df)

pca = PCA(dataset)

pca.plot(color='medIncome')
示例#12
0
from Framework.DataSet import *

from pylab import *
from scipy.io import loadmat
from toolbox_02450 import clusterplot
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram

crime = DataSet(datafile='../data/normalized.csv')

#crime = crime.drop(['state', 'communityname']) 	  # Drop strings
#crime = crime.drop(['countyCode','communityCode']) # Drop nominals
# crime = crime.drop_columns([
# 	'fold',
# 	'murders', 'murdPerPop',
# 	'rapes', 'rapesPerPop',
# 	'robberies', 'robbbPerPop',
# #	'assaults', 'assaultPerPop',
# 	'burglaries', 'burglPerPop',
# 	'larcenies', 'larcPerPop',
# 	'autoTheft', 'autoTheftPerPop',
# 	'arsons', 'arsonsPerPop',
# 	'ViolentCrimesPerPop',
# 	'nonViolPerPop',
# ])
crime = crime.take_columns([
    'racePctHisp',
    'racePctWhite',
    #'racepctblack',
    #'racePctAsian',
    'medIncome',
    'NumStreet',
import csv
import numpy as np
from Framework.DataSet import *

from pylab import *
from scipy.io import loadmat

DATADIR = '../data/'

crime = DataSet(datafile='../data/normalized.csv')

drop_columns = [
	'State', 'countyCode', 'communityCode', 'communityname',
	'murders', 'murdPerPop',
	'rapes', 'rapesPerPop',
	'robberies', 'robbbPerPop',
	'assaults', 'assaultPerPop',
	'burglaries', 'burglPerPop',
	'larcenies', 'larcPerPop',
	'autoTheft',
#    'autoTheftPerPop',
	'arsons', 'arsonsPerPop',
	'violentPerPop',
	'nonViolPerPop',
]


crime = DataSet(datafile='../data/raw.csv', na_values=["?"], string_columns=['state', 'communityname'])

crime = crime.drop_columns([
  'fold',
import pylab as pl


from Framework.DataSet import *

dataset = DataSet(
	datafile ='../data/raw.csv',
	na_values=['?'],
	string_columns=['state','communityname'],
)

dataset = dataset.fix_missing(drop_objects=True)
dataset = dataset.standardize()
dataset = dataset.discretize('arsons', 2);
dataset = dataset.set_class_column('arsons', nodelete=True)

print ( dataset.y )


#dataset = dataset.normalize()

import pylab as pl

from Framework.DataSet import *

crime = DataSet(
    datafile='../data/raw.csv',
    nominals=['state', 'communityname', 'countyCode', 'communityCode'])

crime = crime.drop_columns([
    'state',
    'communityname',
    #'countyCode', 'communityCode',
    #'fold',
    'murders',
    'murdPerPop',
    'rapes',
    'rapesPerPop',
    'robberies',
    'robbbPerPop',
    'assaults',
    'assaultPerPop',
    'burglaries',
    'burglPerPop',
    'larcenies',
    'larcPerPop',
    'autoTheft',
    'autoTheftPerPop',
    'arsons',
    'arsonsPerPop',
    #'ViolentCrimesPerPop',
    'nonViolPerPop',
示例#16
0
from scipy.io import loadmat

from toolbox_02450 import clusterplot

from sklearn.mixture import GMM
from pylab import *
from scipy.io import loadmat
from sklearn.mixture import GMM
from sklearn import cross_validation

# Load Matlab data file and extract variables of interest
from Framework.DataSet import *

crime = DataSet(datafile='../data/raw.csv',
                na_values=['?'],
                string_columns=['communityname', 'state'],
                class_column='state')

crime = crime.drop_columns([
    'fold',
    'murders',
    'murdPerPop',
    'rapes',
    'rapesPerPop',
    'robberies',
    'robbbPerPop',
    'assaults',
    'assaultPerPop',
    'burglaries',
    'burglPerPop',
    'larcenies',