from Framework.DataSet import * from pylab import * from scipy.io import loadmat from toolbox_02450 import clusterplot from scipy.cluster.hierarchy import linkage, fcluster, dendrogram crime = DataSet(datafile='../data/normalized.csv') #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals # crime = crime.drop_columns([ # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet', 'NumImmig', 'PctEmploy', "PctPopUnderPov", 'pctUrban'
import pylab as pl from Framework.DataSet import * from Tools import writeapriorifile dataset = DataSet( datafile ='../data/normalized.csv', na_values=['?'], string_columns=['state','communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.binarize() minSup = 40 minConf = 90 maxRule = 4 # BEGIN APRIORI filename = '../tmp/apriori.txt' writeapriorifile.WriteAprioriFile(dataset.X, filename=filename)
import pylab as pl from Framework.DataSet import * crime = DataSet( datafile='../data/raw.csv', nominals=['state', 'communityname', 'countyCode', 'communityCode']) #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', #'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', #'nonViolPerPop', ]) print(type(crime.X))
import pylab as pl from Framework.DataSet import * from Tools import writeapriorifile dataset = DataSet( datafile='../data/normalized.csv', na_values=['?'], string_columns=['state', 'communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.binarize() minSup = 40 minConf = 90 maxRule = 4 # BEGIN APRIORI filename = '../tmp/apriori.txt' writeapriorifile.WriteAprioriFile(dataset.X, filename=filename) import numpy as np import subprocess from subprocess import call import re import os # Run Apriori Algorithm print('Mining for frequent itemsets by the Apriori algorithm')
from Framework.DataSet import * from Tools import writeapriorifile import pylab as pl dataset = DataSet( datafile ='../data/raw.csv', na_values=['?'], string_columns=['state'], ) dataset = dataset.set_class_column('communityname') dataset = dataset.drop_columns([ # 'communityname', # 'countyCode', ## 'communityCode', # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', ]) #dataset = dataset.standardize() dataset = dataset.standardize();
import pylab as pl from Framework.DataSet import * dataset = DataSet( datafile='../data/raw.csv', na_values=['?'], string_columns=['state', 'communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.standardize() dataset = dataset.discretize('arsons', 2) dataset = dataset.set_class_column('arsons', nodelete=True) print(dataset.y) #dataset = dataset.normalize()
import pylab as pl from Framework.DataSet import * crime = DataSet(datafile='../data/raw.csv', nominals=['state','communityname','countyCode','communityCode']) crime = crime.drop_columns([ 'state', 'communityname', #'countyCode', 'communityCode', #'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', #'ViolentCrimesPerPop', 'nonViolPerPop', ]) crime = crime.normalize() crime = crime.fix_missing(fill_mean=True) crime = crime.discretize('ViolentCrimesPerPop',2) crime = crime.classIn('ViolentCrimesPerPop') print(type(crime.X)) print(crime.y)
from toolbox_02450 import clusterplot from sklearn.mixture import GMM from pylab import * from scipy.io import loadmat from sklearn.mixture import GMM from sklearn import cross_validation # Load Matlab data file and extract variables of interest from Framework.DataSet import * crime = DataSet( datafile ='../data/raw.csv', na_values=['?'], string_columns =['communityname','state'], class_column = 'state' ) crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', 'nonViolPerPop',
from Framework.DataSet import * from pylab import * from scipy.io import loadmat from toolbox_02450 import clusterplot from sklearn.mixture import GMM from sklearn import cross_validation crime = DataSet(datafile='../data/normalized.csv') #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals # crime = crime.drop_columns([ # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome',
from Framework.DataSet import * from pylab import * from scipy.io import loadmat from toolbox_02450 import clusterplot from sklearn.mixture import GMM from sklearn import cross_validation crime = DataSet(datafile='../data/normalized.csv') #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals # crime = crime.drop_columns([ # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet', 'NumImmig',
import pylab as pl from Framework.DataSet import * from Framework.PCA import * from Tools import writeapriorifile dataset = DataSet( datafile ='../data/normalized.csv', na_values=['?'], string_columns=['state','communityname'], ) #dataset = dataset.drop_rows([21]) #dataset = dataset.standardize() dataset = dataset.fix_missing(drop_attributes=True) #print(dataset.df) pca = PCA(dataset) pca.plot(color='medIncome')
from Framework.DataSet import * from pylab import * from scipy.io import loadmat from toolbox_02450 import clusterplot from scipy.cluster.hierarchy import linkage, fcluster, dendrogram crime = DataSet(datafile='../data/normalized.csv') #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals # crime = crime.drop_columns([ # 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet',
import csv import numpy as np from Framework.DataSet import * from pylab import * from scipy.io import loadmat DATADIR = '../data/' crime = DataSet(datafile='../data/normalized.csv') drop_columns = [ 'State', 'countyCode', 'communityCode', 'communityname', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', # 'autoTheftPerPop', 'arsons', 'arsonsPerPop', 'violentPerPop', 'nonViolPerPop', ] crime = DataSet(datafile='../data/raw.csv', na_values=["?"], string_columns=['state', 'communityname']) crime = crime.drop_columns([ 'fold',
import pylab as pl from Framework.DataSet import * dataset = DataSet( datafile ='../data/raw.csv', na_values=['?'], string_columns=['state','communityname'], ) dataset = dataset.fix_missing(drop_objects=True) dataset = dataset.standardize() dataset = dataset.discretize('arsons', 2); dataset = dataset.set_class_column('arsons', nodelete=True) print ( dataset.y ) #dataset = dataset.normalize()
import pylab as pl from Framework.DataSet import * crime = DataSet( datafile='../data/raw.csv', nominals=['state', 'communityname', 'countyCode', 'communityCode']) crime = crime.drop_columns([ 'state', 'communityname', #'countyCode', 'communityCode', #'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', 'arsons', 'arsonsPerPop', #'ViolentCrimesPerPop', 'nonViolPerPop',
from scipy.io import loadmat from toolbox_02450 import clusterplot from sklearn.mixture import GMM from pylab import * from scipy.io import loadmat from sklearn.mixture import GMM from sklearn import cross_validation # Load Matlab data file and extract variables of interest from Framework.DataSet import * crime = DataSet(datafile='../data/raw.csv', na_values=['?'], string_columns=['communityname', 'state'], class_column='state') crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies',