def main(): #nr=input('Enter the dimension N of the square lattice -->') nr=4 reps=nr*nr # number of lattice sites # generate complete dataset (training_data,training_H,training_M)=mh.complete_dataset(reps) train_labels=mh.gen_train_labels(training_data, training_H, training_M) # compute frequesncies counter=collections.Counter(train_labels) val=counter.values() freq=np.array(val)/np.float(len(train_labels)) #partition into training 90% and testing 10% datasets [train,test]=mh.partition(training_data,train_labels) train_set=train[0] train_y=train[1] test_set=test[0] test_y=test[1] test_classes=np.unique(test_y) train_classes=np.unique(train_y) training=[] testing=[] for i in range(len(train_set)): row=train_set[i] training.append(np.hstack((row,train_y[i]))) for i in range(len(test_set)): row2=test_set[i] testing.append(np.hstack((row2,test_y[i]))) np.reshape(testing,(len(testing),17)) np.reshape(training,(len(training),17)) train2=sorted(training,key=itemgetter(-1)) test2=sorted(testing,key=itemgetter(-1)) print "Initial number of random seeds is set to 10" print "It yields a training dataset of 10,000 samples per temperature" print "The temperature range is set to [0.1 40.0] with step size 0.5" expr=input("Would you like to increase the training dataset size? 'y'/'n'") if expr=='y': r=input("Enter a factor (integer) by which to increase the sample size") elif expr=='n': r=1 else: r=1 train_res=get_MH_sampled_IDs(train2,train_classes,freq,r) test_res=get_MH_sampled_IDs(test2,test_classes,freq,1) write_to_csv(train_res, "training") write_to_csv(test_res, "testing")
import ising_model_princeton2 as I import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeClassifier from sklearn.decomposition import PCA from sklearn.preprocessing import scale from operator import itemgetter nr=input('Enter the dimension N of the square lattice -->') reps=nr*nr # number of lattice sites # generate complete dataset (training_data,training_H,training_M)=mh.complete_dataset(reps) train_labels=mh.gen_train_labels(training_data, training_H, training_M) #partition into training 90% and testing 10% datasets [train,test]=mh.partition(training_data,train_labels) train_set=train[0] train_y=train[1] test_set=test[0] test_y=test[1] training=[] testing=[] for i in range(len(train_set)): row=train_set[i] training.append(np.hstack((row,train_y[i]))) for i in range(len(test_set)): row2=test_set[i] testing.append(np.hstack((row2,test_y[i]))) np.reshape(testing,(len(testing),17)) np.reshape(training,(len(training),17))