import numpy as np import read_data as rd from sklearn.cross_validation import KFold train_label,picname_3k,labelname_3k = rd.read_train_3k(rd.train_3k) test_name = rd.read_test_3k(rd.test_3k) attr_name = rd.read_attributes_list(rd.attr_list) train_data = rd.read_attributes(rd.attr_train) # test_data = rd.read_attributes(rd.attr_test) train_data_alex = rd.read_npy(rd.alexnet_train) test_data_alex = rd.read_npy(rd.alexnet_test) train_data_siftbow = rd.read_npy(rd.siftbow_train) # test_data_siftbow = rd.read_npy(rd.siftbow_test) def cross_validate(train_data,n_folds = 5): kf = KFold(len(train_data), n_folds = n_folds) ret = 0.0 for train_index, test_index in kf: X_train, X_test = train_data[train_index], train_data[test_index] y_train, y_test = train_label[train_index], train_label[test_index] temp = svm(X_train,y_train,X_test,y_test) print temp ret += temp return ret/n_folds def svm(X_train,y_train,X_test,y_test): # clf = SVC(C=1.0,kernel='sigmoid') clf = LinearSVC(dual=False) score = clf.fit(X_train,y_train).score(X_test,y_test)
def mode(data): result = np.zeros((1000, 200)) for i in range(0, len(data)): result[i][data[i]] = 1 return result if __name__ == "__main__": piclabel_3k, picname_3k, labels_3k = read_data.read_train_3k(read_data.train_3k) attributes_train = read_data.read_attributes(read_data.attr_train) attributes_test = read_data.read_attributes(read_data.attr_test) alexnet_train = read_data.read_npy(read_data.alexnet_train) alexnet_test = read_data.read_npy(read_data.alexnet_test) alexmin = np.amin(alexnet_train) alexnet_train_ = alexnet_train - alexmin; alexnet_test_ = alexnet_test - alexmin prediction1 = gaussianNB(attributes_train, piclabel_3k, attributes_test) prediction2 = multinomialNB(attributes_train, piclabel_3k, attributes_test) prediction3 = bernoulliNB(attributes_train, piclabel_3k, attributes_test) prediction4 = gaussianNB(alexnet_train, piclabel_3k, alexnet_test) prediction5 = multinomialNB(alexnet_train_, piclabel_3k, alexnet_test_) prediction6 = bernoulliNB(alexnet_train, piclabel_3k, alexnet_test) prediction = mode(prediction1) + mode(prediction2) + mode(prediction3) + mode(prediction4) + mode(prediction5) + mode(prediction6) prediction = np.argmax(prediction, axis = 1) test_imgname = read_data.read_test_3k(read_data.test_3k) csvfile = file('combine_naive_noprob.csv', 'wb') writer = csv.writer(csvfile) writer.writerow(["ID", "Category"]) for i in range(0, len(prediction)): writer.writerow([test_imgname[i],labels_3k[prediction[i]]]) csvfile.close()
import numpy as np import read_data as rd from sklearn.cross_validation import KFold train_label, picname_3k, labelname_3k = rd.read_train_3k(rd.train_3k) test_name = rd.read_test_3k(rd.test_3k) attr_name = rd.read_attributes_list(rd.attr_list) train_data = rd.read_attributes(rd.attr_train) # test_data = rd.read_attributes(rd.attr_test) train_data_alex = rd.read_npy(rd.alexnet_train) test_data_alex = rd.read_npy(rd.alexnet_test) train_data_siftbow = rd.read_npy(rd.siftbow_train) # test_data_siftbow = rd.read_npy(rd.siftbow_test) def cross_validate(train_data, n_folds=5): kf = KFold(len(train_data), n_folds=n_folds) ret = 0.0 for train_index, test_index in kf: X_train, X_test = train_data[train_index], train_data[test_index] y_train, y_test = train_label[train_index], train_label[test_index] temp = svm(X_train, y_train, X_test, y_test) print temp ret += temp return ret / n_folds def svm(X_train, y_train, X_test, y_test): # clf = SVC(C=1.0,kernel='sigmoid') clf = LinearSVC(dual=False) score = clf.fit(X_train, y_train).score(X_test, y_test)