def dump_train(): _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data() train_df = f.make_data_df(train_gray_data, labels) test_df = f.make_test_df(test_gray_data) train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ["pngname", "input", "label"] test_df.columns = ["pngname", "input"] fu = FeatureUnion(transformer_list=f.feature_transformer_rule) feature_name_list = [s.split("__")[1] for s in fu.get_feature_names()] feature_name_list.append("target") train_X = fu.fit_transform(train_df) train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten())) train_X, train_y = cl.downsampling_data(train_X, train_y, 0.2) train_dump = pd.DataFrame(np.c_[train_X, train_y], columns=feature_name_list) dump_path = os.path.abspath(os.path.dirname(__file__)) +\ "/../tmp/train_dump" train_dump.to_csv(dump_path + "/train_dump.csv", index=False)
def prediction(clf_name): print "****************classifier****************" print clf_dict[clf_name]["clf"] clf = clf_dict[clf_name]["clf"] _, _, _, train_gray_data, test_gray_data, _, labels = i_p.load_data() train_keys = train_gray_data.keys() test_keys = test_gray_data.keys() train_df = f.make_data_df(train_gray_data, labels) test_df = f.make_test_df(test_gray_data) train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ["pngname", "input", "label"] test_df.columns = ["pngname", "input"] # operation check if clf_name == "SGDB": # train_df, train_keys, test_df, test_keys = pre.make_checkdata(mode="df") # train_df, train_keys, _, _ = pre.make_checkdata(mode="df") for i in xrange(len(train_keys)): train_X, train_y = classify.set_traindata(train_df, train_keys[i]) clf.partial_fit(train_X, train_y) else: # operation check # train_df, train_keys, _, _ = pre.make_checkdata(mode="df") fu = FeatureUnion(transformer_list=f.feature_transformer_rule) train_X = fu.fit_transform(train_df) train_y = np.concatenate(train_df["label"].apply(lambda x: x.flatten())) train_X, train_y = classify.downsampling_data(train_X, train_y, 0.2) clf.fit(train_X, train_y) clf_dir = os.path.abspath(os.path.dirname(__file__)) +\ "/../tmp/fit_instance/" now = datetime.datetime.now() savefile = clf_dir + clf_name + now.strftime("%Y_%m_%d_%H_%M_%S") + ".pickle" fi = open(savefile, "w") pickle.dump(clf, fi) fi.close() for i in xrange(len(test_keys)): test_img = test_df[(test_df["pngname"] == test_keys[i])]["input"].as_matrix()[0] imgname = test_keys[i] shape = test_img.shape test_img = {test_keys[i]: test_img} X_test = convert_testdata(test_img) output = clf.predict(X_test) output = np.asarray(output) zo = np.vectorize(zero_one) output = zo(output).reshape(shape) tmp = [] for row in xrange(len(output)): for column in xrange(len(output[row])): id_ = imgname + "_" + str(row + 1) + "_" + str(column + 1) value = output[row][column] pix = [id_, value] tmp.append(pix) if i == 0: predict_df = pd.DataFrame(tmp) else: tmp_df = pd.DataFrame(tmp) predict_df = pd.concat([predict_df, tmp_df]) predict_df.columns = ["id", "value"] now = datetime.datetime.now() submission_path = SUBMISSION_DIR + "/submission_" + now.strftime("%Y_%m_%d_%H_%M_%S") + ".csv" predict_df.to_csv(submission_path, header=True, index=False)