Пример #1
0
    # For 1000 train
    # C= 1.0
    # gamma = 0.1

    # For 1m train
    # C = 0.10000000000000001
    # gamma = 0.0001
    logging.info("Train SVC with C: %f, gamma: %f" % (C, gamma))
    svc = SVC(C=C, gamma=gamma, probability=True, verbose=True).fit(X_train, y_train)

    # Load test data for 5 times
    field = ["id", "click"]
    for part in range(1, 6):
        test_filepath = test_filepattern % part
        logging.info("Loading test set [%s]..." % test_filepath)
        X_test, ids_test = p.load_test_data(test_filepath)
        logging.info("Shape X = %r, ids =%r" % (X_test.shape, ids_test.shape))
        logging.info("example X = %s\nids =%r" % (X_test[0], ids_test[0]))
        svc_probs = svc.predict_proba(X_test)
        # [prob of 0, prob of 1]
        logging.info("prob of test: %s" % svc_probs[:10])

        out_filepath = "%s-svc-t1M-s%d-c%f-g%f.csv" % (test_filepath, SAMPLE, C, gamma)
        logging.info("Writing out file %s" % out_filepath)
        if len(ids_test) != len(svc_probs):
            logging.error("Test case count don:t match")
        else:
            with open(out_filepath, "a") as ofile:
                writer = csv.DictWriter(ofile, field)
                if part == 1:
                    writer.writeheader()
Пример #2
0
    #XXX skip test
    #exit()

    #Load test data for 5 times
    test_filepattern = 'data/test_%d_M.out'
    field = ['id', 'click']
    for part in range(1, 6):
        test_filepath = test_filepattern % part
        logging.info("Loading test set [%s]..." % test_filepath)
        #X_test, ids_test= p.load_test_data(test_filepath)
        #Load data with category

        width = 10000
        for a_slice in range(100):
            logging.info("+++Doing slice %d+++" %(a_slice))
            X_test, ids_test= p.load_test_data(test_filepath, enc = enc, map_dict = map_dict, start_line_no = a_slice*width)
            if X_test == None:
                break
            logging.info("Shape X = %r, ids =%r" %(X_test.shape, ids_test.shape ))
            logging.info("example X = %s\nids =%r" %(X_test[0], ids_test[0]))
            learner_probs = gs_learner.predict_proba(X_test)
            #[prob of 0, prob of 1]
            logging.info("prob of test: %s" % learner_probs[:10])
            out_filepath = "%s-knn-s%d-n%d-w-%s-a-%s.csv" %(test_filepath, n_subsamples, gs_learner.best_params_['n_neighbors'], gs_learner.best_params_['weights'], gs_learner.best_params_['algorithm'])
            logging.info("Writing out file %s" % out_filepath)
            if len(ids_test) != len(learner_probs):
                logging.error("Test case count don:t match")
            else :
                with open(out_filepath, 'a') as ofile:
                    writer = csv.DictWriter(ofile, field)
                    if part == 1 and a_slice == 0: