Exemplo n.º 1
0
def process_blocklists(db_file):
    """ prompt for and process blocklists """
    source = inquirer.ask_blocklist()

    import_list = []

    if source in blockLists:
        url_source = blockLists[source]
        resp = requests.get(url_source["url"])
        import_list = utils.process_lines(resp.text, url_source["comment"])

    if source == constants.FILE:
        fname = inquirer.ask_import_file()
        import_file = open(fname)
        import_list = utils.process_lines(import_file, f"File: {fname}")

    if source == constants.PASTE:
        import_list = inquirer.ask_paste()
        import_list = utils.process_lines(import_list, "Pasted content")

    if len(import_list) == 0:
        utils.die("No valid urls found, try again")

    if not inquirer.confirm(
            f"Add {len(import_list)} block lists to {db_file}?"):
        utils.warn("Nothing changed. Bye!")
        sys.exit(0)

    conn = sqlite3.connect(db_file)
    sqldb = conn.cursor()
    added = 0
    exists = 0
    for item in import_list:
        sqldb.execute("SELECT COUNT(*) FROM adlist WHERE address = ?",
                      (item["url"], ))

        cnt = sqldb.fetchone()

        if cnt[0] > 0:
            exists += 1
        else:
            added += 1
            vals = (item["url"], item["comment"])
            sqldb.execute(
                "INSERT OR IGNORE INTO adlist (address, comment) VALUES (?,?)",
                vals)
            conn.commit()

    sqldb.close()
    conn.close()

    utils.success(f"{added} block lists added! {exists} already existed.")
Exemplo n.º 2
0
    def test_process_lines_full_url(self):
        comment = "MyComment"
        new_list = utils.process_lines(
            """
http://google.com
invalid
http://github.com
""",
            comment,
            True,
        )
        assert len(new_list) == 2

        assert new_list[1]["url"] == "http://github.com"
        assert new_list[1]["comment"] == comment
Exemplo n.º 3
0
    def test_process_lines_any(self):
        comment = "MyComment"
        new_list = utils.process_lines(
            """
github
github.com
http://github.com
http://github.com/test
http://github.com/test?f08s
""",
            comment,
            True,
        )
        assert len(new_list) == 3

        # assert new_list[1]["url"] == "http://github.com"
        assert new_list[1]["comment"] == comment
Exemplo n.º 4
0
def process_whitelists(db_file):
    """ prompt for and process blacklists """
    source = inquirer.ask_whitelist()

    import_list = []

    if source in whiteLists:
        url_source = whiteLists[source]
        resp = requests.get(url_source['url'])
        import_list = utils.process_lines(resp.text, url_source['comment'], False)
        # This breaks if we add a new whitelist setup
        if source != ANUDEEP_WHITELIST:
            resp = requests.get(ANUDEEP_WHITELIST)
            import_list += utils.process_lines(resp.text, url_source['comment'], False)

    if source == constants.FILE:
        fname = inquirer.ask_import_file()
        import_file = open(fname)
        import_list = utils.process_lines(import_file.read(), f'File: {fname}', False)

    if source == constants.PASTE:
        import_list = inquirer.ask_paste()
        import_list = utils.process_lines(import_list, 'Pasted content', utils.validate_host)

    if len(import_list) == 0:
        utils.die('No valid urls found, try again')

    if not inquirer.confirm(f'Add {len(import_list)} white lists to {db_file}?'):
        utils.warn('Nothing changed. Bye!')
        sys.exit(0)

    conn = sqlite3.connect(db_file)
    sqldb = conn.cursor()
    added = 0
    exists = 0
    for item in import_list:
        sqldb.execute(
            "SELECT COUNT(*) FROM domainlist WHERE domain = ?",
            (item['url'],))

        cnt = sqldb.fetchone()

        if cnt[0] > 0:
            exists += 1
        else:
            # 0 = exact whitelist
            # 2 = regex whitelist
            domain_type = 0
            if item['type'] == constants.REGEX:
                domain_type = 2

            vals = (item['url'], domain_type, item['comment'])
            sqldb.execute(
                'INSERT OR IGNORE INTO domainlist (domain, type, comment) VALUES (?,?,?)', vals)
            conn.commit()
            added += 1

    sqldb.close()
    conn.close()

    utils.success(f'{added} whitelists added! {exists} already existed.')
Exemplo n.º 5
0
 def test_process_lines_empty(self):
     new_list = utils.process_lines("", "", True)
     assert len(new_list) == 0
def train(model, name):
    history_score = []
    start_time = time.time()
    print 'epochs\tloss\ttrain-auc\teval-auc\ttime'
    sys.stdout.flush()
    for i in range(num_round):
        fetches = [model.optimizer, model.loss]
        if batch_size > 0:
            ls = []
            f = open(train_file, 'r')
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)]), 0, -1)
                    _, l = model.run(fetches, X_i, y_i)
                    ls.append(l)
        elif batch_size == -1:
            pass
            """
            X_i, y_i = utils.slice(train_data)
            _, l = model.run(fetches, X_i, y_i)
            ls = [l]
            """
        lst_train_pred = []
        lst_test_pred = []
        if batch_size > 0:
            f = open(train_file, 'r')
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)]), 0, -1)
                    _train_preds = model.run(model.y_prob, X_i)
                    lst_train_pred.append(_train_preds)
            """
            for j in range(train_size / batch_size + 1):
                X_i, y_i = utils.slice(train_data, j * batch_size, batch_size)
                #X_i = utils.libsvm_2_coo(X_i, (len(X_i), input_dim)).tocsr()
                _train_preds = model.run(model.y_prob, X_i)
                lst_train_pred.append(_train_preds)
            """
            f = open(test_file, 'r')
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)]), 0, -1)
                    _test_preds = model.run(model.y_prob, X_i)
                    lst_test_pred.append(_test_preds)
            """
            for j in range(test_size / batch_size + 1):
                X_i, y_i = utils.slice(test_data, j * batch_size, batch_size)
                #X_i = utils.libsvm_2_coo(X_i, (len(X_i), input_dim)).tocsr()
                _test_preds = model.run(model.y_prob, X_i)
                lst_test_pred.append(_test_preds)
            """
        train_preds = np.concatenate(lst_train_pred)
        test_preds = np.concatenate(lst_test_pred)
        train_score = roc_auc_score(train_label, train_preds)
        test_score = roc_auc_score(test_label, test_preds)
        print '%d\t%f\t%f\t%f\t%f\t%s' % (i, np.mean(ls), train_score, test_score, time.time() - start_time, strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        path_model = 'model/' + str(name) + '_epoch_' + str(i)
        path_label_score = 'model/label_score_' + str(name) + '_epoch_' + str(i)
        #model.dump(path_model)
        d_label_score = {}
        d_label_score['label'] = test_label
        d_label_score['score'] = test_preds
        #pkl.dump(d_label_score, open(path_label_score, 'wb'))
        sys.stdout.flush()
        history_score.append(test_score)
        if i > min_round and i > early_stop_round:
            #if np.argmax(history_score) == i - early_stop_round and history_score[-1] - history_score[
            #            -1 * early_stop_round] < 1e-5:
            i_max = np.argmax(history_score)
            if i - i_max >= early_stop_round:
                print 'early stop\nbest iteration:\n[%d]\teval-auc: %f' % (
                    np.argmax(history_score), np.max(history_score))
                sys.stdout.flush()
                break
def train(model, name, in_memory = True, flag_MTL = False):
    #builder = tf.saved_model.builder.SavedModelBuilder('model')
    global batch_size, time_run, time_read, time_process
    history_score = []
    best_score = -1
    best_epoch = -1
    start_time = time.time()
    print 'epochs\tloss\ttrain-auc\teval-auc\ttime'
    sys.stdout.flush()
    if in_memory:
        train_data = utils.read_data(path_train, INPUT_DIM)
        validation_data = utils.read_data(path_validation, INPUT_DIM)
        test_data = utils.read_data(path_test, INPUT_DIM)
        model_name = name.split('_')[0]
        if model_name in set(['lr', 'fm']):
            train_data_tmp = utils.split_data(train_data, FIELD_OFFSETS)
            validation_data_tmp = utils.split_data(validation_data, FIELD_OFFSETS)
            test_data_tmp = utils.split_data(test_data, FIELD_OFFSETS)
        else:
            train_data = utils.split_data(train_data, FIELD_OFFSETS)
            validation_data = utils.split_data(validation_data, FIELD_OFFSETS)
            test_data = utils.split_data(test_data, FIELD_OFFSETS)
    for i in range(num_round):
        fetches = [model.optimizer, model.loss]
        if batch_size > 0:
            ls = []
            if in_memory:
                for j in range(train_size / batch_size + 1):
                    X_i, y_i = utils.slice(train_data, j * batch_size, batch_size)
                    _, l = model.run(fetches, X_i, y_i)
                    ls.append(l)
            else:
                f = open(path_train, 'r')
                lst_lines = []
                for line in f:
                    if len(lst_lines) < batch_size:
                        lst_lines.append(line)
                    else:
                        X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1) # type of X_i, X_i[0], X_i[0][0] is list, tuple and np.ndarray respectively.
                        _, l = model.run(fetches, X_i, y_i)
                        ls.append(l)
                        lst_lines = [line]
                f.close()
                if len(lst_lines) > 0:
                    X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1)
                    _, l = model.run(fetches, X_i, y_i)
                    ls.append(l)
        elif batch_size == -1:
            pass
        model.dump('model/' + name + '_epoch_' + str(i))
        if in_memory:
            lst_train_preds = []
            lst_validation_preds = []
            lst_test_preds = []
            for j in range(train_size / batch_size + 1):
                X_i, y_i = utils.slice(train_data, j * batch_size, batch_size)
                p = model.run(model.y_prob, X_i, y_i)
                lst_train_preds.append(p)
            for j in range(validation_size / batch_size + 1):
                X_i, y_i = utils.slice(validation_data, j * batch_size, batch_size)
                p = model.run(model.y_prob, X_i, y_i)
                lst_validation_preds.append(p)
            for j in range(test_size / batch_size + 1):
                X_i, y_i = utils.slice(test_data, j * batch_size, batch_size)
                p = model.run(model.y_prob, X_i, y_i)
                lst_test_preds.append(p)
            train_preds = np.concatenate(lst_train_preds)
            validation_preds = np.concatenate(lst_validation_preds)
            test_preds = np.concatenate(lst_test_preds)
            #train_preds = model.run(model.y_prob, utils.slice(train_data)[0])
            #test_preds = model.run(model.y_prob, utils.slice(test_data)[0])
            train_score = roc_auc_score(train_data[1], train_preds)
            validation_score = roc_auc_score(validation_data[1], validation_preds)
            test_score = roc_auc_score(test_data[1], test_preds)
            train_score_sum = 0
            train_score_weight = 0
            validation_score_sum = 0
            validation_score_weight = 0
            test_score_sum = 0
            test_score_weight = 0
            #print '[%d]\tloss:%f\ttrain-auc: %f\teval-auc: %f' % (i, np.mean(ls), train_score, test_score)
            print '%d\t%f\t%f\t%f\t%f\t%f\t%s' % (i, np.mean(ls), train_score, validation_score, test_score, time.time() - start_time, strftime("%Y-%m-%d %H:%M:%S", gmtime()))
            if flag_MTL:
                d_index_task_label_pred_train = {}
                d_index_task_label_pred_validation = {}
                d_index_task_label_pred_test = {}
                if model_name in set(['lr', 'fm']):
                    index_task_train = train_data_tmp[0][-1].indices
                    index_task_validation = validation_data_tmp[0][-1].indices
                    index_task_test = test_data_tmp[0][-1].indices
                else:
                    index_task_train = train_data[0][-1].indices
                    index_task_validation = validation_data[0][-1].indices
                    index_task_test = test_data[0][-1].indices
                for index_tmp in range(len(index_task_train)):
                    index_task = index_task_train[index_tmp]
                    d_index_task_label_pred_train.setdefault(index_task, [[],[]])
                    d_index_task_label_pred_train[index_task][0].append(train_data[1][index_tmp])
                    d_index_task_label_pred_train[index_task][1].append(train_preds[index_tmp])
                for index_task in sorted(list(set(index_task_train))):
                    auc = roc_auc_score(d_index_task_label_pred_train[index_task][0], d_index_task_label_pred_train[index_task][1])
                    num_samples = len(d_index_task_label_pred_train[index_task][0])
                    train_score_sum += auc * num_samples
                    train_score_weight += num_samples
                    print 'train, index_type: %d, number of samples: %d, AUC: %f' % (index_task, len(d_index_task_label_pred_train[index_task][0]), auc)
                for index_tmp in range(len(index_task_validation)):
                    index_task = index_task_validation[index_tmp]
                    d_index_task_label_pred_validation.setdefault(index_task, [[],[]])
                    d_index_task_label_pred_validation[index_task][0].append(validation_data[1][index_tmp])
                    d_index_task_label_pred_validation[index_task][1].append(validation_preds[index_tmp])
                for index_task in sorted(list(set(index_task_validation))):
                    auc = roc_auc_score(d_index_task_label_pred_validation[index_task][0], d_index_task_label_pred_validation[index_task][1])
                    num_samples = len(d_index_task_label_pred_validation[index_task][0])
                    validation_score_sum += auc * num_samples
                    validation_score_weight += num_samples
                    print 'validation, index_type: %d, number of samples: %d, AUC: %f' % (index_task, num_samples, auc)
                for index_tmp in range(len(index_task_test)):
                    index_task = index_task_test[index_tmp]
                    d_index_task_label_pred_test.setdefault(index_task, [[],[]])
                    d_index_task_label_pred_test[index_task][0].append(test_data[1][index_tmp])
                    d_index_task_label_pred_test[index_task][1].append(test_preds[index_tmp])
                for index_task in sorted(list(set(index_task_test))):
                    auc = roc_auc_score(d_index_task_label_pred_test[index_task][0], d_index_task_label_pred_test[index_task][1])
                    num_samples = len(d_index_task_label_pred_test[index_task][0])
                    test_score_sum += auc * num_samples
                    test_score_weight += num_samples
                    print 'test, index_type: %d, number of samples: %d, AUC: %f' % (index_task, len(d_index_task_label_pred_test[index_task][0]), auc)
            weighted_train_score = train_score_sum / train_score_weight
            print 'weighted_train_score', weighted_train_score
            weighted_validation_score = validation_score_sum / validation_score_weight
            print 'weighted_validation_score', weighted_validation_score
            weighted_test_score = test_score_sum / test_score_weight
            print 'weighted_test_score', weighted_test_score
            history_score.append(weighted_validation_score)
            if weighted_validation_score < best_score and (i - best_epoch) >= 3:
                break
            if weighted_validation_score > best_score:
                best_score = weighted_validation_score
                best_epoch = i
            sys.stdout.flush()
        else:
            lst_train_pred = []
            lst_test_pred = []
            if batch_size > 0:
                f = open(path_train, 'r')
                lst_lines = []
                for line in f:
                    if len(lst_lines) < batch_size:
                        lst_lines.append(line)
                    else:
                        X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1)
                        _train_preds = model.run(model.y_prob, X_i)
                        lst_train_pred.append(_train_preds)
                        lst_lines = [line]
                f.close()
                if len(lst_lines) > 0:
                    X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1)
                    _train_preds = model.run(model.y_prob, X_i)
                    lst_train_pred.append(_train_preds)
                f = open(path_test, 'r')
                lst_lines = []
                for line in f:
                    if len(lst_lines) < batch_size:
                        lst_lines.append(line)
                    else:
                        X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1)
                        _test_preds = model.run(model.y_prob, X_i)
                        lst_test_pred.append(_test_preds)
                        lst_lines = [line]
                f.close()
                if len(lst_lines) > 0:
                    X_i, y_i = utils.slice(utils.process_lines(lst_lines, name, INPUT_DIM, FIELD_OFFSETS), 0, -1)
                    _test_preds = model.run(model.y_prob, X_i)
                    lst_test_pred.append(_test_preds)
            train_preds = np.concatenate(lst_train_pred)
            test_preds = np.concatenate(lst_test_pred)
            print 'np.shape(train_preds)', np.shape(train_preds)
            train_score = roc_auc_score(train_label, train_preds)
            test_score = roc_auc_score(test_label, test_preds)
            print '%d\t%f\t%f\t%f\t%f\t%s' % (i, np.mean(ls), train_score, test_score, time.time() - start_time, strftime("%Y-%m-%d %H:%M:%S", gmtime()))
            sys.stdout.flush()
        '''
def do_inference(hostport, test_data, concurrency, num_tests, batch_size):
    """Tests PredictionService with concurrent-batched requests.

    Args:
      hostport: Host:port address of the PredictionService.
      test_data: The full path to the test data set.
      concurrency: Maximum number of concurrent requests.
      num_tests: Number of test tensors to use.
      batch_size: Number of tests to include in each query

    Returns:
      The results of the queries

    Raises:
      IOError: An error occurred processing test data set.
    """
    host, port = hostport.split(':')
    channel = implementations.insecure_channel(host, int(port))
    stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)
    result_counter = _ResultCounter(concurrency)
    num_field = 15

    with open(test_data, 'r') as f:
        data = f.read().split('\n')
        requests = []
        t0 = time.time()

        #Create batches of requests
        for i in range(num_batches):
            data_batch = data[:batch_size]
            data = data[batch_size:]

            index_list = [[] for _ in range(num_field)]
            values_list = [[] for _ in range(num_field)]
            for j in range(batch_size):
                X, y = utils.slice(
                    utils.process_lines([data_batch[j]], 'fwfm', INPUT_DIM,
                                        FIELD_OFFSETS), 0, -1)
                for idx in range(num_field):
                    index_list[idx].append(X[idx][0].tolist())
                    values_list[idx].append(1)

            requests.append(predict_pb2.PredictRequest())
            requests[i].model_spec.name = 'serve'
            requests[i].model_spec.signature_name = 'model'
            requests[i].output_filter.append('outputs')
            for idx in range(num_field):
                requests[i].inputs["field_" + str(idx) + "_values"].CopyFrom(
                    tf.contrib.util.make_tensor_proto(
                        values_list[idx],
                        shape=[len(values_list[idx])],
                        dtype=tf.int64))
                requests[i].inputs["field_" + str(idx) + "_indices"].CopyFrom(
                    tf.contrib.util.make_tensor_proto(
                        index_list[idx],
                        shape=[len(index_list[idx]), 2],
                        dtype=tf.float32))
                requests[i].inputs["field_" + str(idx) +
                                   "_dense_shape"].CopyFrom(
                                       tf.contrib.util.make_tensor_proto(
                                           [batch_size, total_features],
                                           shape=[2],
                                           dtype=tf.int64))
        t1 = time.time()

        #Query server
        for i in range(num_batches):
            result_counter.throttle()
            result = stub.Predict.future(requests[i],
                                         100.0)  # 100 secs timeout
            result.add_done_callback(_create_rpc_callback(i, result_counter))
        t2 = time.time()

        #Synchronize on comleted queries
        result_counter.get_complete()
        t3 = time.time()
        full_results = []
        for values in results:
            full_results.extend(values)

        print("Elapsed time for ", num_tests, " request creations: ",
              (t1 - t0))
        print("Elapsed time for ", num_batches, " batch submissions: ",
              (t2 - t1))
        print("Elapsed time for ", num_tests, " inferences: ", (t3 - t1))
        return full_results
def train(model, name):
    global batch_size
    global time_run
    global time_process
    history_score = []
    start_time = time.time()
    print 'epochs\tloss\ttrain-auc\teval-auc\ttime'
    sys.stdout.flush()
    for i in range(num_round):
        fetches = [model.optimizer, model.loss]
        if batch_size > 0:
            ls = []
            for j in range(train_size / batch_size + 1):
                start_process = time.time()
                X_i, y_i = utils.slice(train_data, j * batch_size, batch_size)
                time_process += time.time() - start_process
                start_run = time.time()
                _, l = model.run(fetches, X_i, y_i)
                time_run += time.time() - start_run
                ls.append(l)
            '''
            f = open(train_file, 'r')
            lst_lines = []
            for line in f:
                if len(lst_lines) < batch_size:
                    lst_lines.append(line)
                else:
                    X_i, y_i = utils.slice(utils.process_lines(lst_lines, name), 0, -1)
                    print 'type(X_i)', type(X_i)
                    print 'type(X_i[0])', type(X_i[0])
                    _, l = model.run(fetches, X_i, y_i)
                    ls.append(l)
                    lst_lines = [line]
            f.close()
            if len(lst_lines) > 0:
                X_i, y_i = utils.slice(utils.process_lines(lst_lines, name), 0, -1)
                _, l = model.run(fetches, X_i, y_i)
                ls.append(l)
            '''
            '''
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)], name), 0, -1)
                    _, l = model.run(fetches, X_i, y_i)
                    ls.append(l)
            '''
        elif batch_size == -1:
            pass
            """
            X_i, y_i = utils.slice(train_data)
            _, l = model.run(fetches, X_i, y_i)
            ls = [l]
            """
        lst_train_pred = []
        lst_test_pred = []
        if batch_size > 0:
            f = open(train_file, 'r')
            lst_lines = []
            for line in f:
                if len(lst_lines) < batch_size:
                    lst_lines.append(line)
                else:
                    X_i, y_i = utils.slice(
                        utils.process_lines(lst_lines, name), 0, -1)
                    _train_preds = model.run(model.y_prob, X_i)
                    lst_train_pred.append(_train_preds)
                    lst_lines = [line]
            f.close()
            if len(lst_lines) > 0:
                X_i, y_i = utils.slice(utils.process_lines(lst_lines, name), 0,
                                       -1)
                _train_preds = model.run(model.y_prob, X_i)
                lst_train_pred.append(_train_preds)
            '''
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)], name), 0, -1)
                    _train_preds = model.run(model.y_prob, X_i)
                    lst_train_pred.append(_train_preds)
            '''
            """
            for j in range(train_size / batch_size + 1):
                X_i, y_i = utils.slice(train_data, j * batch_size, batch_size)
                #X_i = utils.libsvm_2_coo(X_i, (len(X_i), input_dim)).tocsr()
                _train_preds = model.run(model.y_prob, X_i)
                lst_train_pred.append(_train_preds)
            """
            f = open(test_file, 'r')
            lst_lines = []
            for line in f:
                if len(lst_lines) < batch_size:
                    lst_lines.append(line)
                else:
                    X_i, y_i = utils.slice(
                        utils.process_lines(lst_lines, name), 0, -1)
                    _test_preds = model.run(model.y_prob, X_i)
                    lst_test_pred.append(_test_preds)
                    lst_lines = [line]
            f.close()
            if len(lst_lines) > 0:
                X_i, y_i = utils.slice(utils.process_lines(lst_lines, name), 0,
                                       -1)
                _test_preds = model.run(model.y_prob, X_i)
                lst_test_pred.append(_test_preds)
            '''
            while True:
                lines_gen = list(islice(f, batch_size * bb))
                if not lines_gen:
                    break
                for ib in range(bb):
                    X_i, y_i = utils.slice(utils.process_lines(lines_gen[batch_size * ib : batch_size * (ib+1)], name), 0, -1)
                    _test_preds = model.run(model.y_prob, X_i)
                    lst_test_pred.append(_test_preds)
            '''
            """
            for j in range(test_size / batch_size + 1):
                X_i, y_i = utils.slice(test_data, j * batch_size, batch_size)
                #X_i = utils.libsvm_2_coo(X_i, (len(X_i), input_dim)).tocsr()
                _test_preds = model.run(model.y_prob, X_i)
                lst_test_pred.append(_test_preds)
            """
        train_preds = np.concatenate(lst_train_pred)
        test_preds = np.concatenate(lst_test_pred)
        train_score = roc_auc_score(train_label, train_preds)
        test_score = roc_auc_score(test_label, test_preds)
        print '%d\t%f\t%f\t%f\t%f\t%s' % (
            i, np.mean(ls), train_score, test_score, time.time() - start_time,
            strftime("%Y-%m-%d %H:%M:%S", gmtime()))
        print 'time_run', time_run
        print 'time_process', time_process
        # Save the model to local files
        '''
        path_model = 'model/' + str(name) + '_epoch_' + str(i)
        model.dump(path_model)
        d_label_score = {}
        d_label_score['label'] = test_label
        d_label_score['score'] = test_preds
        #path_label_score = 'model/label_score_' + str(name) + '_epoch_' + str(i)
        #pkl.dump(d_label_score, open(path_label_score, 'wb'))
        sys.stdout.flush()
        '''
        history_score.append(test_score)
        if i > min_round and i > early_stop_round:
            i_max = np.argmax(history_score)
            # Early stop
            if i - i_max >= early_stop_round:
                print 'early stop\nbest iteration:\n[%d]\teval-auc: %f' % (
                    np.argmax(history_score), np.max(history_score))
                sys.stdout.flush()
                break
        '''