Пример #1
0
def main(_):
    #1.load data(X:list of lint,y:int).
    #if os.path.exists(FLAGS.cache_path):  # 如果文件系统中存在,那么加载故事(词汇表索引化的)
    #    with open(FLAGS.cache_path, 'r') as data_f:
    #        trainX, trainY, testX, testY, vocabulary_index2word=pickle.load(data_f)
    #        vocab_size=len(vocabulary_index2word)
    #else:
    if 1 == 1:
        trainX, trainY, testX, testY = None, None, None, None
        vocabulary_word2index, vocabulary_index2word = create_vocabulary(
            word2vec_model_path=FLAGS.word2vec_model_path,
            name_scope="transformer_classification")
        vocab_size = len(vocabulary_word2index)
        print("transformer.vocab_size:", vocab_size)
        train, test, _ = load_data_multilabel_new(
            vocabulary_word2index, training_data_path=FLAGS.training_data_path)

        compare_train_data = WikiQA(word2vec=Word2Vec(),
                                    max_len=FLAGS.max_len_compare)
        compare_train_data.open_file(mode="train")
        compare_test_data = WikiQA(word2vec=Word2Vec(),
                                   max_len=FLAGS.max_len_compare)
        compare_test_data.open_file(mode="valid")

        trainX, trainY, = train
        testX, testY = test

        trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)
        testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Transformer(FLAGS.num_classes,
                            FLAGS.learning_rate,
                            FLAGS.batch_size,
                            FLAGS.decay_steps,
                            FLAGS.decay_rate,
                            FLAGS.sequence_length,
                            vocab_size,
                            FLAGS.embed_size,
                            FLAGS.d_model,
                            FLAGS.d_k,
                            FLAGS.d_v,
                            FLAGS.h,
                            FLAGS.num_layer,
                            FLAGS.is_training,
                            compare_train_data.num_features,
                            di=50,
                            s=compare_train_data.max_len,
                            w=4,
                            l2_reg=0.0004,
                            l2_lambda=FLAGS.l2_lambda)
        print("=" * 50)
        print("List of Variables:")
        for v in tf.trainable_variables():
            print(v.name)
        print("=" * 50)
        saver = tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir + "checkpoint"):
            print("Restoring Variables from Checkpoint")
            saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print('Initializing Variables')
            sess.run(tf.global_variables_initializer())
            if FLAGS.use_embedding:  #load pre-trained word embedding
                assign_pretrained_word_embedding(
                    sess,
                    vocabulary_index2word,
                    vocab_size,
                    model,
                    word2vec_model_path=FLAGS.word2vec_model_path)
        curr_epoch = sess.run(model.epoch_step)
        number_of_training_data = len(trainX)
        print("number_of_training_data:", number_of_training_data)

        previous_eval_loss = 10000
        best_eval_loss = 10000
        batch_size = FLAGS.batch_size
        for epoch in range(curr_epoch, FLAGS.num_epochs):
            loss, acc, counter = 0.0, 0.0, 0
            compare_train_data.reset_index()
            for start, end in zip(
                    range(0, number_of_training_data, batch_size),
                    range(batch_size, number_of_training_data, batch_size)):
                if epoch == 0 and counter == 0:
                    print("trainX[start:end]:", trainX[start:end])
                batch_x1, batch_x2, _, batch_features = compare_train_data.next_batch(
                    batch_size=end - start)
                feed_dict = {
                    model.input_x: trainX[start:end],
                    model.dropout_keep_prob: 0.9,
                    model.x1: batch_x1,
                    model.x2: batch_x2,
                    model.features: batch_features
                }
                feed_dict[model.input_y_label] = trainY[start:end]
                curr_loss, curr_acc, _ = sess.run(
                    [model.loss_val, model.accuracy, model.train_op],
                    feed_dict)  #curr_acc--->TextCNN.accuracy
                loss, counter, acc = loss + curr_loss, counter + 1, acc + curr_acc
                if counter % 50 == 0:
                    print(
                        "transformer.classification==>Epoch %d\tBatch %d\tTrain Loss:%.3f\tTrain Accuracy:%.3f"
                        % (epoch, counter, loss / float(counter),
                           acc / float(counter))
                    )  #tTrain Accuracy:%.3f---》acc/float(counter)
                ##VALIDATION VALIDATION VALIDATION PART######################################################################################################
                if FLAGS.batch_size != 0 and (
                        start % (FLAGS.validate_step * FLAGS.batch_size) == 0):
                    eval_loss, eval_acc = do_eval(sess, model, testX, testY,
                                                  compare_test_data,
                                                  batch_size)
                    print(
                        "transformer.classification.validation.part. previous_eval_loss:",
                        previous_eval_loss, ";current_eval_loss:", eval_loss)
                    if eval_loss > previous_eval_loss:  #if loss is not decreasing
                        # reduce the learning rate by a factor of 0.5
                        print(
                            "transformer.classification.==>validation.part.going to reduce the learning rate."
                        )
                        learning_rate1 = sess.run(model.learning_rate)
                        lrr = sess.run([model.learning_rate_decay_half_op])
                        learning_rate2 = sess.run(model.learning_rate)
                        print(
                            "transformer.classification==>validation.part.learning_rate1:",
                            learning_rate1, " ;learning_rate2:",
                            learning_rate2)
                    #print("HierAtten==>Epoch %d Validation Loss:%.3f\tValidation Accuracy: %.3f" % (epoch, eval_loss, eval_acc))
                    else:  # loss is decreasing
                        if eval_loss < best_eval_loss:
                            print(
                                "transformer.classification==>going to save the model.eval_loss:",
                                eval_loss, ";best_eval_loss:", best_eval_loss)
                            # save model to checkpoint
                            save_path = FLAGS.ckpt_dir + "model.ckpt"
                            saver.save(sess, save_path, global_step=epoch)
                            best_eval_loss = eval_loss
                    previous_eval_loss = eval_loss
                    compare_test_data.reset_index()
                ##VALIDATION VALIDATION VALIDATION PART######################################################################################################

            #epoch increment
            print("going to increment epoch counter....")
            sess.run(model.epoch_increment)
def main(_):
    '''
    主函数:数据预处理,迭代数据训练模型
    '''
    print("数据预处理阶段:......")
    trainX,trainY,testX,testY = None, None, None, None
 
    #加载词向量转换
    vocabulary_word2index, vocabulary_index2word = create_vocabulary(
                word2vec_model_path=FLAGS.word2vec_model_path, name_scope="cnn")

    vocab_size = len(vocabulary_word2index)
    #标签索引
    vocabulary_word2index_label, vocabulary_index2word_label = create_vocabulary_label(
               vocabulary_label=FLAGS.training_data_path, name_scope="cnn")

    #将文本转换为向量形式,分训练,测试集
    train, test, _ = load_data_multilabel_new(vocabulary_word2index, 
                   vocabulary_word2index_label,training_data_path=FLAGS.training_data_path)

    trainX, trainY = train
    testX, testY = test

    #用0填充短句
    trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0)
    testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0)
    print("数据预处理部分完成.....")

    print("创建session 对话.......")
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True

    with tf.Session(config=config) as sess:
        textCNN = TextCNN(filter_size,FLAGS.num_filters,FLAGS.num_classes,FLAGS.learning_rate,FLAGS.batch_size,
  FLAGS.sequence_length,vocab_size,FLAGS.embed_size,FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.is_decay, FLAGS.is_dropout,FLAGS.is_l2)

        #存储变量
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(FLAGS.tensorboard_dir, sess.graph)

        #初始化模型保存
        saver = tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):         #判断模型是否存在
            print("从模型中恢复变量")
#            saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))   #自动获取最近一次的模型变量
        else:
            print("初始化变量")
            sess.run(tf.global_variables_initializer())     #初始化所有变量
            if FLAGS.use_embedding:        #加载预训练词向量
                assign_pretrained_word_embedding(sess, vocabulary_index2word,
                           vocab_size, textCNN, word2vec_model_path=FLAGS.word2vec_model_path)

        curr_epoch = sess.run(textCNN.epoch_step)
   
        #划分训练数据
        num_train_data = len(trainX)
        batch_size = FLAGS.batch_size

        index = 0
        for epoch in range(curr_epoch, FLAGS.num_epochs):
            loss,acc,counter = 0.0, 0.0, 0.0
            for start, end in zip(range(0, num_train_data, batch_size), range(batch_size, num_train_data, batch_size)):
                feed_dict = {textCNN.input_x:trainX[start:end], textCNN.input_y:trainY[start:end], 
                                   textCNN.dropout_keep_prob:0.9}
                curr_loss, curr_acc, logits, _ = sess.run([textCNN.loss_val, textCNN.accuracy, 
                                                textCNN.logits, textCNN.train_op], feed_dict)
     
                index += 1
                loss, counter, acc = loss+curr_loss, counter+1, acc+curr_acc
   
                if counter % 100 == 0:
                    rs = sess.run(merged,feed_dict)       #执行参数记录
                    writer.add_summary(rs, index)
                    print("Epoch %d\tBatch %d\tTrain Loss:%.3f\tTrain Accuracy:%.3f\tGlobal Step %d"
                          %(epoch,counter,loss/float(counter),acc/float(counter),sess.run(textCNN.global_step)))
 #                   print("Train Logits{}".format(logits))
    
            #迭代次数增加
            epoch_increment = tf.assign(textCNN.epoch_step, tf.add(textCNN.epoch_step, tf.constant(1)))
            sess.run(epoch_increment)

            #验证
            print("迭代次数:{}".format(epoch))
            if epoch % FLAGS.validate_every == 0:
                eval_loss, eval_acc = do_eval(sess,textCNN,testX,testY,batch_size)
                print("迭代次数:{}\t验证损失值:{}\t准确率:{}".format(epoch, eval_loss, eval_acc))

                #保存模型
        #        save_path = FLAGS.ckpt_dir+"model.ckpt"
        #        saver.save(sess, save_path, global_step=epoch)

        print("验证集上进行损失,准确率计算.....")
        test_loss, test_acc = do_eval(sess, textCNN, testX, testY, batch_size)
        print("测试集中损失值:{}\t准确率:{}".format(test_loss, test_acc))
Пример #3
0
def main(_):
    #os.environ['CUDA_VISIBLE_DEVICES'] = ''

    if FLAGS.dataset == "bibsonomy-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_bib
        traning_data_path = FLAGS.training_data_path_bib
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 11.59

    elif FLAGS.dataset == "zhihu-sample":
        word2vec_model_path = FLAGS.word2vec_model_path_zhihu
        traning_data_path = FLAGS.training_data_path_zhihu
        FLAGS.sequence_length = 100
        FLAGS.ave_labels_per_doc = 2.45

    elif FLAGS.dataset == "citeulike-a-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_cua
        traning_data_path = FLAGS.training_data_path_cua
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 11.6

    elif FLAGS.dataset == "citeulike-t-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_cut
        traning_data_path = FLAGS.training_data_path_cut
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 7.68

    # 1. create trainlist, validlist and testlist
    trainX, trainY, testX, testY = None, None, None, None
    vocabulary_word2index, vocabulary_index2word = create_voabulary(
        word2vec_model_path,
        name_scope=FLAGS.dataset + "-lda")  #simple='simple'
    vocabulary_word2index_label, vocabulary_index2word_label = create_voabulary_label(
        voabulary_label=traning_data_path, name_scope=FLAGS.dataset + "-lda")
    num_classes = len(vocabulary_word2index_label)
    print(vocabulary_index2word_label[0], vocabulary_index2word_label[1])

    vocab_size = len(vocabulary_word2index)
    print("vocab_size:", vocab_size)

    # choosing whether to use k-fold cross-validation or hold-out validation
    if FLAGS.kfold == -1:  # hold-out
        train, valid, test = load_data_multilabel_new(
            vocabulary_word2index,
            vocabulary_word2index_label,
            keep_label_percent=FLAGS.keep_label_percent,
            valid_portion=FLAGS.valid_portion,
            test_portion=FLAGS.test_portion,
            multi_label_flag=FLAGS.multi_label_flag,
            traning_data_path=traning_data_path)
        # here train, test are tuples; turn train into trainlist.
        trainlist, validlist, testlist = list(), list(), list()
        trainlist.append(train)
        validlist.append(valid)
        testlist.append(test)
    else:  # k-fold
        trainlist, validlist, testlist = load_data_multilabel_new_k_fold(
            vocabulary_word2index,
            vocabulary_word2index_label,
            keep_label_percent=FLAGS.keep_label_percent,
            kfold=FLAGS.kfold,
            test_portion=FLAGS.test_portion,
            multi_label_flag=FLAGS.multi_label_flag,
            traning_data_path=traning_data_path)
        # here trainlist, testlist are list of tuples.
    # get and pad testing data: there is only one testing data, but kfold training and validation data
    assert len(testlist) == 1
    testX, testY = testlist[0]
    testX = pad_sequences(testX, maxlen=FLAGS.sequence_length,
                          value=0.)  # padding to max length

    # 3. transform trainlist to the format. x_train, x_test: training and test feature matrices of size (n_samples, n_features)
    #print(len(trainlist))
    #trainX,trainY = trainlist[0]
    #trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)
    #print(len(trainX))
    #print(len(trainX[0]))
    #print(trainX[0])
    #print(len(trainY))
    #print(len(trainY[0]))
    #print(trainY[0])
    #print(np.asarray(trainY).shape)

    num_runs = len(trainlist)
    #validation results variables
    valid_acc_th, valid_prec_th, valid_rec_th, valid_fmeasure_th, valid_hamming_loss_th = [
        0
    ] * num_runs, [0] * num_runs, [0] * num_runs, [0] * num_runs, [
        0
    ] * num_runs  # initialise the result lists
    final_valid_acc_th, final_valid_prec_th, final_valid_rec_th, final_valid_fmeasure_th, final_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    min_valid_acc_th, min_valid_prec_th, min_valid_rec_th, min_valid_fmeasure_th, min_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    max_valid_acc_th, max_valid_prec_th, max_valid_rec_th, max_valid_fmeasure_th, max_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    std_valid_acc_th, std_valid_prec_th, std_valid_rec_th, std_valid_fmeasure_th, std_valid_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    #testing results variables
    test_acc_th, test_prec_th, test_rec_th, test_fmeasure_th, test_hamming_loss_th = [
        0
    ] * num_runs, [0] * num_runs, [0] * num_runs, [0] * num_runs, [
        0
    ] * num_runs  # initialise the testing result lists
    final_test_acc_th, final_test_prec_th, final_test_rec_th, final_test_fmeasure_th, final_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    min_test_acc_th, min_test_prec_th, min_test_rec_th, min_test_fmeasure_th, min_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    max_test_acc_th, max_test_prec_th, max_test_rec_th, max_test_fmeasure_th, max_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    std_test_acc_th, std_test_prec_th, std_test_rec_th, std_test_fmeasure_th, std_test_hamming_loss_th = 0.0, 0.0, 0.0, 0.0, 0.0
    #output variables
    output_valid = ""
    output_test = ""
    output_csv_valid = "fold,hamming_loss,acc,prec,rec,f1"
    output_csv_test = "fold,hamming_loss,acc,prec,rec,f1"

    time_train = [0] * num_runs  # get time spent in training
    num_run = 0

    mallet_path = FLAGS.mallet_path
    num_topics = FLAGS.num_topics
    alpha = 50 / num_topics
    iterations = FLAGS.iterations
    k_num_doc = FLAGS.k_num_doc

    remove_pad_id = True
    remove_dot = True
    docs_test = generateLDAdocFromIndex(testX,
                                        vocabulary_index2word,
                                        remove_pad_id=remove_pad_id,
                                        remove_dot=remove_dot)

    for trainfold in trainlist:
        # get training and validation data
        trainX, trainY = trainfold
        trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)
        # generate training data for gensim MALLET wrapper for LDA
        docs = generateLDAdocFromIndex(trainX,
                                       vocabulary_index2word,
                                       remove_pad_id=remove_pad_id,
                                       remove_dot=remove_dot)
        #print(docs[10])
        id2word = corpora.Dictionary(docs)
        corpus = [id2word.doc2bow(text) for text in docs]
        #print(corpus[10])
        # generate validation data for gensim MALLET wrapper for LDA
        validX, validY = validlist[num_run]
        validX = pad_sequences(validX, maxlen=FLAGS.sequence_length, value=0.)
        docs_valid = generateLDAdocFromIndex(validX,
                                             vocabulary_index2word,
                                             remove_pad_id=remove_pad_id,
                                             remove_dot=remove_dot)
        corpus_valid = [id2word.doc2bow(text) for text in docs_valid]
        # generate testing data for gensim MALLET wrapper for LDA
        corpus_test = [id2word.doc2bow(text) for text in docs_test]

        # training
        start_time_train = time.time()
        print('start training fold', str(num_run))

        model = gensim.models.wrappers.LdaMallet(mallet_path,
                                                 corpus=corpus,
                                                 num_topics=num_topics,
                                                 alpha=alpha,
                                                 id2word=id2word,
                                                 iterations=iterations)
        pprint(model.show_topics(formatted=False))

        print('num_run', str(num_run), 'train done.')

        time_train[num_run] = time.time() - start_time_train
        print("--- training of fold %s took %s seconds ---" %
              (num_run, time_train[num_run]))

        # represent each document as a topic vector
        #mat_train = np.array(model[corpus]) # this will cause an Error with large num_topics, e.g. 1000 or higher.
        #Thus, we turn the MALLET LDA model to a native Gensim LDA model
        model = gensim.models.wrappers.ldamallet.malletmodel2ldamodel(model)
        mat_train = np.array(
            model.get_document_topics(corpus, minimum_probability=0.0))
        #print(len(model[corpus[0]]))
        #print(len(model[corpus[1]]))
        #print(len(model[corpus[2]]))
        #print(mat_train.shape)
        mat_train = mat_train[:, :,
                              1]  # documents in training set as a matrix of topic probabilities

        # evaluate on training data
        #if num_run == 0 and FLAGS.kfold != -1: # do this only for the first fold in k-fold cross-validation to save time
        #    acc, prec, rec, f_measure, hamming_loss = do_eval_lda(model, k_num_doc, mat_train, trainY, corpus, trainY, vocabulary_index2word_label, hamming_q=FLAGS.ave_labels_per_doc)
        #    print('training:', acc, prec, rec, f_measure, hamming_loss)

        # validation
        valid_acc_th[num_run], valid_prec_th[num_run], valid_rec_th[
            num_run], valid_fmeasure_th[num_run], valid_hamming_loss_th[
                num_run] = do_eval_lda(model,
                                       k_num_doc,
                                       mat_train,
                                       trainY,
                                       corpus_valid,
                                       validY,
                                       vocabulary_index2word_label,
                                       hamming_q=FLAGS.ave_labels_per_doc)
        print(
            "LDA==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f"
            % (num_run, valid_acc_th[num_run], valid_hamming_loss_th[num_run],
               valid_prec_th[num_run], valid_rec_th[num_run],
               valid_fmeasure_th[num_run]))
        output_valid = output_valid + "\n" + "LDA==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (
            num_run, valid_acc_th[num_run], valid_hamming_loss_th[num_run],
            valid_prec_th[num_run], valid_rec_th[num_run], valid_fmeasure_th[
                num_run]) + "\n"  # also output the results of each run.
        output_csv_valid = output_csv_valid + "\n" + str(num_run) + "," + str(
            valid_hamming_loss_th[num_run]) + "," + str(
                valid_acc_th[num_run]) + "," + str(
                    valid_prec_th[num_run]) + "," + str(
                        valid_rec_th[num_run]) + "," + str(
                            valid_fmeasure_th[num_run])

        start_time_test = time.time()
        # evaluate on testing data
        test_acc_th[num_run], test_prec_th[num_run], test_rec_th[
            num_run], test_fmeasure_th[num_run], test_hamming_loss_th[
                num_run] = do_eval_lda(model,
                                       k_num_doc,
                                       mat_train,
                                       trainY,
                                       corpus_test,
                                       testY,
                                       vocabulary_index2word_label,
                                       hamming_q=FLAGS.ave_labels_per_doc)
        print(
            "LDA==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f"
            % (num_run, test_acc_th[num_run], test_hamming_loss_th[num_run],
               test_prec_th[num_run], test_rec_th[num_run],
               test_fmeasure_th[num_run]))
        output_test = output_test + "\n" + "LDA==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (
            num_run, test_acc_th[num_run], test_hamming_loss_th[num_run],
            test_prec_th[num_run], test_rec_th[num_run], test_fmeasure_th[
                num_run]) + "\n"  # also output the results of each run.
        output_csv_test = output_csv_test + "\n" + str(num_run) + "," + str(
            test_hamming_loss_th[num_run]) + "," + str(
                test_acc_th[num_run]) + "," + str(
                    test_prec_th[num_run]) + "," + str(
                        test_rec_th[num_run]) + "," + str(
                            test_fmeasure_th[num_run])

        print("--- testing of fold %s took %s seconds ---" %
              (num_run, time.time() - start_time_test))

        prediction_str = ""
        # output final predictions for qualitative analysis
        if FLAGS.report_rand_pred == True:
            prediction_str = display_for_qualitative_evaluation(
                model,
                k_num_doc,
                mat_train,
                trainY,
                corpus_test,
                testX,
                testY,
                vocabulary_index2word,
                vocabulary_index2word_label,
                hamming_q=FLAGS.ave_labels_per_doc)
        # update the num_run
        num_run = num_run + 1

    print('\n--Final Results--\n')
    #print('C', FLAGS.C, 'gamma', FLAGS.gamma)

    # report min, max, std, average for the validation results
    min_valid_acc_th = min(valid_acc_th)
    min_valid_prec_th = min(valid_prec_th)
    min_valid_rec_th = min(valid_rec_th)
    min_valid_fmeasure_th = min(valid_fmeasure_th)
    min_valid_hamming_loss_th = min(valid_hamming_loss_th)

    max_valid_acc_th = max(valid_acc_th)
    max_valid_prec_th = max(valid_prec_th)
    max_valid_rec_th = max(valid_rec_th)
    max_valid_fmeasure_th = max(valid_fmeasure_th)
    max_valid_hamming_loss_th = max(valid_hamming_loss_th)

    if FLAGS.kfold != -1:
        std_valid_acc_th = statistics.stdev(valid_acc_th)  # to change
        std_valid_prec_th = statistics.stdev(valid_prec_th)
        std_valid_rec_th = statistics.stdev(valid_rec_th)
        std_valid_fmeasure_th = statistics.stdev(valid_fmeasure_th)
        std_valid_hamming_loss_th = statistics.stdev(valid_hamming_loss_th)

    final_valid_acc_th = sum(valid_acc_th) / num_runs
    final_valid_prec_th = sum(valid_prec_th) / num_runs
    final_valid_rec_th = sum(valid_rec_th) / num_runs
    final_valid_fmeasure_th = sum(valid_fmeasure_th) / num_runs
    final_valid_hamming_loss_th = sum(valid_hamming_loss_th) / num_runs

    print(
        "LDA==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)"
        % (final_valid_acc_th, std_valid_acc_th, min_valid_acc_th,
           max_valid_acc_th, final_valid_hamming_loss_th,
           std_valid_hamming_loss_th, min_valid_hamming_loss_th,
           max_valid_hamming_loss_th, final_valid_prec_th, std_valid_prec_th,
           min_valid_prec_th, max_valid_prec_th, final_valid_rec_th,
           std_valid_rec_th, min_valid_rec_th, max_valid_rec_th,
           final_valid_fmeasure_th, std_valid_fmeasure_th,
           min_valid_fmeasure_th, max_valid_fmeasure_th))
    #output the result to a file
    output_valid = output_valid + "\n" + "LDA==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (
        final_valid_acc_th, std_valid_acc_th, min_valid_acc_th,
        max_valid_acc_th, final_valid_hamming_loss_th,
        std_valid_hamming_loss_th, min_valid_hamming_loss_th,
        max_valid_hamming_loss_th, final_valid_prec_th, std_valid_prec_th,
        min_valid_prec_th, max_valid_prec_th, final_valid_rec_th,
        std_valid_rec_th, min_valid_rec_th, max_valid_rec_th,
        final_valid_fmeasure_th, std_valid_fmeasure_th, min_valid_fmeasure_th,
        max_valid_fmeasure_th) + "\n"
    output_csv_valid = output_csv_valid + "\n" + "average" + "," + str(
        round(final_valid_hamming_loss_th, 3)) + "±" + str(
            round(std_valid_hamming_loss_th, 3)
        ) + "," + str(round(final_valid_acc_th, 3)) + "±" + str(
            round(std_valid_acc_th, 3)) + "," + str(
                round(final_valid_prec_th, 3)) + "±" + str(
                    round(std_valid_prec_th, 3)) + "," + str(
                        round(final_valid_rec_th, 3)) + "±" + str(
                            round(std_valid_rec_th, 3)) + "," + str(
                                round(final_valid_fmeasure_th, 3)) + "±" + str(
                                    round(std_valid_fmeasure_th, 3))

    # report min, max, std, average for the testing results
    min_test_acc_th = min(test_acc_th)
    min_test_prec_th = min(test_prec_th)
    min_test_rec_th = min(test_rec_th)
    min_test_fmeasure_th = min(test_fmeasure_th)
    min_test_hamming_loss_th = min(test_hamming_loss_th)

    max_test_acc_th = max(test_acc_th)
    max_test_prec_th = max(test_prec_th)
    max_test_rec_th = max(test_rec_th)
    max_test_fmeasure_th = max(test_fmeasure_th)
    max_test_hamming_loss_th = max(test_hamming_loss_th)

    if FLAGS.kfold != -1:
        std_test_acc_th = statistics.stdev(test_acc_th)  # to change
        std_test_prec_th = statistics.stdev(test_prec_th)
        std_test_rec_th = statistics.stdev(test_rec_th)
        std_test_fmeasure_th = statistics.stdev(test_fmeasure_th)
        std_test_hamming_loss_th = statistics.stdev(test_hamming_loss_th)

    final_test_acc_th = sum(test_acc_th) / num_runs
    final_test_prec_th = sum(test_prec_th) / num_runs
    final_test_rec_th = sum(test_rec_th) / num_runs
    final_test_fmeasure_th = sum(test_fmeasure_th) / num_runs
    final_test_hamming_loss_th = sum(test_hamming_loss_th) / num_runs

    print(
        "LDA==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)"
        %
        (final_test_acc_th, std_test_acc_th, min_test_acc_th, max_test_acc_th,
         final_test_hamming_loss_th, std_test_hamming_loss_th,
         min_test_hamming_loss_th, max_test_hamming_loss_th,
         final_test_prec_th, std_test_prec_th, min_test_prec_th,
         max_test_prec_th, final_test_rec_th, std_test_rec_th, min_test_rec_th,
         max_test_rec_th, final_test_fmeasure_th, std_test_fmeasure_th,
         min_test_fmeasure_th, max_test_fmeasure_th))
    #output the result to a file
    output_test = output_test + "\n" + "LDA==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (
        final_test_acc_th, std_test_acc_th, min_test_acc_th, max_test_acc_th,
        final_test_hamming_loss_th, std_test_hamming_loss_th,
        min_test_hamming_loss_th, max_test_hamming_loss_th, final_test_prec_th,
        std_test_prec_th, min_test_prec_th, max_test_prec_th,
        final_test_rec_th, std_test_rec_th, min_test_rec_th, max_test_rec_th,
        final_test_fmeasure_th, std_test_fmeasure_th, min_test_fmeasure_th,
        max_test_fmeasure_th) + "\n"
    output_csv_test = output_csv_test + "\n" + "average" + "," + str(
        round(final_test_hamming_loss_th, 3)) + "±" + str(
            round(std_test_hamming_loss_th, 3)) + "," + str(
                round(final_test_acc_th, 3)
            ) + "±" + str(round(std_test_acc_th, 3)) + "," + str(
                round(final_test_prec_th, 3)) + "±" + str(
                    round(std_test_prec_th, 3)) + "," + str(
                        round(final_test_rec_th, 3)) + "±" + str(
                            round(std_test_rec_th, 3)) + "," + str(
                                round(final_test_fmeasure_th, 3)) + "±" + str(
                                    round(std_test_fmeasure_th, 3))

    setting = "dataset:" + str(FLAGS.dataset) + "\nT: " + str(
        FLAGS.num_topics) + "\nk: " + str(FLAGS.k_num_doc) + ' \ni: ' + str(
            FLAGS.iterations)
    print("--- The whole program took %s seconds ---" %
          (time.time() - start_time))
    time_used = "--- The whole program took %s seconds ---" % (time.time() -
                                                               start_time)
    if FLAGS.kfold != -1:
        print("--- The average training took %s ± %s seconds ---" %
              (sum(time_train) / num_runs, statistics.stdev(time_train)))
        average_time_train = "--- The average training took %s ± %s seconds ---" % (
            sum(time_train) / num_runs, statistics.stdev(time_train))
    else:
        print("--- The average training took %s ± %s seconds ---" %
              (sum(time_train) / num_runs, 0))
        average_time_train = "--- The average training took %s ± %s seconds ---" % (
            sum(time_train) / num_runs, 0)

    # output setting configuration, results, prediction and time used
    output_to_file(
        'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' +
        str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' +
        str(FLAGS.marking_id) + '.txt',
        setting + '\n' + output_valid + '\n' + output_test + '\n' +
        prediction_str + '\n' + time_used + '\n' + average_time_train)
    # output structured evaluation results
    output_to_file(
        'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' +
        str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' +
        str(FLAGS.marking_id) + ' valid.csv', output_csv_valid)
    output_to_file(
        'lda ' + str(FLAGS.dataset) + " T" + str(FLAGS.num_topics) + ' k' +
        str(FLAGS.k_num_doc) + ' i' + str(FLAGS.iterations) + ' gp_id' +
        str(FLAGS.marking_id) + ' test.csv', output_csv_test)
def main(_):
    #os.environ['CUDA_VISIBLE_DEVICES'] = ''
    
    if FLAGS.dataset == "bibsonomy-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_bib
        traning_data_path = FLAGS.training_data_path_bib
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 11.59
        
    elif FLAGS.dataset == "zhihu-sample":
        word2vec_model_path = FLAGS.word2vec_model_path_zhihu
        traning_data_path = FLAGS.training_data_path_zhihu
        FLAGS.sequence_length = 100
        FLAGS.ave_labels_per_doc = 2.45
        
    elif FLAGS.dataset == "citeulike-a-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_cua
        traning_data_path = FLAGS.training_data_path_cua
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 11.6
        
    elif FLAGS.dataset == "citeulike-t-clean":
        word2vec_model_path = FLAGS.word2vec_model_path_cut
        traning_data_path = FLAGS.training_data_path_cut
        FLAGS.sequence_length = 300
        FLAGS.ave_labels_per_doc = 7.68
        
    # 1. create trainlist, validlist and testlist 
    trainX, trainY, testX, testY = None, None, None, None
    vocabulary_word2index, vocabulary_index2word = create_voabulary(word2vec_model_path,name_scope=FLAGS.dataset + "-svm") #simple='simple'
    vocabulary_word2index_label,vocabulary_index2word_label = create_voabulary_label(voabulary_label=traning_data_path, name_scope=FLAGS.dataset + "-svm")
    num_classes=len(vocabulary_word2index_label)
    print(vocabulary_index2word_label[0],vocabulary_index2word_label[1])

    vocab_size = len(vocabulary_word2index)
    print("vocab_size:",vocab_size)

    # choosing whether to use k-fold cross-validation or hold-out validation
    if FLAGS.kfold == -1: # hold-out
        train, valid, test = load_data_multilabel_new(vocabulary_word2index, vocabulary_word2index_label,keep_label_percent=FLAGS.keep_label_percent,valid_portion=FLAGS.valid_portion,test_portion=FLAGS.test_portion,multi_label_flag=FLAGS.multi_label_flag,traning_data_path=traning_data_path) 
        # here train, test are tuples; turn train into trainlist.
        trainlist, validlist, testlist = list(), list(), list()
        trainlist.append(train)
        validlist.append(valid)
        testlist.append(test)
    else: # k-fold
        trainlist, validlist, testlist = load_data_multilabel_new_k_fold(vocabulary_word2index, vocabulary_word2index_label,keep_label_percent=FLAGS.keep_label_percent,kfold=FLAGS.kfold,test_portion=FLAGS.test_portion,multi_label_flag=FLAGS.multi_label_flag,traning_data_path=traning_data_path)
        # here trainlist, testlist are list of tuples.
    # get and pad testing data: there is only one testing data, but kfold training and validation data
    assert len(testlist) == 1
    testX, testY = testlist[0]
    testX = pad_sequences(testX, maxlen=FLAGS.sequence_length, value=0.)  # padding to max length

    # 2. get word_embedding matrix: shape (21425,100)
    word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
    word2vec_dict = {}
    for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
        word2vec_dict[word] = vector
    word_embedding_2dlist = [[]] * vocab_size  # create an empty word_embedding list: which is a list of list, i.e. a list of word, where each word is a list of values as an embedding vector.
    word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size)  # assign empty for first word:'PAD'
    bound = np.sqrt(6.0) / np.sqrt(vocab_size)  # bound for random variables.
    count_exist = 0;
    count_not_exist = 0
    for i in range(1, vocab_size):  # loop each word
        word = vocabulary_index2word[i]  # get a word
        embedding = None
        try:
            embedding = word2vec_dict[word]  # try to get vector:it is an array.
        except Exception:
            embedding = None
        if embedding is not None:  # the 'word' exist a embedding
            word_embedding_2dlist[i] = embedding;
            count_exist = count_exist + 1  # assign array to this word.
        else:  # no embedding for this word
            word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
            count_not_exist = count_not_exist + 1  # init a random value for the word.
    word_embedding_final = np.array(word_embedding_2dlist)  # covert to 2d array.
    print('embedding per word:',word_embedding_final)
    print('embedding per word, shape:',word_embedding_final.shape)

    # 3. transform trainlist to the format. x_train, x_test: training and test feature matrices of size (n_samples, n_features)
    #print(len(trainlist))
    #trainX,trainY = trainlist[0]
    #trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)
    #print(len(trainX))
    #print(len(trainX[0]))
    #print(trainX[0])
    #print(len(trainY))
    #print(len(trainY[0]))
    #print(trainY[0])
    #print(np.asarray(trainY).shape)
    
    num_runs = len(trainlist)
    #validation results variables
    valid_acc_th,valid_prec_th,valid_rec_th,valid_fmeasure_th,valid_hamming_loss_th =[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs # initialise the result lists
    final_valid_acc_th,final_valid_prec_th,final_valid_rec_th,final_valid_fmeasure_th,final_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    min_valid_acc_th,min_valid_prec_th,min_valid_rec_th,min_valid_fmeasure_th,min_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    max_valid_acc_th,max_valid_prec_th,max_valid_rec_th,max_valid_fmeasure_th,max_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    std_valid_acc_th,std_valid_prec_th,std_valid_rec_th,std_valid_fmeasure_th,std_valid_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    #testing results variables
    test_acc_th,test_prec_th,test_rec_th,test_fmeasure_th,test_hamming_loss_th = [0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs,[0]*num_runs # initialise the testing result lists
    final_test_acc_th,final_test_prec_th,final_test_rec_th,final_test_fmeasure_th,final_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    min_test_acc_th,min_test_prec_th,min_test_rec_th,min_test_fmeasure_th,min_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    max_test_acc_th,max_test_prec_th,max_test_rec_th,max_test_fmeasure_th,max_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    std_test_acc_th,std_test_prec_th,std_test_rec_th,std_test_fmeasure_th,std_test_hamming_loss_th = 0.0,0.0,0.0,0.0,0.0
    #output variables
    output_valid = ""
    output_test = ""
    output_csv_valid = "fold,hamming_loss,acc,prec,rec,f1"
    output_csv_test = "fold,hamming_loss,acc,prec,rec,f1"
        
    time_train = [0]*num_runs # get time spent in training    
    num_run = 0
    testX_embedded = get_embedded_words(testX,word_embedding_final,vocab_size)
    print('testX_embedded:',testX_embedded)
    print('testX_embedded:',testX_embedded.shape)
    
    for trainfold in trainlist:
        # get training and validation data
        trainX,trainY=trainfold
        trainX = pad_sequences(trainX, maxlen=FLAGS.sequence_length, value=0.)
        trainX_embedded = get_embedded_words(trainX,word_embedding_final,vocab_size)
        print('trainX_embedded:',trainX_embedded)
        print('trainX_embedded:',trainX_embedded.shape)
        # code for debugging with less training data
        # debugging_num=1000
        # print('trainX_embedded_for_debugging:',trainX_embedded[1:debugging_num].shape) # for quick debugging
        # trainX_embedded = trainX_embedded[1:debugging_num] # for quick debugging
        # trainY = trainY[1:debugging_num] # for quick debugging
        
        validX,validY=validlist[num_run]
        validX = pad_sequences(validX, maxlen=FLAGS.sequence_length, value=0.)
        validX_embedded = get_embedded_words(validX,word_embedding_final,vocab_size)
        print('validX_embedded:',validX_embedded)
        print('validX_embedded:',validX_embedded.shape)
        
    # ** training **
        start_time_train = time.time()
        print('start training fold',str(num_run))
        
        trainY_int = np.asarray(trainY).astype(int)
        #print(type(trainY_int)) # <class 'numpy.ndarray'>
        #print(np.asarray(trainY).astype(int) == 1)
        
        #check trainY and remove labels that are False for all training instances
        one_class_label_list = list() # the list of labels that are not associated with any training instances.
        #print(trainY_int.shape)
        #print(sum(trainY_int[:,2]))
        for k in range(num_classes):
            if sum(trainY_int[:,k]) == 0:
                #print(k)
                one_class_label_list.append(k)
        # to delete the labels not associated to any labels in the training data        
        trainY_int_pruned =np.delete(trainY_int,one_class_label_list,1)
        print(trainY_int_pruned.shape)        
        #print(len(one_class_label_list),one_class_label_list)
        # base_lr = LogisticRegression()
        # #base_svm = SVC(kernel='rbf',C=FLAGS.C,gamma=FLAGS.gamma,probability=False)
        # #model = train_svm(trainX_embedded,np.asarray(trainY).astype(int))
        # chains = [ClassifierChain(base_lr, order='random', random_state=i) for i in range(3)]
        # count_chain=0
        # for chain in chains:
            # chain.fit(trainX_embedded,trainY_int_pruned == 1)
            # print('chain',count_chain,'out of',3,'done')
            # count_chain=count_chain+1
        # print('num_run',str(num_run),'train done.')
        chains = train_cc(trainX_embedded,trainY_int_pruned,num_chains=FLAGS.num_chains)
        time_train[num_run] = time.time() - start_time_train
        print("--- training of fold %s took %s seconds ---" % (num_run,time_train[num_run]))
        
        # evaluate on training data
        #acc, prec, rec, f_measure, hamming_loss = do_eval(model,trainX_embedded,np.asarray(trainY),hamming_q=FLAGS.ave_labels_per_doc)
        acc, prec, rec, f_measure, hamming_loss = do_eval_chains(chains,one_class_label_list,trainX_embedded,np.asarray(trainY),hamming_q=FLAGS.ave_labels_per_doc)
        #print('training:', acc, prec, rec, f_measure, hamming_loss)
        #pp = model.predict_proba(trainX_embedded)
        #print('pp',pp)
        #print('pp:',pp.shape)
        #print('pp_sum',np.sum(pp,0))
        #print('pp_sum',np.sum(pp,1))
        
        # evaluate on validation data
        #valid_acc_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run],valid_hamming_loss_th[num_run] = do_eval(model,validX_embedded,validY,hamming_q=FLAGS.ave_labels_per_doc)
        valid_acc_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run],valid_hamming_loss_th[num_run] = do_eval_chains(chains,one_class_label_list,validX_embedded,validY,hamming_q=FLAGS.ave_labels_per_doc)
        #print('validation:', acc, prec, rec, f_measure, hamming_loss)
        print("CC==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (num_run,valid_acc_th[num_run],valid_hamming_loss_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run]))
        output_valid = output_valid + "\n" + "CC==>Run %d Validation Accuracy: %.3f\tValidation Hamming Loss: %.3f\tValidation Precision: %.3f\tValidation Recall: %.3f\tValidation F-measure: %.3f" % (num_run,valid_acc_th[num_run],valid_hamming_loss_th[num_run],valid_prec_th[num_run],valid_rec_th[num_run],valid_fmeasure_th[num_run]) + "\n" # also output the results of each run.
        output_csv_valid = output_csv_valid + "\n" + str(num_run) + "," + str(valid_hamming_loss_th[num_run]) + "," + str(valid_acc_th[num_run]) + "," + str(valid_prec_th[num_run]) + "," + str(valid_rec_th[num_run]) + "," + str(valid_fmeasure_th[num_run])
        
        start_time_test = time.time()
        # evaluate on testing data
        #test_acc_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run],test_hamming_loss_th[num_run] = do_eval(model,testX_embedded,testY,hamming_q=FLAGS.ave_labels_per_doc)
        test_acc_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run],test_hamming_loss_th[num_run] = do_eval_chains(chains,one_class_label_list,testX_embedded,testY,hamming_q=FLAGS.ave_labels_per_doc)
        #print('testing:', acc, prec, rec, f_measure, hamming_loss)
        print("CC==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (num_run,test_acc_th[num_run],test_hamming_loss_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run]))
        output_test = output_test + "\n" + "CC==>Run %d Test Accuracy: %.3f\tTest Hamming Loss: %.3f\tTest Precision: %.3f\tTest Recall: %.3f\tTest F-measure: %.3f" % (num_run,test_acc_th[num_run],test_hamming_loss_th[num_run],test_prec_th[num_run],test_rec_th[num_run],test_fmeasure_th[num_run]) + "\n" # also output the results of each run.
        output_csv_test = output_csv_test + "\n" + str(num_run) + "," + str(test_hamming_loss_th[num_run]) + "," + str(test_acc_th[num_run]) + "," + str(test_prec_th[num_run]) + "," + str(test_rec_th[num_run]) + "," + str(test_fmeasure_th[num_run])
        
        print("--- testing of fold %s took %s seconds ---" % (num_run, time.time() - start_time_test))
        
        prediction_str = ""
        # output final predictions for qualitative analysis
        if FLAGS.report_rand_pred == True:
            #prediction_str = display_for_qualitative_evaluation(model, testX_embedded,testX,testY,vocabulary_index2word,vocabulary_index2word_label)
            prediction_str = display_for_qualitative_evaluation_chains(chains, one_class_label_list,testX_embedded,testX,testY,vocabulary_index2word,vocabulary_index2word_label)
        # update the num_run
        num_run = num_run + 1
    
    print('\n--Final Results--\n')
    #print('C', FLAGS.C, 'gamma', FLAGS.gamma)
    
    # report min, max, std, average for the validation results
    min_valid_acc_th = min(valid_acc_th)
    min_valid_prec_th = min(valid_prec_th)
    min_valid_rec_th = min(valid_rec_th)
    min_valid_fmeasure_th = min(valid_fmeasure_th)
    min_valid_hamming_loss_th = min(valid_hamming_loss_th)
    
    max_valid_acc_th = max(valid_acc_th)
    max_valid_prec_th = max(valid_prec_th)
    max_valid_rec_th = max(valid_rec_th)
    max_valid_fmeasure_th = max(valid_fmeasure_th)
    max_valid_hamming_loss_th = max(valid_hamming_loss_th)
    
    if FLAGS.kfold != -1:
        std_valid_acc_th = statistics.stdev(valid_acc_th) # to change
        std_valid_prec_th = statistics.stdev(valid_prec_th)
        std_valid_rec_th = statistics.stdev(valid_rec_th)
        std_valid_fmeasure_th = statistics.stdev(valid_fmeasure_th)
        std_valid_hamming_loss_th = statistics.stdev(valid_hamming_loss_th)
    
    final_valid_acc_th = sum(valid_acc_th)/num_runs
    final_valid_prec_th = sum(valid_prec_th)/num_runs
    final_valid_rec_th = sum(valid_rec_th)/num_runs
    final_valid_fmeasure_th = sum(valid_fmeasure_th)/num_runs
    final_valid_hamming_loss_th = sum(valid_hamming_loss_th)/num_runs
    
    print("CC==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_valid_acc_th,std_valid_acc_th,min_valid_acc_th,max_valid_acc_th,final_valid_hamming_loss_th,std_valid_hamming_loss_th,min_valid_hamming_loss_th,max_valid_hamming_loss_th,final_valid_prec_th,std_valid_prec_th,min_valid_prec_th,max_valid_prec_th,final_valid_rec_th,std_valid_rec_th,min_valid_rec_th,max_valid_rec_th,final_valid_fmeasure_th,std_valid_fmeasure_th,min_valid_fmeasure_th,max_valid_fmeasure_th))
    #output the result to a file
    output_valid = output_valid + "\n" + "CC==>Final Validation results Validation Accuracy: %.3f ± %.3f (%.3f - %.3f)\tValidation Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tValidation Precision: %.3f ± %.3f (%.3f - %.3f)\tValidation Recall: %.3f ± %.3f (%.3f - %.3f)\tValidation F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_valid_acc_th,std_valid_acc_th,min_valid_acc_th,max_valid_acc_th,final_valid_hamming_loss_th,std_valid_hamming_loss_th,min_valid_hamming_loss_th,max_valid_hamming_loss_th,final_valid_prec_th,std_valid_prec_th,min_valid_prec_th,max_valid_prec_th,final_valid_rec_th,std_valid_rec_th,min_valid_rec_th,max_valid_rec_th,final_valid_fmeasure_th,std_valid_fmeasure_th,min_valid_fmeasure_th,max_valid_fmeasure_th) + "\n"
    output_csv_valid = output_csv_valid + "\n" + "average" + "," + str(round(final_valid_hamming_loss_th,3)) + "±" + str(round(std_valid_hamming_loss_th,3)) + "," + str(round(final_valid_acc_th,3)) + "±" + str(round(std_valid_acc_th,3)) + "," + str(round(final_valid_prec_th,3)) + "±" + str(round(std_valid_prec_th,3)) + "," + str(round(final_valid_rec_th,3)) + "±" + str(round(std_valid_rec_th,3)) + "," + str(round(final_valid_fmeasure_th,3)) + "±" + str(round(std_valid_fmeasure_th,3))
    
    # report min, max, std, average for the testing results
    min_test_acc_th = min(test_acc_th)
    min_test_prec_th = min(test_prec_th)
    min_test_rec_th = min(test_rec_th)
    min_test_fmeasure_th = min(test_fmeasure_th)
    min_test_hamming_loss_th = min(test_hamming_loss_th)
    
    max_test_acc_th = max(test_acc_th)
    max_test_prec_th = max(test_prec_th)
    max_test_rec_th = max(test_rec_th)
    max_test_fmeasure_th = max(test_fmeasure_th)
    max_test_hamming_loss_th = max(test_hamming_loss_th)
    
    if FLAGS.kfold != -1:
        std_test_acc_th = statistics.stdev(test_acc_th) # to change
        std_test_prec_th = statistics.stdev(test_prec_th)
        std_test_rec_th = statistics.stdev(test_rec_th)
        std_test_fmeasure_th = statistics.stdev(test_fmeasure_th)
        std_test_hamming_loss_th = statistics.stdev(test_hamming_loss_th)
    
    final_test_acc_th = sum(test_acc_th)/num_runs
    final_test_prec_th = sum(test_prec_th)/num_runs
    final_test_rec_th = sum(test_rec_th)/num_runs
    final_test_fmeasure_th = sum(test_fmeasure_th)/num_runs
    final_test_hamming_loss_th = sum(test_hamming_loss_th)/num_runs
    
    print("SVM==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_test_acc_th,std_test_acc_th,min_test_acc_th,max_test_acc_th,final_test_hamming_loss_th,std_test_hamming_loss_th,min_test_hamming_loss_th,max_test_hamming_loss_th,final_test_prec_th,std_test_prec_th,min_test_prec_th,max_test_prec_th,final_test_rec_th,std_test_rec_th,min_test_rec_th,max_test_rec_th,final_test_fmeasure_th,std_test_fmeasure_th,min_test_fmeasure_th,max_test_fmeasure_th))
    #output the result to a file
    output_test = output_test + "\n" + "SVM==>Final Test results Test Accuracy: %.3f ± %.3f (%.3f - %.3f)\tTest Hamming Loss: %.3f ± %.3f (%.3f - %.3f)\tTest Precision: %.3f ± %.3f (%.3f - %.3f)\tTest Recall: %.3f ± %.3f (%.3f - %.3f)\tTest F-measure: %.3f ± %.3f (%.3f - %.3f)" % (final_test_acc_th,std_test_acc_th,min_test_acc_th,max_test_acc_th,final_test_hamming_loss_th,std_test_hamming_loss_th,min_test_hamming_loss_th,max_test_hamming_loss_th,final_test_prec_th,std_test_prec_th,min_test_prec_th,max_test_prec_th,final_test_rec_th,std_test_rec_th,min_test_rec_th,max_test_rec_th,final_test_fmeasure_th,std_test_fmeasure_th,min_test_fmeasure_th,max_test_fmeasure_th) + "\n"
    output_csv_test = output_csv_test + "\n" + "average" + "," + str(round(final_test_hamming_loss_th,3)) + "±" + str(round(std_test_hamming_loss_th,3)) + "," + str(round(final_test_acc_th,3)) + "±" + str(round(std_test_acc_th,3)) + "," + str(round(final_test_prec_th,3)) + "±" + str(round(std_test_prec_th,3)) + "," + str(round(final_test_rec_th,3)) + "±" + str(round(std_test_rec_th,3)) + "," + str(round(final_test_fmeasure_th,3)) + "±" + str(round(std_test_fmeasure_th,3))
    
    setting = "dataset:" + str(FLAGS.dataset) + "\nC: " + str(FLAGS.C) + "\ngamma: " + str(FLAGS.gamma)
    print("--- The whole program took %s seconds ---" % (time.time() - start_time))
    time_used = "--- The whole program took %s seconds ---" % (time.time() - start_time)
    if FLAGS.kfold != -1:
        print("--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,statistics.stdev(time_train)))
        average_time_train = "--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,statistics.stdev(time_train))
    else:
        print("--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,0))
        average_time_train = "--- The average training took %s ± %s seconds ---" % (sum(time_train)/num_runs,0)

    # output setting configuration, results, prediction and time used
    output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + '.txt',setting + '\n' + output_valid + '\n' + output_test + '\n' + prediction_str + '\n' + time_used + '\n' + average_time_train)
    # output structured evaluation results
    output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + ' valid.csv',output_csv_valid)
    output_to_file('svm ' + str(FLAGS.dataset) + " C " + str(FLAGS.C) + ' gamma' + str(FLAGS.gamma) + ' gp_id' + str(FLAGS.marking_id) + ' test.csv',output_csv_test)