def __init__(self):
     """
     inhereit from HyperParameters Class
     :return:
     """
     # super(MultiEmbedding,self).__init__()
     HyperParameters.__init__(self)
Пример #2
0
 def __init__(self):
     """
     Most of hyperparameters come from Hyperparameters class
     :return:
     """
     # state here again to make sure this class can inhereite parameters from partent class
     HyperParameters.__init__(self)
Пример #3
0
    def __init__(self):
        """

        """
        HyperParameters.__init__(self)
        self.history_classify_df = pd.DataFrame(columns=[
            'loss', 'accuracy', 'val_loss', 'val_accuracy', 'epoch',
            'model_features'
        ])
Пример #4
0
 def __init__(self):
     """
     All the hyperparameters need initialize in this section
     """
     HyperParameters.__init__(self)
Пример #5
0
 def __init__(self):
     """
     """
     # using distribution to decide this parameters
     # self.MAX_SEQ_LENGTH = 400
     HyperParameters.__init__(self)
 def __init__(self):
     HyperParameters.__init__(self)
 def __init__(self):
     # we need state here again to make sure this class can inhereite parameters from partent class
     HyperParameters.__init__(self)
Пример #8
0
 def __init__(self, history):
     HyperParameters.__init__(self)
     # self.shape = None
     self.history = history
Пример #9
0
def main():
    """
    We use this function to call process one by one.
    """

    # ***********************import******************************
    import_class = ImportData()
    # df_train_raw = (6079,41) raw data, which just read from csv file without changing anything
    # X_q_train_df = (6079, 6) only contain ['qa_id', 'question_title', 'question_body', 'category', 'host','question']
    # X_a_train_df = (6079,4) contain ['qa_id', 'answer', 'category', 'host']
    # y_q_train_df = (6079,21) numerical labels
    # y_a_train_df = (6079,9) numerical labels
    (df_train_raw, X_q_train_df, X_a_train_df, y_q_train_df,
     y_a_train_df) = import_class.import_data("03_data/02_train.csv")
    df_test_raw, X_q_test_df, X_a_test_df, y_q_test_df, y_a_test_df = import_class.import_data("03_data/03_test.csv")

    # ***********************clean******************************
    clean_class = CleanData()
    q_train_cleaned_df = clean_class.clean_process(X_q_train_df, column_1='question')
    a_train_cleaned_df = clean_class.clean_process(X_a_train_df, column_1='answer')
    q_test_cleaned_df = clean_class.clean_process(X_q_test_df, column_1 ='question')
    a_test_cleaned_df = clean_class.clean_process(X_a_test_df, column_1='answer')

    # ***********************tokenize*****************************
    token_class = TokenizeData()
    # do not use '|', insteand we can use comma to next line and bracket to state they are together
    # input q_train_cleaned_dataframe and output still dataframe with new colunm[padded]
    # question_train part_padded  = (6078, 400)
    (q_train_padded, q_train_cleaned_df, a_train_padded, word_index,
     q_index_word) = token_class.tokenize_data(q_train_cleaned_df, a_train_cleaned_df)


    # ********Using manuually categorical*************
    #***********From this part, we only consider classification version************
    # y_q_train_df
    label_class = LabelProcess(y_q_train_df, y_a_train_df)
    y_q_label_df, y_a_label_df, q_feature_col, a_feature_col = label_class.num_label()
    y_q_classify_list, y_q_classify_dict, y_a_classify_list, y_a_classify_dict = label_class.classify_label()


    # ********************EDA******************************
    # eda_class = EdaData()
    # eda_class.question_plot(y_question_df)
    # eda_class.answer_plot(y_answer_df)
    # q_train_padded have shape (6079,100) can be used in fewer embedding

    # #*********************Embedding****THIS PART ONLY WORD AS API*******************

    glove_class = GloveVect()
    embedding_matrix, embedding_index = glove_class.glove_vect(word_index)
    embed_class = MultiEmbedding()
    #first transform q_train_padded
    # q_glove_output_array, q_glove_embedding_layer= embed_class.glove_embedding(word_index,
    #                                                                            q_train_padded,
    #                                                                            embedding_matrix,
    #                                                                            part = 'q')
    # a_glove_output_array, a_glove_embedding_layer= embed_class.glove_embedding(word_index,
    #                                                                            a_train_padded,
    #                                                                            embedding_matrix,
    #                                                                            part = 'a')
    # q_random_output, q_random_embedding_layer = embed_class.random_embedding(word_index,
    #                                                                          q_train_padded,
    #                                                                           part = 'q')
    # a_random_output, a_random_embedding_layer = embed_class.random_embedding(word_index,
    #                                                                          a_train_padded,
    #                                                                          part = 'a')
    #

    #******************Models*******************************

    model_class = BuildModels()
    compile_class = SplitAndCompile()
    save_class = SaveModelHistory()
    para_class = HyperParameters()
    # predict_class = PredictCallback()
    #***************Random Embedding Normal Neural Network****************
    nn_model = model_class.nn_model(word_index, pretrain_matrix=embedding_matrix)
    history, model = compile_class.compile_fit(nn_model,
                                               q_train_padded, a_train_padded, y_q_label_df, y_a_label_df,
                                               y_q_classify_list, y_q_classify_dict,
                                               y_a_classify_list, y_a_classify_dict,
                                               epoch_num=para_class.EPOCH)
    history_classify_df = save_class.write_csv(history, model)

    # # ***************Pretrain Glove Normal Neural Network****************(each model should have its own model part)
    # pretrain_nn = model_class.nn_model(word_index, pretrain_matrix=embedding_matrix)
    # history, model = compile_class.compile_fit(pretrain_nn,
    #                                            q_train_padded, a_train_padded, y_q_label_df, y_a_label_df,
    #                                            y_q_classify_list, y_q_classify_dict,
    #                                            y_a_classify_list, y_a_classify_dict,
    #                                            epoch_num=3)
    # history_classify_df = save_class.write_csv(history, model)

    # ***************Question Pretrain Gloave Normal CNN Classify*******************
    # cnn_class= CNNModel()
    # cnn_model_1 = cnn_class.normal_cnn(word_index, pretrain_matrix=embedding_matrix)
    # history, model = compile_class.compile_fit(cnn_model_1,
    #                                            X_q_train, X_q_val, y_q_train, y_q_val,
    #                                            loss_fun='categorical_crossentropy',
    #                                            epoch_num=10)
    # history_classify_df = save_class.write_csv(history, model)

    # cnn_class = CNNModel()
    # cnn_model_2 = cnn_class.n_gram_cnn(word_index, pretrain_matrix=embedding_matrix)
    # history, model = compile_class.compile_fit(cnn_model_2, q_train_padded, a_train_padded, y_q_label_df, y_a_label_df,
    #                                            y_q_classify_list, y_q_classify_dict,
    #                                            y_a_classify_list, y_a_classify_dict,
    #                                            epoch_num=30)
    # history_classify_df = save_class.write_csv(history, model)

    rnn_class = RNNModel()
    lstm_model_1 = rnn_class.lstm(word_index, pretrain_matrix=embedding_matrix)
    history, model = compile_class.compile_fit(lstm_model_1, q_train_padded, a_train_padded, y_q_label_df, y_a_label_df,
                                               y_q_classify_list, y_q_classify_dict,
                                               y_a_classify_list, y_a_classify_dict,
                                               epoch_num=1)
    history_classify_df = save_class.write_csv(history, model)

    #************************test part*****************************
    # 1.tokenize
    # 2.fit model
    # 3.get plot and analysis result
    #*************************END***************************



    return (df_train_raw, X_q_train_df, X_a_train_df, y_q_train_df, y_a_train_df, a_train_cleaned_df,
            q_train_padded, q_train_cleaned_df, a_train_padded, word_index, q_index_word,
            y_q_label_df, y_a_label_df, q_feature_col, a_feature_col,
            y_q_classify_list, y_q_classify_dict, y_a_classify_list, y_a_classify_dict,
            history, model)