def main():
    data_reader = DataReader()
    df = data_reader.get_all_data()

    # random split of data
    train_x_raw, train_y_raw, test_x_raw, test_y_raw = get_train_test_split(df)

    # set up train data
    train_tokens, train_y_raw = tokenize(train_x_raw, train_y_raw, save_missing_feature_as_string=False,
                                       remove_empty=True)
    train_x, train_y, feature_names = tokens_to_bagofwords(train_tokens, train_y_raw)

    # train model
    model  = _get_nn_model_bag_of_words_simple_v2(train_x, train_y, data_reader.get_region_labels()['Code'],
                                                      epochs=50, batch_size=64)

    # set up test data
    test_tokens, test_y_raw = tokenize(test_x_raw, test_y_raw, save_missing_feature_as_string=False, remove_empty=True)
    test_x, test_y, _ = tokens_to_bagofwords(test_tokens, test_y_raw, feature_names=feature_names)

    # evaluate model
    evaluate_model_nn(model, test_x, test_y, plot_roc=False)

    # ABOVE IS BASIC SUPERVISED LEARNING TO GENERATE MODEL
    #################################################
    # BELOW IS SEMI-SUPERVISED SELF-TRAINING TO FUTHER TRAIN MODEL

    # read unlabelled data and format it to be the same as labelled data
    unlabelled_df = data_reader.get_east_dir()
    unlabelled_df = normalize_east_dir_df(unlabelled_df)

    # set up unlabelled data as semi-supervised data
    tokens, _ = tokenize(unlabelled_df, _, save_missing_feature_as_string=False, remove_empty=True)
    semi_x_base, _, _ = tokens_to_bagofwords(tokens, _, feature_names=feature_names)

    # Confidence threshold to train on
    train_threshold = 0.8
    semi_train_amount = 30

    # SELF TRAIN MANY TIMES
    for i in range(semi_train_amount):
        # get predictions on unlabelled data
        pred = model.model.predict(semi_x_base)
        # convert probablities to 1 hot encoded output
        semi_y = np.zeros_like(pred)
        semi_y[np.arange(len(pred)), pred.argmax(1)] = 1
        # filter semi_x and semi_y to only include predictions above train_threshold
        semi_y = semi_y[pred.max(axis=1) > train_threshold]
        semi_x = semi_x_base[pred.max(axis=1) > train_threshold]

        # train on semi supervised data
        model.model.fit(semi_x, semi_y, batch_size=64, epochs=100)
        # retrain on original train data
        model.model.fit(train_x, model.encoder.transform(train_y), batch_size=32, epochs=10)

        # evaluate model
        evaluate_model_nn(model, test_x, test_y, plot_roc=False)

        # remove semi data used in this iteration from future iterations
        semi_x_base = semi_x_base[~(pred.max(axis=1) > train_threshold)]