Пример #1
0
def valid(mdl: FeedforwardNetwork, data_iter: utils.BatchIterator, LANG, args):
    with torch.no_grad():
        preds = []
        golds = []
        for i, batch in enumerate(
                tqdm.tqdm(data_iter, total=data_iter.__len__())):
            pred = run_iter(mdl, batch, None, False, None)
            preds.extend(LANG.decode(pred))
            golds.extend(LANG.decode(batch['lang']))

    acc = accuracy_score(golds, preds) * 100
    f1 = f1_score(golds, preds, average='macro') * 100
    precision = precision_score(golds, preds, average='macro') * 100
    recall = recall_score(golds, preds, average='macro') * 100
    res = {
        'acc': round(acc, 2),
        'f1': round(f1, 2),
        'precision': round(precision, 2),
        'recall': round(recall, 2)
    }
    report = classification_report(golds, preds, digits=4)
    utils.save_txt(
        report,
        os.path.join(
            args.mdir, f'report-acc{acc:.2}-'
            f'f1{f1:.2}-'
            f'p{precision:.2}-'
            f'r{recall:.2}.txt'))
    return res
Пример #2
0
def create_training_labels(type_,
                           path=os.path.join(path_train, "semcor+omsti.json")):
    ###############################################################################
    # This function creates a txt file with sentences with a specific label
    # and a vocabulary with all the seen labels.
    #
    # Input:
    #   type_: it is a laabel used to choose the type of label
    #   path: path of the json file
    #
    # Output:
    #   None
    ###############################################################################

    # create a list with sentences of the considered labels for all the training set
    dictionary = create_labels_words()
    data = utils.load_json(path)
    data = list(
        map(partial(sentence_from_dictionaries, training_sentence=False),
            data))

    sentences = []
    labels = set()

    for sentence in data:

        single_sentence = []

        for word in sentence.split():

            # insert the current word
            if type(dictionary.get(word, word)) != list:
                single_sentence.append(word)

            # insert the corrispondent label for the current word
            else:
                single_sentence.append(str(dictionary.get(word, word)[type_]))
                labels.add(str(dictionary.get(word, word)[type_]))

        sentences.append(single_sentence)

    # create the vocabulary of seen labels, adding the ids for the padding, the unseen labels and the unlabelled words
    vocabulary = {
        value: key
        for key, value in dict(enumerate(labels, 3)).items()
    }
    vocabulary["<PAD>"] = "0"
    vocabulary["<UNSEEN>"] = "1"
    vocabulary["<WORD>"] = "2"

    # exchange strings with ids
    sentences = list(
        map(
            lambda sentence: ' '.join(
                str(vocabulary.get(word, word)) for word in sentence),
            sentences))

    utils.save_txt(sentences, path_train + name_training_file[type_][0])
    utils.save_pickle(vocabulary,
                      "../resources/" + name_training_file[type_][1])
def predict_wordnet_domains(input_path: str, output_path: str,
                            resources_path: str) -> None:
    """
    DO NOT MODIFY THE SIGNATURE!
    This is the skeleton of the prediction function.
    The predict function will build your model, load the weights from the checkpoint and write a new file (output_path)
    with your predictions in the "<id> <wordnetDomain>" format (e.g. "d000.s000.t000 sport").

    The resources folder should contain everything you need to make the predictions. It is the "resources" folder in your submission.

    N.B. DO NOT HARD CODE PATHS IN HERE. Use resource_path instead, otherwise we will not be able to run the code.
    If you don't know what HARD CODING means see: https://en.wikipedia.org/wiki/Hard_coding

    :param input_path: the path of the input file to predict in the same format as Raganato's framework (XML files you downloaded).
    :param output_path: the path of the output file (where you save your predictions)
    :param resources_path: the path of the resources folder containing your model and stuff you might need.
    :return: None
    """

    path_json = os.path.join(resources_path, "Test", 'test.json')
    path_model = os.path.join(resources_path, 'model', 'model_coarsegrained')

    # parse the xml file
    parsing.parsing_datasets(input_path, path_json)
    # creates candidates and input file for the model
    input_, candidates, ids_candidates = create_batched_inputs(path_json)
    ids = take_id_istances(resources_path)
    final_prediction = []

    with tf.Session() as sess:

        # load the model
        output_wndomain, _, inputs_, input_int, keep_prob = load_model_coarsegrained(
            sess, path_model)
        for batch_num in range(len(input_[0])):

            # do the prediction
            preds_wndomain = sess.run(output_wndomain,
                                      feed_dict={
                                          inputs_: input_[0][batch_num],
                                          input_int: input_[1][batch_num],
                                          keep_prob: 1.
                                      })
            final_prediction += calculate_prediction(input_[1][batch_num],
                                                     preds_wndomain,
                                                     candidates[1][batch_num],
                                                     ids_candidates[1],
                                                     vocab_wndomain)

        combined = list(zip(ids, final_prediction))

        # save the prediction
        utils.save_txt(
            list(map(lambda word: word[0] + " " + word[1], combined)),
            output_path)

    tf.reset_default_graph()
    pass
Пример #4
0
def all_operations(in_dir, number_of_images, out_dir):
    def shoggoth(img_num):
        image = get_image(img_num, in_dir)
        # Wykrycie narożników kartki, skorygowanie perspektywy oraz wycięcie kartki.
        image_perspective_operations, sPoints, tPoints = perspective_operations(
            image)
        # Usunięcie kratek z kartki tak, żeby zostały widoczne tylko słowa
        image_remove_lines_operations = removeLines(
            image_perspective_operations)
        image_checkered_operations = checkered_operations(
            image_remove_lines_operations)
        # Wykrycie słów
        image_detect_words = detect_words_operations(
            image_checkered_operations)
        # Wykrycie indeksów
        image_crop_numbers, cropped_numbers = crop_numbers(
            image_detect_words, image_perspective_operations)
        # Podział na cyfry
        image_crop_digits = crop_digits(cropped_numbers, image)

        cropped_rectangles = crop_small_rectangles(image_detect_words)
        mask = paint_lines(cropped_rectangles)
        mask = prepare_mask(mask)
        new_img = move_to_original_coords(mask, sPoints, tPoints,
                                          image.shape[:2])
        save_image(img_num, new_img, "png", out_dir)

        return image_crop_digits

    image_range = range(1, number_of_images + 1)

    image_crop_digits = []
    for image_num in tqdm(image_range):
        image_crop_digits.append(shoggoth(image_num))

    index_array = []
    model = tf.keras.models.load_model(pretict_model_path)

    for page_number, page in tqdm(enumerate(image_crop_digits)):
        index_array.append([])
        for index_number, image_number in enumerate(page):
            index_array[page_number].append('')
            for index_digit, image_digit in enumerate(image_number):
                predicted_digit = predict_image(image_digit, model)
                index_array[page_number][index_number] += str(predicted_digit)
        index_array[page_number].reverse()

    for image_num, page in enumerate(index_array):
        save_txt((image_num + 1), page, out_dir)
        print('page: {}'.format(page))
Пример #5
0
def main():
    if (EXPERIMENT_NAME not in os.listdir()):
        os.mkdir(EXPERIMENT_NAME)

    for feature in FEATURES:
        try:
            data = np.load(feature + "_stats.npy", allow_pickle=True).item()
            pca = joblib.load("pca_" + feature + "_stats")
            train_data, train_data_classes = make_train_data(data, True)
            test_data, test_data_classes = make_test_data(data)
            train_data_pca = np.array(pca.transform(train_data))
            test_data_pca = np.array(pca.transform(test_data))

            for c in C_VALUES:
                if (feature, c) in SKIP_COMBINATIONS:
                    print("Skipping " + feature + " SVM-linear C = " + str(c))
                    continue

                print("Computing " + feature + " SVM-linear C = " + str(c))
                res, model = experiment_svm_linear(train_data_pca,
                                                   train_data_classes,
                                                   test_data_pca,
                                                   test_data_classes, c,
                                                   TRAIN_VERBOSE)

                if res != None:
                    if SAVE_RESULTS:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str(
                            c) + "_results"
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(res, path)

                    if SAVE_RESULTS_TXT:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str(
                            c) + "_results.txt"
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        save_txt(res, path)

                    if SAVE_MODEL:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + str(
                            c) + "_model"
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(model, path)

        except Exception as e:
            print("Error during " + EXPERIMENT_NAME + " " + feature +
                  " SVM-linear C = " + str(c))
            print(e)
            pass
Пример #6
0
Файл: main.py Проект: ChicoOu/dl
    def _recons(self, sess, result_file, recons_file):
        x = self.test_x[0:self.batch_size]
        y = self.test_y[0:self.batch_size]
        feed_dict = {
            self.capsNet.x: x,
            self.capsNet.labels: y
        } if self.capsNet.recon_with_y else {
            self.capsNet.x: x
        }

        masked_v, decoded = sess.run(
            [self.capsNet.recons_input, self.capsNet.decoded],
            feed_dict=feed_dict)
        save_txt(masked_v, recons_file_name=recons_file)
        save_images(decoded, result_file_name=result_file, height_number=8)
        pass
Пример #7
0
def create_training_sentence(path=os.path.join(path_train,
                                               "semcor+omsti.json")):
    ###############################################################################
    # This function creates a txt file with all the training sentences of strings,
    # a txt file with all the training sentences of integers and two vocabularies,
    # one of them with all the seen lemma and the other with all the seen words
    # in the training set.
    #
    # Input:
    #   path: path of the json file
    #
    # Output:
    #   None
    ###############################################################################
    data = utils.load_json(path)

    # create the dictionaries of lemmas and words
    dictionary_train = create_training_dictionary(data)
    dictionary_lemmas = create_dictionary_lemmas(data)

    data = [
        sentence for sentence in list(
            map(partial(sentence_from_dictionaries, training_sentence=True),
                data))
    ]

    print(max([len(sentence) for sentence in data]))
    # exchange the words in the training set with their ids, put one for the OOV
    data_w_ids = list(
        map(
            lambda sentence: " ".join(
                [dictionary_train.get(word, "1") for word in sentence]), data))

    utils.save_txt([" ".join(sentence) for sentence in data],
                   path_train + "semcor_omsti_string.txt")
    utils.save_txt(data_w_ids, path_train + "semcor_omsti.txt")

    utils.save_pickle(dictionary_train, "../resources/vocabulary.pickle")
    utils.save_pickle(dictionary_lemmas,
                      "../resources/vocabulary_lemmas.pickle")
Пример #8
0
def create_training_POS_labels(
        path=os.path.join(path_train, "semcor+omsti.json"), type_=3):
    ###############################################################################
    # This function creates a txt file with sentences with POS labels
    # and a vocabulary with all the seen POS labels.
    #
    # Input:
    #   path: path of the json file
    #   type_: it is a label used to select the type of the output
    #
    # Output:
    #   None
    ###############################################################################

    # create a list with sentences of POS labels for all the training set
    data = utils.load_json(path)
    data = [sentence.split() for sentence in list(map(label_POS, data))]

    # create the vocabulary of seen POS labels, adding the ids for the padding and the unseen labels
    dictionary_POS = {
        value: str(key)
        for key, value in dict(
            enumerate(
                set(itertools.chain.from_iterable(data)) -
                {"<UNSEEN>"}, 2)).items()
    }
    dictionary_POS["<PAD>"] = "0"
    dictionary_POS["<UNSEEN>"] = "1"

    # exchange strings with their ids
    data = list(
        map(
            lambda sentence: " ".join(
                [dictionary_POS.get(word, word) for word in sentence]), data))

    utils.save_txt(data, path_train + name_training_file[type_][0])
    utils.save_pickle(dictionary_POS,
                      "../resources/" + name_training_file[type_][1])
def create_labels_words(path_input, save_gt=False):
    ###############################################################################
    # This function, given the gold file, creates a dictionary of labels for each
    # ambiguous words (babelnet id, Wndomain, Lexname) and  if save_gt is True,
    # this function saves the ground truth
    #
    # Input:
    #   path: path of the gold file
    #   save_gt: if True, the ground truth is saved, otherwise no
    #
    # Output:
    #   dict_sensekey: dictionary of labels
    ###############################################################################

    sense_keys = [sensekey.split() for sensekey in utils.load_txt(path_input)]
    dict_sensekey = {}

    for list_info in sense_keys:

        # take the synset from the sense key
        synset = wn.lemma_from_key(list_info[1]).synset()
        # take the wordnet id from the sense key
        wn_id = "wn:" + str(synset.offset()).zfill(8) + synset.pos()
        bn_id = Wordnet_match[wn_id]

        try:
            dict_sensekey[list_info[0]] = [
                bn_id, Wndomain_match[bn_id], lexname_match[bn_id]
            ]

        # add the factotum label to all the words which don't have a wndomain label
        except:
            dict_sensekey[list_info[0]] = [
                bn_id, "factotum", lexname_match[bn_id]
            ]

    # save the ground truth
    if save_gt:

        name_words = list(dict_sensekey.keys())
        gt_words = list(dict_sensekey.values())
        combined = list(zip(name_words, gt_words))

        utils.save_txt(
            list(map(lambda word: word[0] + " " + word[1][0], combined)),
            "../resources/Test/fine_grained_gt.txt")
        utils.save_txt(
            list(map(lambda word: word[0] + " " + word[1][1], combined)),
            "../resources/Test/wndomain_gt.txt")
        utils.save_txt(
            list(map(lambda word: word[0] + " " + word[1][2], combined)),
            "../resources/Test/lexname_gt.txt")

        return None

    return dict_sensekey
Пример #10
0
def main():
    ADS_PATH = sys.argv[1]
    TV_PATH = sys.argv[2]

    if config.TIME_FLAG:
        start_time = time.time()

    # GET DESCRIPTORS

    if not config.READ_DESCRIPTORS:
        #obtener todos los paths de los comerciales
        ADS = utils.getMPEGfiles(ADS_PATH)

        # Inicializar la clase EHD para obtener los descriptores
        # de los comerciales y el video de TV
        EHD = utils.EHD_DESCRIPTOR()
        ADS_DESCRIPTORS, N_FRAMES_ADS = EHD.getDescriptors(
            ADS, title='ADS_DESCRIPTORS.dat')
        #ADS DESCRIPTOR = VIDEOS x FRAMES x DESCRIPTOR
        TV_DESCRIPTORS, N_FRAMES_TV = EHD.getDescriptors(
            [TV_PATH], title='TV_DESCRIPTORS.dat')
        # obtener numero de video, basename y duracion
        ADS_INFO = utils.ads_information(ADS, N_FRAMES_ADS)

        # Guardar descriptores
        if config.SAVE_DESCRIPTORS:
            np.save('ADS_DESCRIPTORS.npy', ADS_DESCRIPTORS)
            np.save('TV_DESCRIPTORS.npy', TV_DESCRIPTORS)
            np.save('N_FRAMES_ADS.npy', N_FRAMES_ADS)
            np.save('N_FRAMES_TV', N_FRAMES_TV)
            print("SAVED SUCCEED")

    else:
        # Leer descriptores
        ADS = utils.getMPEGfiles(ADS_PATH)
        ADS_DESCRIPTORS = np.load('ADS_DESCRIPTORS.npy')
        TV_DESCRIPTORS = np.load('TV_DESCRIPTORS.npy')
        N_FRAMES_ADS = np.load('N_FRAMES_ADS.npy')
        ADS_INFO = utils.ads_information(ADS, N_FRAMES_ADS)

    # GET NEIGHBORS
    # Obtener los k(3) vecinos mas cercanos para cada frame del video de TV y guardar los resultados
    if not config.READ_DESCRIPTORS:
        k_nearest_neighbors = []
        GET_NEIGHBORS = utils.K_NEIGHBORS()
        for tv_video in TV_DESCRIPTORS:
            for frame_descriptor in tv_video:
                k_nearest_neighbors.append(
                    GET_NEIGHBORS.get_k_nearest_neighbors(
                        frame_descriptor, ADS_DESCRIPTORS))

        k_nearest_neighbors = np.array(k_nearest_neighbors)
        if config.SAVE_DESCRIPTORS:
            np.save('vecinos.npy', k_nearest_neighbors)
    else:
        # Leer los resultados
        k_nearest_neighbors = np.load('vecinos.npy')

    # Separar los resultados en distintas list para una manipulacion mas sencilla
    tiempo_array = []
    videos_array = []
    frames_array = []
    for i, element in enumerate(k_nearest_neighbors):
        #print(i,element, ADS[element[0][0]])
        tiempo_array.append(i)
        videos_array.append(element[0][0])
        frames_array.append(element[0][1])

    tiempo_array = np.array(tiempo_array)
    frames_array = np.array(frames_array)
    videos_array = np.array(videos_array)
    #utils.pendiente(tiempo_array, frames_array)

    # Generar las detecciones y guardar los resultados
    DETECCIONES = utils.algoritmo_deteccion(tiempo_array, frames_array,
                                            videos_array, ADS_INFO)
    utils.save_txt(DETECCIONES, ADS_INFO, TV_PATH)

    if config.DRAW:
        for i in range(21):
            t = tiempo_array[videos_array == i]
            f = frames_array[videos_array == i]
            if f.shape[0] > 0:
                fig = plt.figure()
                ax = fig.add_subplot(111)
                ax.scatter(t, f, c='r', marker='o')
                ax.set_xlabel('TIEMPO')
                ax.set_ylabel('FRAMES')
                ax.set_title('COMERCIAL N: {}'.format(i + 1))
                plt.show()

    if config.TIME_FLAG:
        print("Total time: {}".format((time.time() - start_time) / 60))
Пример #11
0
def main():
    for feature, c in COMBINATIONS:
        try:
            print("Computing " + feature + " SVM-linear C = " + str(c) +
                  " cross validation")
            data = np.load(feature + "_stats.npy", allow_pickle=True).item()

            meditation = []
            music_video = []
            logic_game = []

            for key in data.keys():
                for item in data[key]['meditation']:
                    meditation.append(item)
                for item in data[key]['logic_game']:
                    logic_game.append(item)
                for item in data[key]['music_video']:
                    music_video.append(item)

            if len(meditation) != len(music_video) or len(music_video) != len(
                    logic_game):
                raise Exception("Classes have unequal number of samples")
            len1 = len(meditation)

            x = meditation + logic_game + music_video

            y = (len1 * ['meditation']) + (len1 * ['logic_game']) + (
                len1 * ['music_video'])
            if len(y) != len(x):
                raise Exception(
                    "Unequal number of input samples and output samples")

            kfold = KFold(n_splits=10, shuffle=True)
            splits = list(kfold.split(x, y))

            if EXPERIMENT_NAME not in os.listdir():
                os.mkdir(EXPERIMENT_NAME)

            if SAVE_SPLITS:
                np.save(
                    os.path.join(
                        EXPERIMENT_NAME,
                        EXPERIMENT_NAME + "_" + feature + "_splits.npy"),
                    splits)

            predicted_list = np.ndarray(0)
            test_data_classes_list = np.ndarray(0)

            for i in range(len(splits)):
                print("Computing " + numeral_str(i + 1) + " fold")

                train_data, train_data_classes, test_data, test_data_classes = make_crossvalidation_data(
                    splits[i], x, y)

                pca = PCA(n_components=0.95)
                pca.fit(train_data)
                pca_list.append(pca)
                train_data_pca = np.array(pca.transform(train_data))
                test_data_pca = np.array(pca.transform(test_data))

                res, model = experiment_svm_linear(train_data_pca,
                                                   train_data_classes,
                                                   test_data_pca,
                                                   test_data_classes, c,
                                                   TRAIN_VERBOSE)
                results_list.append(res)
                models_list.append(model)

                test_data_classes_list = np.append(test_data_classes_list,
                                                   test_data_classes)
                predicted_list = np.append(predicted_list, res['results'])

                if res != None:
                    if SAVE_RESULTS:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                            str(c)) + "_results_" + str(i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(res, path)

                    if SAVE_RESULTS_TXT:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                            str(c)) + "_results_" + str(i + 1) + ".txt"
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        save_txt(res, path)

                    if SAVE_MODEL:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                            str(c)) + "_model_" + str(i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(model, path)

                    if SAVE_PCA:
                        filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                            str(c)) + "_pca_" + str(i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(pca, path)

            if SAVE_RESULTS or SAVE_RESULTS_TXT:
                average_report = make_report(test_data_classes_list,
                                             predicted_list)

            if SAVE_RESULTS:
                filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                    str(c)) + "_results_averaged"
                path = os.path.join(EXPERIMENT_NAME, filename)
                joblib.dump(average_report, path)

            if SAVE_RESULTS_TXT:
                filename = EXPERIMENT_NAME + "_" + feature + " svm_lin_c_" + dots_to_underscores(
                    str(c)) + "_results_averaged.txt"
                path = os.path.join(EXPERIMENT_NAME, filename)
                save_txt(average_report, path)

        except Exception as e:
            print("Error during " + EXPERIMENT_NAME + " " + feature +
                  " SVM-linear C = " + dots_to_underscores(str(c)))
            print(traceback.format_exc())
            # print(e)
            pass
Пример #12
0
def main():
    for feature in FEATURES:
        try:
            print(
                "Computing " + feature +
                " multiple hidden layer neural network (specification 1) cross validation"
            )
            data = np.load(feature + "_stats.npy", allow_pickle=True).item()

            meditation = []
            music_video = []
            logic_game = []

            for key in data.keys():
                for item in data[key]['meditation']:
                    meditation.append(item)
                for item in data[key]['logic_game']:
                    logic_game.append(item)
                for item in data[key]['music_video']:
                    music_video.append(item)

            if len(meditation) != len(music_video) or len(music_video) != len(
                    logic_game):
                raise Exception("Classes have unequal number of samples")
            len1 = len(meditation)

            x = meditation + logic_game + music_video

            y = (len1 * ['meditation']) + (len1 * ['logic_game']) + (
                len1 * ['music_video'])
            if len(y) != len(x):
                raise Exception(
                    "Unequal number of input samples and output samples")

            kfold = KFold(n_splits=10, shuffle=True)
            splits = list(kfold.split(x, y))

            encoder = LabelEncoder()
            encoder.fit(y)

            if EXPERIMENT_NAME not in os.listdir():
                os.mkdir(EXPERIMENT_NAME)

            if SAVE_SPLITS:
                np.save(
                    os.path.join(
                        EXPERIMENT_NAME,
                        EXPERIMENT_NAME + "_" + feature + "_splits.npy"),
                    splits)

            if SAVE_ENCODER:
                joblib.dump(
                    encoder,
                    os.path.join(EXPERIMENT_NAME,
                                 EXPERIMENT_NAME + "_" + feature + "_encoder"))

            predicted_list = np.ndarray(0)
            test_data_classes_list = np.ndarray(0)

            for i in range(len(splits)):
                print("Computing " + numeral_str(i + 1) + " fold")

                initial_train_data, initial_train_data_classes, test_data, test_data_classes = make_crossvalidation_data(
                    splits[i], x, y)

                l = len(initial_train_data)
                i1 = int(0.8889 * l)
                indices = [i for i in range(l)]

                random.shuffle(indices)
                train_val_indices = [indices[:i1], indices[i1:]]

                train_data, train_data_classes, val_data, val_data_classes = make_crossvalidation_data(
                    train_val_indices, initial_train_data,
                    initial_train_data_classes)

                if (SAVE_SPLITS):
                    np.save(
                        os.path.join(
                            EXPERIMENT_NAME, EXPERIMENT_NAME + "_" + feature +
                            "_test_val_splits_" + str(i) + ".npy"),
                        train_val_indices)

                pca = PCA(n_components=0.95)
                pca.fit(train_data)
                pca_list.append(pca)

                train_data_pca = np.array(pca.transform(train_data))
                val_data_pca = np.array(pca.transform(val_data))
                test_data_pca = np.array(pca.transform(test_data))

                res, model = experiment_mlp_1a(train_data_pca,
                                               train_data_classes,
                                               val_data_pca, val_data_classes,
                                               test_data_pca,
                                               test_data_classes, encoder,
                                               TRAIN_VERBOSE)
                results_list.append(res)
                models_list.append(model)

                test_data_classes_list = np.append(test_data_classes_list,
                                                   test_data_classes)
                predicted_list = np.append(predicted_list,
                                           res['results_decoded'])

                if res != None:
                    if SAVE_RESULTS:
                        filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_results_" + str(
                            i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(res, path)

                    if SAVE_RESULTS_TXT:
                        filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_results_" + str(
                            i + 1) + ".txt"
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        save_txt(res, path)

                    if SAVE_MODEL:
                        filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_" + str(
                            i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        save_mlp(model, path)

                    if SAVE_PCA:
                        filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_pca_" + str(
                            i + 1)
                        path = os.path.join(EXPERIMENT_NAME, filename)
                        joblib.dump(pca, path)

            if SAVE_RESULTS or SAVE_RESULTS_TXT:
                average_report = make_report(test_data_classes_list,
                                             predicted_list)

            if SAVE_RESULTS:
                filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_results_averaged"
                path = os.path.join(EXPERIMENT_NAME, filename)
                joblib.dump(average_report, path)

            if SAVE_RESULTS_TXT:
                filename = EXPERIMENT_NAME + "_" + feature + "_mlp_multi_layer_spec_1_results_averaged.txt"
                path = os.path.join(EXPERIMENT_NAME, filename)
                save_txt(average_report, path)

        except Exception as e:
            print("Error during " + EXPERIMENT_NAME + " " + feature +
                  "_mlp_multi_layer_spec_1")
            print(traceback.format_exc())
            pass
Пример #13
0
 def save(self, filename):
     buffer = np.concatenate(([self.p1, self.m, self.n], self.prior.alpha, self.prior.a, self.prior.b))
     utils.save_txt(filename, buffer)
Пример #14
0
def main():
	if(EXPERIMENT_NAME not in os.listdir()):
		os.mkdir(EXPERIMENT_NAME)

	# neural networks with single hidden layer
	for feature in FEATURES:
		try:
			data = np.load(feature + "_stats.npy",allow_pickle=True).item()
			pca = joblib.load("pca_" + feature + "_stats_noval")

			train_data, train_data_classes = make_train_data(data,False)
			test_data, test_data_classes = make_test_data(data)
			val_data, val_data_classes = make_val_data(data)

			train_data_pca = np.array(pca.transform(train_data))
			test_data_pca = np.array(pca.transform(test_data))
			val_data_pca = np.array(pca.transform(val_data))

			train_data_classes = np.array(train_data_classes)
			val_data_classes = np.array(val_data_classes)
			test_data_classes = np.array(test_data_classes)

			print("Computing " + feature  + " single hidden layer neural network")
			res,model = experiment_mlp_singlelayer(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE)

			if res != None:
				if SAVE_RESULTS:
					filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer_results"
					path = os.path.join(EXPERIMENT_NAME,filename)
					joblib.dump(res,path)

				if SAVE_RESULTS_TXT:
					filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer_results.txt"
					path = os.path.join(EXPERIMENT_NAME,filename)
					save_txt(res,path)

				if SAVE_MODEL:
					filename = EXPERIMENT_NAME + "_" + feature + "_mlp_single_layer"
					path = os.path.join(EXPERIMENT_NAME,filename)
					save_mlp(model,path)

		except Exception as e:
			print("Error during " + EXPERIMENT_NAME + " " + feature  + " single hidden layer neural network")
			print(e)
			pass

	# neural networks with multiple hidden layers
	# welch32 only
	data = np.load("welch_32_stats.npy",allow_pickle=True).item()
	pca = joblib.load("pca_welch_32_stats_noval")

	train_data, train_data_classes = make_train_data(data,False)
	test_data, test_data_classes = make_test_data(data)
	val_data, val_data_classes = make_val_data(data)

	train_data_pca = np.array(pca.transform(train_data))
	test_data_pca = np.array(pca.transform(test_data))
	val_data_pca = np.array(pca.transform(val_data))

	train_data_classes = np.array(train_data_classes)
	val_data_classes = np.array(val_data_classes)
	test_data_classes = np.array(test_data_classes)

	print("Computing welch_32 multiple hidden layer neural network, specification 1:")
	print("3 hidden layers, LReLU activation (a = 0.02), learning rate = 0.01")
	print("decay = 1e-6, momentum = 0.9, patience = 50, max epochs = 2000")
	try:
		res,model = experiment_mlp_1(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE)
		if res != None:
			if SAVE_RESULTS:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1_results"
				path = os.path.join(EXPERIMENT_NAME,filename)
				joblib.dump(res,path)

			if SAVE_RESULTS_TXT:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1_results.txt"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_txt(res,path)

			if SAVE_MODEL:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_1"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_mlp(model,path)

	except Exception as e:
		print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 1")
		print(e)
		pass

	print("Computing welch_32 multiple hidden layer neural network, specification 2:")
	print("4 hidden layers, tanh + LReLU activation (a = 0.02), learning rate = 0.005")
	print("decay = 1e-6, momentum = 0.9, patience = 250, max epochs = 3000")
	try:
		res,model = experiment_mlp_2(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE)
		if res != None:
			if SAVE_RESULTS:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2_results"
				path = os.path.join(EXPERIMENT_NAME,filename)
				joblib.dump(res,path)

			if SAVE_RESULTS_TXT:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2_results.txt"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_txt(res,path)

			if SAVE_MODEL:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_2"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_mlp(model,path)

	except Exception as e:
		print("Some problem during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 2")
		print(e)
		pass

	print("Computing welch_32 multiple hidden layer neural network, specification 3:")
	print("6 hidden layers, ReLU activation, learning rate = 0.01")
	print("decay = 1e-6, momentum = 0.9, patience = 70, max epochs = 2000")
	try:
		res,model = experiment_mlp_3(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE)
		if res != None:
			if SAVE_RESULTS:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3_results"
				path = os.path.join(EXPERIMENT_NAME,filename)
				joblib.dump(res,path)

			if SAVE_RESULTS_TXT:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3_results.txt"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_txt(res,path)

			if SAVE_MODEL:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_3"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_mlp(model,path)

	except Exception as e:
		print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 3")
		print(e)
		pass

	print("Computing welch_32 multiple hidden layer neural network, specification 4:")
	print("3 hidden layers, tanh activation, learning rate = 0.01")
	print("decay = 1e-6, momentum = 0.9, patience = 250, max epochs = 3000")
	try:
		res,model = experiment_mlp_4(train_data_pca,train_data_classes,val_data_pca,val_data_classes,test_data_pca,test_data_classes,MLP_VERBOSE)
		if res != None:
			if SAVE_RESULTS:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4_results"
				path = os.path.join(EXPERIMENT_NAME,filename)
				joblib.dump(res,path)

			if SAVE_RESULTS_TXT:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4_results.txt"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_txt(res,path)

			if SAVE_MODEL:
				filename = EXPERIMENT_NAME + "_welch_32_mlp_multi_layer_spec_4"
				path = os.path.join(EXPERIMENT_NAME,filename)
				save_mlp(model,path)

	except Exception as e:
		print("Error during " + EXPERIMENT_NAME + " welch_32 multiple hidden layer neural network, specification 4")
		print(e)
		pass
Пример #15
0
if __name__ == "__main__":
    # Check Dir
    BASE_DIR = './datas/'
    if not os.path.exists(BASE_DIR): os.mkdir(BASE_DIR)

    # Load Data
    originUrl = read_txt('origin.txt')
    naverUrl = read_txt('naver.txt')
    assert len(originUrl) == len(naverUrl)

    # Pre-processing
    newsDict : Dict[str, int]  = {}
    for url in originUrl:
        '''
        split('/') => ['https:', '', 'news.joins.com', 'article', 'olink', '23309016']
        idx 2 => 뉴스 URI부분
        '''
        try: newsDict[url.split('/')[2]] += 1
        except: newsDict[url.split('/')[2]] = 1
    sortedNewsList = sorted(newsDict.items(),
                     key=(lambda v : v[1]),
                     reverse=True) # Cnt 내림차순으로 정렬

    # Save
    for idx, (k, v) in tqdm(enumerate(sortedNewsList)):
        #print("{} : {}, {}".format(idx,v, k)) # 전체 뉴스가 -> 몇개씩 기사를 가지고있는지
        #print("|{}|{}|{}|".format(idx, k, v)) # for README
        save_txt(BASE_DIR + "{}.txt".format(idx), extract_n(k))