示例#1
0
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity,
                                    converter=converter,
                                    spectral_radius=rc_spectral_radius,
                                    w_sparsity=rc_w_sparsity,
                                    use_sparse_matrix=args.sparse)

    # Add examples
    document_index = 0
    for author_id in np.arange(1, args.n_authors + 1):
        author_path = os.path.join(args.dataset, "total", str(author_id))
        for file_index in range(args.n_documents):
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            logger.info(u"Adding document {} as {}".format(
                file_path, document_index))
            classifier.train(io.open(file_path, 'r').read(), document_index)
            document_index += 1
        # end for
    # end for

    # Finalize model training
    classifier.finalize(verbose=args.verbose)

    # Get documents embeddings
    document_embeddings = classifier.get_embeddings()
    logger.info(u"Document embeddings shape : {}".format(
        document_embeddings.shape))

    # Display similar doc for the first document of each author with each distance measure
    for distance_measure in ["euclidian", "cosine", "cosine_abs"]:
        print(u"###################### {} ######################".format(
    training_set_indexes = indexes
    training_set_indexes = np.delete(training_set_indexes, args.k, axis=0)
    training_set_indexes.shape = (100 - n_fold_samples)

    # Classifier
    classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling,
                                    leak_rate=rc_leak_rate,
                                    input_sparsity=rc_input_sparsity, converter=wv_converter,
                                    spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity)

    # Add examples
    for author_index, author_id in enumerate((args.author1, args.author2)):
        author_path = os.path.join(args.dataset, "total", author_id)
        for file_index in training_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            classifier.train(io.open(file_path, 'r').read(), author_index)
            # end for
    # end for

    # Finalize model training
    classifier.finalize(verbose=True)

    # Init test epoch
    test_set = list()

    # Get text
    for author_index, author_id in enumerate((args.author1, args.author2)):
        author_path = os.path.join(args.dataset, "total", str(author_id))
        for file_index in test_set_indexes:
            file_path = os.path.join(author_path, str(file_index) + ".txt")
            test_set.append((io.open(file_path, 'r').read(), author_index))
示例#3
0
            print("Adding negative example %s" % text_path)
            classifier.add_example(text_path, 1)
            author_index += 1
            n_negative_samples += 1
            if author_index >= len(negative_authors):
                author_index = 0
                text_index += 1
                if text_index >= len(training_set_indexes):
                    break
                # end if
            # end if
        # end while

        # >> 8. Train model
        print("Training model...")
        classifier.train()

        # >> 9. Test model performance
        print("Testing model performances with text files from %s..." % os.path.join(args.dataset, "total"))
        print(test_set_indexes)
        success = 0.0
        count = 0.0
        # For each authors
        for author_id in np.arange(1, 51, 1):
            author_path = os.path.join(args.dataset, "total", str(author_id))
            print("Testing model performances with %d text files for author from %s..." % (test_set_indexes.shape[0],
                                                                                           author_path))
            test_count = 0
            for file_index in test_set_indexes:
                author_pred = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"), True)
                if author_id == args.author and author_pred == 0: