leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity, use_sparse_matrix=args.sparse) # Add examples document_index = 0 for author_id in np.arange(1, args.n_authors + 1): author_path = os.path.join(args.dataset, "total", str(author_id)) for file_index in range(args.n_documents): file_path = os.path.join(author_path, str(file_index) + ".txt") logger.info(u"Adding document {} as {}".format( file_path, document_index)) classifier.train(io.open(file_path, 'r').read(), document_index) document_index += 1 # end for # end for # Finalize model training classifier.finalize(verbose=args.verbose) # Get documents embeddings document_embeddings = classifier.get_embeddings() logger.info(u"Document embeddings shape : {}".format( document_embeddings.shape)) # Display similar doc for the first document of each author with each distance measure for distance_measure in ["euclidian", "cosine", "cosine_abs"]: print(u"###################### {} ######################".format(
training_set_indexes = indexes training_set_indexes = np.delete(training_set_indexes, args.k, axis=0) training_set_indexes.shape = (100 - n_fold_samples) # Classifier classifier = EchoWordClassifier(classes=[0, 1], size=rc_size, input_scaling=rc_input_scaling, leak_rate=rc_leak_rate, input_sparsity=rc_input_sparsity, converter=wv_converter, spectral_radius=rc_spectral_radius, w_sparsity=rc_w_sparsity) # Add examples for author_index, author_id in enumerate((args.author1, args.author2)): author_path = os.path.join(args.dataset, "total", author_id) for file_index in training_set_indexes: file_path = os.path.join(author_path, str(file_index) + ".txt") classifier.train(io.open(file_path, 'r').read(), author_index) # end for # end for # Finalize model training classifier.finalize(verbose=True) # Init test epoch test_set = list() # Get text for author_index, author_id in enumerate((args.author1, args.author2)): author_path = os.path.join(args.dataset, "total", str(author_id)) for file_index in test_set_indexes: file_path = os.path.join(author_path, str(file_index) + ".txt") test_set.append((io.open(file_path, 'r').read(), author_index))
print("Adding negative example %s" % text_path) classifier.add_example(text_path, 1) author_index += 1 n_negative_samples += 1 if author_index >= len(negative_authors): author_index = 0 text_index += 1 if text_index >= len(training_set_indexes): break # end if # end if # end while # >> 8. Train model print("Training model...") classifier.train() # >> 9. Test model performance print("Testing model performances with text files from %s..." % os.path.join(args.dataset, "total")) print(test_set_indexes) success = 0.0 count = 0.0 # For each authors for author_id in np.arange(1, 51, 1): author_path = os.path.join(args.dataset, "total", str(author_id)) print("Testing model performances with %d text files for author from %s..." % (test_set_indexes.shape[0], author_path)) test_count = 0 for file_index in test_set_indexes: author_pred = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"), True) if author_id == args.author and author_pred == 0: