Пример #1
0
        # Measure performances 2
        remembering_rate2, final_output2 = RCNLPMetrics.remembering_rate(predicted2, observed, threshold=rc_threshold)
        remembering_rate_set2 = np.append(remembering_rate_set2, remembering_rate2)
        lucidity_set2 = np.append(lucidity_set2, RCNLPMetrics.lucidity(predicted2, observed, threshold=rc_threshold))

        # Add to final outputs
        final_outputs1 += [final_output1]
        final_outputs2 += [final_output2]

        # Next sample
        sample_pos += 1

    # endfor

    # Average performance
    logging.save_results("Remembering rate 1", np.average(remembering_rate_set1), display=True)
    logging.save_results("Lucidity 1", np.average(lucidity_set1), display=True)
    logging.save_results("Remembering rate 2", np.average(remembering_rate_set2), display=True)
    logging.save_results("Lucidity 2", np.average(lucidity_set2), display=True)

    # Plot results
    pos = 0
    for prediction1 in predictions1:

        # The observation
        observation = test_out[pos]

        # Prediction 2
        prediction2 = predictions2[pos]

        # The input
Пример #2
0
        converter = RCNLPWordVectorConverter(resize=args.in_components,
                                             pca_model=pca_model)
    # end if

    # >> 3. Array for results
    doc_success_rate_avg = np.array([])
    sen_success_rate_avg = np.array([])
    doc_success_rate_std = np.array([])
    sen_success_rate_std = np.array([])

    # Training set sizes
    training_set_sizes = np.arange(1, 96, args.step)

    # For each training size
    for training_size in training_set_sizes:
        logging.save_results("Training size ", training_size, display=True)

        # Average success rate for this training size
        training_size_average_doc_success_rate = np.array([])
        training_size_average_sen_success_rate = np.array([])

        # >> 4. Try n time
        for s in range(0, args.samples):

            # >> 5. Prepare training and test set.
            training_set_indexes = np.arange(0, training_size, 1)
            test_set_indexes = np.arange(training_size, 100, 1)

            # >> 6. Create Echo Word Classifier
            classifier = RCNLPEchoWordClassifier(
                size=rc_size,
Пример #3
0
                # >> 8. Train model
                classifier.train()

                # >> 9. Test model performance
                success = 0.0
                count = 0.0
                for author_index, author_id in enumerate((args.author1, args.author2)):
                    author_path = os.path.join(args.dataset, "total", author_id)
                    for file_index in test_set_indexes:
                        author_pred, _, _ = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"))
                        if author_pred == author_index:
                            success += 1.0
                        # end if
                        count += 1.0
                    # end for
                # end for

                # >> 11. Save results
                average_success_rate = np.append(average_success_rate, [(success / count) * 100.0])

                # Delete variables
                del classifier
            # end for

            # Log results
            logging.save_results("Average success rate ", np.average(average_success_rate), display=True)
            logging.save_results("Success rate std ", np.std(average_success_rate), display=True)
        # end for
    # end for

# end if
                # Load text file
                nlp = spacy.load(args.lang)
                doc = nlp(io.open(os.path.join(author_path, str(file_index) + ".txt"), 'r').read())

                # Iterate over sentences
                for sentence in doc.sents:
                    sentence_pred = classifier.pred_text(sentence.text)
                    if sentence_pred == author_index:
                        success += 1.0
                    # end if
                    count += 1.0
                # end for
        # end for

        # >> 10. Log success
        logging.save_results("Number of sentences in test set ", count, display=True)
        logging.save_results("Number of successes ", success, display=True)
        logging.save_results("Success rate ", (success / count) * 100.0, display=True)

        # >> 11. Save results
        average_success_rate = np.append(average_success_rate, [(success / count) * 100.0])

        # Delete variables
        del classifier
        # end for

        # Log results
    logging.save_results("Average success rate ", np.average(average_success_rate), display=True)
    logging.save_results("Success rate std ", np.std(average_success_rate), display=True)

# end if
Пример #5
0
    for file_index in test_set_indexes:
        author_path = os.path.join(args.dataset, "total",
                                   np.random.choice(negative_authors, 1)[0])
        file_path = os.path.join(author_path, str(file_index) + ".txt")

        # Document success rate
        if not args.sentence:
            author_pred, _ = classifier.pred(file_path)
            if author_pred == 1:
                success += 1.0
            # end if
            count += 1.0
        else:
            # Sentence success rate
            nlp = spacy.load(args.lang)
            doc = nlp(io.open(file_path, 'r').read())
            for sentence in doc.sents:
                sentence_pred, _ = classifier.pred_text(sentence.text)
                if sentence_pred == 1:
                    success += 1.0
                # end if
                count += 1.0
            # end for
        # end if
    # end for

    # >> 10. Log success
    logging.save_results("Success rate ", (success / count) * 100.0,
                         display=True)

# end if
Пример #6
0
            # Log results
            #logging.save_results("V state score " + str(i), state_score, display=True)
            #logging.save_results("V doc score " + str(i), doc_score, display=True)

            # Save results
            state_results = np.append(state_results, state_score)
            doc_results = np.append(doc_results, doc_score)
        # end for

        # Log overall state results
        print(
            "#############################################################################"
        )
        logging.save_results("Overall state score",
                             np.average(state_results),
                             display=True)
        logging.save_results("Overall state score std",
                             np.std(state_results),
                             display=True)
        logging.save_results("State score T-test",
                             ttest_1samp(state_results, 0).pvalue * 100.0,
                             display=True)

        # Log overall doc results
        logging.save_results("Overall doc score",
                             np.average(doc_results),
                             display=True)
        logging.save_results("Overall doc score std",
                             np.std(doc_results),
                             display=True)
Пример #7
0
                author_pred = classifier.pred(os.path.join(author_path, str(file_index) + ".txt"), True)
                if author_id == args.author and author_pred == 0:
                    success += 1.0
                elif author_id != args.author and author_pred == 1:
                    success += 1.0
                # end if
                count += 1.0
                test_count += 1
                if test_count >= args.test_size:
                    break
                # end if
            # end for
        # end for

        # >> 10. Log success
        logging.save_results("Number of file in test set ", count, display=True)
        logging.save_results("Number of success ", success, display=True)
        logging.save_results("Success rate ", (success / count) * 100.0, display=True)

        # >> 11. Save results
        average_success_rate = np.append(average_success_rate, [(success / count) * 100.0])

        # Delete variables
        del classifier
    # end for

    # Log results
    logging.save_results("Average success rate ", np.average(average_success_rate), display=True)
    logging.save_results("Success rate std ", np.std(average_success_rate), display=True)
    logging.save_results("Baseline ", float(len(negative_authors)) / 50.0 * 100.0, display=True)
Пример #8
0
                        # Original size
                        if in_size1 == -1 and in_size2 is None:
                            print("Original size")
                            original_size_perf = average_success_rate
                        # end if

                        # No additional representation
                        if in_size2 is None:
                            print("None size")
                            none_size_perf = average_success_rate
                        # end if

                        # Log results
                        logging.save_results("Average success rate ",
                                             np.average(average_success_rate),
                                             display=True)
                        logging.save_results("Success rate std ",
                                             np.std(average_success_rate),
                                             display=True)
                        if not (original_size_perf
                                == average_success_rate).all():
                            logging.save_results(
                                "Paired t-test again original size ",
                                stats.ttest_rel(original_size_perf,
                                                average_success_rate).pvalue *
                                100,
                                display=True)
                        # end if
                        if not (none_size_perf == average_success_rate).all():
                            logging.save_results(
Пример #9
0
            print("[Diff] Same : %r, Prob : %f, std : %f" %
                  (same, prob, prob_std))
            show_graph = False

            # Test
            if not same:
                success += 1.0
            # end for

            different_probs.append(prob)
            count += 1.0
        # end for
    # end for

    # Result
    logging.save_results("Same mu", np.average(same_probs), display=True)
    logging.save_results("Diff mu", np.average(different_probs), display=True)
    logging.save_results("TTest same mu = 0.5",
                         ttest_1samp(same_probs, 0.5).pvalue * 100.0,
                         display=True)
    logging.save_results("TTest diff mu = 0.5",
                         ttest_1samp(different_probs, 0.5).pvalue * 100.0,
                         display=True)
    logging.save_results("TTest diff mu = same mu",
                         ttest_ind(different_probs, same_probs).pvalue * 100.0,
                         display=True)
    logging.save_results("Success rate", success / count, display=True)

    # Plot histogram
    print("Plotting histogram")
    bins = np.linspace(0, 1.0, 100)
                            for sentence in doc.sents:
                                sentence_pred, _ = classifier.pred_text(
                                    sentence.text)
                                if sentence_pred == author_index:
                                    success += 1.0
                                # end if
                                count += 1.0
                            # end for
                        # end if

                    # end for
                # end for

                # >> 10. Log success
                logging.save_results("Success rate ",
                                     (success / count) * 100.0,
                                     display=True)

                # >> 11. Save results
                reservoir_size_average_success_rate = np.append(
                    reservoir_size_average_success_rate,
                    [(success / count) * 100.0])

                # Delete variables
                del classifier
            except:
                pass
            # end try
        # end for

        # >> 10. Log success
                    # Save result
                    success_rates[k, s] = success_rate

                    # Reset classifier
                    classifier.reset()
                # end for
                print(u"\t\tAll - Success rate : {}".format(
                    np.average(success_rates[:, s])))
            # end for

            # Average results
            model['results'] = np.average(success_rates, axis=1)

            # Log success
            logging.save_results(u"\tSuccess rate ",
                                 np.average(model['results']),
                                 display=True)
            logging.save_results(u"\tSuccess rate std ",
                                 np.std(model['results']),
                                 display=True)
        # end if
    # end for

    # Compare results
    for model1 in models:
        print(model1['name'])
        for model2 in models:
            if model1['name'] == model2['name']:
                logging.save_results(u"\t{} : {}".format(
                    model2['name'],
                    stats.ttest_rel(model1['results'],
Пример #12
0
            # Save results for this sample
            sample_remembering_rate += [np.average(remembering_rate_set)]
            sample_lucidity += [np.average(lucidity_set)]

        # Average for this value
        parameter_remembering_rates += [np.average(sample_remembering_rate)]
        parameter_lucidity += [np.average(sample_lucidity)]

        # Error for this value
        parameter_remembering_rates_std += [np.std(sample_remembering_rate)]
        parameter_lucidity_std += [np.std(sample_lucidity)]

    # Log results
    logging.save_results("Remembering rates",
                         parameter_remembering_rates,
                         display=False)
    logging.save_results("Lucidity", parameter_lucidity, display=False)

    # Plot perfs
    plot = RCNLPPlotGenerator(title=ex_name, n_plots=1)

    # First subplot
    plot.add_sub_plot(title=ex_instance + ", sparsity 0.05 to 1.0, 2 dim.",
                      x_label="Inputs sparsity",
                      y_label="Percentage",
                      ylim=[-10, 120])
    plot.plot(y=parameter_remembering_rates,
              x=rc_sparsity,
              yerr=parameter_remembering_rates_std,
              label="Remembering rate",
                args=args,
                texts1=texts1,
                texts2=texts2,
                ex_name=ex_name,
                ex_instance=ex_instance,
                input_scaling=rc_input_scaling,
                input_sparsity=rc_input_sparsity,
                leak_rate=leak_rate,
                logging=logging,
                size=rc_size,
                spectral_radius=rc_spectral_radius,
                w_sparsity=rc_w_sparsity,
                save_graph=True if i == 0 else False,
                pca_model=pca_model)
            logging.save_results("Precision 1 round " + str(i),
                                 average_precision[0],
                                 display=True)
            logging.save_results("Precision 2 round " + str(i),
                                 average_precision[1],
                                 display=True)
            results = np.append(results, average_precision[0])
            results = np.append(results, average_precision[1])
        # end for

        # Log
        logging.save_results("Overall average precision",
                             np.average(results),
                             display=True)
        logging.save_results("Overall std precision",
                             np.std(results),
                             display=True)
                        nlp = spacy.load(args.lang)
                        doc = nlp(io.open(file_path, 'r').read())
                        for sentence in doc.sents:
                            sentence_pred, _, _ = classifier.pred_text(
                                sentence.text)
                            if sentence_pred == author_index:
                                success += 1.0
                            # end if
                            count += 1.0
                        # end for
                    # end if
                # end for
            # end for

            # >> 10. Log success
            logging.save_results("Success rate ", (success / count) * 100.0,
                                 display=True)

            # >> 11. Save results
            leaky_rate_average_success_rate = np.append(
                leaky_rate_average_success_rate, [(success / count) * 100.0])

            # Delete variables
            del classifier
        # end for

        # >> 10. Log success
        logging.save_results("Leaky rate ", leaky_rate, display=True)
        logging.save_results("Success rate ",
                             np.average(leaky_rate_average_success_rate),
                             display=True)
        logging.save_results("Success rate std ",
Пример #15
0
                doc = nlp(io.open(file_path, 'r').read())
                for sentence in doc.sents:
                    author_pred, same_prob, diff_prob = classifier.pred_text(sentence.text)
                    if same_prob > diff_prob and same_prob > args.threshold:
                        pass
                    else:
                        success += 1.0
                    # end if
                    count += 1.0
                    # end for
                # end for
            # end if
        # end for

        # >> 11. Save results
        logging.save_results("Text length ", n_token, display=True)
        logging.save_results("Success rate ", (success / count) * 100.0, display=True)
        logging.save_results("Max prob ", max_prob * 100, display=True)
        logging.save_results("Max cat ", max_cat, display=True)
        success_rates = np.append(success_rates, [(success / count) * 100.0])

        # Nearest success rate
        if max_cat == 'same':
            nearest_success_rate = np.append(nearest_success_rate, [1.0])
        else:
            nearest_success_rate = np.append(nearest_success_rate, [0.0])

        # Reset learning
        del classifier
    # end for
Пример #16
0
                # >> 11. Save results
                print((success / count) * 100.0)
                training_size_average_success_rate = np.append(
                    training_size_average_success_rate,
                    [(success / count) * 100.0])

                # Delete variables
                del classifier
            except:
                pass
            # end try
        # end for

        # >> 10. Log success
        logging.save_results("Training size ", training_size, display=True)
        logging.save_results("Tokens ", n_token, display=True)
        logging.save_results("Success rate ",
                             np.average(training_size_average_success_rate),
                             display=True)
        logging.save_results("Success rate std ",
                             np.std(training_size_average_success_rate),
                             display=True)

        # Save results
        success_rate_avg = np.append(
            success_rate_avg, np.average(training_size_average_success_rate))
        success_rate_std = np.append(
            success_rate_std, np.std(training_size_average_success_rate))
        n_tokens = np.append(n_tokens, [n_token])
    # end for