示例#1
0
def clustering_states(args,
                      texts1,
                      texts2,
                      ex_name,
                      ex_instance,
                      size,
                      input_scaling,
                      leak_rate,
                      spectral_radius,
                      input_sparsity,
                      w_sparsity,
                      logging,
                      save_graph=False,
                      pca_model=None,
                      flow=None):

    # >> 1. Convert the text to symbolic or continuous representations
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components,
                                      pca_model=pca_model)
    elif args.converter == "tag":
        converter = RCNLPTagConverter(resize=args.in_components,
                                      pca_model=pca_model)
    elif args.converter == "fw":
        converter = RCNLPFuncWordConverter(resize=args.in_components,
                                           pca_model=pca_model)
    else:
        converter = RCNLPWordVectorConverter(resize=args.in_components,
                                             pca_model=pca_model)
    # end if

    # >> 2. Create an echo state network
    if flow is None:
        flow = create_reservoir(converter.get_n_inputs(), size, input_scaling,
                                leak_rate, spectral_radius, input_sparsity,
                                w_sparsity)
    # end if

    # >> 3. Generate Temporal Representations
    # Generate "temporal representations" for first author
    a1_states, a1_index, a1_n_samples = generate_documents_states(
        converter, flow, texts1, args)
    if save_graph:
        generate_plot(logging,
                      ex_name,
                      ex_instance,
                      "Temporal representations for Author 1",
                      "Time",
                      "Neurons",
                      a1_states[:args.show_states],
                      transpose=True,
                      cmap='Greys')
    # end if

    # Generate "temporal representations" for second author
    a2_states, a2_index, a2_n_samples = generate_documents_states(
        converter, flow, texts2, args)
    if save_graph:
        generate_plot(logging,
                      ex_name,
                      ex_instance,
                      "Temporal representations for Author 2",
                      "Time",
                      "Neurons",
                      a2_states[:args.show_states],
                      transpose=True,
                      cmap='Greys')
    # end if

    # >> 4. Complete states.
    complete_states = np.vstack((a1_states, a2_states))
    if save_graph:
        generate_plot(logging,
                      ex_name,
                      ex_instance,
                      "Complete joined Reservoir states",
                      "Time",
                      "Neurons",
                      complete_states,
                      transpose=True,
                      cmap='Greys')
    # end if

    # Get average and std dev
    logging.save_results("Average neural activations",
                         np.average(complete_states))
    logging.save_results("Std dev of neural activations",
                         np.std(complete_states))

    # Same size for each authors in needed
    if args.homogene:
        if a1_states.shape[0] > a2_states.shape[0]:
            a1_states = a1_states[:a2_states.shape[0]]
        elif a2_states.shape[0] > a1_states.shape[0]:
            a2_states = a2_states[:a1_states.shape[0]]
        # end if
    # end if

    # >> 5. Join states.
    join_states = np.vstack((a1_states, a2_states))
    if save_graph:
        generate_plot(logging,
                      ex_name,
                      ex_instance,
                      "Joined Reservoir states",
                      "Time",
                      "Neurons",
                      join_states,
                      transpose=True,
                      cmap='Greys')
    # end if

    # >> 6. Clustering
    if args.out_components != -1:
        # PCA
        pca = PCA(n_components=args.out_components)
        pca.fit(join_states)

        # Generate PCA image of principal components
        if args.pca_images and save_graph:
            # Generate PCA
            a1_states_pca = pca.transform(a1_states)
            a2_states_pca = pca.transform(a2_states)
            for c in np.arange(0, 8):
                save_pca_image(logging, a1_states_pca, a2_states_pca, c, c + 1)
            # end for
        # end if

        # Reduce whole states
        join_states_reduced = pca.transform(join_states)

        # Get centroids for the whole components
        centroids, _ = kmeans(join_states_reduced, 2)

        # Assign each sample to a cluster
        idx, _ = vq(pca.transform(complete_states), centroids)
        a1_idx = idx[:a1_n_samples]
        a2_idx = idx[a1_n_samples:]
    else:
        # Get centroids for the whole components
        centroids, _ = kmeans(join_states, 2)

        # Assign each sample to a cluster
        idx, _ = vq(complete_states, centroids)
        a1_idx = idx[:a1_n_samples]
        a2_idx = idx[a1_n_samples:]
    # end if

    # Compute average precision
    return get_v_measure_score(a1_idx, a2_idx, a1_index, a2_index)
示例#2
0
                                      pca_model=pca_model)
    elif args.converter == "fw":
        converter = RCNLPFuncWordConverter(resize=args.in_components,
                                           pca_model=pca_model)
    else:
        converter = RCNLPWordVectorConverter(resize=args.in_components,
                                             pca_model=pca_model)
        # end if

    # Whathever
    max_score = 0.0

    # Iterate over reservoirs
    for r in range(args.nreservoir):
        # Create a reservoir
        flow = cf.create_reservoir(converter.get_n_inputs(), rc_size,
                                   rc_input_scaling, rc_leak_rate,
                                   rc_spectral_radius, rc_input_sparsity,
                                   rc_w_sparsity)

        # Iterate
        for i in np.arange(0, args.samples):
            authors_id = np.random.choice(49, 2, replace=False) + 1
            texts1 = os.path.join(args.texts, str(authors_id[0]))
            texts2 = os.path.join(args.texts, str(authors_id[1]))
            print("Round %d with author %s and %s" % (i, texts1, texts2))
            state_score, doc_score = cf.clustering_states(
                args=args,
                texts1=texts1,
                texts2=texts2,
                ex_name=ex_name,