Пример #1
0
    parser.add_option('--KNN', dest='KNN', type="int", action='store', default=10)
    parser.add_option('--normalize', dest='normalize', type="int", action='store', default=1)
    (options, args) = parser.parse_args()
    return options

if __name__ == '__main__':
    # parse arguments
    filename_wordsX = (sys.argv[1])

    # read input
    wordsX = IO.readPickledWords(filename_wordsX)
    options = parseOptions()

    # make graph
    G = makeGraph(wordsX, options)
    G = G.todense()

    if options.normalize == 1:
        G = toSymmetricStochastic(G, sym=(options.sym == 1), stochastic=(options.stochastic == 1), norm='l1')
    elif options.normalize == 2:
        G = toSymmetricStochastic(G, sym=(options.sym == 1), stochastic=(options.stochastic == 1), norm='l2')

    msk = MSK(None, wordsX.words, wordsX.words)
    # save the matrix.
    # This is hacky, since we're trusting that G is generated with rows/columns that match the order of wordsX.words
    msk.M = G
    graphFilename = filename_wordsX.replace(".", "_WG.")
    if options.KNN > 0:
        graphFilename = graphFilename.replace(".", "_KNN"+str(options.KNN)+".")

    IO.pickle(graphFilename, msk)
Пример #2
0
    hammings = np.zeros(R)

    for (i, noise) in enumerate(noise_levels):
        for r in xrange(R):
            np.random.seed(r)
            # read files
            wordsX, wordsY, seedsX, seedsY = mcca.readInput(options, filename_wordsX, filename_wordsY, filename_seedX, filename_seedY)
            seed_length = len(seedsX.words)
            pi = perm.randperm(xrange(len(wordsY.words)))
            wordsY.permuteFirstWords(pi)
            if options.K > 0:
                GX = words_to_graph.makeGraph(wordsX, seedsX, options.graph_type, options.K)
                GY = words_to_graph.makeGraph(wordsY, seedsY, options.graph_type, options.K)
                GY = graphs.permute(GY, pi)
                
                GX = graphs.toSymmetricStochastic(GX)
                GY = graphs.toSymmetricStochastic(GY)
                print 'Graph norms', norm(GX), norm(GY)
            else:
                GX = None
                GY = None
                print 'no graphs provided'
            # add random noise to the features
            if noise > 0:
                if not options.pickled:
                    wordsX.features += noise * randn(wordsX.features.shape)
                    wordsY.features += noise * randn(wordsY.features.shape)
                    seedsX.features += noise * randn(seedsX.features.shape)
                    seedsY.features += noise * randn(seedsY.features.shape)
                else:
                    wordsX.addReprNoise(noise)