def do_work(corpus, features, svd_dims):
    prefix = '/lustre/scratch/inf/mmb28/FeatureExtractionToolkit'
    name = 'wiki' if corpus == 11 else 'gigaw'

    # where should output be written
    svd_appendage = '' if svd_dims == 0 else '-%d' % svd_dims
    output_file = os.path.join(prefix,
                               'exp%d-%d-composed-ngrams-ppmi-svd' % (corpus, features),
                               'AN_NN_%s%s_Observed.events.filtered.strings' % (name, svd_appendage))

    # contains SVD-reduced N,J and NP observed vectors, built by other script
    vectors_file = '%s/exp%d-%db/exp%d-with-obs-phrases-SVD%d.events.filtered.strings' % \
                   (prefix, corpus, features, corpus, svd_dims)
    force_symlink(vectors_file, output_file)
Exemplo n.º 2
0
def run_socher_code():
    # symlink the file Socher's code expects to where the list of phrases I'm interested is
    force_symlink(phrases_to_compose, socher_input_file)
    with temp_chdir(socher_base_dir):
        run_and_log_output('./phrase2Vector.sh')  # this takes a while