Python PeripheralSpace示例，composes.semantic_space.peripheral_space.PeripheralSpace Python示例

示例#1

0

显示文件

    def test_init_svd(self):
        test_cases = [(self.space2, self.us, self.us2, self.x, self.row3)]
        red1 = Svd(2)
        red2 = Svd(1)

        for in_s, expected_mat, expected_mat2, data, rows in test_cases:
            in_s = in_s.apply(red1)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), rows)

            np.testing.assert_array_almost_equal(expected_mat,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, in_s.id2row)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, in_s.row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(1, len(per_s.operations))

            in_s = in_s.apply(red2)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), rows)

            np.testing.assert_array_almost_equal(expected_mat2,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, in_s.id2row)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, in_s.row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(2, len(per_s.operations))

示例#2

0

显示文件

文件： build_peripheral_space.py 项目： Rygbee/dissect

def transform_raw_per_space(raw_per_space, in_file_prefix, out_dir, out_format, core_space_file):

    in_file_descr = "PER_SS." + in_file_prefix.split("/")[-1]
    core_space = io_utils.load(core_space_file, Space)
    core_descr = ".".join(core_space_file.split("/")[-1].split(".")[0:-1])

    space = PeripheralSpace(core_space, raw_per_space.cooccurrence_matrix, raw_per_space.id2row, raw_per_space.row2id)

    print "Printing..."
    out_file_prefix = "%s/%s.%s" % (out_dir, in_file_descr, core_descr)
    io_utils.save(space, out_file_prefix + ".pkl")
    if not out_format is None:
        space.export(out_file_prefix, format=out_format)

示例#3

0

显示文件

    def test_init(self):
        test_cases = [(self.space1, self.m2, self.row2, np.array([[2, 0.5,
                                                                   1]]),
                       np.array([[0.69314718, 0, 0]]))]

        w1 = EpmiWeighting()
        w2 = PlogWeighting()

        for core_s, per_mat, per_row, per_mat_out1, per_mat_out2 in test_cases:
            tmp_mat = per_mat.copy()
            tmp_core_mat = core_s.cooccurrence_matrix.mat
            per_s1 = PeripheralSpace(core_s, DenseMatrix(per_mat), per_row)

            np.testing.assert_array_equal(per_s1.cooccurrence_matrix.mat,
                                          tmp_mat)
            self.assert_column_identical(per_s1, core_s)
            self.assertListEqual(per_s1.id2row, per_row)
            self.assertListEqual(per_s1.operations, core_s.operations)

            core_s1 = core_s.apply(w1)
            per_s2 = PeripheralSpace(core_s1, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s2.cooccurrence_matrix.mat, per_mat_out1)
            self.assert_column_identical(per_s2, core_s1)
            self.assertListEqual(per_s2.id2row, per_row)
            self.assertListEqual(per_s2.operations, core_s1.operations)
            self.assertEqual(len(per_s2.operations), 1)

            core_s2 = core_s1.apply(w2)
            per_s3 = PeripheralSpace(core_s2, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s3.cooccurrence_matrix.mat, per_mat_out2)
            self.assert_column_identical(per_s3, core_s2)
            self.assertListEqual(per_s3.id2row, per_row)
            self.assertListEqual(per_s3.operations, core_s2.operations)
            self.assertEqual(len(per_s3.operations), 2)

            np.testing.assert_array_equal(tmp_core_mat,
                                          core_s.cooccurrence_matrix.mat)

            core_s3 = core_s2
            per_s4 = PeripheralSpace(core_s3, DenseMatrix(per_mat), per_row)
            np.testing.assert_array_almost_equal(
                per_s4.cooccurrence_matrix.mat, per_mat_out2)
            self.assert_column_identical(per_s4, core_s2)
            self.assertListEqual(per_s4.id2row, per_row)
            self.assertListEqual(per_s4.operations, core_s3.operations)
            self.assertEqual(len(per_s4.operations), 2)

            np.testing.assert_array_equal(tmp_core_mat,
                                          core_s.cooccurrence_matrix.mat)

示例#4

0

显示文件

def transform_raw_per_space(raw_per_space, in_file_prefix, out_dir, out_format,
                            core_space_file):

    in_file_descr = "PER_SS." + in_file_prefix.split("/")[-1]
    core_space = io_utils.load(core_space_file, Space)
    core_descr = ".".join(core_space_file.split("/")[-1].split(".")[0:-1])

    space = PeripheralSpace(core_space, raw_per_space.cooccurrence_matrix,
                            raw_per_space.id2row, raw_per_space.row2id)

    print("Printing...")
    out_file_prefix = "%s/%s.%s" % (out_dir, in_file_descr, core_descr)
    io_utils.save(space, out_file_prefix + ".pkl")
    if not out_format is None:
        space.export(out_file_prefix, format=out_format)

示例#5

0

显示文件

文件： semantic_space.py 项目： anupama-gupta/AN_Composition

def build_bigram_space():
	bigrams_space = PeripheralSpace.build(unigrams_space,
                                     data=args.function[3],
                                     cols=args.function[1],
                                     format="sm")

	save_space(bigrams_space, "bigrams_space")

示例#6

0

显示文件

文件： peripheral_space_test.py 项目： Aliases/dissect

    def test_add_rows_svd(self):
        test_cases = [(self.space2, np.vstack([self.us2[0], self.us2[0]]),
                       self.m1, ["e"], ["f"], {"e":0, "f":1})]
        red1 = Svd(2)
        red2 = Svd(1)

        for in_s, expected_mat, data, id2row1, id2row2, row2id in test_cases:
            in_s = in_s.apply(red1)
            in_s = in_s.apply(red2)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), id2row1)
            per_s.add_rows(DenseMatrix(data), id2row2)

            np.testing.assert_array_almost_equal(expected_mat,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, id2row1 + id2row2)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(2, len(per_s.operations))

示例#7

0

显示文件

    def test_add_rows_svd(self):
        test_cases = [(self.space2, np.vstack([self.us2[0], self.us2[0]]),
                       self.m1, ["e"], ["f"], {
                           "e": 0,
                           "f": 1
                       })]
        red1 = Svd(2)
        red2 = Svd(1)

        for in_s, expected_mat, data, id2row1, id2row2, row2id in test_cases:
            in_s = in_s.apply(red1)
            in_s = in_s.apply(red2)
            per_s = PeripheralSpace(in_s, DenseMatrix(data), id2row1)
            per_s.add_rows(DenseMatrix(data), id2row2)

            np.testing.assert_array_almost_equal(expected_mat,
                                                 per_s.cooccurrence_matrix.mat,
                                                 2)
            self.assertListEqual(per_s.id2row, id2row1 + id2row2)
            self.assertListEqual(per_s.id2column, [])
            self.assertDictEqual(per_s.row2id, row2id)
            self.assertDictEqual(per_s.column2id, {})
            self.assertEqual(2, len(per_s.operations))

示例#8

0

显示文件

    def test_add_rows(self):

        test_cases = [(self.space1, self.m2, self.row2, np.array([[4, 2,
                                                                   6]]), ["c"],
                       np.array([[4, 2, 6], [4, 2, 6]]),
                       np.array([[0.69314718, 0, 0], [0.69314718, 0, 0]]), {
                           "b": 0,
                           "c": 1
                       }, ["b", "c"])]

        for (core_sp, per_mat1, id2row1, per_mat2, id2row2, per_exp_mat1,
             per_exp_mat2, per_exp_row2id, per_exp_id2row) in test_cases:

            per_sp = PeripheralSpace(core_sp, DenseMatrix(per_mat1), id2row1)
            per_sp.add_rows(DenseMatrix(per_mat2), id2row2)
            np.testing.assert_array_almost_equal(
                per_sp.cooccurrence_matrix.mat, per_exp_mat1, 7)

            self.assertDictEqual(per_sp.row2id, per_exp_row2id)
            self.assertListEqual(per_sp.id2row, per_exp_id2row)

            self.assertDictEqual(per_sp.column2id, core_sp.column2id)
            self.assertListEqual(per_sp.id2column, core_sp.id2column)

            core_sp2 = core_sp.apply(PpmiWeighting())
            per_sp2 = PeripheralSpace(core_sp2, DenseMatrix(per_mat1), id2row1)
            per_sp2.add_rows(DenseMatrix(per_mat2), id2row2)
            np.testing.assert_array_almost_equal(
                per_sp2.cooccurrence_matrix.mat, per_exp_mat2, 7)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), id2row1)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), id2row2)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), ["d", "e"])

示例#9

0

显示文件

文件： peripheral_space_test.py 项目： Aliases/dissect

    def test_add_rows(self):

        test_cases = [(self.space1, self.m2, self.row2,
                        np.array([[4,2,6]]), ["c"],
                        np.array([[4,2,6],[4,2,6]]),
                        np.array([[0.69314718,0,0],[0.69314718,0,0]]),
                        {"b":0,"c":1},
                        ["b", "c"])]

        for (core_sp, per_mat1, id2row1, per_mat2, id2row2, per_exp_mat1,
             per_exp_mat2, per_exp_row2id, per_exp_id2row) in test_cases:

            per_sp = PeripheralSpace(core_sp, DenseMatrix(per_mat1), id2row1)
            per_sp.add_rows(DenseMatrix(per_mat2), id2row2)
            np.testing.assert_array_almost_equal(per_sp.cooccurrence_matrix.mat,
                                            per_exp_mat1, 7)

            self.assertDictEqual(per_sp.row2id, per_exp_row2id)
            self.assertListEqual(per_sp.id2row, per_exp_id2row)

            self.assertDictEqual(per_sp.column2id, core_sp.column2id)
            self.assertListEqual(per_sp.id2column, core_sp.id2column)

            core_sp2 = core_sp.apply(PpmiWeighting())
            per_sp2 = PeripheralSpace(core_sp2, DenseMatrix(per_mat1), id2row1)
            per_sp2.add_rows(DenseMatrix(per_mat2), id2row2)
            np.testing.assert_array_almost_equal(per_sp2.cooccurrence_matrix.mat,
                                            per_exp_mat2, 7)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), id2row1)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), id2row2)

            self.assertRaises(ValueError, per_sp2.add_rows,
                              DenseMatrix(per_mat2), ["d", "e"])

示例#10

0

显示文件

    def test_per_space_top_feat_selection(self):

        test_cases = [(self.space_d, 1, ["f3"], {
            "f3": 0
        }, np.mat([[3], [5]])),
                      (self.space_d, 2, ["f3", "f1"], {
                          "f3": 0,
                          "f1": 1
                      }, np.mat([[3, 1], [5, 4]])),
                      (self.space_d, 4, ["f3", "f1", "f2"], {
                          "f3": 0,
                          "f1": 1,
                          "f2": 2
                      }, np.mat([[3, 1, 2], [5, 4, 0]]))]

        for space_d, no_dim, id2col, col2id, mat in test_cases:

            trans = TopFeatureSelection(no_dim)
            new_space = space_d.apply(trans)

            #peripheral test simple test
            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a),
                                     ["c", "d"])

            self.assertListEqual(per_sp.id2row, ["c", "d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_equal(per_sp.cooccurrence_matrix.mat, mat)

            #peripheral test with add rows
            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a[0, :]),
                                     ["c"])
            per_sp.add_rows(DenseMatrix(self.a[1, :]), ["d"])

            self.assertListEqual(per_sp.id2row, ["c", "d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_equal(per_sp.cooccurrence_matrix.mat, mat)

            #peripheral test, with plog applied to core BEFORE feat selection
            plogmat = mat.copy()
            plogmat[plogmat == 0] = 1
            plogmat = np.log(plogmat)

            new_space = space_d.apply(PlogWeighting())
            trans = TopFeatureSelection(no_dim)
            new_space = new_space.apply(trans)

            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a),
                                     ["c", "d"])

            self.assertListEqual(per_sp.id2row, ["c", "d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_almost_equal(
                per_sp.cooccurrence_matrix.mat, plogmat, 7)

            #peripheral test, with plog applied to core AFTER feat selection
            trans = TopFeatureSelection(no_dim)
            new_space = space_d.apply(trans)
            new_space = new_space.apply(PlogWeighting())

            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a),
                                     ["c", "d"])

            self.assertListEqual(per_sp.id2row, ["c", "d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_almost_equal(
                per_sp.cooccurrence_matrix.mat, plogmat, 7)

示例#11

0

显示文件

文件： full_example.py 项目： totonac/dissect

space_file = data_path + "CORE_SS.verbnoun.core.pkl"
space = io_utils.load(space_file)

print "Applying PPMI..."
space = space.apply(PpmiWeighting())

print "Applying feature selection..."
space = space.apply(TopFeatureSelection(2000))

print "Applying SVD..."
space = space.apply(Svd(100))

print "Creating peripheral space.."
per_space = PeripheralSpace.build(space,
                                  data=data_path + "per.raw.SV.sm",
                                  cols=data_path + "per.raw.SV.cols",
                                  format="sm")

#reading in train data
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0, 1, 2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner=RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt"
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0, 1, 2])
composed_space = comp_model.compose(test_phrases, space)

示例#12

0

显示文件

#ex05.py
#-------
from composes.utils import io_utils
from composes.semantic_space.peripheral_space import PeripheralSpace
from composes.transformation.scaling.ppmi_weighting import PpmiWeighting


#load a space and apply ppmi on it
my_space = io_utils.load("./data/out/ex01.pkl")
my_space = my_space.apply(PpmiWeighting())

print(my_space.cooccurrence_matrix)
print(my_space.id2row)

#create a peripheral space
my_per_space = PeripheralSpace.build(my_space,
                                     data="./data/in/ex05.sm",
                                     cols="./data/in/ex05.cols",
                                     format="sm")

print(my_per_space.cooccurrence_matrix)
print(my_per_space.id2row)

#save the space
io_utils.save(my_per_space, "./data/out/PER_SS.ex05.pkl")

示例#13

0

显示文件

文件： full_example.py 项目： georgiana-dinu/dissect

space_file = data_path + "CORE_SS.verbnoun.core.pkl"
space = io_utils.load(space_file)

print "Applying PPMI..."
space = space.apply(PpmiWeighting())

print "Applying feature selection..."
space = space.apply(TopFeatureSelection(2000))

print "Applying SVD..."
space = space.apply(Svd(100))

print "Creating peripheral space.."
per_space = PeripheralSpace.build(space,
                                  data = data_path + "per.raw.SV.sm",
                                  cols = data_path + "per.raw.SV.cols",
                                  format = "sm"                                
                                  )

#reading in train data
train_data_file = data_path + "ML08_SV_train.txt"
train_data = io_utils.read_tuple_list(train_data_file, fields=[0,1,2])

print "Training Lexical Function composition model..."
comp_model = LexicalFunction(learner = RidgeRegressionLearner(param=2))
comp_model.train(train_data, space, per_space)

print "Composing phrases..."
test_phrases_file = data_path + "ML08nvs_test.txt" 
test_phrases = io_utils.read_tuple_list(test_phrases_file, fields=[0,1,2])
composed_space = comp_model.compose(test_phrases, space)

示例#14

0

显示文件

文件： ex05.py 项目： georgiana-dinu/dissect

#ex05.py
#-------
from composes.utils import io_utils
from composes.semantic_space.peripheral_space import PeripheralSpace
from composes.transformation.scaling.ppmi_weighting import PpmiWeighting


#load a space and apply ppmi on it
my_space = io_utils.load("./data/out/ex01.pkl")
my_space = my_space.apply(PpmiWeighting())

print my_space.cooccurrence_matrix
print my_space.id2row

#create a peripheral space 
my_per_space = PeripheralSpace.build(my_space,
                                     data="./data/in/ex05.sm",
                                     cols="./data/in/ex05.cols",
                                     format="sm")

print my_per_space.cooccurrence_matrix
print my_per_space.id2row

#save the space
io_utils.save(my_per_space, "./data/out/PER_SS.ex05.pkl")

示例#15

0

显示文件

文件： baroni_group.py 项目： mbatchkarov/vector_builder

def train_baroni_guevara_composers(all_vectors,
                                   ROOT_DIR,
                                   baroni_output_path, guevara_output_path,
                                   baroni_threshold=10):
    """

    :type all_vectors: str; path to vectors file containing both N and observed AN vectors
    :type ROOT_DIR: str; where to write temp files
    :type baroni_output_path: str; where to write pickled baroni composer
    :type guevara_output_path: str
    :type baroni_threshold: int
    """
    SVD_DIMS = 100
    baroni_training_phrase_types = {'AN', 'NN'}  # what kind of NPs to train Baroni composer for

    # prepare the input files to be fed into Dissect
    mkdirs_if_not_exists(ROOT_DIR)

    filename = basename(all_vectors)
    noun_events_file = join(ROOT_DIR, '%s-onlyN-SVD%d.tmp' % (filename, SVD_DIMS))
    NPs_events_file = join(ROOT_DIR, '%s-onlyPhrases-SVD%d.tmp' % (filename, SVD_DIMS))

    thes = Vectors.from_tsv(all_vectors, lowercasing=False)
    thes.to_tsv(noun_events_file,
                entry_filter=lambda x: x.type == '1-GRAM' and x.tokens[0].pos == 'N')
    _translate_byblo_to_dissect(noun_events_file)

    thes.to_tsv(NPs_events_file,
                entry_filter=lambda x: x.type in baroni_training_phrase_types,
                row_transform=lambda x: str(x).replace(' ', '_'))
    _translate_byblo_to_dissect(NPs_events_file)

    my_space = Space.build(data="{}.sm".format(noun_events_file),
                           rows="{}.rows".format(noun_events_file),
                           cols="{}.cols".format(noun_events_file),
                           format="sm")
    logging.info('Each unigram vector has dimensionality %r', my_space.element_shape)

    # create a peripheral space
    my_per_space = PeripheralSpace.build(my_space,
                                         data="{}.sm".format(NPs_events_file),
                                         rows="{}.rows".format(NPs_events_file),
                                         # The columns of the peripheral space have to be identical to those
                                         # in the core space (including their order)!
                                         cols="{}.cols".format(NPs_events_file),
                                         format="sm")
    logging.info('Each phrase vector has dimensionality %r', my_per_space.element_shape)

    # use the model to compose words in my_space
    all_data = []
    for phrase in my_per_space._row2id:
        # make sure there are only NPs here
        if DocumentFeature.from_string(phrase.replace(' ', '_')).type in baroni_training_phrase_types:
            adj, noun = phrase.split('_')
            all_data.append((adj, noun, '%s_%s' % (adj, noun)))

    # train a composition model on the data and save it
    baroni = LexicalFunction(min_samples=baroni_threshold, learner=RidgeRegressionLearner())
    guevara = FullAdditive(learner=RidgeRegressionLearner())
    for composer, out_path in zip([baroni, guevara],
                                  [baroni_output_path, guevara_output_path]):
        composer.train(all_data, my_space, my_per_space)
        io_utils.save(composer, out_path)
        logging.info('Saved trained composer to %s', out_path)

示例#16

0

显示文件

文件： feat_selection_test.py 项目： Aliases/dissect

    def test_per_space_top_feat_selection(self):

        test_cases = [(self.space_d, 1, ["f3"], {"f3":0},
                       np.mat([[3],[5]])),
                      (self.space_d, 2, ["f3", "f1"], {"f3":0, "f1":1},
                       np.mat([[3,1],[5,4]])),
                      (self.space_d, 4, ["f3", "f1", "f2"],
                       {"f3":0, "f1":1, "f2":2},
                       np.mat([[3,1,2],[5,4,0]]))
                      ]

        for space_d, no_dim, id2col, col2id, mat in test_cases:

            trans = TopFeatureSelection(no_dim)
            new_space = space_d.apply(trans)

            #peripheral test simple test
            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a), ["c","d"])

            self.assertListEqual(per_sp.id2row, ["c","d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_equal(per_sp.cooccurrence_matrix.mat,
                                          mat)

            #peripheral test with add rows
            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a[0,:]), ["c"])
            per_sp.add_rows(DenseMatrix(self.a[1,:]), ["d"])

            self.assertListEqual(per_sp.id2row, ["c","d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_equal(per_sp.cooccurrence_matrix.mat,
                                          mat)

            #peripheral test, with plog applied to core BEFORE feat selection
            plogmat = mat.copy()
            plogmat[plogmat==0] = 1
            plogmat = np.log(plogmat)

            new_space = space_d.apply(PlogWeighting())
            trans = TopFeatureSelection(no_dim)
            new_space = new_space.apply(trans)

            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a), ["c","d"])

            self.assertListEqual(per_sp.id2row, ["c","d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_almost_equal(per_sp.cooccurrence_matrix.mat,
                                                 plogmat, 7)

            #peripheral test, with plog applied to core AFTER feat selection
            trans = TopFeatureSelection(no_dim)
            new_space = space_d.apply(trans)
            new_space = new_space.apply(PlogWeighting())

            per_sp = PeripheralSpace(new_space, DenseMatrix(self.a), ["c","d"])

            self.assertListEqual(per_sp.id2row, ["c","d"])
            self.assertListEqual(per_sp.id2column, id2col)
            self.assertDictEqual(per_sp.column2id, col2id)

            np.testing.assert_array_almost_equal(per_sp.cooccurrence_matrix.mat,
                                                 plogmat, 7)

示例#17

0

显示文件

文件： importSpace.py 项目： gastrovec/gastrovec-esslli2015

sys.stderr.flush()
gastrovec = Space.build(data = "../corpus_collection/corpus.sm",
                        rows = "../corpus_collection/corpus.rows",
                        cols = "../corpus_collection/corpus.cols",
                        format = "sm")
print("done.", file=sys.stderr)

io_utils.save(gastrovec, "gastrovec.pkl")

print("Applying PPMI... ",end="", file=sys.stderr)
sys.stderr.flush()
gastrovec = gastrovec.apply(PpmiWeighting())
print("Applying SVD (20)... ",end="",file=sys.stderr)
sys.stderr.flush()
gastrovec = gastrovec.apply(Svd(20))
print("done.", file=sys.stderr)

io_utils.save(gastrovec, "gastrovec.ppmi.svd20.pkl")

print("Loading recipe peripheral space...",end="",file=sys.stderr)
sys.stderr.flush()
recipes = PeripheralSpace.build(gastrovec,
                                  data = "../corpus_collection/recipes.sm",
                                  rows = "../corpus_collection/recipes.rows",
                                  cols = "../corpus_collection/recipes.cols",
                                  format = "sm")
print("done.", file=sys.stderr)

io_utils.save(recipes, "recipes.ppmi.svd20.pkl")