def test_load_patents_dataset(self): dataset = fmatrix.load_patents_dataset('test/test_set') print dataset['08714876'] print len(dataset['08714876'][0]) print len(dataset) dataset2 = {} i = 0 for k in dataset.keys(): if i < 100: dataset2[k] = [dataset[k][0][:25], np.random.choice(['abc', 'def', 'gh'], 2).tolist()] i += 1 else: break with open('test_vector2.pic', 'wb') as file: cPickle.dump(dataset2, file, cPickle.HIGHEST_PROTOCOL)
def test_load_patents_dataset2(self): test_set_dir = resource_filename("patent_parsing_tools.utils.tests", "test_set") dataset = fmatrix.load_patents_dataset(test_set_dir) print dataset['08714876'] print len(dataset['08714876'][0]) print len(dataset)