def test_load_patents_dataset(self):
        dataset = fmatrix.load_patents_dataset('test/test_set')
        print dataset['08714876']
        print len(dataset['08714876'][0])
        print len(dataset)

        dataset2 = {}
        i = 0
        for k in dataset.keys():
            if i < 100:
                dataset2[k] = [dataset[k][0][:25], np.random.choice(['abc', 'def', 'gh'], 2).tolist()]
                i += 1
            else:
                break

        with open('test_vector2.pic', 'wb') as file:
            cPickle.dump(dataset2, file, cPickle.HIGHEST_PROTOCOL)
 def test_load_patents_dataset2(self):
     test_set_dir = resource_filename("patent_parsing_tools.utils.tests", "test_set")
     dataset = fmatrix.load_patents_dataset(test_set_dir)
     print dataset['08714876']
     print len(dataset['08714876'][0])
     print len(dataset)