for i in range(n): try: segments.append(iterator.next()) except StopIteration: break return segments if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cluster segments') parser.add_argument('clustermodel', type=unicode, help='The clusterer model to use.') args = parser.parse_args() setstorage = MongoSettingsStorage() docstorage = MongoDocumentStorage() segstorage = MongoSegmentStorage() logger.info('Loading clusterer model') settings = setstorage.load(encode_name(args.clustermodel)) dictionary = Dictionary.load(os.path.join(DICTIONARY_PATH, settings[DICTIONARY])) ngram_size = len(dictionary[0]) transformer = NgramTransformer(ngram_size) ldamodel = LdaModel.load(os.path.join(LDA_PATH, settings[LDA_MODEL])) logger.info('Clusterer model loaded!') kwargs = {'dictionary': dictionary, 'ngramtransformer': transformer, 'ldamodel': ldamodel, 'method': 'LDA'}
def emptystorage(self): storage = MongoSegmentStorage('test_db') storage.delete() return storage