def preprocess(profiles, network_name): for label in profiles.iterkeys(): model = BigramLanguageModel() for profile in profiles[label]: model.add(profile.description.lower()) model.prune() with open(network_name + 'BigramLM-' + label, 'wb') as output: cPickle.dump(model, output)
def preprocess(profiles, network_name): for label in profiles.iterkeys(): model = BigramLanguageModel() for profile in profiles[label]: for post in (x.content.lower() for x in profile.posts): model.add(post) model.prune() with open(network_name + 'PostBigramLM-' + label, 'wb') as output: cPickle.dump(model, output)