def __init__(self, output=None, corpus=None, attribute="categories", categories=None, mode="combined"): """ Create a CategoryToCorpus module, which loads a corpus with tagged documents. If corpus is passed in, it adds to an existing corpus. mode is the corpus loading method to use. If set to "combined", all documents in a category are concatenated to a single document. Otherwise each document is loaded separately. """ self.output = output self.corpora = {} # combined mode has a single corpus if corpus == None: self.corpus = Corpus() else: self.corpus = corpus self.module_type = enumModuleType(enumModuleType.Document) self.module_processing_type = \ enumModuleProcessingType(enumModuleProcessingType.PostProcess) self.attribute = attribute self.categories = categories self.mode = mode self.pp = pprint.PrettyPrinter(indent=4)
def __init__(self, output=None, corpus=None): self.output = output self.corpus = Corpus() if (corpus == None) else corpus self.module_type = enumModuleType(enumModuleType.Document) self.module_processing_type = \ enumModuleProcessingType(enumModuleProcessingType.PostProcess)
def __init__(self, output=None): self.output = output self.cooccur_graph = Graph() self.module_type = enumModuleType(enumModuleType.Document) self.module_processing_type = \ enumModuleProcessingType(enumModuleProcessingType.PostProcess)
def __init__(self): self.module_type = enumModuleType(enumModuleType.Document) self.module_processing_type = \ enumModuleProcessingType(enumModuleProcessingType.PostProcess) self.index = {}