def __init__(self, entityparams=ee.ALL): self.params = entityparams self.entity_extractor = ee.EntityExtractor() self.meta_extractor = MetaExtractor() self.cita_parser = CitationEntityExtractor(self.params) self.document_wrapper = DocumentWrapper() self.textual_document = TextualDocument() self.email_extractor = ee.EntityExtractor.EmailExtractor() self.document_info = DocumentInfo() self.cleaner = TextCleaner() self.lang_identifier = LanguageIdentifier()
def get_language(self): l = LanguageIdentifier() return l.identify(self.text_content())