def __init__(self, ngram, dir): self.docID = DocID() self.tokenizer = Tokenizer("ma") self.content = Content() self.ngram = ngram self.docID.load(dir + "docid.pickle") self.content.load(dir + "content.pickle") self.stopwords = self._load_stopwords(STOPWORDS_FILE)
def __init__(self, ngram): self.tokenizer = Tokenizer("ma") self.docID = DocID() self.content = Content() self.ngram = ngram