def __init__(self): self.g = Group() self.input = FileInOut() self.classes = self.input.readClasses("KNN") # self.classes = self.input.readClasses("NB") self.docVectorList, self.vectorsIds = self.input.readDocsVector() self.wordFormer = FormWords()
def set_tokens(self): input = FileInOut() dictionary = input.readDic() M = len(dictionary) print("M :" + str(M)) T = 755440 return T, M
def __init__(self): self.input = FileInOut() self.wordFormer = FormWords() self.constants = ConstantVars() self.dictionary = dict() self.posting_list = np.array([dict() for j in range(150000)]) self.dicIndex = 0 self.docIndex = 0 self.c = 0
def set_cf_dictionary(self): input = FileInOut() postings = input.readPostingList() cfis = {} for i in range(len(postings) - 1): cfis[i] = 0 for j in range(len(postings[i]) - 1): cfis[i] += len(postings[i][j]) self.cfDic = sorted(cfis.items(), key=lambda cfis: cfis[1], reverse=True) cfis.clear()
def __init__(self): self.input = FileInOut() self.Dic = self.input.readDic() self.DocID_file = self.input.readDocID() self.posting_file = self.input.readPostingList() self.wordFormer = FormWords() self.constants = ConstantVars() self.relatedDocs = [] self.notRelatedDocs = [] self.relatedDocsPos = [] self.notRelatedDocsPos = [] self.notRelatedCounts = 0
def __init__(self): self.inOut = FileInOut() self.df = dict() v, d = self.inOut.readDocsVector() for i in range(1, 38729): for j in v: if i in j.keys(): self.df.setdefault(str(i), []).append(j[i]) else: self.df.setdefault(str(i), []).append(0) self.df = pd.DataFrame(self.df) self.df.index=d print('phase 1 completed')
def __init__(self, algorithm): self.train_data = Train_data() print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!") self.input = FileInOut() self.k = 5 # self.train_data = self.input.N self.docVectorList, self.vectorsIds = self.input.readDocsVector() print("222222222222222222222") self.trainVectorList, self.trainvectorsIds = self.input.readTrainDocsVector( ) print("33333333333333333333333333333") self.num_ov_results = 100 self.gp = Group([7745]) print("ta ghable knn umaaaaaaad") self.classes = self.KNN()
def __init__(self): self.inOut = FileInOut() self.clusters = self.inOut.readClusters() self.g = Group() self.similarity = Similiarity() self.v, self.d = self.inOut.readDocsVector()
def __init__(self): self.input = FileInOut() self.N = self.input.N