def trainClassifiers(self, xml_file, type): self.prepareModels(xml_file, type) comentarios = self.procesar(xml_file, type) if type == 1: data = load_data_from_disk(tfidfModel) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVM, NB, ME, DT] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass) else: data = load_data_from_disk(tfidfModelp) data_expanded = [] for i in data: vec = expand(i) data_expanded.append(vec) labels = [] for i in comentarios: labels.append(i[1]) fileClassifiers = [SVMp, NBp, MEp, DTp] for i in range(4): classifier = SC(data_expanded, labels, i + 1) fClass = classifier.train() write_data_to_disk(fileClassifiers[i], fClass)
def prepareModels(self, xml_file, type): comentarios = self.procesar(xml_file, type) train = [] for i in comentarios: train.append(i[0]) model = VM(train) vectorModelData = model.prepare_models() modelVectorizer = vectorModelData[0] modelVectorizerTFIDF = vectorModelData[1] modelTFIDF = vectorModelData[2] if type == 1: write_data_to_disk(simpleVectorizer, modelVectorizer) write_data_to_disk(tfidfVectorizer, modelVectorizerTFIDF) write_data_to_disk(tfidfModel, modelTFIDF) else: write_data_to_disk(simpleVectorizerp, modelVectorizer) write_data_to_disk(tfidfVectorizerp, modelVectorizerTFIDF) write_data_to_disk(tfidfModelp, modelTFIDF)