def pyfasttext_sample(): """https://pypi.org/project/pyfasttext/ """ model = FastText() # model.load_model('output/model_cooking_6.bin') model.load_model('output/model_cooking_5.ftz') result = model.predict_file('data/cooking/pre_cooking.valid', 2) for i, r in enumerate(result): print(i, r)
def runExperiment(trainFileName, testFileName): global TMPFILENAME, DIM, MINCOUNT model = FastText() model.supervised(input=trainFileName, output=TMPFILENAME, dim=DIM, minCount=MINCOUNT, verbose=0) labels = model.predict_file(testFileName) data = readData(testFileName) correct = 0 for i in range(0, len(labels)): data["classes"][i] = re.sub("__label__", "", data["classes"][i]) if labels[i][0] == data["classes"][i]: correct += 1 os.unlink(TMPFILENAME + ".bin") os.unlink(TMPFILENAME + ".vec") return ({"correct": correct, "labels": labels})
def train(self, params, test_path): self.message('parameters:', params) try: kwargs = self.generate_fastText_hyperparams(params) self.message('fastText parameters:') for key, val in kwargs.items(): self.message(' ', key, ':', val) model = FastText() model.supervised(input=self.TRAIN_PATH, output=self.MODEL_PATH, **kwargs) y_pred = [item[0] for item in model.predict_file(test_path)] acc = self.metrics_fun(y_pred) self.message('metrics:', acc) return (1.0 - acc) except Exception as exc: self.message('failed to train!') self.message(exc) return 1.0
with open(TRAIN_FOLDER+str(k),"w") as train_file: for train in train_data: train_file.write(train+"\n") for train in glob.glob(TRAIN_FOLDER+"*"): print("Processing of "+basename(train)) print("Processing of the model") #creating model with fastText model=FastText() classifier = model.supervised(input=train,output=MODEL_PATH+basename(train),lr=0.02,epoch=50) print("Testing the model") #testing the model print(TEST_FOLDER+basename(train)) result=model.predict_file(TEST_FOLDER+basename(train), k=1) #opening test file to do confusion matrix with open(TEST_FOLDER+basename(train)) as f: test_dataset=f.read().splitlines() pred_labels_list=[]# list to save predicted labels with each model test_labels=[]#list to save the real labels test_labels_normalized=[]#list to save real labels in forme label1\nlabel2\nlabel3 #opening test files to save labels pred_labels=[] #list of list of labels estimated by the model with open(TEST_FOLDER+basename(train)) as f: for row in f: test_labels.append(row.split()[0]) #doing a list with real labels for the confusion matrix