def train(training_list, model_path, format, logfile=None): # Read the data into a Document object docs = [] for txt, con in training_list: #try: doc_tmp = Document(txt, con) docs.append(doc_tmp) #except Exception, e: # exit( '\n\tWARNING: Document Exception - %s\n\n' % str(e) ) # file names if not docs: print 'Error: Cannot train on 0 files. Terminating train.' exit(1) # Create a Machine Learning model model = ClinerModel() # Train the model using the Document's data model.fit_from_documents(docs) # Pickle dump print '\nserializing model to %s\n' % model_path with open(model_path, 'wb') as f: pickle.dump(model, f) model.log(logfile , model_file=model_path) model.log(sys.stdout, model_file=model_path)
def train(training_list, model_path, format, use_lstm, logfile=None): # Read the data into a Document object docs = [] for txt, con in training_list: doc_tmp = Document(txt, con) docs.append(doc_tmp) # file names if not docs: print 'Error: Cannot train on 0 files. Terminating train.' return 1 # Create a Machine Learning model model = ClinerModel(use_lstm) # Train the model using the Documents's data model.train(docs) # Pickle dump print '\nserializing model to %s\n' % model_path with open(model_path, "wb") as m_file: pickle.dump(model, m_file) model.log(logfile, model_file=model_path) model.log(sys.stdout, model_file=model_path)
def train(training_list, model_path, format, use_lstm, logfile=None, val=[], test=[]): # Read the data into a Document object train_docs = [] # print(training_list) # [('data/examples/ex_doc.txt', 'data/examples/ex_doc.con')] for txt, con in training_list: doc_tmp = Document(txt, con) train_docs.append(doc_tmp) val_docs = [] for txt, con in val: doc_tmp = Document(txt, con) val_docs.append(doc_tmp) test_docs = [] for txt, con in test: doc_tmp = Document(txt, con) test_docs.append(doc_tmp) # file names if not train_docs: print('Error: Cannot train on 0 files. Terminating train.') return 1 # Create a Machine Learning model model = ClinerModel(use_lstm) # Train the model using the Documents's data model.train(train_docs, val=val_docs, test=test_docs) # Pickle dump print('\nserializing model to %s\n' % model_path) with open(model_path, "wb") as m_file: pickle.dump(model, m_file) model.log(logfile, model_file=model_path) model.log(sys.stdout, model_file=model_path)