class NLC(object): def __init__(self, credential_file_path=None): self.__nlc = None self.__initialize(credential_file_path) def __initialize(self, credential_file_path): if not credential_file_path: credential_file_path = os.path.expanduser(DEFAULT_CREDENTIAL_PATH) with open(credential_file_path, 'r') as credential_file: credential = json.load(credential_file) self.__nlc = NaturalLanguageClassifier(url=credential['url'], username=credential['username'], password=credential['password']) def create(self, traning_data, name=None, language='en'): """ :param traning_data: A csv file or file path representing the traning data :param name: The optional descriptive name for the classifier :param language: The language og the input data :return: A instance object with the classifier_id of the newly created classifier, still in traning """ create_result = None if isinstance(traning_data, file) or isinstance(traning_data, IOBase): # traning_data is file discripter create_result = self.__nlc.create(traning_data, name=name, language=language) elif isinstance(traning_data, str): # traning_data is file path with open(traning_data, newline=None, mode='r', encoding='utf-8') as csv_file: if is_valid_recode_num(csv_file): create_result = self.__nlc.create(csv_file, name=name, language=language) return CreateResult(create_result) def classifiers(self): classifiers_raw = self.__nlc.list() classifiers_ = [Classifier(c) for c in classifiers_raw['classifiers']] return Classifiers(classifiers_) def status(self, classifier_id): return Status(self.__nlc.status(classifier_id)) def classify(self, classifier_id, text): return ClassifyResult(self.__nlc.classify(classifier_id, text)) def remove(self, classifier_id): """ param: classifier_id: Unique identifier for the classifier retrun: empty dict object raise: watson_developer_cloud.watson_developer_cloud_service.WatsonException: Not found """ return self.__nlc.remove(classifier_id) def remove_all(self): classifiers_ = self.classifiers() return [self.remove(c.classifier_id) for c in classifiers_]
def train_nlc(url, username, password, truth, name): logger.info("Train model %s with %d instances" % (name, len(truth))) with tempfile.TemporaryFile() as training_file: # NLC cannot handle newlines. truth[QUESTION] = truth[QUESTION].str.replace("\n", " ") to_csv(training_file, truth[[QUESTION, ANSWER_ID]], header=False, index=False) training_file.seek(0) nlc = NaturalLanguageClassifier(url=url, username=username, password=password) r = nlc.create(training_data=training_file, name=name) logger.info(pretty_print_json(r)) return r["classifier_id"]
class Watson_api(): def __init__(self): self.fname = "" self.modelSearchList = ModelSearchList() self.text_data = [] self.target_label = [] self.watson_crediantial = watson_key() #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier self.watson_classifier = self.watson_crediantial.twitter_category_classfier self.natural_language_classifier = NaturalLanguageClassifier(username=self.watson_crediantial.username, password=self.watson_crediantial.password) #print(json.dumps(self.natural_language_classifier.list(), indent=2)) def parse_args(self): p = ArgumentParser(description='Encoder-decoder neural machine trainslation') p.add_argument('data', help='[in] data') args = p.parse_args() return args def train(self): # create a classifier with open('../resources/weather_data_train.csv', 'rb') as training_data: print(json.dumps(self.natural_language_classifier.create(training_data=training_data, name='weather2'), indent=2)) def __read_data(self): for line in open(self.fname, "r"): split_line = line.split(",") self.text_data.append(split_line[0].strip()) self.target_label.append(self.modelSearchList.search_category_dictionary[split_line[1].strip()]) def predict(self, args): # replace 47C164-nlc-243 with your classifier id status = self.natural_language_classifier.status(self.watson_classifier) self.fname = args.data self.__read_data() predict_id = [] #print (json.dumps(status, indent=2, ensure_ascii=False)) for i in range(len(self.text_data)): classes = self.natural_language_classifier.classify(self.watson_classifier, self.text_data[i]) class_id = self.modelSearchList.search_category_dictionary[classes["classes"][0]["class_name"].replace("\"", "").replace("\"", "")] predict_id.append(class_id) print(self.target_label) print(predict_id) f1_score_twitter = f1_score(self.target_label, predict_id, average='macro') print("----F measure-----") print(f1_score_twitter)
def nlc_router_train(url, username, password, oracle_out, path, all_correct): """ NLC Training on the oracle experiment output to determine which system(NLC or Solr) should answer particular question. 1. Splitting up the oracle experiment output data into 8 equal training records and testing records. This is to ensure 8-fold cross validation of the data-set. All training and Testing files will be stored at the "path" 2. Perform NLC training on the all 8 training set simultaneously and returns list of classifier ids as json file in the working directory :param url: URL of NLC instance :param username: NLC Username :param password: NLC password :param oracle_out: file created by oracle experiment :param path: directory path to save intermediate results :param all_correct: optional boolean parameter to train with only correct QA pairs :return: list of classifier ids by NLC training """ ensure_directory_exists(path) sys_name = oracle_out[SYSTEM][0] oracle_out[QUESTION] = oracle_out[QUESTION].str.replace("\n", " ") kfold_split(oracle_out, path, NLC_ROUTER_FOLDS, True) classifier_list = [] list = [] for x in range(0, NLC_ROUTER_FOLDS): train = pandas.read_csv(os.path.join(path, "Train{0}.csv".format(str(x)))) if all_correct: logger.info("Training only on CORRECT examples.") # Ignore records from training which are not correct train = train[train[CORRECT]] train = train[train[IN_PURVIEW]] train = train[[QUESTION, ANSWERING_SYSTEM]] logger.info("Training set size = {0}".format(str(len(train)))) with tempfile.TemporaryFile() as training_file: to_csv(training_file, train[[QUESTION, ANSWERING_SYSTEM]], header=False, index=False) training_file.seek(0) nlc = NaturalLanguageClassifier(url=url, username=username, password=password) classifier_id = nlc.create(training_data=training_file, name="{0}_fold_{1}".format(str(sys_name), str(x))) classifier_list.append(classifier_id["classifier_id"].encode("utf-8")) list.append({classifier_id["name"].encode("utf-8"): classifier_id["classifier_id"].encode("utf-8")}) logger.info(pretty_print_json(classifier_id)) pretty_print_json(classifier_id) with open(os.path.join(path, 'classifier.json'), 'wb') as f: json.dump(list, f) return classifier_list
class NLClassifier(object): def __init__(self, username, password, classifier): # Setup Watson SDK self.natural_language_classifier = NLC(username=username,password=password) # Classifier information self.classifier = {} self.classifier['name'] = classifier['name'] self.classifier['training_file'] = classifier['training_file'] c = self.natural_language_classifier.list_classifiers() if any(d['name'] == self.classifier['name'] for d in c['classifiers'] ): self.classifier['id'] = [ d['classifier_id'] for d in c['classifiers'] if d['name'] == self.classifier['name'] ][0] print 'Found classifier id %s ' % self.classifier['id'] self.classifier['status'] = self.natural_language_classifier.status(self.classifier['id'])['status'] else: print 'No classifier found, creating new from training set' self.classifier['id'] = self.create_classifier() print 'New classifier id: %s ' % self.classifier['id'] ### Method to train the Watson Natural Language Classifier # The training set is delivered as a CSV file as specified in the Developer Guide # https://www.ibm.com/watson/developercloud/doc/nl-classifier/data_format.shtml def create_classifier(self): training_data = open(self.classifier['training_file'], 'rb') training_result = self.natural_language_classifier.create( training_data=training_data, name=self.classifier['name'] ) if training_result['status'] == "Training": self.classifier['status'] = "Training" return training_result['classifier_id'] else: print training_result return "Error" def classify(self,text): # Typically in a production system Watson NLC will be fully trained and verified by a data scientist before the system is ever # exposed in production. However because this is a demo application where Watson NLC is trained at application deployment time, # we will need to have a check to verify that the training is completed. if self.classifier['status'] == "Training": r = self.natural_language_classifier.status(self.classifier['id']) if r['status'] == "Training": return {"error": "Classifier still in training. Please try again in a few minutes."} elif r['status'] == "Available": self.classifier['status'] = 'Available' else: return {"error": "Unknown status for classifier", "message": r['status']} return self.natural_language_classifier.classify(self.classifier['id'], text)
#! /usr/bin/python from watson_developer_cloud import NaturalLanguageClassifierV1 as NLC import json with open('credential.json') as f_cred: cred = json.load(f_cred) nlc = NLC(username = cred['username'], password = cred['password']) with open('weather_data_train.csv') as f_train: clsfier = nlc.create( training_data = f_train, name = 'python classfier', language = 'en') with open ('classifier_info.json', 'w') as f_cls: json.dump(clsfier, f_cls, indent = 2) print(json.dumps(clsfier, indent = 2))
class Watson_api(): def __init__(self): self.fname = "" self.modelSearchList = ModelSearchList() self.text_data = [] self.target_label = [] self.watson_crediantial = watson_key() #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier self.watson_classifier = self.watson_crediantial.twitter_category_classfier self.natural_language_classifier = NaturalLanguageClassifier( username=self.watson_crediantial.username, password=self.watson_crediantial.password) #print(json.dumps(self.natural_language_classifier.list(), indent=2)) def parse_args(self): p = ArgumentParser( description='Encoder-decoder neural machine trainslation') p.add_argument('data', help='[in] data') args = p.parse_args() return args def train(self): # create a classifier with open('../resources/weather_data_train.csv', 'rb') as training_data: print( json.dumps(self.natural_language_classifier.create( training_data=training_data, name='weather2'), indent=2)) def __read_data(self): for line in open(self.fname, "r"): split_line = line.split(",") self.text_data.append(split_line[0].strip()) self.target_label.append( self.modelSearchList.search_category_dictionary[ split_line[1].strip()]) def predict(self, args): # replace 47C164-nlc-243 with your classifier id status = self.natural_language_classifier.status( self.watson_classifier) self.fname = args.data self.__read_data() predict_id = [] #print (json.dumps(status, indent=2, ensure_ascii=False)) for i in range(len(self.text_data)): classes = self.natural_language_classifier.classify( self.watson_classifier, self.text_data[i]) class_id = self.modelSearchList.search_category_dictionary[ classes["classes"][0]["class_name"].replace("\"", "").replace( "\"", "")] predict_id.append(class_id) print(self.target_label) print(predict_id) f1_score_twitter = f1_score(self.target_label, predict_id, average='macro') print("----F measure-----") print(f1_score_twitter)
import sys import operator import requests import json import twitter from watson_developer_cloud import NaturalLanguageClassifierV1 as NaturalLanguageClassifier #The IBM Bluemix credentials nlc_username = '******' nlc_password = '******' natural_language_classifier = NaturalLanguageClassifier( username=nlc_username, password=nlc_password) with open('../resources/weather_data_train.csv', 'rb') as training_data: classifier = natural_language_classifier.create( training_data=training_data, name='MySampleClassifierPython', language='en' ) #print print(json.dumps(classifier, indent=2))
sys.exit(2) for opt, arg in opts: if opt == '-h': usage() sys.exit() elif opt in ("-t", "---trainingdata"): trainingdata_filepath = arg elif opt in ("-n", "---name"): name = arg elif opt in ("-l", "---language"): language = arg elif opt == '-d': DEBUG = True if not trainingdata_filepath or not name or not language: print('Required argument missing.') usage() sys.exit(2) try: # create classifiers with the training data natural_language_classifier = NaturalLanguageClassifier(url=nlcConstants.getUrl(), username=nlcConstants.getUsername(), password=nlcConstants.getPassword()) with open(trainingdata_filepath, 'rb') as training_data: res = natural_language_classifier.create(training_data, name, language) sys.stdout.write('Response: \n%s\n' % json.dumps(res, indent=2)) except Exception as e: sys.stdout.write(str(e)) exit(1)
if opt == '-h': usage() sys.exit() elif opt in ("-t", "---trainingdata"): trainingdata_filepath = arg elif opt in ("-n", "---name"): name = arg elif opt in ("-l", "---language"): language = arg elif opt == '-d': DEBUG = True if not trainingdata_filepath or not name or not language: print('Required argument missing.') usage() sys.exit(2) try: # create classifiers with the training data natural_language_classifier = NaturalLanguageClassifier( url=nlcConstants.getUrl(), username=nlcConstants.getUsername(), password=nlcConstants.getPassword()) with open(trainingdata_filepath, 'rb') as training_data: res = natural_language_classifier.create(training_data, name, language) sys.stdout.write('Response: \n%s\n' % json.dumps(res, indent=2)) except Exception as e: sys.stdout.write(str(e)) exit(1)