class NLClassifier(object): def __init__(self, username, password, classifier): # Setup Watson SDK self.natural_language_classifier = NLC(username=username,password=password) # Classifier information self.classifier = {} self.classifier['name'] = classifier['name'] self.classifier['training_file'] = classifier['training_file'] c = self.natural_language_classifier.list_classifiers() if any(d['name'] == self.classifier['name'] for d in c['classifiers'] ): self.classifier['id'] = [ d['classifier_id'] for d in c['classifiers'] if d['name'] == self.classifier['name'] ][0] print 'Found classifier id %s ' % self.classifier['id'] self.classifier['status'] = self.natural_language_classifier.status(self.classifier['id'])['status'] else: print 'No classifier found, creating new from training set' self.classifier['id'] = self.create_classifier() print 'New classifier id: %s ' % self.classifier['id'] ### Method to train the Watson Natural Language Classifier # The training set is delivered as a CSV file as specified in the Developer Guide # https://www.ibm.com/watson/developercloud/doc/nl-classifier/data_format.shtml def create_classifier(self): training_data = open(self.classifier['training_file'], 'rb') training_result = self.natural_language_classifier.create( training_data=training_data, name=self.classifier['name'] ) if training_result['status'] == "Training": self.classifier['status'] = "Training" return training_result['classifier_id'] else: print training_result return "Error" def classify(self,text): # Typically in a production system Watson NLC will be fully trained and verified by a data scientist before the system is ever # exposed in production. However because this is a demo application where Watson NLC is trained at application deployment time, # we will need to have a check to verify that the training is completed. if self.classifier['status'] == "Training": r = self.natural_language_classifier.status(self.classifier['id']) if r['status'] == "Training": return {"error": "Classifier still in training. Please try again in a few minutes."} elif r['status'] == "Available": self.classifier['status'] = 'Available' else: return {"error": "Unknown status for classifier", "message": r['status']} return self.natural_language_classifier.classify(self.classifier['id'], text)
def classifier_status(url, username, password, classifier_ids): n = NaturalLanguageClassifier(url=url, username=username, password=password) for classifier_id in classifier_ids: status = n.status(classifier_id) print("%s: %s" % (status["status"], status["status_description"]))
class NLC(object): def __init__(self, credential_file_path=None): self.__nlc = None self.__initialize(credential_file_path) def __initialize(self, credential_file_path): if not credential_file_path: credential_file_path = os.path.expanduser(DEFAULT_CREDENTIAL_PATH) with open(credential_file_path, 'r') as credential_file: credential = json.load(credential_file) self.__nlc = NaturalLanguageClassifier(url=credential['url'], username=credential['username'], password=credential['password']) def create(self, traning_data, name=None, language='en'): """ :param traning_data: A csv file or file path representing the traning data :param name: The optional descriptive name for the classifier :param language: The language og the input data :return: A instance object with the classifier_id of the newly created classifier, still in traning """ create_result = None if isinstance(traning_data, file) or isinstance(traning_data, IOBase): # traning_data is file discripter create_result = self.__nlc.create(traning_data, name=name, language=language) elif isinstance(traning_data, str): # traning_data is file path with open(traning_data, newline=None, mode='r', encoding='utf-8') as csv_file: if is_valid_recode_num(csv_file): create_result = self.__nlc.create(csv_file, name=name, language=language) return CreateResult(create_result) def classifiers(self): classifiers_raw = self.__nlc.list() classifiers_ = [Classifier(c) for c in classifiers_raw['classifiers']] return Classifiers(classifiers_) def status(self, classifier_id): return Status(self.__nlc.status(classifier_id)) def classify(self, classifier_id, text): return ClassifyResult(self.__nlc.classify(classifier_id, text)) def remove(self, classifier_id): """ param: classifier_id: Unique identifier for the classifier retrun: empty dict object raise: watson_developer_cloud.watson_developer_cloud_service.WatsonException: Not found """ return self.__nlc.remove(classifier_id) def remove_all(self): classifiers_ = self.classifiers() return [self.remove(c.classifier_id) for c in classifiers_]
class Watson_api(): def __init__(self): self.fname = "" self.modelSearchList = ModelSearchList() self.text_data = [] self.target_label = [] self.watson_crediantial = watson_key() #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier self.watson_classifier = self.watson_crediantial.twitter_category_classfier self.natural_language_classifier = NaturalLanguageClassifier(username=self.watson_crediantial.username, password=self.watson_crediantial.password) #print(json.dumps(self.natural_language_classifier.list(), indent=2)) def parse_args(self): p = ArgumentParser(description='Encoder-decoder neural machine trainslation') p.add_argument('data', help='[in] data') args = p.parse_args() return args def train(self): # create a classifier with open('../resources/weather_data_train.csv', 'rb') as training_data: print(json.dumps(self.natural_language_classifier.create(training_data=training_data, name='weather2'), indent=2)) def __read_data(self): for line in open(self.fname, "r"): split_line = line.split(",") self.text_data.append(split_line[0].strip()) self.target_label.append(self.modelSearchList.search_category_dictionary[split_line[1].strip()]) def predict(self, args): # replace 47C164-nlc-243 with your classifier id status = self.natural_language_classifier.status(self.watson_classifier) self.fname = args.data self.__read_data() predict_id = [] #print (json.dumps(status, indent=2, ensure_ascii=False)) for i in range(len(self.text_data)): classes = self.natural_language_classifier.classify(self.watson_classifier, self.text_data[i]) class_id = self.modelSearchList.search_category_dictionary[classes["classes"][0]["class_name"].replace("\"", "").replace("\"", "")] predict_id.append(class_id) print(self.target_label) print(predict_id) f1_score_twitter = f1_score(self.target_label, predict_id, average='macro') print("----F measure-----") print(f1_score_twitter)
import json from os.path import join, dirname from watson_developer_cloud import NaturalLanguageClassifierV1 as NaturalLanguageClassifier natural_language_classifier = NaturalLanguageClassifier( username='******', password='******') classifiers = natural_language_classifier.list() print(json.dumps(classifiers, indent=2)) # create a classifier # with open('../resources/weather_data_train.csv', 'rb') as training_data: # print(json.dumps(natural_language_classifier.create(training_data=training_data, name='weather2'), indent=2)) # replace 47C164-nlc-243 with your classifier id status = natural_language_classifier.status('47C164-nlc-243') print(json.dumps(status, indent=2)) classes = natural_language_classifier.classify('47C164-nlc-243', 'How hot will it be tomorrow?') print(json.dumps(classes, indent=2))
import json from os.path import join, dirname from watson_developer_cloud import NaturalLanguageClassifierV1 as NaturalLanguageClassifier natural_language_classifier = NaturalLanguageClassifier(username='******', password='******') print(json.dumps(natural_language_classifier.list(), indent=2)) # create a classifier # with open('../resources/weather_data_train.csv', 'rb') as training_data: # print(json.dumps(natural_language_classifier.create(training_data=training_data, name='weather2'), indent=2)) # replace 47C164-nlc-243 with your classifier id status = natural_language_classifier.status('47C164-nlc-243') print (json.dumps(status, indent=2)) classes = natural_language_classifier.classify('47C164-nlc-243', 'How hot will it be tomorrow?') print(json.dumps(classes, indent=2))
def classifier_status(url, username, password, classifier_ids): n = NaturalLanguageClassifier(url=url, username=username, password=password) for classifier_id in classifier_ids: status = n.status(classifier_id) print(" Instance name: %s with classifier id %s is %s; Description: %s" % (status["name"],status["classifier_id"],status["status"], status["status_description"]))
class Watson_api(): def __init__(self): self.fname = "" self.modelSearchList = ModelSearchList() self.text_data = [] self.target_label = [] self.watson_crediantial = watson_key() #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier self.watson_classifier = self.watson_crediantial.twitter_category_classfier self.natural_language_classifier = NaturalLanguageClassifier( username=self.watson_crediantial.username, password=self.watson_crediantial.password) #print(json.dumps(self.natural_language_classifier.list(), indent=2)) def parse_args(self): p = ArgumentParser( description='Encoder-decoder neural machine trainslation') p.add_argument('data', help='[in] data') args = p.parse_args() return args def train(self): # create a classifier with open('../resources/weather_data_train.csv', 'rb') as training_data: print( json.dumps(self.natural_language_classifier.create( training_data=training_data, name='weather2'), indent=2)) def __read_data(self): for line in open(self.fname, "r"): split_line = line.split(",") self.text_data.append(split_line[0].strip()) self.target_label.append( self.modelSearchList.search_category_dictionary[ split_line[1].strip()]) def predict(self, args): # replace 47C164-nlc-243 with your classifier id status = self.natural_language_classifier.status( self.watson_classifier) self.fname = args.data self.__read_data() predict_id = [] #print (json.dumps(status, indent=2, ensure_ascii=False)) for i in range(len(self.text_data)): classes = self.natural_language_classifier.classify( self.watson_classifier, self.text_data[i]) class_id = self.modelSearchList.search_category_dictionary[ classes["classes"][0]["class_name"].replace("\"", "").replace( "\"", "")] predict_id.append(class_id) print(self.target_label) print(predict_id) f1_score_twitter = f1_score(self.target_label, predict_id, average='macro') print("----F measure-----") print(f1_score_twitter)
sys.exit(2) for opt, arg in opts: if opt == '-h': usage() sys.exit() elif opt in ("-c", "---classifier_id"): classifier_id = arg elif opt == '-d': DEBUG = True if not classifier_id: print('Required argument missing.') usage() sys.exit(2) try: # create classifiers with the training data natural_language_classifier = NaturalLanguageClassifier( url=nlcConstants.getUrl(), username=nlcConstants.getUsername(), password=nlcConstants.getPassword()) # get classifier information res = natural_language_classifier.status(classifier_id) sys.stdout.write(json.dumps(res, indent=2)) except Exception as e: sys.stdout.write(str(e)) exit(1)
#! /usr/bin/python from watson_developer_cloud import NaturalLanguageClassifierV1 as NLC import json with open('credential.json') as f_cred: cred = json.load(f_cred) with open('classifier_info.json') as f_cls: cls_id = json.load(f_cls)['classifier_id'] nlc = NLC(username=cred['username'], password=cred['password']) status = nlc.status(cls_id) with open('classifier_info.json', 'w') as f_cls: json.dump(status, f_cls, indent=2) print(json.dumps(status, indent=2))
import sys import operator import requests import json import twitter from watson_developer_cloud import NaturalLanguageClassifierV1 as NaturalLanguageClassifier #The IBM Bluemix credentials nlc_username = '******' nlc_password = '******' natural_language_classifier = NaturalLanguageClassifier(username=nlc_username, password=nlc_password) classifierId = 'e4be4cx148-nlc-16' status = natural_language_classifier.status(classifierId) print(json.dumps(status, indent=2))
#! /usr/bin/python from watson_developer_cloud import NaturalLanguageClassifierV1 as NLC import json with open('credential.json') as f_cred: cred = json.load(f_cred) with open ('classifier_info.json') as f_cls: cls_id = json.load(f_cls)['classifier_id'] nlc = NLC(username = cred['username'], password = cred['password']) status = nlc.status(cls_id) with open ('classifier_info.json', 'w') as f_cls: json.dump(status, f_cls, indent = 2) print (json.dumps(status, indent=2))