示例#1
0
class NLC(object):
    def __init__(self, credential_file_path=None):
        self.__nlc = None
        self.__initialize(credential_file_path)

    def __initialize(self, credential_file_path):
        if not credential_file_path:
            credential_file_path = os.path.expanduser(DEFAULT_CREDENTIAL_PATH)
 
        with open(credential_file_path, 'r') as credential_file:
            credential = json.load(credential_file)

            self.__nlc = NaturalLanguageClassifier(url=credential['url'], username=credential['username'], password=credential['password'])

    def create(self, traning_data, name=None, language='en'):
        """
        :param traning_data: A csv file or file path representing the traning data
        :param name: The optional descriptive name for the classifier
        :param language: The language og the input data
        :return: A instance object with the classifier_id of the newly created classifier, still in traning
        """
        create_result = None

        if isinstance(traning_data, file) or isinstance(traning_data, IOBase): # traning_data is file discripter
            create_result = self.__nlc.create(traning_data, name=name, language=language)
        elif isinstance(traning_data, str): # traning_data is file path
            with open(traning_data, newline=None, mode='r', encoding='utf-8') as csv_file:
                if is_valid_recode_num(csv_file):
                    create_result = self.__nlc.create(csv_file, name=name, language=language)

        return CreateResult(create_result)

    def classifiers(self):
        classifiers_raw = self.__nlc.list()
        classifiers_ = [Classifier(c) for c in classifiers_raw['classifiers']]
        return Classifiers(classifiers_)

    def status(self, classifier_id):
        return Status(self.__nlc.status(classifier_id))

    def classify(self, classifier_id, text):
        return ClassifyResult(self.__nlc.classify(classifier_id, text))

    def remove(self, classifier_id):
        """
        param: classifier_id: Unique identifier for the classifier
        retrun: empty dict object
        raise: watson_developer_cloud.watson_developer_cloud_service.WatsonException: Not found
        """
        return self.__nlc.remove(classifier_id)

    def remove_all(self):
        classifiers_ = self.classifiers()
        return [self.remove(c.classifier_id) for c in classifiers_]
示例#2
0
def train_nlc(url, username, password, truth, name):
    logger.info("Train model %s with %d instances" % (name, len(truth)))
    with tempfile.TemporaryFile() as training_file:
        # NLC cannot handle newlines.
        truth[QUESTION] = truth[QUESTION].str.replace("\n", " ")
        to_csv(training_file, truth[[QUESTION, ANSWER_ID]], header=False, index=False)
        training_file.seek(0)
        nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
        r = nlc.create(training_data=training_file, name=name)
        logger.info(pretty_print_json(r))
    return r["classifier_id"]
示例#3
0
def train_nlc(url, username, password, truth, name):
    logger.info("Train model %s with %d instances" % (name, len(truth)))
    with tempfile.TemporaryFile() as training_file:
        # NLC cannot handle newlines.
        truth[QUESTION] = truth[QUESTION].str.replace("\n", " ")
        to_csv(training_file, truth[[QUESTION, ANSWER_ID]], header=False, index=False)
        training_file.seek(0)
        nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
        r = nlc.create(training_data=training_file, name=name)
        logger.info(pretty_print_json(r))
    return r["classifier_id"]
class Watson_api():

    def __init__(self):
        self.fname = "" 
        self.modelSearchList = ModelSearchList()
        self.text_data = []
        self.target_label = []
        self.watson_crediantial = watson_key()
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier
        #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier
        self.watson_classifier = self.watson_crediantial.twitter_category_classfier
        self.natural_language_classifier = NaturalLanguageClassifier(username=self.watson_crediantial.username,
                                                                     password=self.watson_crediantial.password)
        #print(json.dumps(self.natural_language_classifier.list(), indent=2))

    def parse_args(self):

        p = ArgumentParser(description='Encoder-decoder neural machine trainslation')
        p.add_argument('data', help='[in] data')
        args = p.parse_args()

        return args

    def train(self):
        # create a classifier
        with open('../resources/weather_data_train.csv', 'rb') as training_data:
             print(json.dumps(self.natural_language_classifier.create(training_data=training_data, name='weather2'), indent=2))
    
    def __read_data(self):
        for line in open(self.fname, "r"):
            split_line = line.split(",")
            self.text_data.append(split_line[0].strip())
            self.target_label.append(self.modelSearchList.search_category_dictionary[split_line[1].strip()])

    def predict(self, args):
        # replace 47C164-nlc-243 with your classifier id
        status = self.natural_language_classifier.status(self.watson_classifier)
        self.fname = args.data
        self.__read_data()
        predict_id = []
        #print (json.dumps(status, indent=2, ensure_ascii=False))
        for i in range(len(self.text_data)):
            classes = self.natural_language_classifier.classify(self.watson_classifier, self.text_data[i])
            class_id = self.modelSearchList.search_category_dictionary[classes["classes"][0]["class_name"].replace("\"", "").replace("\"", "")]
            predict_id.append(class_id)
        print(self.target_label)
        print(predict_id)
        f1_score_twitter = f1_score(self.target_label, predict_id, average='macro') 
        print("----F measure-----")
        print(f1_score_twitter)
示例#5
0
def nlc_router_train(url, username, password, oracle_out, path, all_correct):

    """
    NLC Training on the oracle experiment output to determine which system(NLC or Solr) should
    answer particular question.

    1. Splitting up the oracle experiment output data into 8 equal training records and testing records. This is to
    ensure 8-fold cross validation of the data-set. All training and Testing files will be stored
    at the "path"

     2. Perform NLC training on the all 8 training set simultaneously and returns list of classifier
     ids as json file in the working directory

    :param url: URL of NLC instance
    :param username: NLC Username
    :param password: NLC password
    :param oracle_out: file created by oracle experiment
    :param path: directory path to save intermediate results
    :param all_correct: optional boolean parameter to train with only correct QA pairs
    :return: list of classifier ids by NLC training
    """
    ensure_directory_exists(path)

    sys_name = oracle_out[SYSTEM][0]
    oracle_out[QUESTION] = oracle_out[QUESTION].str.replace("\n", " ")
    kfold_split(oracle_out, path, NLC_ROUTER_FOLDS, True)
    classifier_list = []
    list = []

    for x in range(0, NLC_ROUTER_FOLDS):
        train = pandas.read_csv(os.path.join(path, "Train{0}.csv".format(str(x))))
        if all_correct:
            logger.info("Training only on CORRECT examples.")
            # Ignore records from training which are not correct
            train = train[train[CORRECT]]
            train = train[train[IN_PURVIEW]]
        train = train[[QUESTION, ANSWERING_SYSTEM]]
        logger.info("Training set size = {0}".format(str(len(train))))
        with tempfile.TemporaryFile() as training_file:
            to_csv(training_file, train[[QUESTION, ANSWERING_SYSTEM]], header=False, index=False)
            training_file.seek(0)
            nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
            classifier_id = nlc.create(training_data=training_file, name="{0}_fold_{1}".format(str(sys_name), str(x)))
            classifier_list.append(classifier_id["classifier_id"].encode("utf-8"))
            list.append({classifier_id["name"].encode("utf-8"): classifier_id["classifier_id"].encode("utf-8")})
            logger.info(pretty_print_json(classifier_id))
            pretty_print_json(classifier_id)

    with open(os.path.join(path, 'classifier.json'), 'wb') as f:
        json.dump(list, f)
    return classifier_list
示例#6
0
class NLClassifier(object):

  def __init__(self, username, password, classifier):
    # Setup Watson SDK
    self.natural_language_classifier = NLC(username=username,password=password)

    # Classifier information
    self.classifier = {}
    self.classifier['name'] = classifier['name']
    self.classifier['training_file'] = classifier['training_file']

    c = self.natural_language_classifier.list_classifiers()
    if any(d['name'] == self.classifier['name'] for d in c['classifiers'] ):
      self.classifier['id'] = [ d['classifier_id'] for d in c['classifiers'] if d['name'] == self.classifier['name'] ][0]
      print 'Found classifier id %s ' % self.classifier['id']
      self.classifier['status'] = self.natural_language_classifier.status(self.classifier['id'])['status']
    else:
      print 'No classifier found, creating new from training set'
      self.classifier['id']  = self.create_classifier()
      print 'New classifier id: %s ' % self.classifier['id']
  
  ### Method to train the Watson Natural Language Classifier    
  # The training set is delivered as a CSV file as specified in the Developer Guide
  # https://www.ibm.com/watson/developercloud/doc/nl-classifier/data_format.shtml
  def create_classifier(self):
    training_data = open(self.classifier['training_file'], 'rb')
    training_result = self.natural_language_classifier.create( training_data=training_data, name=self.classifier['name'] )
    if training_result['status'] == "Training":
      self.classifier['status'] = "Training"
      return training_result['classifier_id']
    else:
      print training_result
      return "Error"
    
  
  def classify(self,text):
    # Typically in a production system Watson NLC will be fully trained and verified by a data scientist before the system is ever 
    # exposed in production. However because this is a demo application where Watson NLC is trained at application deployment time,
    # we will need to have a check to verify that the training is completed.
    if self.classifier['status'] == "Training":
      r = self.natural_language_classifier.status(self.classifier['id'])
      if r['status'] == "Training":
        return {"error": "Classifier still in training. Please try again in a few minutes."}
      elif r['status'] == "Available":
          self.classifier['status'] = 'Available'
      else:
        return {"error": "Unknown status for classifier", "message": r['status']}

    return self.natural_language_classifier.classify(self.classifier['id'], text)   
示例#7
0
#! /usr/bin/python
from watson_developer_cloud import NaturalLanguageClassifierV1 as NLC
import json

with open('credential.json') as f_cred:
    cred = json.load(f_cred)

nlc = NLC(username = cred['username'], password = cred['password'])

with open('weather_data_train.csv') as f_train:
    clsfier = nlc.create(
            training_data = f_train,
            name = 'python classfier',
            language = 'en')

with open ('classifier_info.json', 'w') as f_cls:
    json.dump(clsfier, f_cls, indent = 2)

print(json.dumps(clsfier, indent = 2))
示例#8
0
class Watson_api():
    def __init__(self):
        self.fname = ""
        self.modelSearchList = ModelSearchList()
        self.text_data = []
        self.target_label = []
        self.watson_crediantial = watson_key()
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_classfier
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_hash_classfier
        #self.watson_classifier = self.watson_crediantial.classifier_twitter_unblance_keyword_classfier
        #self.watson_classifier = self.watson_crediantial.twitter_priority_classfier
        self.watson_classifier = self.watson_crediantial.twitter_category_classfier
        self.natural_language_classifier = NaturalLanguageClassifier(
            username=self.watson_crediantial.username,
            password=self.watson_crediantial.password)
        #print(json.dumps(self.natural_language_classifier.list(), indent=2))

    def parse_args(self):

        p = ArgumentParser(
            description='Encoder-decoder neural machine trainslation')
        p.add_argument('data', help='[in] data')
        args = p.parse_args()

        return args

    def train(self):
        # create a classifier
        with open('../resources/weather_data_train.csv',
                  'rb') as training_data:
            print(
                json.dumps(self.natural_language_classifier.create(
                    training_data=training_data, name='weather2'),
                           indent=2))

    def __read_data(self):
        for line in open(self.fname, "r"):
            split_line = line.split(",")
            self.text_data.append(split_line[0].strip())
            self.target_label.append(
                self.modelSearchList.search_category_dictionary[
                    split_line[1].strip()])

    def predict(self, args):
        # replace 47C164-nlc-243 with your classifier id
        status = self.natural_language_classifier.status(
            self.watson_classifier)
        self.fname = args.data
        self.__read_data()
        predict_id = []
        #print (json.dumps(status, indent=2, ensure_ascii=False))
        for i in range(len(self.text_data)):
            classes = self.natural_language_classifier.classify(
                self.watson_classifier, self.text_data[i])
            class_id = self.modelSearchList.search_category_dictionary[
                classes["classes"][0]["class_name"].replace("\"", "").replace(
                    "\"", "")]
            predict_id.append(class_id)
        print(self.target_label)
        print(predict_id)
        f1_score_twitter = f1_score(self.target_label,
                                    predict_id,
                                    average='macro')
        print("----F measure-----")
        print(f1_score_twitter)
示例#9
0
import sys
import operator
import requests
import json
import twitter

from watson_developer_cloud import NaturalLanguageClassifierV1 as NaturalLanguageClassifier

#The IBM Bluemix credentials
nlc_username = '******'
nlc_password = '******'

natural_language_classifier = NaturalLanguageClassifier(
  username=nlc_username, password=nlc_password)

with open('../resources/weather_data_train.csv', 'rb') as training_data:
    classifier = natural_language_classifier.create(
    training_data=training_data,
    name='MySampleClassifierPython',
    language='en'
  )

#print
print(json.dumps(classifier, indent=2))
    sys.exit(2)
    
for opt, arg in opts:
    if opt == '-h':
        usage()
        sys.exit()
    elif opt in ("-t", "---trainingdata"):
        trainingdata_filepath = arg
    elif opt in ("-n", "---name"):
        name = arg
    elif opt in ("-l", "---language"):
        language = arg
    elif opt == '-d':
        DEBUG = True

if not trainingdata_filepath or not name or not language:
    print('Required argument missing.')
    usage()
    sys.exit(2)
    
try:   
    # create classifiers with the training data
    natural_language_classifier = NaturalLanguageClassifier(url=nlcConstants.getUrl(), username=nlcConstants.getUsername(), password=nlcConstants.getPassword())
    with open(trainingdata_filepath, 'rb') as training_data:
        res = natural_language_classifier.create(training_data, name, language)
        sys.stdout.write('Response: \n%s\n' % json.dumps(res, indent=2))

except Exception as e:
    sys.stdout.write(str(e))
    exit(1)
    if opt == '-h':
        usage()
        sys.exit()
    elif opt in ("-t", "---trainingdata"):
        trainingdata_filepath = arg
    elif opt in ("-n", "---name"):
        name = arg
    elif opt in ("-l", "---language"):
        language = arg
    elif opt == '-d':
        DEBUG = True

if not trainingdata_filepath or not name or not language:
    print('Required argument missing.')
    usage()
    sys.exit(2)

try:
    # create classifiers with the training data
    natural_language_classifier = NaturalLanguageClassifier(
        url=nlcConstants.getUrl(),
        username=nlcConstants.getUsername(),
        password=nlcConstants.getPassword())
    with open(trainingdata_filepath, 'rb') as training_data:
        res = natural_language_classifier.create(training_data, name, language)
        sys.stdout.write('Response: \n%s\n' % json.dumps(res, indent=2))

except Exception as e:
    sys.stdout.write(str(e))
    exit(1)