示例#1
0
async def test_create_train_data_no_history(default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(max_history=1)
    training_trackers = await training.load_data(DEFAULT_STORIES_FILE,
                                                 default_domain,
                                                 augmentation_factor=0)

    assert len(training_trackers) == 3
    (decoded,
     _) = featurizer.training_states_and_actions(training_trackers,
                                                 default_domain)

    # decoded needs to be sorted
    hashed = []
    for states in decoded:
        hashed.append(json.dumps(states, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    assert hashed == [
        '[{}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]',
        '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0, '
        '"slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_action_listen": 1.0, "slot_name_0": 1.0}]'
    ]
示例#2
0
def test_create_train_data_no_history(default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(max_history=1)
    training_trackers = training.load_data(
            DEFAULT_STORIES_FILE,
            default_domain,
            augmentation_factor=0
    )
    assert len(training_trackers) == 3
    (decoded, _) = featurizer.training_states_and_actions(
            training_trackers, default_domain)

    # decoded needs to be sorted
    hashed = []
    for states in decoded:
        hashed.append(json.dumps(states, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    assert hashed == [
        '[{}]',
        '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]',
        '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]',
        '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0}]',
        '[{"intent_default": 1.0, "prev_utter_default": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0}]',
        '[{"intent_default": 1.0, "prev_action_listen": 1.0, '
        '"slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]',
        '[{"entity_name": 1.0, "intent_greet": 1.0, '
        '"prev_action_listen": 1.0, "slot_name_0": 1.0}]'
    ]
示例#3
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data("data/test_stories/stories.md",
                                  default_domain,
                                  augmentation_factor=0)
    (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions(
        trackers, default_domain)
    hashed = []
    for sts, acts in zip(tr_as_sts, tr_as_acts):
        hashed.append(json.dumps(sts + acts, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    data = featurizer.featurize_trackers(trackers, default_domain)

    featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                                 max_history=2)
    trackers_mul = training.load_data("data/test_multifile_stories",
                                      default_domain,
                                      augmentation_factor=0)
    (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions(
        trackers_mul, default_domain)
    hashed_mul = []
    for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul):
        hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True))
    hashed_mul = sorted(hashed_mul, reverse=True)

    data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain)

    assert hashed == hashed_mul

    assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0))
    assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
示例#4
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data(tmpdir.strpath, default_domain)
    data = featurizer.featurize_trackers(trackers, default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
def train_bot():
    logging.basicConfig(level='INFO')

    training_data_file = './data/stories'
    model_path = './models/dialogue'

    fallback = FallbackPolicy(fallback_action_name="utter_not_understood",
                              core_threshold=0.3,
                              nlu_threshold=0.6)
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=5)
    agent = Agent('./data/domain.yml',
                  policies=[
                      MemoizationPolicy(max_history=5),
                      KerasPolicy(featurizer), fallback
                  ])

    training_data = agent.load_data(training_data_file)
    agent.train(training_data,
                augmentation_factor=50,
                epochs=500,
                batch_size=10,
                validation_split=0.2)

    agent.persist(model_path)
示例#6
0
def train_dialogue(domain_file='restaurant_domain.yml',
                   model_path='./models/dialogue',
                   training_data_file='./data/stories.md'):

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=5)
    fallback = FallbackPolicy(fallback_action_name="action_default_fallback",
                              core_threshold=0.3,
                              nlu_threshold=0.3)
    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=5),
                      KerasPolicy(featurizer), fallback
                  ])

    agent.train(
        training_data_file,
        #max_history = 3,
        epochs=300,
        batch_size=50,
        validation_split=0.2,
        augmentation_factor=50)

    agent.persist(model_path)
    return agent
def train_bot():
    training_data_file = './data/stories'
    model_path = './models/dialogue'
    domain_file = './data/domain.yml'

    # core_threshold: min confidence needed to accept an action predicted by Rasa Core
    # nlu_threshold: min confidence needed to accept an intent predicted by the interpreter (NLU)
    fallback = FallbackPolicy(fallback_action_name="action_not_understood",
                              core_threshold=0.5,
                              nlu_threshold=0.35)

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=3)
    agent = Agent(domain=domain_file,
                  policies=[
                      MemoizationPolicy(max_history=2),
                      KerasPolicy(featurizer), fallback
                  ])

    training_data = agent.load_data(training_data_file)
    agent.train(training_data,
                augmentation_factor=50,
                epochs=400,
                batch_size=50,
                validation_split=0.2)

    agent.persist(model_path)
	def train_dialogue(self, domain_file, model_path, training_data_file):
		fallback = FallbackPolicy(fallback_action_name="utter_default",core_threshold=0.2, nlu_threshold=0.5)
		featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=10)
		self.agent = Agent(domain_file , policies=[MemoizationPolicy(max_history=10), KerasPolicy(epochs = 90, batch_size = 20, validation_split = 0.1), fallback])
		data = self.agent.load_data(training_data_file)
		self.agent.train(data)
		self.agent.persist(model_path)
示例#9
0
def train_core(domain_file="robot/config/domain.yml",
               model_path="robot/models/dialogue",
               training_data_file="robot/config/stories.md"):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,
                                       BinarySingleStateFeaturizer)
    # fallback = FallbackPolicy(fallback_action_name="action_default_fallback",
    #                           core_threshold=0.9,
    #                           nlu_threshold=0.9)
    fallback = FallbackPolicy(fallback_action_name="action_default_custom",
                              core_threshold=0.8,
                              nlu_threshold=0.8)

    agent = Agent(
        domain_file,
        policies=[
            MemoizationPolicy(max_history=5),
            KerasPolicy(
                MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                            max_history=5)), fallback
        ])
    training_data = agent.load_data(training_data_file)
    # 训练agent的策略policy
    agent.train(training_data, epochs=500)
    agent.persist(model_path)
    return agent
示例#10
0
 def _standard_featurizer(cls, max_history=None):
     max_history = max_history or cls.MAX_HISTORY_DEFAULT
     # Memoization policy always uses MaxHistoryTrackerFeaturizer
     # without state_featurizer
     return MaxHistoryTrackerFeaturizer(state_featurizer=None,
                                        max_history=max_history,
                                        use_intent_probabilities=False)
def run_bot_cli(input_channel, interpreter,
                          domain_file="./data/student_info_domain.yml",
                          training_data_file='./data/stories.md'):

  # Featureizer Generation
  featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5)
  
  # Not really sure what is happening here
  agent = Agent(domain_file,
                policies=[MemoizationPolicy(max_history=5),
                          KerasPolicy(featurizer)],
                          interpreter=interpreter)

  # This is where our training data file is loaded in for training
  training_data = agent.load_data(training_data_file)
  
  # Training data is the training data object created in the above line
  # input_channel - How the trainer recieves its input
  # batch_size - How many times the model is updated per pass
  # epochs - Number of training passes
  # validation_split - Fraction of the training data to be used as validation data 
  # augmentation_factor - How many of the dialogue stories are randomly glued together
      # the more stories you have the higher the augmentation factor you want
  agent.train_online(training_data,
                      input_channel=input_channel,
                      batch_size=35,
                      epochs=400,
                      max_training_samples=200,
                      validation_split = 0.2,
                      augmentation_factor = 20)

  return agent
def train_dialogue(domain_file = 'restaurant_domain.yml',
					model_path = './models/dialogue',
					training_data_file = './data/stories.md'):

	featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5)
	agent = Agent(domain_file, policies = [MemoizationPolicy(max_history = 5), KerasPolicy(featurizer)])
	
	data = agent.load_data(training_data_file,
				 		   augmentation_factor = 50)

	agent.train(data,
				epochs = 500,
				batch_size = 30,
				validation_split = 0.2)

	# agent.train(
	# 			training_data_file,
	# 			#max_history = 3,
	# 			epochs = 300,
	# 			batch_size = 50,
	# 			validation_split = 0.2,
	# 			augmentation_factor = 50)

	agent.persist(model_path)
	return agent
示例#13
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=4)
    training_trackers = training.load_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        augmentation_factor=0)

    training_data = featurizer.featurize_trackers(training_trackers,
                                                  default_domain)
    y = training_data.y.argmax(axis=-1)

    # how many there are depends on the graph which is not created in a
    # deterministic way but should always be 3 or
    assert len(training_trackers) == 3 or len(training_trackers) == 4

    # if we have 4 trackers, there is going to be one example more for label 2
    num_twos = len(training_trackers) - 1
    assert Counter(y) == {0: 6, 1: 2, 2: num_twos, 3: 1, 4: 3}
示例#14
0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    open(normal_file, 'a').close()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = training.load_data(
        tmpdir.strpath,
        default_domain
    )
    data = featurizer.featurize_trackers(trackers,
                                         default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
示例#15
0
def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=4)
    training_trackers = training.load_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        augmentation_factor=0
    )

    training_data = featurizer.featurize_trackers(training_trackers,
                                                  default_domain)
    y = training_data.y.argmax(axis=-1)

    # how many there are depends on the graph which is not created in a
    # deterministic way but should always be 3 or
    assert len(training_trackers) == 3 or len(training_trackers) == 4

    # if we have 4 trackers, there is going to be one example more for label 3
    num_threes = len(training_trackers) - 1
    assert Counter(y) == {0: 6, 1: 2, 3: num_threes, 4: 1, 5: 3}
示例#16
0
def train_core(domain_file="config/domain.yml",
                   model_path="models/dialogue",
                   training_data_file="config/stories.md"):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,
                                       BinarySingleStateFeaturizer)
    agent = Agent(domain_file,
                  policies=[MemoizationPolicy(max_history=6),
                            KerasPolicy(MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=6))])
    training_data = agent.load_data(training_data_file)
    # 训练agent的策略policy
    agent.train(training_data, epochs=800)
    agent.persist(model_path)
    return agent
示例#17
0
def graph(domain_file="robot/config/domain.yml",
          training_data_file="robot/config/stories.md"):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,
                                       BinarySingleStateFeaturizer)
    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=6),
                      KerasPolicy(
                          MaxHistoryTrackerFeaturizer(
                              BinarySingleStateFeaturizer(), max_history=6))
                  ])

    agent.visualize(training_data_file, output_file='graph.png', max_history=6)
示例#18
0
def train_dialogue(domain_file, model_path, training_folder):

    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=5),
                      KerasPolicy(MaxHistoryTrackerFeaturizer(
                          BinarySingleStateFeaturizer(), max_history=5),
                                  epochs=300), fallback
                  ])

    training_data = agent.load_data(training_folder)

    agent.train(training_data)
    agent.persist(model_path)
示例#19
0
def train_dialogue(domain_file="mobile_domain.yml",
                   model_path="models/dialogue",
                   training_data_file="data/mobile_story.md"):
    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=6),
                      KerasPolicy(
                          MaxHistoryTrackerFeaturizer(
                              BinarySingleStateFeaturizer(), max_history=6))
                  ])
    training_data = agent.load_data(training_data_file)
    agent.train(training_data, epochs=100)
    agent.persist(model_path)
    return agent
示例#20
0
def train_dialogue(domain_file, dia_data_file, nlu_model_dir, dia_model_dir):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,BinarySingleStateFeaturizer)
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5)
    agent = Agent(domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer), policy.fallback], \
                  interpreter=RasaNLUInterpreter(nlu_model_dir))
    training_data = agent.load_data(dia_data_file)
    agent.train(training_data,
                epochs = 400,
                batch_size = 100,
                # max_history=5,
                validation_split = 0.2)
    #         augmentation_factor=50,
    agent.persist(dia_model_dir)
    return agent
示例#21
0
def train_dialogue(domain_file, model_path, training_folder):

    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=6),
                      KerasPolicy(
                          MaxHistoryTrackerFeaturizer(
                              BinarySingleStateFeaturizer(), max_history=6)),
                      FallbackPolicy(nlu_threshold=0.8, core_threshold=0.3)
                  ])

    training_data = agent.load_data(training_folder)

    agent.train(training_data, epochs=100)
    agent.persist(model_path)
示例#22
0
def train_dialogue(domain_file='restaurant_domain.yml',
                   model_path="models/dialogue",
                   training_data_file='data/babi_stories.md'):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=5)
    agent = Agent(
        domain_file,
        policies=[MemoizationPolicy(max_history=5),
                  KerasPolicy(featurizer)])
    training_data = agent.load_data(training_data_file)
    agent.train(training_data,
                epochs=400,
                batch_size=100,
                validation_split=0.2)
    agent.persist(model_path)
    return agent
示例#23
0
def train_dialog(domain_file, training_data_file, model_dir, interpreter):
    _agent = Agent(domain_file,
                   policies=[
                       MemoizationPolicy(max_history=6),
                       KerasPolicy(MaxHistoryTrackerFeaturizer(
                           BinarySingleStateFeaturizer(), max_history=6),
                                   augmentation_factor=50,
                                   epochs=300,
                                   batch_size=50,
                                   validation_split=0.2)
                   ],
                   interpreter=interpreter)
    _training_data = _agent.load_data(training_data_file)
    _agent.train(_training_data)
    _agent.persist(model_dir)
    return _agent
示例#24
0
def train_dialogue_model(domain_file, stories_file, output_path,
                         nlu_model_path=None,
                         endpoints=None,
                         max_history=None,
                         dump_flattened_stories=False,
                         kwargs=None):
    if not kwargs:
        kwargs = {}

    action_endpoint = utils.read_endpoint_config(endpoints, "action_endpoint")

    fallback_args, kwargs = utils.extract_args(kwargs,
                                               {"nlu_threshold",
                                                "core_threshold",
                                                "fallback_action_name"})

    policies = [
        FallbackPolicy(
                fallback_args.get("nlu_threshold",
                                  DEFAULT_NLU_FALLBACK_THRESHOLD),
                fallback_args.get("core_threshold",
                                  DEFAULT_CORE_FALLBACK_THRESHOLD),
                fallback_args.get("fallback_action_name",
                                  DEFAULT_FALLBACK_ACTION)),
        MemoizationPolicy(
                max_history=max_history),
        KerasPolicy(
                MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                            max_history=max_history))]

    agent = Agent(domain_file,
                  action_endpoint=action_endpoint,
                  interpreter=nlu_model_path,
                  policies=policies)

    data_load_args, kwargs = utils.extract_args(kwargs,
                                                {"use_story_concatenation",
                                                 "unique_last_num_states",
                                                 "augmentation_factor",
                                                 "remove_duplicates",
                                                 "debug_plots"})

    training_data = agent.load_data(stories_file, **data_load_args)
    agent.train(training_data, **kwargs)
    agent.persist(output_path, dump_flattened_stories)

    return agent
示例#25
0
def train_dialogue(domain_file = 'foodie.yml',
					model_path = './models/dialogue',
					training_data_file = './data/stories.md'):
					
	featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5)
	agent = Agent(domain_file, policies = [MemoizationPolicy(max_history = 5), KerasPolicy(featurizer)])
	
	agent.train(
				training_data_file,
				#max_history = 3,
				epochs = 300,
				batch_size = 50,
				validation_split = 0.2,
				augmentation_factor = 50)
				
	agent.persist(model_path)
	return agent
示例#26
0
    def default_policies(cls, fallback_args, max_history):
        # type: (Dict[Text, Any], int) -> List[Policy]
        """Load the default policy setup consisting of
        FallbackPolicy, MemoizationPolicy and KerasPolicy."""

        return [
            FallbackPolicy(
                fallback_args.get("nlu_threshold",
                                  DEFAULT_NLU_FALLBACK_THRESHOLD),
                fallback_args.get("core_threshold",
                                  DEFAULT_CORE_FALLBACK_THRESHOLD),
                fallback_args.get("fallback_action_name",
                                  DEFAULT_FALLBACK_ACTION)),
            MemoizationPolicy(max_history=max_history),
            KerasPolicy(
                MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                            max_history=max_history))
        ]
示例#27
0
    def from_dict(cls, dictionary):
        # type: Dict[Text, Any] -> List[Policy]

        policies = []

        for policy in dictionary.get('policies', []):

            policy_name = policy.pop('name')

            if policy_name == 'KerasPolicy':
                policy_object = KerasPolicy(MaxHistoryTrackerFeaturizer(
                                BinarySingleStateFeaturizer(),
                                max_history=policy.get('max_history', 3)))
            constr_func = utils.class_from_module_path(policy_name)

            policy_object = constr_func(**policy)
            policies.append(policy_object)

        return policies
示例#28
0
def trainingBot(to_bot_queue, to_human_queue, base_model, output_model,
                nlu_model, training_data):

    utils.configure_colored_logging(loglevel="INFO")

    max_history = None
    interactive_learning_on = True

    channel = TrainingInputChannel(to_bot_queue, to_human_queue)
    preloaded_model = True

    if preloaded_model:
        agent = CustomAgent.load(base_model,
                                 NaturalLanguageInterpreter.create(nlu_model))
        training_data = agent.load_data(training_data)

        agent.train_online_preloaded_model(training_data,
                                           input_channel=channel,
                                           model_path=output_model)
    else:
        agent = CustomAgent(
            "domain.yml",
            policies=[
                MemoizationPolicy(max_history=max_history),
                KerasPolicy(
                    MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                                max_history=max_history)),
                FallbackPolicy(fallback_action_name="utter_fallback",
                               nlu_threshold=0.3)
            ])

        training_data = agent.load_data(training_data)
        agent.interpreter = NaturalLanguageInterpreter.create(nlu_model)
        agent.train_online(training_data,
                           input_channel=channel,
                           model_path=output_model,
                           augmentation_factor=50,
                           epochs=250,
                           batch_size=10,
                           validation_split=0.2)

    agent.persist(output_model)
示例#29
0
    def trainRasaCore(self):
        try:
            training_data_file = "./" + self.config.get('inputData', 'stories')
            domain_yml = "./" + self.config.get('inputData', 'coreyml')

            logger.info(
                "Building RASA Core model with stories : %s, domain_yml : %s" %
                (training_data_file, domain_yml))
            model_name = "model_" + datetime.now().strftime("%Y%m%dT%H%M%S")
            model_location = "./models/ourgroup/dialogue/" + model_name

            featurizer = MaxHistoryTrackerFeaturizer(
                BinarySingleStateFeaturizer(), max_history=5)
            agent = Agent(domain_yml,
                          policies=[
                              MemoizationPolicy(max_history=4),
                              KerasPolicy(featurizer)
                          ])

            agent.train(
                training_data_file,
                augmentation_factor=50,
                #max_history = 4,
                epochs=500,
                batch_size=30,
                validation_split=0.2)
            agent.persist(model_location)

            model_location = os.path.realpath(model_location)
            logger.info("RASA Core model_location : %s" %
                        (str(model_location)))

            self.config.set('coreModel',
                            'model_location',
                            value=model_location)
            with open("./etc/config.ini", "w+") as f:
                self.config.write(f)
            return ("RASA core model training completed, see details above")
        except Exception as e:
            logger.error("unable to train rasa core model, exception : %s" %
                         (str(e)))
            raise (e)
示例#30
0
文件: bot.py 项目: yutong01/rasa_bot
def train_dialogue(domain_file="data/domain.yml",
                   model_path="models/dialogue",
                   training_data_file="data/stories.md"):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,
                                       BinarySingleStateFeaturizer)
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=5)
    agent = Agent(
        domain_file,
        policies=[MemoizationPolicy(max_history=5),
                  KerasPolicy(featurizer)])

    agent.train(training_data_file,
                epochs=200,
                batch_size=16,
                augmentation_factor=50,
                validation_split=0.2)

    agent.persist(model_path)
    return agent
示例#31
0
文件: bot.py 项目: yutong01/rasa_bot
def run_ivrbot_online(input_channel=ConsoleInputChannel(),
                      interpreter=RasaNLUInterpreter("models/ivr/demo"),
                      domain_file="data/domain.yml",
                      training_data_file="data/stories.md"):
    from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,
                                       BinarySingleStateFeaturizer)
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=5)
    agent = Agent(
        domain_file,
        policies=[MemoizationPolicy(max_history=5),
                  KerasPolicy(featurizer)],
        interpreter=interpreter)

    agent.train_online(training_data_file,
                       input_channel=input_channel,
                       batch_size=50,
                       epochs=200,
                       max_training_samples=300)

    return agent
示例#32
0
def run_foodie_online(input_channel,
                      interpreter,
                      domain_file="foodie_domain.yml",
                      training_data_file='data/stories.md'):

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=10)
    agent = Agent(
        domain_file,
        policies=[MemoizationPolicy(max_history=10),
                  KerasPolicy(featurizer)],
        interpreter=interpreter)

    agent.train_online(training_data_file,
                       input_channel=input_channel,
                       max_history=2,
                       batch_size=50,
                       epochs=200,
                       max_training_samples=300)

    return agent
示例#33
0
def train_dialogue_model(domain_file,
                         stories_file,
                         output_path,
                         use_online_learning=False,
                         nlu_model_path=None,
                         max_history=None,
                         dump_flattened_stories=False,
                         kwargs=None):
    if not kwargs:
        kwargs = {}

    agent = Agent(domain_file,
                  policies=[
                      MemoizationPolicy(max_history=max_history),
                      KerasPolicy(
                          MaxHistoryTrackerFeaturizer(
                              BinarySingleStateFeaturizer(),
                              max_history=max_history))
                  ])

    data_load_args, kwargs = utils.extract_args(
        kwargs, {
            "use_story_concatenation", "unique_last_num_states",
            "augmentation_factor", "remove_duplicates", "debug_plots"
        })
    training_data = agent.load_data(stories_file, **data_load_args)

    if use_online_learning:
        if nlu_model_path:
            agent.interpreter = RasaNLUInterpreter(nlu_model_path)
        else:
            agent.interpreter = RegexInterpreter()
        agent.train_online(training_data,
                           input_channel=ConsoleInputChannel(),
                           model_path=output_path,
                           **kwargs)
    else:
        agent.train(training_data, **kwargs)

    agent.persist(output_path, dump_flattened_stories)
示例#34
0
def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files
    featurizer = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2)
    trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        augmentation_factor=0
    )
    (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions(
                                        trackers, default_domain)
    hashed = []
    for sts, acts in zip(tr_as_sts, tr_as_acts):
        hashed.append(json.dumps(sts + acts, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    data = featurizer.featurize_trackers(trackers,
                                         default_domain)

    featurizer_mul = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2)
    trackers_mul = training.load_data(
        "data/test_multifile_stories",
        default_domain,
        augmentation_factor=0
    )
    (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions(
                                        trackers_mul, default_domain)
    hashed_mul = []
    for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul):
        hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True))
    hashed_mul = sorted(hashed_mul, reverse=True)

    data_mul = featurizer_mul.featurize_trackers(trackers_mul,
                                                 default_domain)

    assert hashed == hashed_mul

    assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0))
    assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))