def run(train_file, label_file, flag): examples = [] ex = Example(None, None, []) cur_entity = Entity(0, 0, None, None) text = "" entities = [] with open(train_file, "r") as tr: with open(label_file, "r") as lb: for line in tr.readlines(): if line == "\n": if cur_entity.to_add: entities.append(cur_entity) cur_label = lb.readline() ex.text = text ex.intent = transform_intent(cur_label.split("\n")[0]) for ent in entities: ent.find_start_end(ex.text) ex.entities.append(ent) examples.append(ex) ex = Example(None, None, []) text = "" entities = [] cur_entity = Entity(0, 0, None, None) else: word, tag = line.split("\t") if text == "": text = word else: text = text + " " + word tag = tag.split("\n")[0] if tag == "O": if cur_entity.entity is not None: entities.append(cur_entity) cur_entity = Entity(0, 0, None, None) elif tag.startswith("B"): if cur_entity.entity is not None: entities.append(cur_entity) cur_entity = Entity(0, 0, word, tag[2:]) else: cur_entity.entity = tag[2:] cur_entity.value = word cur_entity.to_add = True elif tag.startswith("I"): if cur_entity is not None: if cur_entity.entity == tag[2:]: cur_entity.value = cur_entity.value + " " + word cur_entity.to_add = True final_json = { "rasa_nlu_data": { "common_examples": [example.get_json() for example in examples], "entity_examples": [], "intent_examples": [] } } file_names = ["exact.json", "test.json"] with open("basic_intents.json") as f: data = json.load(f) for el in data: final_json["rasa_nlu_data"]["common_examples"].append(el) with open(file_names[flag], "w") as j: json.dump(final_json, j)