def parseXLSXIntoDataBlocks(self, filename): """ Read Excel spreadsheet in T2C format. Store the data as tuples (domain, prefix, intent, rawBlock) into private field. """ printf('Processing xlsx file: %s\n', filename) if not os.path.exists(filename): eprintf('Error: File does not exist: %s\n', filename) return {} try: domainName = unicode( toIntentName(NAME_POLICY, None, os.path.splitext(os.path.split(filename)[1])[0]), 'utf-8') workbook = load_workbook(filename=filename, read_only=True) except (IOError, BadZipfile): eprintf( 'Error: File does not seem to be a valid Excel spreadsheet: %s\n', filename) return {} # Process all the tabs of the file for sheet in workbook.worksheets: printf(' Sheet: %s\n', sheet.title) prefix = unicode(sheet.title, 'utf-8') currentBlock = [] # Separate all data blocks in the sheet, if the currentBlock starts with header, it is considered to be part of currentBlock for row in sheet.iter_rows(max_col=4): validRow = False # Check if the row is valid. Row is valid if it contains at least one column not empty and different from comment for columnIndex in range(0, 4): if row[columnIndex] and row[columnIndex].value and not ( row[columnIndex].value.startswith('//')): validRow = True # Three slashes in the first cell cause whole rest of the line to be treated as comment if row[0].value and row[0].value.startswith('///'): validRow = False # If empty line or header, we store the previous currentBlock-if any if not validRow: if currentBlock: self.__createBlock(domainName, prefix, currentBlock) currentBlock = [] else: # if valid row - we add it to block currentBlock.append( (row[0].value.strip() if row[0].value and not row[0].value.startswith('//') else None, row[1].value.strip() if row[1].value and not row[1].value.startswith('//') else None, row[2].value.strip() if row[2].value and not row[2].value.startswith('//') else None, row[3].value.strip() if row[3].value and not row[3].value.startswith('//') else None)) if currentBlock: self.__createBlock(domainName, prefix, currentBlock)
def convertBlocksToDialogData(self): """ Read all parsed raw blocks of data and handle it depending on the type of block. """ for domain, prefix, intent, block in self._dataBlocks: if not block or not isinstance(block[0], tuple) or not block[0][0]: printf('Warning: First cell of the data block does not contain any data. (domain=%s, prefix=%s, intent=%s)\n', domain, prefix, intent) continue if self.__isConditionBlock(block[0][0]): self.__handleConditionBlock(intent, block, domain) else: self.__handleIntentBlock(intent, block, domain)
def __createBlock(self, domain, prefix, block): if not block or not block[0][0]: printf('Warning: First cell of the data block does not contain any data. (domain=%s, prefix=%s)\n', domain, prefix) return # Check if there's a label label = None firstCell = block[0][0] if firstCell.startswith(u':') and len(block) > 1: label = firstCell[1:] if label in self._labelsMap: printf('Warning: Found a label that has already been assigned to an intent and will be overwritten. Label: %s\n', label) del block[0] firstCell = block[0][0] # If it's entity block, load the entity if firstCell.startswith(u'@'): self.__handleEntityBlock(block) return # Check the intent name conditionHasX = Dialog.X_PLACEHOLDER in firstCell intentName = firstCell if self.__isConditionBlock(firstCell): if conditionHasX and block[1][0]: intentName = re.sub(Dialog.X_PLACEHOLDER, block[1][0], firstCell) else: if firstCell.startswith(u'#'): intentName = firstCell[1:] else: # Create intent name from first sentence by replacing all spaces with underscores and removing accents, commas and slashes intentName = re.sub("[/,?']", '', re.sub(' ', '_', unidecode.unidecode(intentName), re.UNICODE)) # check intent name fullIntentName = toIntentName(NAME_POLICY, None, domain, prefix, intentName) self._dialogData.getIntentData(fullIntentName, domain) self._dataBlocks.append((domain, prefix, fullIntentName, block)) if label: self._labelsMap[label] = fullIntentName.decode('utf-8')
required=False, help='verbosity', action='store_true') args = parser.parse_args(sys.argv[1:]) VERBOSE = args.verbose if args.soft: NAME_POLICY = 'soft' else: NAME_POLICY = 'hard' with open(args.intents, 'r') as intentsFile: intentsJSON = json.load(intentsFile) # process all intents for intentJSON in intentsJSON: examples = [] # process all example sentences for exampleJSON in intentJSON["examples"]: examples.append(exampleJSON["text"].strip().lower()) # new intent file intentFileName = os.path.join( args.intentsDir, toIntentName(NAME_POLICY, args.common_intents_nameCheck, intentJSON["intent"]) + ".csv") with open(intentFileName, "w") as intentFile: for example in examples: intentFile.write((example + "\n").encode('utf8')) if VERBOSE: printf("Intents from file '%s' were successfully extracted\n", args.intents)
def areSame(expectedOutputJson, receivedOutputJson, failureData, parentPath): printf("ARE SAME: %s and %s\n", expectedOutputJson, receivedOutputJson) if isinstance(expectedOutputJson, basestring): if not isinstance(receivedOutputJson, basestring): failureData['message'] = 'Received output differs in type from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = "Element of the type string (" + expectedOutputJson + ")" failureData['receivedElement'] = "Element of the type " + receivedOutputJson.__class__.__name__ printf("Different type: %s and %s\n", expectedOutputJson, receivedOutputJson) return False if expectedOutputJson != receivedOutputJson: failureData['message'] = 'Received output differs from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = expectedOutputJson failureData['receivedElement'] = receivedOutputJson printf("NOT SAME: %s and %s\n", expectedOutputJson, receivedOutputJson) return False else: printf('SAME: basestring %s and %s are same\n', expectedOutputJson, receivedOutputJson) return True if isinstance(expectedOutputJson, int): if not isinstance(receivedOutputJson, int): failureData['message'] = 'Received output differs in type from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = "Element of the type int (" + str(expectedOutputJson) + ")" failureData['receivedElement'] = "Element of the type " + receivedOutputJson.__class__.__name__ printf("Different type: %s and %s\n", expectedOutputJson, receivedOutputJson) return False if expectedOutputJson != receivedOutputJson: failureData['message'] = 'Received output differs from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = str(expectedOutputJson) failureData['receivedElement'] = str(receivedOutputJson) printf("NOT SAME: %s and %s\n", expectedOutputJson, receivedOutputJson) return False else: printf('SAME: int %s and %s are same\n', expectedOutputJson, receivedOutputJson) return True elif isinstance(expectedOutputJson, list): if not isinstance(receivedOutputJson, list): failureData['message'] = 'Received output differs in type from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = "Element of the type list" failureData['receivedElement'] = "Element of the type " + receivedOutputJson.__class__.__name__ printf("Different type: %s and %s\n", expectedOutputJson, receivedOutputJson) return False if len(expectedOutputJson) != len(receivedOutputJson): failureData['message'] = 'List in received output differs in length from list in expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = "List of the length " + str(len(expectedOutputJson)) failureData['receivedElement'] = "List of the length " + str(len(receivedOutputJson)) printf('ERROR: Different list length!\n') printf('expected %s\n', expectedOutputJson) printf('received %s\n', receivedOutputJson) return False else: for i in range(len(expectedOutputJson)): printf("STEP: Item %d\n", i) if not areSame(expectedOutputJson[i], receivedOutputJson[i], failureData, parentPath + " - " + str(i) + "th item in list"): printf('ERROR: Different list items in positon %d!\n', i) return False return True elif isinstance(expectedOutputJson, dict): if not isinstance(receivedOutputJson, dict): failureData['message'] = 'Received output differs in type from expected output.' + " (" + parentPath + ")" failureData['expectedElement'] = "Element of the type dict" failureData['receivedElement'] = "Element of the type " + receivedOutputJson.__class__.__name__ printf("Different type: %s and %s\n", expectedOutputJson, receivedOutputJson) return False for elementKey in expectedOutputJson: printf("STEP: Element key %s\n", elementKey) if expectedOutputJson[elementKey] is None: printf("NONE: Element with key %s is none\n", elementKey) continue if elementKey not in receivedOutputJson or receivedOutputJson[elementKey] is None: failureData['message'] = 'Received output has no key ' + elementKey + '.' + " (" + parentPath + ")" failureData['expectedElement'] = "Dict with key " + elementKey failureData['receivedElement'] = "None" printf('ERROR: Missing key in received json!\n') return False else: if not areSame(expectedOutputJson[elementKey], receivedOutputJson[elementKey], failureData, parentPath + " - " + elementKey): printf('ERROR: Different dict items for key %s!\n', elementKey) return False return True else: eprintf('ERROR: Unsupported type of element %s, type %s!\n', str(expectedOutputJson), expectedOutputJson.__class__.__name__) return False
# expected JSON with open(args.expectedFileName, "r") as expectedJsonFile: # received JSON with open(args.receivedFileName, "r") as receivedJsonFile: # init whole test nDialogs = 0 nDialogsFailed = 0 firstFailedLine = None timeStart = time.time() # XML (whole test) outputXml = LET.Element('testsuites') # print (whole test) printf('--------------------------------------------------------------------------------\n') printf('-- TEST: ' + testName + '\n') printf('--------------------------------------------------------------------------------\n') # XML (new dialouge) dialogXml = LET.Element('testsuite') expectedJsonLine = expectedJsonFile.readline() receivedJsonLine = receivedJsonFile.readline() line = 0 dialogId = 0 # for every line while expectedJsonLine: line += 1; if not receivedJsonLine: # no more received line
http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os, json, sys, argparse from cfgCommons import Cfg from wawCommons import printf, eprintf if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser( description= 'Concatenate intents, entities and dialogue jsons to Watson Conversation Service workspace .json format', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-c', '--common_configFilePaths', help='configuaration file', action='append') parser.add_argument('-oc', '--common_output_config', help='output configuration file') parser.add_argument( '-of', '--common_outputs_directory', required=False,
nodeNameMap[element.tag] = [ element ] for name in nodeNameMap: # structure=listItem attribute results in generating array rather then object #if len(nodeNameMap[name]) == 1 and nodeNameMap[name][0].get('structure') != 'listItem' and name!='values': if len(nodeNameMap[name]) == 1 and nodeNameMap[name][0].get('structure') != 'listItem' : convertAll(upperNodeJson[key], nodeNameMap[name][0]) else: upperNodeJson[key][name] = [] for element in nodeNameMap[name]: upperNodeJson[key][name].append(None) # just to get index convertAll(upperNodeJson[key][name], element) if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser(description='Converts dialog nodes from .xml format to Bluemix conversation service workspace .json format', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-dm','--common_dialog_main', required=False, help='main dialog file with dialogue nodes in xml format') parser.add_argument('-c','--common_configFilePaths', help='configuaration file', action='append') parser.add_argument('-oc', '--common_output_config', help='output configuration file') parser.add_argument('-s', '--common_schema', required=False, help='schema file') parser.add_argument('-of', '--common_outputs_directory', required=False, help='directory where the otputs will be stored (outputs is default)') parser.add_argument('-od', '--common_outputs_dialogs', required=False, help='name of generated file (dialogs.xml is the default)') #CF parameters are specific to Cloud Functions Credentials placement from config file and will be replaced in the future by a separate script parser.add_argument('-cfn','--cloudfunctions_namespace', required=False, help='cloud functions namespace') parser.add_argument('-cfu','--cloudfunctions_username', required=False, help='cloud functions username') parser.add_argument('-cfp','--cloudfunctions_password', required=False, help='cloud functions password') parser.add_argument('-cfa','--cloudfunctions_package', required=False, help='cloud functions package') parser.add_argument('-v','--common_verbose', required=False, help='verbosity', action='store_true') args = parser.parse_args(sys.argv[1:]) config = Cfg(args);
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import sys, argparse, os import wawCommons from cfgCommons import Cfg from wawCommons import printf, eprintf import shutil if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser(description='Clean generated directories.',formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-c', '--common_configFilePaths', help='configuaration file', action='append') parser.add_argument('-oc', '--common_output_config', help='output configuration file') parser.add_argument('-od', '--common_outputs_directory', required=False, help='directory where the otputs will be stored (outputs is default)') parser.add_argument('-oi', '--common_outputs_intents', help='file with output json with all the intents') parser.add_argument('-oe', '--common_outputs_entities', help='file with output json with all the entities') parser.add_argument('-v','--common_verbose', required=False, help='verbosity', action='store_true') parser.add_argument('-s', '--common_soft', required=False, help='soft name policy - change intents and entities names without error.', action='store_true', default="") args = parser.parse_args(sys.argv[1:]) config=Cfg(args); VERBOSE = hasattr(config, 'common_verbose') if os.path.exists(config.common_generated_dialogs[0]): shutil.rmtree(config.common_generated_dialogs[0]) if VERBOSE:printf('%s does not exist.',config.common_generated_dialogs[0])
http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import json, sys, argparse, os from cfgCommons import Cfg from wawCommons import printf, eprintf, toEntityName, getFilesAtPath if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser( description='Conversion entity csv files to .json.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-c', '--common_configFilePaths', help='configuaration file', action='append') parser.add_argument( '-oc', '--common_output_config', help= 'output configuration fil, the optional name of file where configuration is stored.' ) parser.add_argument( '-ie',
print('Created new directory ' + getattr(config, 'common_generated_entities')[0]) entities = dialogData.getAllEntities() for entity in entities: with open( os.path.join( getattr(config, 'common_generated_entities')[0], entity.encode('ascii', 'ignore') + '.csv'), 'w') as entityFile: for entityList in entities[entity]: entityFile.write(entityList.encode('utf8') + '\n') if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser( description='Creates dialog nodes with answers to intents .', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # optional arguments parser.add_argument('-x', '--common_xls', required=False, help='file with MSExcel formated dialog', action='append') parser.add_argument('-gd', '--common_generated_dialogs', nargs='?', help='generated dialog file') parser.add_argument('-gi', '--common_generated_intents',
intentName = toIntentName(NAME_POLICY, args.common_intents_nameCheck, PREFIX, os.path.splitext(intentFileName)[0]) if intentName not in intentNames: intentNames.append(intentName) with open(os.path.join(args.intentsDir, intentFileName), "r") as intentFile: for line in intentFile.readlines(): # remove comments line = line.split('#')[0] if args.entityDir: line = tagEntities(line, entities) if line: outputFile.write("1\t" + intentName + "\t" + line) if VERBOSE: printf("Intents file '%s' was successfully created\n", args.output) if args.list: with open(args.list, 'w') as intentsListFile: for intentName in intentNames: intentsListFile.write(intentName + "\n") if VERBOSE: printf("Intents list '%s' was successfully created\n", args.list) if args.map: domIntMap = {} for intentName in intentNames: intentSplit = intentName.split("_", 1) domainPart = intentSplit[0] intentPart = intentSplit[1] if domainPart in domIntMap:
# process all entity values for valueJSON in entityJSON["values"]: value = [] value.append(valueJSON["value"].strip()) # add all synonyms if 'synonyms' in valueJSON: for synonym in valueJSON['synonyms']: value.append(synonym.strip()) values.append(value) # new entity file entityFileName = os.path.join( args.entitiesDir, toEntityName(NAME_POLICY, args.common_entities_nameCheck, entityJSON["entity"])) + ".csv" with open(entityFileName, "w") as entityFile: for value in values: entityFile.write(';'.join(value) + "\n") # write file with system entities with open(os.path.join(args.entitiesDir, "system_entities.csv"), 'w') as systemEntitiesFile: systemEntitiesFile.write( "# a special list for the system entities - only one value at each line\n" ) for systemEntity in systemEntities: systemEntitiesFile.write(systemEntity + "\n") if VERBOSE: printf("Entities from file '%s' were successfully extracted\n", args.entities)
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import os, json, sys, argparse, requests from requests.packages.urllib3.exceptions import InsecureRequestWarning from cfgCommons import Cfg from wawCommons import printf, eprintf, getFilesAtPath import urllib3 if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser( description= 'Concatenate intents, entities and dialogue jsons to Watson Conversation Service workspace .json format', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-c', '--common_configFilePaths', help='configuaration file', action='append') parser.add_argument('-oc', '--common_output_config', help='output configuration file') parser.add_argument('-offnc', '--functions', required=False, help='directory where the cloud functions are located')
http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import json, sys, argparse, os, glob, codecs from wawCommons import printf, eprintf, getFilesAtPath, toIntentName from cfgCommons import Cfg if __name__ == '__main__': printf('\nSTARTING: ' + os.path.basename(__file__) + '\n') parser = argparse.ArgumentParser( description= 'Converts intent csv files to .json format of Watson Conversation Service', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-c', '--common_configFilePaths', help='configuaration file', action='append') parser.add_argument('-oc', '--common_output_config', help='output configuration file') parser.add_argument( '-ii', '--common_intents', help=
ensure_ascii=False).encode('utf8')) if args.entities: with open(args.entities, 'w') as entitiesFile: entitiesFile.write( json.dumps(workspaceJSON['entities'], indent=4, ensure_ascii=False).encode('utf8')) if args.dialog: with open(args.dialog, 'w') as dialogFile: dialogFile.write( json.dumps(workspaceJSON['dialog_nodes'], indent=4, ensure_ascii=False).encode('utf8')) if args.counterexamples: with open(args.counterexamples, 'w') as counterexamplesFile: counterexamplesJSON = [] counterexampleIntentJSON = {} counterexampleIntentJSON['intent'] = "IRRELEVANT" counterexampleIntentJSON['examples'] = workspaceJSON[ 'counterexamples'] counterexamplesJSON.append(counterexampleIntentJSON) counterexamplesFile.write( json.dumps(counterexamplesJSON, indent=4, ensure_ascii=False).encode('utf8')) if VERBOSE: printf("Workspace %s was successfully decomposed\n", args.workspace)
for tagXPath in args.tagsXPath: tagsToReplace.extend(dialogsXML.xpath(tagXPath)) # LOAD EXISTING RESOURCE FILE (TRANSLATIONS) if args.join: with open(args.resource, 'r') as resourceFile: translations = json.load(resourceFile) else: translations = {} counter = 0 # REPLACE ALL TEXTS WITH CODES for tagToReplace in tagsToReplace: text = tagToReplace.text if VERBOSE: printf("%s: %s\n", tagToReplace.tag, tagToReplace.text) # if this tag text is not in translations dictionary (it has not a code), # create new code for it and add it to dictionary if not text in translations.values(): translations[toCode(NAME_POLICY, PREFIX + str(counter))] = text counter += 1 # replace tag text by its code code = translations.keys()[translations.values().index( text)] # returns key (code) for this value (text) tagToReplace.text = '%%' + code if VERBOSE: printf("-> encoded as %s\n", code) # OUTPUT NEW DIALOG if args.output is not None: with open(args.output, 'w') as outputFile: outputFile.write(
# create/update workspace response = requests.post(workspacesUrl, auth=(username, password), headers={'Content-Type': 'application/json'}, data=json.dumps(workspace, indent=4)) responseJson = response.json() # check errors during upload if 'error' in responseJson: eprintf('Cannot upload conversation workspace\nERROR: %s\n', responseJson['error']) if VERBOSE: eprintf("INFO: RESPONSE: %s\n", responseJson) # if VERBOSE: eprintf("INFO: WORKSPACE: %s\n", json.dumps(workspace, indent=4)) sys.exit(1) else: printf('Workspace successfully uploaded\n') if VERBOSE: printf("%s", responseJson) if not hasattr(config, 'conversation_workspace_id') or not getattr( config, 'conversation_workspace_id'): setattr(config, 'conversation_workspace_id', responseJson['workspace_id']) printf('WCS WORKSPACE_ID: %s\n', responseJson['workspace_id']) if hasattr(config, 'common_output_config'): config.saveConfiguration(getattr(config, 'common_output_config')) if hasattr(config, 'context_client_name'): # Assembling uri of the client clientv2URL = 'https://clientv2-latest.mybluemix.net/#defaultMinMode=true' clientv2URL += '&prefered_workspace_id=' + getattr(
required=False, help='verbosity', action='store_true') args = parser.parse_args(sys.argv[1:]) VERBOSE = args.verbose # load config file conversationSection = 'conversation' try: config = configparser.ConfigParser() config.read(args.config) workspacesUrl = config.get(conversationSection, 'url') version = config.get(conversationSection, 'version') username = config.get(conversationSection, 'username') printf('WCS USERNAME: %s\n', username) password = config.get(conversationSection, 'password') printf('WCS PASSWORD: %s\n', password) workspaceId = config.get(conversationSection, 'workspace_id', fallback=None) if workspaceId: printf('WCS WORKSPACE_ID: %s\n', workspaceId) workspacesUrl += '/' + workspaceId except IOError: eprintf('ERROR: Cannot load config file %s\n', args.config) sys.exit(1) # wait until workspace is done with training checkWorkspaceTime = 0 url = workspacesUrl + '?version=' + version
# load dialog from XML dialogXML = LET.parse(args.dialog) # find all tags with codes to replace tagsToReplace = [] for tagXPath in args.tagsXPath: tagsToReplace.extend(dialogXML.xpath(tagXPath)) # LOAD RESOURCE FILE (TRANSLATIONS) with open(args.resource, 'r') as resourceFile: translations = json.load(resourceFile) # REPLACE ALL CODES WITH TEXTS for tagToReplace in tagsToReplace: if tagToReplace.text is None: continue if VERBOSE: printf("%s: code '%s'\n", tagToReplace.tag, tagToReplace.text) textParts = tagToReplace.text.split() for textPart in textParts: if not textPart.startswith('%%'): continue # it is not a code code = toCode(NAME_POLICY, textPart[2:]) # if this tag code is not in translations dictionary -> error if not code in translations: eprintf("ERROR: code '%s' not in resource file!\n", code) else: # replace code (introduced with double %% and followed by white character or by the end) with its translation newText = re.sub(r"%%"+code+"(?=\s|$)", translations[code], tagToReplace.text) tagToReplace.text = newText if VERBOSE: printf("-> translated as %s\n", tagToReplace.text) # OUTPUT NEW DIALOG if args.output is not None:
intentText = line.split("\t")[2] intentSplit = intentName.split("_",1) domainPart = intentSplit[0] intentPart = intentSplit[1] for entity in re.findall('<([^>]+)>[^<]+<\/[^>]+>', intentText): domEntMap[domainPart][entity] = 1 intEntMap[intentPart][entity] = 1 if args.domEnt: with open(args.domEnt, 'w') as domEntFile: for domain in sorted(domEntMap.keys()): entities="NONE;" for entity in sorted(domEntMap[domain].keys()): entities += entity + ";" domEntFile.write(domain + ";" + entities + "\n") if VERBOSE: printf("Domain-entity map '%s' was successfully created\n", args.domEnt) if args.domEnt: with open(args.intEnt, 'w') as intEntFile: for intent in sorted(intEntMap.keys()): entities="NONE;" for entity in sorted(intEntMap[intent].keys()): entities += entity + ";" intEntFile.write(intent + ";" + entities + "\n") if VERBOSE: printf("Intent-entity map '%s' was successfully created\n", args.domEnt) if args.list: with open(args.list, 'w') as listFile: # process entities entityNames = [] for entityFileName in os.listdir(args.entitiesDir):