Exemplo n.º 1
0
def main(argv):
    parser = argparse.ArgumentParser(description='Decompose Bluemix conversation service intents in .json format to intent files in .csv format', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # positional arguments
    parser.add_argument('intents', help='file with intents in .json format')
    parser.add_argument('intentsDir', help='directory with intents files')
    # optional arguments
    parser.add_argument('-ni', '--common_intents_nameCheck', action='append', nargs=2, help="regex and replacement for intent name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\\L' for lowercase")
    parser.add_argument('-s', '--soft', required=False, help='soft name policy - change intents and entities names without error.', action='store_true', default="")
    parser.add_argument('-v', '--verbose', required=False, help='verbosity', action='store_true')
    parser.add_argument('--log', type=str.upper, default=None, choices=list(logging._levelToName.values()))
    args = parser.parse_args(argv)

    if __name__ == '__main__':
        setLoggerConfig(args.log, args.verbose)

    NAME_POLICY = 'soft' if args.soft else 'hard'

    with openFile(args.intents, 'r') as intentsFile:
        intentsJSON = json.load(intentsFile)

    # process all intents
    for intentJSON in intentsJSON:
        examples = []
        # process all example sentences
        for exampleJSON in intentJSON["examples"]:
            examples.append(exampleJSON["text"].strip().lower())
        # new intent file
        intentFileName = os.path.join(args.intentsDir, toIntentName(NAME_POLICY, args.common_intents_nameCheck, intentJSON["intent"]) + ".csv")
        with openFile(intentFileName, "w") as intentFile:
            for example in examples:
                intentFile.write((example + "\n"))

    logger.verbose("Intents from file '%s' were successfully extracted\n", args.intents)
Exemplo n.º 2
0
    def convertDialogData(self, dialogData, intents):
        """ Convert Dialog Data into XML and return pointer to the root XML element. """
        nodesXml = XML.Element('nodes')
        for intent in intents:
            intentData = dialogData.getIntentData(intent)
            if not intentData.generateNodes():
                continue

            normName = toIntentName('soft', None, intent)
            # construct the XML structure for each intent
            nodeXml = XML.Element('node', name=normName.decode('utf-8'))

            conditionXml = XML.Element('condition')
            conditionXml.text = intent.decode('utf-8') if intent.decode(
                'utf-8').startswith(u'#') else u'#' + intent.decode('utf-8')
            nodeXml.append(conditionXml)

            nodeXml.append(
                self._createOutputElement(intentData.getChannelOutputs(),
                                          intentData.getButtons()))
            if intentData.getVariables():
                nodeXml.append(
                    self._createContextElement(intentData.getVariables()))
            if intentData.getJumpToTarget() and intentData.getJumpToSelector():
                nodeXml.append(
                    self._createGotoElement(intentData.getJumpToTarget(),
                                            intentData.getJumpToSelector()))

            nodesXml.append(nodeXml)

        return nodesXml
    def parseXLSXIntoDataBlocks(self, filename):
        """ Read Excel spreadsheet in T2C format. Store the data as tuples (domain, prefix, intent, rawBlock) into private field. """

        printf('Processing xlsx file: %s\n', filename)
        if not os.path.exists(filename):
            eprintf('Error: File does not exist: %s\n', filename)
            return {}

        try:
            domainName = unicode(
                toIntentName(NAME_POLICY, None,
                             os.path.splitext(os.path.split(filename)[1])[0]),
                'utf-8')
            workbook = load_workbook(filename=filename, read_only=True)
        except (IOError, BadZipfile):
            eprintf(
                'Error: File does not seem to be a valid Excel spreadsheet: %s\n',
                filename)
            return {}

        # Process all the tabs of the file
        for sheet in workbook.worksheets:
            printf(' Sheet: %s\n', sheet.title)
            prefix = unicode(sheet.title, 'utf-8')
            currentBlock = []

            # Separate all data blocks in the sheet, if the currentBlock starts with header, it is considered to be part of currentBlock
            for row in sheet.iter_rows(max_col=4):
                validRow = False
                # Check if the row is valid. Row is valid if it contains at least one column not empty and different from comment
                for columnIndex in range(0, 4):
                    if row[columnIndex] and row[columnIndex].value and not (
                            row[columnIndex].value.startswith('//')):
                        validRow = True
                # Three slashes in the first cell cause whole rest of the line to be treated as comment
                if row[0].value and row[0].value.startswith('///'):
                    validRow = False

                # If empty line or header, we store the previous currentBlock-if any
                if not validRow:
                    if currentBlock:
                        self.__createBlock(domainName, prefix, currentBlock)
                    currentBlock = []
                else:
                    # if valid row - we add it to block
                    currentBlock.append(
                        (row[0].value.strip() if row[0].value
                         and not row[0].value.startswith('//') else None,
                         row[1].value.strip() if row[1].value
                         and not row[1].value.startswith('//') else None,
                         row[2].value.strip() if row[2].value
                         and not row[2].value.startswith('//') else None,
                         row[3].value.strip() if row[3].value
                         and not row[3].value.startswith('//') else None))
            if currentBlock:
                self.__createBlock(domainName, prefix, currentBlock)
    def __createBlock(self, domain, prefix, block):
        if not block or not block[0][0]:
            printf(
                'Warning: First cell of the data block does not contain any data. (domain=%s, prefix=%s)\n',
                domain, prefix)
            return

        # Check if there's a label
        label = None
        firstCell = block[0][0]
        if firstCell.startswith(u':') and len(block) > 1:
            label = firstCell[1:]
            if label in self._labelsMap:
                printf(
                    'Warning: Found a label that has already been assigned to an intent and will be overwritten. Label: %s\n',
                    label)
            del block[0]
            if not block or not block[0][0]:
                printf(
                    'WARNING: First cell of the goto block does not contain any data. (domain=%s, prefix=%s, label=%s)\n',
                    domain, prefix, label)
                return
            firstCell = block[0][0]

        # If it's entity block, load the entity
        if firstCell.startswith(u'@'):
            self.__handleEntityBlock(block)
            return

        # Check the intent name
        conditionHasX = Dialog.X_PLACEHOLDER in firstCell
        intentName = firstCell

        if self.__isConditionBlock(firstCell):
            if conditionHasX and block[1][0]:
                intentName = re.sub(Dialog.X_PLACEHOLDER, block[1][0],
                                    firstCell)
        else:
            if firstCell.startswith(u'#'):
                intentName = firstCell[1:]
            else:
                # Create intent name from first sentence by replacing all spaces with underscores and removing accents, commas and slashes
                intentName = re.sub(
                    "[/,?']", '',
                    re.sub(' ', '_', unidecode.unidecode(intentName),
                           re.UNICODE))

            # check intent name
            fullIntentName = toIntentName(NAME_POLICY, None, domain, prefix,
                                          intentName)

            self._dialogData.getIntentData(fullIntentName, domain)
            self._dataBlocks.append((domain, prefix, fullIntentName, block))
            if label:
                self._labelsMap[label] = fullIntentName.decode('utf-8')
    def createUniqueIntentName(self, intent_name):
        """
            Creates unique intent_name based on given string
            intent_name is stripped from not allowed characters, spaces are replaced by _
            if the result exists a modifier is added at the end of the string

            :returns unique intent_name or None if not able to create
        """
        #Normalize the string
        unique_intent_name = toIntentName(self._NAME_POLICY,
                                          [['$special', '\\A']], intent_name)
        if unique_intent_name not in self._intents:
            return unique_intent_name
        #try to modify by a number
        for modifier in range(0, 10000):
            new_unique_intent_name = unique_intent_name + repr(
                modifier)  #create a modified one
            # Check if the name exists
            if new_unique_intent_name not in self._intents:
                return new_unique_intent_name
        return None
    def parseXLSXIntoDataBlocks(self, filename):
        """ Reads Excel spreadsheet (in T2C format). Splits it to blocks and
            stores the data as tuples (domain, prefix, intent, rawBlock) in _dataBlocks,
            THIS IS THE FIRST PASS THROUGH INPUT  (a single file of the INPUT)
        """
        logger.info('Processing xlsx file: %s', filename)
        if not os.path.exists(filename):
            logger.error('File does not exist: %s', filename)
            return {}

        # Derive domain name from file name (use the same naming policy as for intents)
        try:
            domainName = toIntentName(
                self._NAME_POLICY, None,
                os.path.splitext(os.path.split(filename)[1])[0])
            try:
                domainName = unicode(domainName, 'utf-8')  # Python 2
            except NameError:
                domainName = str(domainName)  # Python 3
            workbook = load_workbook(filename=filename, read_only=True)
        except (IOError, BadZipfile):
            logger.error(
                'File does not seem to be a valid Excel spreadsheet: %s',
                filename)
            return {}

        # Process all the tabs of the file
        for sheet in workbook.worksheets:
            # get prefix is a sheet title
            logger.info(' Sheet: %s', sheet.title)
            try:
                prefix = unicode(sheet.title, 'utf-8')  # Python 2
            except NameError:
                prefix = str(sheet.title)  # Python 3

            currentBlock = []  # Each cheet starts a new block
            # Separate all data blocks in the sheet, if the currentBlock starts with header, the header is considered to be part of currentBlock
            for row in sheet.iter_rows(max_col=4):
                validRow = False
                # Check if the row is valid. Row is valid if it contains at least one column not empty and different from comment
                for columnIndex in range(0, 4):
                    if row[columnIndex] and row[columnIndex].value and not (
                            row[columnIndex].value.startswith('//')):
                        validRow = True
                # Three slashes in the first cell cause whole rest of the line to be treated as comment
                if row[0].value and row[0].value.startswith('///'):
                    validRow = False

                if not validRow:
                    # If behind the block, we save the currentBlock (if any was populated)
                    if currentBlock:
                        self.__createBlock(domainName, prefix, currentBlock)
                    currentBlock = []
                else:
                    # if valid row - we add the raw to block
                    currentBlock.append(
                        (escape(row[0].value.strip()) if row[0].value
                         and not row[0].value.startswith('//') else None,
                         escape(row[1].value.strip()) if row[1].value
                         and not row[1].value.startswith('//') else None,
                         escape(row[2].value.strip()) if row[2].value
                         and not row[2].value.startswith('//') else None,
                         escape(row[3].value.strip()) if row[3].value
                         and not row[3].value.startswith('//') else None))
            if currentBlock:
                self.__createBlock(
                    domainName, prefix,
                    currentBlock)  # store the last block of the sheet
def main(argv):
    parser = argparse.ArgumentParser(description='Converts intents files to one file in NLU tsv format', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # positional arguments
    parser.add_argument('intentsDir', help='directory with intents files - all of them will be included in output file')
    parser.add_argument('output', help='file with output intents in NLU data .tsv format')
    # optional arguments
    parser.add_argument('-e', '--entityDir', required=False, help='directory with lists of entities in csv files (file names = entity names), used to tag those entities in output')
    parser.add_argument('-l', '--list', required=False, help='file with list of all intents (if it should be generated)')
    parser.add_argument('-m', '--map', required=False, help='file with domain to intents map (if it should be generated)')
    parser.add_argument('-p', '--prefix', required=False, help='prefix for all generated intents (if it should be added)')
    parser.add_argument('-ni', '--common_intents_nameCheck', action='append', nargs=2, help="regex and replacement for intent name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\L' for lowercase")
    parser.add_argument('-ne', '--common_entities_nameCheck', action='append', nargs=2, help="regex and replacement for entity name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\L' for lowercase")
    parser.add_argument('-s', '--soft', required=False, help='soft name policy - change intents and entities names without error.', action='store_true', default="")
    parser.add_argument('-v', '--verbose', required=False, help='verbosity', action='store_true', default="")
    parser.add_argument('--log', type=str.upper, default=None, choices=list(logging._levelToName.values()))
    args = parser.parse_args(argv)

    if __name__ == '__main__':
        setLoggerConfig(args.log, args.verbose)

    NAME_POLICY = 'soft' if args.soft else 'hard'
    PREFIX = toIntentName(NAME_POLICY, args.common_intents_nameCheck, args.prefix)

    if args.entityDir:
        entities = getEntities(args.entityDir, args.common_entities_nameCheck, NAME_POLICY)

    with openFile(args.output, 'w') as outputFile:
        # process intents
        intentNames = []
        for intentFileName in os.listdir(args.intentsDir):
            intentName = toIntentName(NAME_POLICY, args.common_intents_nameCheck, PREFIX, os.path.splitext(intentFileName)[0])
            if intentName not in intentNames:
                intentNames.append(intentName)
            with open(os.path.join(args.intentsDir, intentFileName), "r") as intentFile:
                for line in intentFile.readlines():
                    # remove comments
                    line = line.split('#')[0]
                    if args.entityDir:
                        line = tagEntities(line, entities)
                    if line:
                        outputFile.write("1\t" + intentName + "\t" + line)
    logger.verbose("Intents file '%s' was successfully created", args.output)

    if args.list:
        with openFile(args.list, 'w') as intentsListFile:
            for intentName in intentNames:
                intentsListFile.write(intentName + "\n")
    logger.verbose("Intents list '%s' was successfully created", args.list)

    if args.map:
        domIntMap = {}
        for intentName in intentNames:
            intentSplit = intentName.split("_",1)
            domainPart = intentSplit[0]
            intentPart = intentSplit[1]
            if domainPart in domIntMap:
                domIntMap[domainPart] = domIntMap[domainPart] + ";" + intentPart
            else:
                domIntMap[domainPart] = ";" + intentPart
        with openFile(args.map, 'w') as intentsMapFile:
            for domainPart in domIntMap.keys():
                intentsMapFile.write(domainPart + domIntMap[domainPart] + "\n")
        logger.verbose("Domain-intent map '%s' was successfully created", args.output)
Exemplo n.º 8
0
        print('generated_intents parameter is not defined, ignoring')
    if not hasattr(config, 'common_outputs_intents'):
        print(
            'Outputs_intents parameter is not defined, output will be generated to console.'
        )

    intents = []

    pathList = getattr(config, 'common_intents')
    if hasattr(config, 'common_generated_intents'):
        pathList = pathList + getattr(config, 'common_generated_intents')

    filesAtPath = getFilesAtPath(pathList)
    for intentFileName in filesAtPath:
        intentName = toIntentName(
            NAME_POLICY, args.common_intents_nameCheck,
            os.path.splitext(os.path.basename(intentFileName))[0])
        with codecs.open(intentFileName, encoding='utf8') as intentFile:
            intent = {}
            intent['intent'] = intentName
            examples = []
            for line in intentFile:
                # remove comments
                line = line.split('#')[0]
                line = line.rstrip().lower()
                if line and not line in examples:
                    examples.append(line)
                elif line in examples:
                    printf(
                        'Example used twice for the intent %s, omitting:%s /n',
                        intentName, line)
Exemplo n.º 9
0
                        required=False,
                        help='verbosity',
                        action='store_true')
    args = parser.parse_args(sys.argv[1:])

    VERBOSE = args.verbose
    if args.soft: NAME_POLICY = 'soft'
    else: NAME_POLICY = 'hard'

    with open(args.intents, 'r') as intentsFile:
        intentsJSON = json.load(intentsFile)

    # process all intents
    for intentJSON in intentsJSON:
        examples = []
        # process all example sentences
        for exampleJSON in intentJSON["examples"]:
            examples.append(exampleJSON["text"].strip().lower())
        # new intent file
        intentFileName = os.path.join(
            args.intentsDir,
            toIntentName(NAME_POLICY, args.common_intents_nameCheck,
                         intentJSON["intent"]) + ".csv")
        with open(intentFileName, "w") as intentFile:
            for example in examples:
                intentFile.write((example + "\n").encode('utf8'))

    if VERBOSE:
        printf("Intents from file '%s' were successfully extracted\n",
               args.intents)
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'Converts intent csv files to .json format of Watson Conversation Service',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-c',
                        '--common_configFilePaths',
                        help='configuaration file',
                        action='append')
    parser.add_argument('-oc',
                        '--common_output_config',
                        help='output configuration file')
    parser.add_argument(
        '-ii',
        '--common_intents',
        help=
        'directory with intent csv files to be processed (all of them will be included in output json)',
        action='append')  #-gi is functionsally equivalent to -ii
    parser.add_argument(
        '-gi',
        '--common_generated_intents',
        help=
        'directory with generated intent csv files to be processed (all of them will be included in output json)',
        action='append')
    parser.add_argument(
        '-od',
        '--common_outputs_directory',
        required=False,
        help='directory where the otputs will be stored (outputs is default)')
    parser.add_argument('-oi',
                        '--common_outputs_intents',
                        help='file with output json with all the intents')
    parser.add_argument(
        '-ni',
        '--common_intents_nameCheck',
        action='append',
        nargs=2,
        help=
        "regex and replacement for intent name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\\L' for lowercase"
    )
    parser.add_argument(
        '-s',
        '--soft',
        required=False,
        help=
        'soft name policy - change intents and entities names without error.',
        action='store_true',
        default="")
    parser.add_argument('-v',
                        '--verbose',
                        required=False,
                        help='verbosity',
                        action='store_true')
    parser.add_argument('--log',
                        type=str.upper,
                        default=None,
                        choices=list(logging._levelToName.values()))
    args = parser.parse_args(argv)

    if __name__ == '__main__':
        setLoggerConfig(args.log, args.verbose)

    config = Cfg(args)

    NAME_POLICY = 'soft' if args.soft else 'hard'
    logger.info('STARTING: ' + os.path.basename(__file__))

    if not hasattr(config, 'common_intents'):
        logger.info('intents parameter is not defined.')
    if not hasattr(config, 'common_generated_intents'):
        logger.info('generated_intents parameter is not defined, ignoring')
    if not hasattr(config, 'common_outputs_intents'):
        logger.info(
            'Outputs_intents parameter is not defined, output will be generated to console.'
        )

    intents = []

    pathList = getattr(config, 'common_intents')
    if hasattr(config, 'common_generated_intents'):
        pathList = pathList + getattr(config, 'common_generated_intents')

    filesAtPath = getFilesAtPath(pathList)
    for intentFileName in sorted(filesAtPath):
        intentName = toIntentName(
            NAME_POLICY, args.common_intents_nameCheck,
            os.path.splitext(os.path.basename(intentFileName))[0])
        with openFile(intentFileName, 'r', encoding='utf8') as intentFile:
            intent = {}
            intent['intent'] = intentName
            examples = []
            for line in intentFile:
                # remove comments
                line = line.split('#')[0]
                line = line.rstrip().lower()
                #non-ascii characters fix
                #line = line.encode('utf-8')
                if line:
                    example = processExample(line, intentName, examples)
                    #adding to the list
                    if example:
                        examples.append(example)

            intent['examples'] = examples
            intents.append(intent)

    if hasattr(config, 'common_outputs_directory') and hasattr(
            config, 'common_outputs_intents'):
        if not os.path.exists(getattr(config, 'common_outputs_directory')):
            os.makedirs(getattr(config, 'common_outputs_directory'))
            logger.info('Created new output directory ' +
                        getattr(config, 'common_outputs_directory'))
        with codecs.open(os.path.join(
                getattr(config, 'common_outputs_directory'),
                getattr(config, 'common_outputs_intents')),
                         'w',
                         encoding='utf8') as outputFile:
            outputFile.write(json.dumps(intents, indent=4, ensure_ascii=False))
    else:
        print(json.dumps(intents, indent=4, ensure_ascii=False))

    logger.info('FINISHING: ' + os.path.basename(__file__))
Exemplo n.º 11
0
    parser.add_argument('-v', '--verbose', required=False, help='verbosity', action='store_true')
    args = parser.parse_args(sys.argv[1:])

    VERBOSE = args.verbose
    if args.soft: NAME_POLICY = 'soft'
    else: NAME_POLICY = 'hard'

    domEntMap = defaultdict(dict)
    intEntMap = defaultdict(dict)

    if args.sentences:
        with open(args.sentences, "r") as sentencesFile:
            for line in sentencesFile.readlines():
                line = line.rstrip()
                if not line: continue
                intentName = toIntentName(NAME_POLICY, args.common_intents_nameCheck, line.split("\t")[1])
                intentText = line.split("\t")[2]
                intentSplit = intentName.split("_",1)
                domainPart = intentSplit[0]
                intentPart = intentSplit[1]
                for entity in re.findall('<([^>]+)>[^<]+<\/[^>]+>', intentText):
                    domEntMap[domainPart][entity] = 1
                    intEntMap[intentPart][entity] = 1

    if args.domEnt:
        with open(args.domEnt, 'w') as domEntFile:
            for domain in sorted(domEntMap.keys()):
                entities="NONE;"
                for entity in sorted(domEntMap[domain].keys()):
                    entities += entity + ";"
                domEntFile.write(domain + ";" + entities + "\n")
Exemplo n.º 12
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=
        'convert NLU tsv files into domain-entity and intent-entity mappings.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # positional arguments
    parser.add_argument(
        'entitiesDir',
        help=
        'directory with entities files - all of them will be included in output list if specified'
    )
    # optional arguments
    parser.add_argument(
        '-is',
        '--sentences',
        help=
        '.tsv file in NLU format with tagged entities in example sentences in third column and intent names in second column'
    )
    parser.add_argument(
        '-l',
        '--list',
        required=False,
        help='output file with list of all entities (if it should be generated)'
    )
    parser.add_argument(
        '-d',
        '--domEnt',
        required=False,
        help=
        'output file with domain-entity mapping (if it should be generated)')
    parser.add_argument(
        '-i',
        '--intEnt',
        required=False,
        help=
        'output file with intent-entity mapping (if it should be generated)')
    parser.add_argument(
        '-ni',
        '--common_intents_nameCheck',
        action='append',
        nargs=2,
        help=
        "regex and replacement for intent name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\L' for lowercase"
    )
    parser.add_argument(
        '-ne',
        '--common_entities_nameCheck',
        action='append',
        nargs=2,
        help=
        "regex and replacement for entity name check, e.g. '-' '_' for to replace hyphens for underscores or '$special' '\L' for lowercase"
    )
    parser.add_argument(
        '-s',
        '--soft',
        required=False,
        help=
        'soft name policy - change intents and entities names without error.',
        action='store_true',
        default="")
    parser.add_argument('-v',
                        '--verbose',
                        required=False,
                        help='verbosity',
                        action='store_true')
    parser.add_argument('--log',
                        type=str.upper,
                        default=None,
                        choices=list(logging._levelToName.values()))
    args = parser.parse_args(argv)

    if __name__ == '__main__':
        setLoggerConfig(args.log, args.verbose)

    NAME_POLICY = 'soft' if args.soft else 'hard'

    domEntMap = defaultdict(dict)
    intEntMap = defaultdict(dict)

    if args.sentences:
        with openFile(args.sentences, "r") as sentencesFile:
            for line in sentencesFile.readlines():
                line = line.rstrip()
                if not line: continue
                intentName = toIntentName(NAME_POLICY,
                                          args.common_intents_nameCheck,
                                          line.split("\t")[1])
                intentText = line.split("\t")[2]
                intentSplit = intentName.split("_", 1)
                domainPart = intentSplit[0]
                intentPart = intentSplit[1]
                for entity in re.findall('<([^>]+)>[^<]+<\/[^>]+>',
                                         intentText):
                    domEntMap[domainPart][entity] = 1
                    intEntMap[intentPart][entity] = 1

    if args.domEnt:
        with openFile(args.domEnt, 'w') as domEntFile:
            for domain in sorted(domEntMap.keys()):
                entities = "NONE;"
                for entity in sorted(domEntMap[domain].keys()):
                    entities += entity + ";"
                domEntFile.write(domain + ";" + entities + "\n")
        logger.debug("Domain-entity map '%s' was successfully created",
                     args.domEnt)

    if args.domEnt:
        with openFile(args.intEnt, 'w') as intEntFile:
            for intent in sorted(intEntMap.keys()):
                entities = "NONE;"
                for entity in sorted(intEntMap[intent].keys()):
                    entities += entity + ";"
                intEntFile.write(intent + ";" + entities + "\n")
        logger.debug("Intent-entity map '%s' was successfully created",
                     args.domEnt)

    if args.list:
        with openFile(args.list, 'w') as listFile:
            # process entities
            entityNames = []
            for entityFileName in os.listdir(args.entitiesDir):
                entityName = toEntityName(NAME_POLICY,
                                          args.common_entities_nameCheck,
                                          os.path.splitext(entityFileName)[0])
                if entityName not in entityNames:
                    entityNames.append(entityName)
            for entityName in entityNames:
                listFile.write(entityName + ";\n")
        logger.debug("Entities list '%s' was successfully created", args.list)
Exemplo n.º 13
0
        required=False,
        help=
        'soft name policy - change intents and entities names without error.',
        action='store_true',
        default="")
    parser.add_argument('-v',
                        '--verbose',
                        required=False,
                        help='verbosity',
                        action='store_true',
                        default="")
    args = parser.parse_args(sys.argv[1:])

    VERBOSE = args.verbose
    NAME_POLICY = 'soft' if args.soft else 'hard'
    PREFIX = toIntentName(NAME_POLICY, args.common_intents_nameCheck,
                          args.prefix)

    if args.entityDir:
        entities = getEntities(args.entityDir, NAME_POLICY)

    with open(args.output, 'w') as outputFile:
        # process intents
        intentNames = []
        for intentFileName in os.listdir(args.intentsDir):
            intentName = toIntentName(NAME_POLICY,
                                      args.common_intents_nameCheck, PREFIX,
                                      os.path.splitext(intentFileName)[0])
            if intentName not in intentNames:
                intentNames.append(intentName)
            with open(os.path.join(args.intentsDir, intentFileName),
                      "r") as intentFile: