Exemplo n.º 1
0
Arquivo: da.py Projeto: oplatek/alex
    def parse(self, dai_str):
        """
        Parses the dialogue act item in text format into a structured form.
        """
        dai_str = dai_str.strip()

        try:
            first_par_idx = dai_str.index('(')
        except ValueError:
            raise DialogueActItemException(
                'Parsing error in: "{dai}". Missing opening parenthesis.'
                .format(dai=dai_str))

        self._dat = dai_str[:first_par_idx]

        # Remove the parentheses, parse slot name and value.
        dai_nv = dai_str[first_par_idx + 1:-1]
        if dai_nv:
            name_val = split_by(dai_nv, splitter='=', quotes='"')
            if len(name_val) == 1:
                # There is only a slot name.
                self._name = name_val[0]
            elif len(name_val) == 2:
                # There is a slot name and a value.
                self._name = name_val[0]
                self._value = name_val[1]
                if self._value and self._value[0] in ["'", '"']:
                    self._value = self._value[1:-1]
            else:
                raise DialogueActItemException(
                    "Parsing error in: {dai_str}: {atval}".format(
                        dai_str=dai_str, atval=name_val))

        self._str = None
        return self
Exemplo n.º 2
0
    def parse(self, dai_str):
        """
        Parses the dialogue act item in text format into a structured form.
        """
        dai_str = dai_str.strip()

        try:
            first_par_idx = dai_str.index('(')
        except ValueError:
            raise DialogueActItemException(
                'Parsing error in: "{dai}". Missing opening parenthesis.'.
                format(dai=dai_str))

        self._dat = dai_str[:first_par_idx]

        # Remove the parentheses, parse slot name and value.
        dai_nv = dai_str[first_par_idx + 1:-1]
        if dai_nv:
            name_val = split_by(dai_nv, splitter='=', quotes='"')
            if len(name_val) == 1:
                # There is only a slot name.
                self._name = name_val[0]
            elif len(name_val) == 2:
                # There is a slot name and a value.
                self._name = name_val[0]
                self._value = name_val[1]
                if self._value and self._value[0] in ["'", '"']:
                    self._value = self._value[1:-1]
            else:
                raise DialogueActItemException(
                    "Parsing error in: {dai_str}: {atval}".format(
                        dai_str=dai_str, atval=name_val))

        self._str = None
        return self
Exemplo n.º 3
0
Arquivo: da.py Projeto: oplatek/alex
    def parse(self, da_str):
        """
        Parses the dialogue act from text.

        If any DAIs have been already defined for this DA, they will be
        overwritten.

        """
        if self._dais:
            del self._dais[:]
        dais = split_by(da_str, splitter='&', opening_parentheses='(',
                        closing_parentheses=')', quotes='"')
        self._dais.extend(DialogueActItem(dai=dai) for dai in dais)
        self._dais_sorted = False
Exemplo n.º 4
0
Arquivo: da.py Projeto: tkraut/alex
    def parse(self, da_str):
        """
        Parses the dialogue act from text.

        If any DAIs have been already defined for this DA, they will be
        overwritten.

        """
        if self._dais:
            del self._dais[:]
        dais = split_by(da_str, splitter='&', opening_parentheses='(',
                        closing_parentheses=')', quotes='"')
        self._dais.extend(DialogueActItem(dai=dai) for dai in dais)
        self._dais_sorted = False
Exemplo n.º 5
0
def load_semantics(file_name):
    f = codecs.open(file_name, encoding='UTF-8')

    semantics = defaultdict(list)
    for l in f:
        l = l.strip()
        if not l:
            continue

        l = l.split("=>")

        key = l[0].strip()
        sem = l[1].strip()

        sem = split_by(sem, '&', '(', ')', '"')

        semantics[key] = sem
    f.close()

    return semantics
Exemplo n.º 6
0
def load_semantics(file_name):
    f = codecs.open(file_name,encoding = 'UTF-8')

    semantics = defaultdict(list)
    for l in f:
        l = l.strip()
        if not l:
            continue

        l = l.split("=>")

        key = l[0].strip()
        sem = l[1].strip()

        sem = split_by(sem, '&', '(', ')', '"')

        semantics[key] = sem
    f.close()

    return semantics
Exemplo n.º 7
0
    def parse(self, da_str):
        # Get the dialogue act type.
        first_par_idx = da_str.index("(")
        self.dat = da_str[:first_par_idx]

        if len(split_by_comma(da_str)) != 1:
            raise ValueError('Too many (or none -- too few) DAs in CUED DA '
                             'representation.')

        slots_str = da_str[first_par_idx:].lower()[1:-1]
        if not slots_str:
            # no slots to process
            self._dais = list()
        else:
            # split slots_str
            slotstr_list = split_by(slots_str, splitter=',', quotes='"')

            slots = list()
            for slot_str in slotstr_list:
                try:
                    slots.append(CUEDSlot(slot_str))
                except ValueError:
                    # Skip slots we cannot parse.
                    pass

            if self.dat == 'inform':
                for slot in slots:
                    if slot.negated:
                        self._dais.append(
                            DialogueActItem('deny', slot.name, slot.value))
                    else:
                        self._dais.append(
                            DialogueActItem('inform', slot.name, slot.value))

            elif self.dat == 'request':
                for slot in slots:
                    if slot.value:
                        if slot.negated:
                            self._dais.append(
                                DialogueActItem('deny', slot.name, slot.value))
                        else:
                            self._dais.append(
                                DialogueActItem('inform', slot.name,
                                                slot.value))
                    else:
                        self._dais.append(
                            DialogueActItem('request', slot.name, slot.value))

            elif self.dat == 'confirm':
                for slot in slots:
                    if slot.name == 'name':
                        self._dais.append(
                            DialogueActItem('inform', slot.name, slot.value))
                    else:
                        self._dais.append(
                            DialogueActItem('confirm', slot.name, slot.value))

            elif self.dat == 'select':
                # XXX We cannot represent DAIS with multiple slots as of now.
                # Therefore, the select DAT is split into two DAIs here.
                self._dais.append(
                    DialogueActItem('select', slots[0].name, slots[0].value))
                self._dais.append(
                    DialogueActItem('select', slots[1].name, slots[1].value))

            elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup',
                              'repeat', 'help', 'restart', 'null'):
                self._dais.append(DialogueActItem(self.dat))

            elif self.dat in ('hello', 'affirm', 'negate', 'reqalts',
                              'reqmore'):
                self._dais.append(DialogueActItem(self.dat))
                for slot in self._dais:
                    if slot.negated:
                        self._dais.append(
                            DialogueActItem('deny', slot.name, slot.value))
                    else:
                        self._dais.append(
                            DialogueActItem('inform', slot.name, slot.value))

            elif self.dat == 'deny':
                self._dais.append(
                    DialogueActItem('deny', slots[0].name, slots[0].value))
                for slot in slots[1:]:
                    if slot.negated:
                        self._dais.append(
                            DialogueActItem('deny', slot.name, slot.value))
                    else:
                        self._dais.append(
                            DialogueActItem('inform', slot.name, slot.value))

            else:
                raise CuedDialogueActError(
                    'Unknown CUED DA type "{dat}" when parsing "{da_str}".'.
                    format(dat=self.dat, da_str=da_str))

        self._dais_sorted = False
Exemplo n.º 8
0
            slts = da.get_slots_and_values()
            for slt in slts:
                slots[slt].update(slts[slt])

        fo = open(
            os.path.join(outdir,
                         os.path.basename(fn).replace('.sem', '.grp')), 'w+')
        for key in sorted(da_clustered):
            fo.write(key)
            fo.write(' <=> ')
            fo.write(str(sorted(list(da_clustered[key]))) + '\n')
        fo.close()

        dai_unique = set()
        for da in sorted(da_clustered):
            dais = split_by(da, '&', '(', ')', '"')
            for dai in dais:
                dai_unique.add(dai)

        fo = open(
            os.path.join(outdir,
                         os.path.basename(fn).replace('.sem', '.grp.dais')),
            'w+')
        for dai in sorted(dai_unique):
            fo.write(dai)
            fo.write('\n')
        fo.close()

        da_reclustered = collections.defaultdict(set)
        for key in da_clustered:
            sem_reduced = re.sub(r'([a-z_0-9]+)(="[a-zA-Z0-9_\'! ]+")', r'\1',
Exemplo n.º 9
0
            slts = da.get_slots_and_values()
            for slt in slts:
                slots[slt].update(slts[slt])

        fo = open(os.path.join(
            outdir, os.path.basename(fn).replace('.sem', '.grp')), 'w+')
        for key in sorted(da_clustered):
            fo.write(key)
            fo.write(' <=> ')
            fo.write(str(sorted(list(da_clustered[key]))) + '\n')
        fo.close()

        dai_unique = set()
        for da in sorted(da_clustered):
            dais = split_by(da, '&', '(', ')', '"')
            for dai in dais:
                dai_unique.add(dai)

        fo = open(os.path.join(
            outdir, os.path.basename(fn).replace('.sem', '.grp.dais')), 'w+')
        for dai in sorted(dai_unique):
            fo.write(dai)
            fo.write('\n')
        fo.close()

        da_reclustered = collections.defaultdict(set)
        for key in da_clustered:
            sem_reduced = re.sub(
                r'([a-z_0-9]+)(="[a-zA-Z0-9_\'! ]+")', r'\1', key)
            da_reclustered[sem_reduced].update(da_clustered[key])
Exemplo n.º 10
0
Arquivo: cued_da.py Projeto: AoJ/alex
    def parse(self, da_str):
        # Get the dialogue act type.
        first_par_idx = da_str.index("(")
        self.dat = da_str[:first_par_idx]

        if len(split_by_comma(da_str)) != 1:
            raise ValueError('Too many (or none -- too few) DAs in CUED DA '
                             'representation.')

        slots_str = da_str[first_par_idx:].lower()[1:-1]
        if not slots_str:
            # no slots to process
            self._dais = list()
        else:
            # split slots_str
            slotstr_list = split_by(slots_str, splitter=',', quotes='"')

            slots = list()
            for slot_str in slotstr_list:
                try:
                    slots.append(CUEDSlot(slot_str))
                except ValueError:
                    # Skip slots we cannot parse.
                    pass

            if self.dat == 'inform':
                for slot in slots:
                    if slot.negated:
                        self._dais.append(DialogueActItem(
                            'deny', slot.name, slot.value))
                    else:
                        self._dais.append(DialogueActItem(
                            'inform', slot.name, slot.value))

            elif self.dat == 'request':
                for slot in slots:
                    if slot.value:
                        if slot.negated:
                            self._dais.append(DialogueActItem(
                                'deny', slot.name, slot.value))
                        else:
                            self._dais.append(DialogueActItem(
                                'inform', slot.name, slot.value))
                    else:
                        self._dais.append(DialogueActItem(
                            'request', slot.name, slot.value))

            elif self.dat == 'confirm':
                for slot in slots:
                    if slot.name == 'name':
                        self._dais.append(DialogueActItem(
                            'inform', slot.name, slot.value))
                    else:
                        self._dais.append(DialogueActItem(
                            'confirm', slot.name, slot.value))

            elif self.dat == 'select':
                # XXX We cannot represent DAIS with multiple slots as of now.
                # Therefore, the select DAT is split into two DAIs here.
                self._dais.append(DialogueActItem(
                    'select', slots[0].name, slots[0].value))
                self._dais.append(DialogueActItem(
                    'select', slots[1].name, slots[1].value))

            elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup',
                              'repeat', 'help', 'restart', 'null'):
                self._dais.append(DialogueActItem(self.dat))

            elif self.dat in ('hello', 'affirm', 'negate', 'reqalts',
                              'reqmore'):
                self._dais.append(DialogueActItem(self.dat))
                for slot in self._dais:
                    if slot.negated:
                        self._dais.append(DialogueActItem(
                            'deny', slot.name, slot.value))
                    else:
                        self._dais.append(DialogueActItem(
                            'inform', slot.name, slot.value))

            elif self.dat == 'deny':
                self._dais.append(DialogueActItem(
                    'deny', slots[0].name, slots[0].value))
                for slot in slots[1:]:
                    if slot.negated:
                        self._dais.append(DialogueActItem(
                            'deny', slot.name, slot.value))
                    else:
                        self._dais.append(DialogueActItem(
                            'inform', slot.name, slot.value))

            else:
                raise CuedDialogueActError(
                    'Unknown CUED DA type "{dat}" when parsing "{da_str}".'
                    .format(dat=self.dat, da_str=da_str))

        self._dais_sorted = False