Пример #1
0
    def serializeFields(data):
        """Turns every attribute of the Mambu object in to a string representation.

        If the object is an iterable one, it goes down to each of its
        elements and turns its attributes too, recursively.

        The base case is when it's a MambuStruct class (this one) so it
        just 'serializes' the attr atribute. Refer to
        MambuStruct.serializeStruct pydoc.

        This is perhaps the worst way to do it, still looking for a better way.
        """
        if isinstance(data, MambuStruct):
            return data.serializeStruct()
        try:
            it = iter(data)
        except TypeError as terr:
            return unicode(data)
        if type(it) == type(iter([])):
            l = []
            for e in it:
                l.append(MambuStruct.serializeFields(e))
            return l
        elif type(it) == type(iter({})):
            d = {}
            for k in it:
                d[k] = MambuStruct.serializeFields(data[k])
            return d
        # elif ... tuples? sets?
        return unicode(data)
Пример #2
0
 def define_viable(self, value):
     try:
         if unicode(value)[-1] == u'%':
             viable = float(unicode(value)[:-1]) / 100
         else:
             viable = float(value)
     except ValueError:
         print("Invalid viable value '%s'" % value)
     else:
         self._viable = viable
Пример #3
0
 def __new__(cls, s, *args, **kwargs):
     if isinstance(s, _):
         s = unicode(s.untranslated)
     if translator:
         trans = translator(s, *args, **kwargs)
         obj = super(_, cls).__new__(cls, trans, *args, **kwargs)
     else:
         obj = super(_, cls).__new__(cls, s, *args, **kwargs)
     obj.untranslated = unicode(s)
     obj._additionals = []
     return obj
Пример #4
0
 def __new__(cls, s, *args, **kwargs):
     if isinstance(s, _):
         s = unicode(s.untranslated)
     if translator:
         trans = translator(s, *args, **kwargs)
         obj = super(_, cls).__new__(cls, trans, *args, **kwargs)
     else:
         obj = super(_, cls).__new__(cls, s, *args, **kwargs)
     obj.untranslated = unicode(s)
     obj._additionals = []
     return obj
Пример #5
0
 def define_variance(self, seq, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         if unicode(value)[-1] == u'%':
             cv = float(unicode(value)[:-1]) / 100
         else:
             cv = float(value)
     except ValueError:
         print("Invalid cv value '%s'" % value)
     else:
         self._measure_data[seq]['variance'] = cv
Пример #6
0
def create_categories(connection):
    for linea in category_table_data:
        if category_table_data[linea][1] is not None:
            ac = Category(id=unicode(category_table_data[linea][0]),
                          name=linea,
                          parent=unicode(category_table_data[linea][1]),
                          balance=0)
        else:
            ac = Category(id=unicode(category_table_data[linea][0]),
                          name=linea,
                          balance=0)
        connection.add(ac)
Пример #7
0
 def feed_ctags(self, tagsfile_obj):
     for l in tagsfile_obj:
         #print l
         if not isPython3:
             l = builtins.unicode(l, 'utf8', 'replace')
         if l.startswith('!'):
             continue
         fields = l.split('\t')
         m = fields[0]
         fil = fields[1]
         pat = fields[2]
         # typ = fields[3]
         klass = None
         try:
             ext = fields[4]
             if ext and ext.startswith('class:'):
                 klass = ext.split(':', 1)[1].strip()
                 idd = self.class_id(klass)
                 #print "klass",klass, idd
         except IndexError:
             ext = None
             # class id 0 = function
             idd = 0
         c = self.cursor()
         #print fields
         fid = self.file_id(fil)
         c.execute('insert into function(class, name, searchpattern, file) values (?, ?, ?, ?)',
                   [idd, m, pat, fid])
     self.dbconn.commit()
Пример #8
0
    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        # text = ''.join(char for char in unicodedata.normalize('NFD', text)
        #                if unicodedata.category(char) != 'Mn')  # Strip accents
        # text = re.sub("[^ a-z'.,?!\-]", "", text)

        normalizer = hazm.Normalizer()
        text = normalizer.normalize(text)
        # tokenization
        words = hazm.word_tokenize(text)
        # tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word in words:
            if not any(letter in word for letter in self.graphemes):
                pron = [word]

            # elif word in self.homograph2features:  # Check homograph
            #     pron1, pron2, pos1 = self.homograph2features[word]
            #     if pos.startswith(pos1):
            #         pron = pron1
            #     else:
            #         pron = pron2
            elif word in self.tihu:  # lookup tihu dict
                pron = self.tihu[word]
            else: # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])

        return prons[:-1]
Пример #9
0
 def __valid_ip(self, value):
     try:
         if not ipaddress.ip_address(unicode(value)).is_global:
             return None
     except:
         return None
     return value
Пример #10
0
 def feed_ctags(self, tagsfile_obj):
     for l in tagsfile_obj:
         #print l
         if not isPython3:
             l = builtins.unicode(l, 'utf8', 'replace')
         if l.startswith('!'):
             continue
         fields = l.split('\t')
         m = fields[0]
         fil = fields[1]
         pat = fields[2]
         # typ = fields[3]
         klass = None
         try:
             ext = fields[4]
             if ext and ext.startswith('class:'):
                 klass = ext.split(':', 1)[1].strip()
                 idd = self.class_id(klass)
                 #print "klass",klass, idd
         except IndexError:
             ext = None
             # class id 0 = function
             idd = 0
         c = self.cursor()
         #print fields
         fid = self.file_id(fil)
         c.execute('insert into function(class, name, searchpattern, file) values (?, ?, ?, ?)',
                   [idd, m, pat, fid])
     self.dbconn.commit()
Пример #11
0
Файл: g2p.py Проект: Koomook/g2p
def predict(words, sess):
    '''
    Returns predicted pronunciation of `words` which do NOT exist in the dictionary.
    :param words: A list of words.
    :return: pron: A list of phonemes
    '''
    if len(words) > hp.batch_size:
        after = predict(words[hp.batch_size:], sess)
        words = words[:hp.batch_size]
    else:
        after = []
    x = np.zeros((len(words), hp.maxlen), np.int32)  # 0: <PAD>
    for i, w in enumerate(words):
        for j, g in enumerate((w + "E")[:hp.maxlen]):
            x[i][j] = g2idx.get(g, 2)  # 2:<UNK>

    ## Autoregressive inference
    preds = np.zeros((len(x), hp.maxlen), np.int32)
    for j in range(hp.maxlen):
        _preds = sess.run(graph.preds, {graph.x: x, graph.y: preds})
        preds[:, j] = _preds[:, j]

    # convert to string
    pron = []
    for i in range(len(preds)):
        p = [u"%s" % unicode(idx2p[idx])
             for idx in preds[i]]  # Make p into unicode.
        if "<EOS>" in p:
            eos = p.index("<EOS>")
            p = p[:eos]
        pron.append(p)

    return pron + after
Пример #12
0
def _text_preprocessing(text):
    text = unicode(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text)
                   if unicodedata.category(char) != 'Mn')
    text = text.lower()
    text = re.sub("[^ a-z'\".,?!()\[\]:;\-]", "", text)
    return text
Пример #13
0
 def define_deviation(self, seq, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         dev = float(value)
     except ValueError:
         print("Invalid deviation value")
     else:
         self._measure_data[seq]['deviation'] = dev
Пример #14
0
def english_text_preprocessing(text, lower=True):
    text = unicode(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text) if unicodedata.category(char) != 'Mn')
    text = ''.join(char if char not in SYNOGLYPH2ASCII else SYNOGLYPH2ASCII[char] for char in text)

    if lower:
        text = text.lower()

    return text
Пример #15
0
def JSONResponse(obj, start_response):
    """
    JSONResponse
    """
    if isstring(obj):
        res = obj
    elif isinstance(obj, (dict, list)):
        res = unicode(json.dumps(obj))
    else:
        res = obj
    return httpResponse(res, "200 OK", start_response)
Пример #16
0
 def define_measurement(self, seq, ptype, value):
     seq = unicode(seq)  # ensure sequence is a string
     try:
         avg = float(value)
     except ValueError:
         print("Invalid average value")
     else:
         self._measure_data[seq].update({
             'ptype': ptype,
             'value': avg,
         })
Пример #17
0
def create_accounts(connection):
    for linea in acount_table_data:
        id_acc_type = unicode(Acounttype().get_one(
            acount_table_data[linea][2]).id)
        id_currency = unicode(Currency().get_one(
            acount_table_data[linea][3]).id)
        if acount_table_data[linea][1] is not None:
            ac = Account(id=unicode(acount_table_data[linea][0]),
                         name=linea,
                         parent=unicode(acount_table_data[linea][1]),
                         id_account_type=id_acc_type,
                         id_currency=id_currency,
                         balance=0)
        else:
            ac = Account(id=unicode(acount_table_data[linea][0]),
                         name=linea,
                         id_account_type=id_acc_type,
                         id_currency=id_currency,
                         balance=0)
        connection.add(ac)
Пример #18
0
def process_django_model(app, what, name, obj, options, lines):
    # This causes import errors if left outside the function
    from django.db import models

    # Only look at objects that inherit from Django's base model class
    if inspect.isclass(obj) and issubclass(obj, models.Model):
        # Grab the field list from the meta class
        fields = obj._meta.fields

        for field in fields:
            # Decode and strip any html out of the field's help text
            help_text = strip_tags(unicode(field.help_text))

            # Decode and capitalize the verbose name, for use if there isn't
            # any help text
            verbose_name = unicode(field.verbose_name).capitalize()

            if help_text:
                # Add the model field to the end of the docstring as a param
                # using the help text as the description
                lines.append(":param {}: {}".format(field.attname, help_text))
            else:
                # Add the model field to the end of the docstring as a param
                # using the verbose name as the description
                lines.append(":param {}: {}".format(field.attname,
                                                    verbose_name))

            # Add the field's type to the docstring
            if isinstance(field, (models.ForeignKey, models.OneToOneField,
                                  models.ManyToManyField)):
                lines.append(":type %s: %s to :class:`%s.%s`" % (
                    field.attname,
                    type(field).__name__,
                    field.related_model.__module__,
                    field.related_model.__name__,
                ))
            else:
                lines.append(":type {}: {}".format(field.attname,
                                                   type(field).__name__))
    # Return the extended docstring
    return lines
Пример #19
0
def SQL_EXEC(sql, args):
    """
    SQL_EXEC - run a query o a file.sql
    """
    try:
        env = mapify(args, sep=' ', kvsep='=', strip_char=' ', glue='"')
        res = SqliteDB.ExecuteP(sql, env, outputmode='response', verbose=False)
        return unicode(json.dumps(res))
    except Exception as ex:
        manage(ex)

    return 0
Пример #20
0
 def loadCss(self):
     #log("*** loadCss function ***")
     try:
         #log(self.cssFileInPlugin)
         f = open(self.cssFileInPlugin, 'r')
         #log(f.read())
         css = unicode(f.read())
         f.close()
         
     except Exception as e:
         log(e)
         css = u''
     return css
Пример #21
0
def normalize(sentence):
    """ Normalize English text.
    """
    # preprocessing
    sentence = unicode(sentence)
    sentence = normalize_numbers(sentence)
    sentence = ''.join(char for char in unicodedata.normalize('NFD', sentence)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
    sentence = sentence.lower()
    sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
    sentence = sentence.replace("i.e.", "that is")
    sentence = sentence.replace("e.g.", "for example")
    return sentence
Пример #22
0
    def postprocess(self):
        """Postprocessing.

        Just in case some elements on the addresses was converted to
        anything but string, it gets converted back to only string
        (unicode). Things on addresses are not useful but by what they
        say, not what they are.

.. todo:: do the same thing to the 'address' field created on
          preprocessing.
        """
        try:
            for name, item in self['addresses'][0].items():
                try:
                    if name == "indexInList": continue
                    self['addresses'][0][name] = unicode(self['addresses'][0][name])
                    self['address'][name] = unicode(self['address'][name])
                except AttributeError:
                    pass
        except (KeyError, IndexError):
            pass

        super(MambuClient,self).postprocess()
Пример #23
0
def stringify(blob):
  retstr = ''
  if not blob:
    return '' # we were passed nothing, so return nothing
  elif isinstance(blob, list):
    for e in blob:
      retstr += stringify(e)
  elif isinstance(blob, dict):
    for k,v in blob.items():
      retstr += stringify(unicode(k))
      #print(type(retstr), type(v), v)
      retstr += stringify(unicode(v))
  elif isinstance(blob, str):
    retstr += unicode(blob)
  elif isinstance(blob, bytes):
    retstr += unicode(blob)
  elif isinstance(blob, unicode):
    retstr += blob
  else:
    raise Exception("unknown type: %s" % str(type(blob)))

  #print(retstr)
  return retstr
Пример #24
0
def process_django_model(app, what, name, obj, options, lines):
    # This causes import errors if left outside the function
    from django.db import models

    # Only look at objects that inherit from Django's base model class
    if inspect.isclass(obj) and issubclass(obj, models.Model):
        # Grab the field list from the meta class
        fields = obj._meta.fields

        for field in fields:
            # Decode and strip any html out of the field's help text
            help_text = strip_tags(unicode(field.help_text))

            # Decode and capitalize the verbose name, for use if there isn't
            # any help text
            verbose_name = unicode(field.verbose_name).capitalize()

            if help_text:
                # Add the model field to the end of the docstring as a param
                # using the help text as the description
                lines.append(u':param %s: %s' % (field.attname, help_text))
            else:
                # Add the model field to the end of the docstring as a param
                # using the verbose name as the description
                lines.append(u':param %s: %s' % (field.attname, verbose_name))

            # Add the field's type to the docstring
            if isinstance(field, (models.ForeignKey, models.OneToOneField, models.ManyToManyField)):
                lines.append(u':type %s: %s to :class:`%s.%s`' % (field.attname,
                                                                  type(field).__name__,
                                                                  field.rel.to.__module__,
                                                                  field.rel.to.__name__))
            else:
                lines.append(u':type %s: %s' % (field.attname, type(field).__name__))
    # Return the extended docstring
    return lines
Пример #25
0
    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        text = re.sub("[^ a-z'.,?!\-]", "", text)
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        words = word_tokenize(text)
        tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        #prons = []
        # YJS added
        prons2 = ""
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else:  # predict for oov
                pron = self.predict(word)

            #prons.extend(pron)
            #prons.extend([" "])

            #YJS CHANGED
            new_word = ''
            for i in np.arange(np.size(pron)):
                new_word += pron[i]

            prons2 += new_word
            prons2 += ' '


#        return prons[:-1_old_2]
        return prons2
Пример #26
0
def g2p(text):
    '''
    Returns the pronunciation of text.
    :param text: A string. A sequence of words.
    :return: A list of phonemes.
    '''
    # normalization
    text = unicode(text)
    #text = normalize_numbers(text)
    text = ''.join(char for char in unicodedata.normalize('NFD', text)
                   if unicodedata.category(char) != 'Mn')  # Strip accents
    text = text.lower()
    #text = re.sub("[^ a-z'.,?!\-]", "", text)
    #text = text.replace("i.e.", "that is")
    #text = text.replace("e.g.", "for example")

    # tokenization
    words = tokenize(text)
    tokens = (words)
    #tokens = pos_tag(words) # tuples of (word, tag)

    # g2p
    oovs, u_loc = [], []
    ret = []
    for token in tokens:
        pron = token2pron(token)  # list of phonemes
        if pron == []:  # oov
            oovs.append(token[0])
            u_loc.append(len(ret))
        ret.extend(pron)
        ret.extend([" "])

    if len(oovs) > 0:
        global g_sess
        if g_sess is not None:  # check global session
            prons = predict(oovs, g_sess)
            for i in range(len(oovs) - 1, -1, -1):
                ret = ret[:u_loc[i]] + prons[i] + ret[u_loc[i]:]
        else:  # If global session is not defined, make new one as local.
            with tf.Session(graph=g, config=config) as sess:
                saver.restore(
                    sess,
                    tf.train.latest_checkpoint(os.path.join(
                        dirname, hp.logdir)))
                prons = predict(oovs, sess)
                for i in range(len(oovs) - 1, -1, -1):
                    ret = ret[:u_loc[i]] + prons[i] + ret[u_loc[i]:]
    return ret[:-1]
Пример #27
0
def JSONResponse(obj, start_response):
    """
    JSONResponse
    """
    if isstring(obj):
        text = obj
    elif isinstance(obj, (dict, list)):
        text = unicode(json.dumps(obj))
    else:
        text = obj

    response_headers = [('Content-type', 'application/json'),
                        ('Content-Length', str(len(text)))]
    if start_response:
        start_response("200 OK", response_headers)
    return [text.encode('utf-8')]
Пример #28
0
    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        #text = re.sub("[^ a-z'.,?!\-]", "", text)
        text = re.sub("[^ a-z'.,?!\-;:\"]", "", text)  # mdda
        #text = re.sub("([a-z])\-([a-z])", r"\1 - \2", text)   # mdda 'hot-shot' -> 'hot - shot'
        text = re.sub("([a-z])\-([a-z])", r"\1 \2",
                      text)  # mdda    'hot-shot' -> 'hot shot'
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        #words1 = word_tokenize(text)
        #print( words1 )
        words2 = kaldi_tokenize(text)
        #print( words2 )

        tokens = pos_tag(words2)  # tuples of (word, tag)

        # steps
        prons = []
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else:  # predict for oov
                pron = self.predict(word)

            #prons.extend(pron)  #mdda
            #prons.extend([" "]) #mdda
            prons.append((word, pron))  #mdda

        #return prons[:-1]   #mdda
        return prons  #mdda
Пример #29
0
 def _get(self, config, section, field, default):
     try:
         if isinstance(default, bool):
             self[field] = config.getboolean(section, field)
         elif isinstance(default, int):
             self[field] = config.getint(section, field)
         else:
             self[field] = config.get(section, field)
             if field == 'password' and self[field] != '' and len(
                     self[field]) != 64:  # likely not a hashed password.
                 self[field] = hashlib.sha256(
                     self[field]).hexdigest()  # hash the original password.
     except ConfigParser.Error as e:
         logging.debug(
             "Could not parse setting '%s.%s': %s. Using default value: '%s'."
             % (section, field, unicode(e), default))
         self[field] = default
     if field in ['database', 'assetdir']:
         self[field] = str(path.join(self.home, self[field]))
Пример #30
0
 def create_index_page(self):
     '''
     If there is no local html containing links to files, create one.
     '''
     if os.path.isfile(self.index_page):
         print('>>> Reading cached index page')
         index_file = open(self.index_page, 'r')
         index_contents = index_file.read()
         index_file.close()
     else:
         print('>>> Downloading index page')
         fp = urllib.urlopen(self.kgs_url)
         data = unicode(fp.read())
         fp.close()
         index_contents = data
         index_file = open(self.index_page, 'w')
         index_file.write(index_contents)
         index_file.close()
     return index_contents
Пример #31
0
    def __call__(self, text, tidy=False, secret=False):

        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        # text = ''.join(char for char in unicodedata.normalize('NFD', text)
        #                if unicodedata.category(char) != 'Mn')  # Strip accents
        # text = re.sub("[^ a-z'.,?!\-]", "", text)

        normalizer = hazm.Normalizer()
        text = normalizer.normalize(text)
        # tokenization
        words = hazm.word_tokenize(text)
        # tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word in words:

            if not any(letter in word for letter in self.graphemes):
                pron = [word]

            # elif word in self.homograph2features:  # Check homograph
            #     pron1, pron2, pos1 = self.homograph2features[word]
            #     if pos.startswith(pos1):
            #         pron = pron1
            #     else:
            #         pron = pron2
            elif word in self.tihu:  # lookup tihu dict
                pron = [self.tihu[word].replace(' ', '')
                        ] if secret else [' ', self.tihu[word], ' ']
            else:  # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])

        result = ''.join(prons[:-1])

        if tidy:
            return Persian_g2p_converter.convert_from_native_to_good(result)

        return result
Пример #32
0
def sanitize(text, kana=True, wildcards=False):
    if kana:
        checker = isJapanese
    else:
        checker = isKanji

    if wildcards:
        text = re.sub(u'[\**]', u'%', text)
        text = re.sub(u'[\??]', u'_', text)
        overrides = [u'%', u'_']
    else:
        overrides = list()

    result = unicode()
    for c in text:
        if checker(c) or c in overrides:
            result += c

    return result
Пример #33
0
    def feed_scintilla(self, apifile_obj):
        """ handle scintilla api files

        Syntax is like:

        qt.QApplication.style?4() -> QStyle
        """
        for l in apifile_obj:
            if not isPython3:
                l = builtins.unicode(l, 'utf8', 'replace')
            parts = l.split('?')
            fullsym = parts[0].rsplit('.', 1)
            klass, func = fullsym
            if len(parts) == 2:
                desc = parts[1]
            else:
                desc = ''
            # now our class is like qt.QApplication. We do the dirty trick and
            # remove all but actual class name
            shortclass = klass.rsplit('.', 1)[-1]
            #print func, klass, desc
            self.feed_function(func.strip(), shortclass.strip(), '', desc.strip())
        self.dbconn.commit()
Пример #34
0
    def feed_scintilla(self, apifile_obj):
        """ handle scintilla api files

        Syntax is like:

        qt.QApplication.style?4() -> QStyle
        """
        for l in apifile_obj:
            if not isPython3:
                l = builtins.unicode(l, 'utf8', 'replace')
            parts = l.split('?')
            fullsym = parts[0].rsplit('.', 1)
            klass, func = fullsym
            if len(parts) == 2:
                desc = parts[1]
            else:
                desc = ''
            # now our class is like qt.QApplication. We do the dirty trick and
            # remove all but actual class name
            shortclass = klass.rsplit('.', 1)[-1]
            #print func, klass, desc
            self.feed_function(func.strip(), shortclass.strip(), '', desc.strip())
        self.dbconn.commit()
Пример #35
0
    def __call__(self, text):
        # preprocessing
        text = unicode(text)
        text = normalize_numbers(text)
        text = ''.join(char for char in unicodedata.normalize('NFD', text)
                       if unicodedata.category(char) != 'Mn')  # Strip accents
        text = text.lower()
        text = re.sub("[^ a-z'.,?!\-#~\r\t_\"\']", "", text)
        text = text.replace("i.e.", "that is")
        text = text.replace("e.g.", "for example")

        # tokenization
        words = word_tokenize(text)
        tokens = pos_tag(words)  # tuples of (word, tag)

        # steps
        prons = []
        for word, pos in tokens:
            if re.search("[a-z]", word) is None:
                pron = [word]

            elif word in self.homograph2features:  # Check homograph
                pron1, pron2, pos1 = self.homograph2features[word]
                if pos.startswith(pos1):
                    pron = pron1
                else:
                    pron = pron2
            elif word in self.cmu:  # lookup CMU dict
                pron = self.cmu[word][0]
            else: # predict for oov
                pron = self.predict(word)

            prons.extend(pron)
            prons.extend([" "])
            self.word_map["".join(pron)] = word
        return prons[:-1]
Пример #36
0
 def u(s):
     return builtins.unicode(s)
Пример #37
0
 def _str(s, encoding="UTF-8"):
     return unicode(s, encoding=encoding)
Пример #38
0
 def ue(s, encoding):
     return builtins.unicode(s, encoding)
Пример #39
0
 def _str(s, encoding="UTF-8"):
     s = unicode(s, encoding=encoding)
     return unichr_escape.sub(lambda x:
                                  x.group(0).decode('unicode-escape'),
                              s)
Пример #40
0
 def _str(s, encoding="UTF-8"):
     return unicode(s, encoding=encoding)
Пример #41
0
 def toUnicode(self, s):
     # pylint: disable=no-member
     return builtins.unicode(s)
Пример #42
0
 def toUnicode(self, s):
     # pylint: disable=no-member
     if g.isPython3:
         return str(s)
     else:
         return builtins.unicode(s)
Пример #43
0
 def toUnicode(self, s):
     if g.isPython3:
         return str(s)
     else:
         return builtins.unicode(s)