Python normalizarTexto 예제들, usig_normalizador_amba.commons.normalizarTexto Python 예제들

예제 #1

0

파일 보기

파일: Callejero.py 프로젝트: valentinopfarherr/normalizador-amba

    def cargarCallejero(self):
        try:
            if self.partido.codigo == 'caba':
                server = '{0}?full=1&encoding=utf-8'.format(
                    self.config['callejero_caba_server'])
                encoding = 'latin-1'
            else:
                server = '{0}callejero/?partido={1}'.format(
                    self.config['callejero_amba_server'], self.partido.codigo)
                encoding = 'utf8'

            data = urllib.request.urlopen(server).read()
            self.data = json.loads(data)
            for d in self.data:
                if self.partido.codigo == 'caba':
                    d.append('CABA')
                d.append(
                    set(
                        normalizarTexto(d[1], separador=' ',
                                        lower=False).split(' ')))
            self.data.sort()  # Ordeno por id
            self.osm_ids = [k[0] for k in self.data]  # Armo lista de osm_ids
        except urllib.error.HTTPError as e:
            e.detalle = 'Se produjo un error al intentar cargar la información de calles.'
            raise e
        except Exception as e:
            raise e

예제 #2

0

파일 보기

 def cargarCallejero(self):
     try:
         if self.partido.codigo == 'caba':
             server = '{0}?full=1'.format(
                 self.config['callejero_caba_server'])
             encoding = 'latin-1'
         else:
             server = '{0}callejero/?partido={1}'.format(
                 self.config['callejero_amba_server'], self.partido.codigo)
             encoding = 'utf8'
         http = urllib3.PoolManager()
         data = http.request('GET', server).data.decode("latin-1", "strict")
         self.data = json.loads(data, encoding)
         for d in self.data:
             if self.partido.codigo == 'caba':
                 d.append('CABA')
             d.append(
                 set(
                     normalizarTexto(d[1], separador=' ',
                                     lower=False).split(' ')))
         self.data.sort()  # Ordeno por id
         self.osm_ids = [k[0] for k in self.data]  # Armo lista de osm_ids
     except urllib3.exceptions.HTTPError as e:
         e.detalle = 'Se produjo un error al intentar cargar la información de calles.'
         raise e
     except Exception as e:
         raise e

예제 #3

0

파일 보기

 def test_normalizarTexto_acentos(self):
     res = normalizarTexto(
         u'ábçdéfǵhíjḱĺḿńñóṕqŕśtúvẃxýźÁBÇDÉFǴHÍJḰĹḾŃÑÓṔQŔŚTÚVẂXÝŹäëïöüÄËÏÖÜ'
     )
     self.assertEqual(
         res,
         u'abcdefghijklmnnopqrstuvwxyzabcdefghijklmnnopqrstuvwxyzaeiouaeiou'
     )

예제 #4

0

파일 보기

def cargarCallejeroEstatico(c):
    filename = 'callejeros/{0}.callejero'.format(c.partido.codigo)
    with open(filename) as data_file:
        data = json.load(data_file)
    for d in data:
        d.append(set(normalizarTexto(d[1], separador=' ', lower=False).split(' ')))
    c.data = data
    c.data.sort()
    c.osm_ids = [k[0] for k in c.data]

예제 #5

0

파일 보기

파일: Callejero.py 프로젝트: valentinopfarherr/normalizador-amba

    def buscarCalle(self, calle, limit=0):
        '''
        Busca calles cuyo nombre se corresponda con calle y devuelve un array con todas las instancias de Calle halladas
        @param calle: String a matchear
        @type calle: String
        @param limit: Maximo numero de respuestas a devolver. Cero es sin limite.
        @type limit: Integer
        @return: Array de instancias de Calle que matchearon calle
        @rtype: Array de Calle
        '''

        if self.minicache[0] == calle:
            return self.minicache[1] if limit == 0 else self.minicache[
                1][:limit]

        res = [[], [], [], []]
        calleNorm1 = normalizarTexto(calle, separador=' ', lower=False)
        words1 = list(set(calleNorm1.split(' ')))
        words1.sort(key=len, reverse=True)
        regexps1 = [
            re.compile(r'^{0}| {1}'.format(re.escape(x), re.escape(x)))
            for x in words1
        ]

        words1 = set(words1)
        # No utilizo commons.matcheaTexto por cuestiones de optimizacion
        # No podo la busqueda en limit para buscar las mejores soluciones
        for data in self.data:
            if calle == data[1]:  # Match exacto con el nombre
                res[0].append(
                    Calle(data[0], data[1], data[3], data[4], self.partido,
                          data[5]))
            else:  # Match permutado con el nombre
                if (words1 == data[6]):
                    res[1].append(
                        Calle(data[0], data[1], data[3], data[4], self.partido,
                              data[5]))
                elif (words1 == words1
                      & data[6]):  # Match incluido con el nombre
                    res[2].append(
                        Calle(data[0], data[1], data[3], data[4], self.partido,
                              data[5]))
                else:  # Match con las keywords de la calle
                    match = True
                    for regexp in regexps1:
                        if regexp.search(data[2]) is None:
                            match = False
                            break
                    if match:
                        res[3].append(
                            Calle(data[0], data[1], data[3], data[4],
                                  self.partido, data[5]))

        res = res[0] + res[1] + res[2] + res[3]
        self.minicache = [calle, res]

        return res if limit == 0 else res[:limit]

예제 #6

0

파일 보기

 def _buscarPartidoLocalidad(self, texto, partido, localidad):
     retval = False
     palabras = re.split('\s', normalizarTexto(texto))
     cant_palabras = len(palabras)
     for i in range(cant_palabras):
         texto_cortado = ' '.join(palabras[:i + 1])
         if matcheaTexto(texto_cortado, partido) or matcheaTexto(texto_cortado, localidad):
             retval = True
         else:
             break
     return retval

예제 #7

0

파일 보기

파일: Callejero.py 프로젝트: hogasa/normalizador-amba

    def buscarCalle(self, calle, limit=0):
        '''
        Busca calles cuyo nombre se corresponda con calle y devuelve un array con todas las instancias de Calle halladas
        @param calle: String a matchear
        @type calle: String
        @param limit: Maximo numero de respuestas a devolver. Cero es sin limite.
        @type limit: Integer
        @return: Array de instancias de Calle que matchearon calle
        @rtype: Array de Calle
        '''

        if self.minicache[0] == calle:
            return self.minicache[1] if limit == 0 else self.minicache[1][:limit]

        res = [[], [], [], []]
        calleNorm1 = normalizarTexto(calle, separador=' ', lower=False)
        words1 = list(set(calleNorm1.split(' ')))
        words1.sort(key=len, reverse=True)
        regexps1 = map(lambda x: re.compile(ur'^{0}| {1}'.format(re.escape(x), re.escape(x))), words1)

        words1 = set(words1)
# No utilizo commons.matcheaTexto por cuestiones de optimizacion
# No podo la busqueda en limit para buscar las mejores soluciones
        for data in self.data:
            if calle == data[1]:  # Match exacto con el nombre
                res[0].append(Calle(data[0], data[1], data[3], data[4], self.partido, data[5]))
            else:  # Match permutado con el nombre
                if (words1 == data[6]):
                    res[1].append(Calle(data[0], data[1], data[3], data[4], self.partido, data[5]))
                elif (words1 == words1 & data[6]):  # Match incluido con el nombre
                        res[2].append(Calle(data[0], data[1], data[3], data[4], self.partido, data[5]))
                else:  # Match con las keywords de la calle
                    match = True
                    for regexp in regexps1:
                        if regexp.search(data[2]) is None:
                            match = False
                            break
                    if match:
                        res[3].append(Calle(data[0], data[1], data[3], data[4], self.partido, data[5]))

        res = res[0] + res[1] + res[2] + res[3]
        self.minicache = [calle, res]

        return res if limit == 0 else res[:limit]

예제 #8

0

파일 보기

파일: Callejero.py 프로젝트: hogasa/normalizador-amba

    def cargarCallejero(self):
        try:
            if self.partido.codigo == 'caba':
                server = '{0}?full=1'.format(self.config['callejero_caba_server'])
                encoding = 'latin-1'
            else:
                server = '{0}callejero/?partido={1}'.format(self.config['callejero_amba_server'], self.partido.codigo)
                encoding = 'utf8'

            data = urllib2.urlopen(server).read()
            self.data = json.loads(data, encoding)
            for d in self.data:
                if self.partido.codigo == 'caba':
                    d.append('CABA')
                d.append(set(normalizarTexto(d[1], separador=' ', lower=False).split(' ')))
            self.data.sort()  # Ordeno por id
            self.osm_ids = [k[0] for k in self.data]  # Armo lista de osm_ids
        except urllib2.HTTPError, e:
            e.detalle = 'Se produjo un error al intentar cargar la información de calles.'
            raise e

예제 #9

0

파일 보기

파일: StringDireccion.py 프로젝트: usig/normalizador-amba

 def __init__(self, strInput):
     self.strOriginal = strInput
     self.strNormalizado = normalizarTexto(strInput, separador=' ', lower=False)
     self.buscarCandidatos()

예제 #10

0

파일 보기

파일: CommonsTestCase.py 프로젝트: usig/normalizador-amba

 def test_normalizarTexto_espacios(self):
     res = normalizarTexto('   hola    chau         ')
     self.assertEqual(res, 'hola chau')

예제 #11

0

파일 보기

파일: CommonsTestCase.py 프로젝트: valentinopfarherr/normalizador-amba

 def test_normalizarTexto_separador(self):
     res = normalizarTexto('   hola  á   chau         ', separador='_')
     self.assertEqual(res, 'hola_a_chau')

예제 #12

0

파일 보기

파일: CommonsTestCase.py 프로젝트: valentinopfarherr/normalizador-amba

 def test_normalizarTexto_simbolos(self):
     res = normalizarTexto('hola !#$%&/()=?¡@"\\\' chau')
     self.assertEqual(res, 'hola chau')

예제 #13

0

파일 보기

파일: CommonsTestCase.py 프로젝트: valentinopfarherr/normalizador-amba

 def test_normalizarTexto_espacios(self):
     res = normalizarTexto('   hola    chau         ')
     self.assertEqual(res, 'hola chau')

예제 #14

0

파일 보기

파일: CommonsTestCase.py 프로젝트: valentinopfarherr/normalizador-amba

 def test_normalizarTexto_lower(self):
     res = normalizarTexto('   hola  á   chau         ', lower=False)
     self.assertEqual(res, 'HOLA A CHAU')

예제 #15

0

파일 보기

파일: CommonsTestCase.py 프로젝트: usig/normalizador-amba

 def test_normalizarTexto_lower(self):
     res = normalizarTexto('   hola  á   chau         ', lower=False)
     self.assertEqual(res, 'HOLA A CHAU')

예제 #16

0

파일 보기

파일: CommonsTestCase.py 프로젝트: usig/normalizador-amba

 def test_normalizarTexto_separador(self):
     res = normalizarTexto('   hola  á   chau         ', separador='_')
     self.assertEqual(res, 'hola_a_chau')

예제 #17

0

파일 보기

파일: CommonsTestCase.py 프로젝트: usig/normalizador-amba

 def test_normalizarTexto_acentos(self):
     res = normalizarTexto('ábçdéfǵhíjḱĺḿńñóṕqŕśtúvẃxýźÁBÇDÉFǴHÍJḰĹḾŃÑÓṔQŔŚTÚVẂXÝŹäëïöüÄËÏÖÜ')
     self.assertEqual(res, 'abcdefghijklmnnopqrstuvwxyzabcdefghijklmnnopqrstuvwxyzaeiouaeiou')

예제 #18

0

파일 보기

파일: CommonsTestCase.py 프로젝트: usig/normalizador-amba

 def test_normalizarTexto_simbolos(self):
     res = normalizarTexto('hola !#$%&/()=?¡@"\\\' chau')
     self.assertEqual(res, 'hola chau')

예제 #19

0

파일 보기

 def __init__(self, strInput):
     self.strOriginal = strInput
     self.strNormalizado = normalizarTexto(strInput,
                                           separador=' ',
                                           lower=False)
     self.buscarCandidatos()