Python remove_accents 예제들, utility.remove_accents Python 예제들

예제 #1

0

파일 보기

파일: htmlscraping.py 프로젝트: elvinny/HousePricing

def getPropertyFloorArea(houseProperty):
	logger.debug('#############################################START GET PROPERTY FLOOR AREA#############################################')
			
	
	propertyInfo = houseProperty.find("div","searchPropertyInfo")
	
	previousTag = ''
	
	for divTags in propertyInfo.find_all('div'):
		for pTags in divTags.find_all('p'):
			logger.debug('PTags : %s', pTags.contents[0])		
					
			##PREVIOUS TAG is one of the ones we need to fetch, than let's pick the info
			if previousTag == 'estado':
				logger.debug('PreviousTag is %s',previousTag)
			elif previousTag == 'areautil':				
				logger.debug('PreviousTag is %s',previousTag)
			elif previousTag == 'areabruta':
				logger.debug('PreviousTag is %s',previousTag)
									
			previousTag = utility.remove_accents(pTags.contents[0])
			logger.debug('New previous Tag is : %s',previousTag)
									
		logger.debug('Search Property Info %s', divTags)		
		result = divTags.Text		
	logger.debug('#############################################END GET PROPERTY FLOOR AREA#############################################')
	
	
	return result

예제 #2

0

파일 보기

파일: file_server.py 프로젝트: Ninja-1/Circle2

def build_directory(path, mtime, flags, server):
    dir = Directory()
    dir.mtime = mtime
    dir.is_dir = 1
    dir.names = [ ]
    basename = os.path.basename(path)

    dir.info = {
        'type'     : 'directory',
        'filename' : basename,
        'length'   : 0L,
        'path'     : string.split(path,'/')
    }
    #print dir.info['path']
    names = os.listdir(path)
    
    #for i in range(len(names)):
    #    names[i] = utility.force_unicode(names[i])

    # Option to limit number of files published
    # TODO: make it work with subdirectories
    if flags['max'] != None:
        names = names[:flags['max']]
        

    if flags.get('name'):
        if server.cache.has_key((path,mtime)):
            dir.hash, dir.length = server.cache[(path,mtime)]
        else:
            dir.length = 0
            dir.hash = hash.hash_of('basename')
            server.cache[(path,mtime)] = (dir.hash, dir.length)
            
        dir.info['name'] = dir.hash
        dir.names.append(dir.hash)



    if not flags.get('name'):
        dir.info['local_path'] = path

    str = utility.remove_accents(string.lower(basename))
    keywords = [ ]    
    if flags.get('filename'):
        keywords.append(str)
    if flags.get('keywords'):
        for char in '+-_.,?!()[]':
            str = string.replace(str,char," ")
        keywords.extend(string.split(str))         

    dir.info['keywords'] = [ ]

    dir.files = [ ]
    for item in names:
        if item[0] != '.':
            dir.files.append(os.path.join(path,item))

    #for the moment do not publish directories
    return dir

    for word in keywords:
        word=utility.force_string(word)
        if len(word) >= min_search_keyword_len and word not in dir.info['keywords']:
            dir.info['keywords'].append(word)
            if flags.get('name'):
                dir.names.append(hash.hash_of(word))

    # publish directory...
    # todo: publish after all files have been hashed,
    # generate name from their hash
    if flags.get('name'):
        if not server.entries.has_key(path):
            for name in dir.names:
                server.node.publish(name, dir.info)
        elif server.entries[path].mtime != mtime:
            #first unpublish outdated info
            #print "unpublishing outdated dir"
            server.node.unpublish(dir.info)
            for name in dir.names:
                server.node.publish(name, dir.info)

        server.entries[path]    = dir
        server.paths[dir.hash] = (path, dir.mtime)
        server.names[path]      = dir.hash

    return dir

예제 #3

0

파일 보기

파일: file_server.py 프로젝트: Ninja-1/Circle2

def build_file(path, mtime,flags,server):
    
    file = File()
    file.is_dir = 0
    file.mtime = mtime

    # basename = utility.force_unicode(os.path.basename(path))
    # do not convert to unicode, because published data should not
    # depend on the terminal encoding of the client
    basename = os.path.basename(path)
    
    file.length = os.stat(path)[6]
    file.names = [ ]
    file.info = {
        'type'     : 'file',
        'filename' : basename,
        'length'   : file.length,
    }

    if flags.get('name'):
        if server.cache.has_key((path,mtime)):
            file.hash, file.length = server.cache[(path,mtime)]
        else:
            try:
                f = open(path,'rb')
                m = md5.new()
                file.length = 0L
                while 1:
                    str = f.read(1<<20)
                    if str == '': break
                    m.update(str)
                    file.length = file.length + len(str)
                f.close()
                file.hash = m.digest()
            except IOError:
                raise Error('bad file')
            server.cache[(path,mtime)] = (file.hash, file.length)
            
        file.info['name'] = file.hash
        file.names.append(file.hash)

    if flags.get('local'):
        file.info['local_path'] = path
    
    str = utility.remove_accents(string.lower(basename))
    keywords = [ ]
    
    if flags.get('filename'):
        keywords.append(str)

    if flags.get('keywords'):
        for char in '+-_.,?!()[]':
            str = string.replace(str,char," ")

        keywords.extend(string.split(str))         

    if flags.get('mime'):
        list = {}
        if string.lower(path[-4:]) =='.mp3':
            list = mp3.mp3_info(path)
        elif string.lower(path[-4:]) =='.ogg':
            list = mp3.ogg_info(path)
        if list:
            for (k,v) in list.items():
                file.info[k] = v

        if file.info.get('music_title'):
            keywords.extend(string.split(
                utility.remove_accents(string.lower(file.info['music_title']))))
        if file.info.get('music_artist'):
            keywords.extend(string.split(
                utility.remove_accents(string.lower(file.info['music_artist']))))

    file.info['keywords'] = [ ]

    if flags.get('mime'):
        import classify
        try:        
            information = classify.classifier.information(path)
            for key in information.keys():
                if information[key] == None:
                    #print "[Harmless warning] Can not classify : ", path
                    continue
    
                if len(information[key]) >= min_search_keyword_len:
                    file.info[key] = information[key]
        except:
            sys.stderr.write("Exception caught while classifying file.\n")

        
    for word in keywords:
        word=utility.force_string(word)
        if len(word) >= min_search_keyword_len and word not in file.info['keywords']:
            file.info['keywords'].append(word)
            if flags.get('name'):
                file.names.append(hash.hash_of(word))


    # publish immediately...
    if flags.get('name'):
        if not server.entries.has_key(path):
            for name in file.names:
                server.node.publish(name, file.info)
        elif server.entries[path].mtime != mtime:
            #first unpublish outdated info
            print "unpublishing outdated:",path
            server.node.unpublish(file.info)
            for name in file.names:
                server.node.publish(name, file.info)

        server.entries[path]    = file
        server.paths[file.hash] = (path, file.mtime)
        server.names[path]      = file.hash




    return file