def upload_file(): if request.method == 'POST': # če ima post zahteva datoteko if 'file' not in request.files: flash('No file part') # izpiše če ni return redirect(request.url) file = request.files['file'] if file.filename == '': # če ni izbrane datoteke za nalaganje flash('No file selected for uploading') return redirect(request.url) if file and allowed_file( file.filename): # če je datoteka izbrana in ustreza filename = secure_filename(file.filename) # pretvori datoteko img_path = os.path.join( app.config['UPLOAD_FOLDER']) + "/" + filename file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) # shranjevanje datoteke flash('Datoteka se je uspesno nalozila') # feedback original_image = Image.open(img_path, mode='r') original_image = original_image.convert('RGB') detect(original_image, img_path, min_score=0.2, max_overlap=0.5, top_k=200) return redirect('/response?file=' + filename) else: flash('Allowed file types are txt, pdf, png, jpg, jpeg, gif') return redirect(request.url)
def loadAllHeroURL(self): '''获取英雄的名称列表''' #切换js,把它切成json字符串去掉第二个=号前面的和最后一个;号 content = self.requestHelper(self.allHeroURL) # 获取数据的字符集,然后解码 from pip._vendor import chardet encoding = chardet.detect(content)['encoding'] print(encoding) content = content.decode(encoding=encoding) chapionListData = content.split('=')[2][:-1] data = eval(chapionListData) heroNameList = data['keys'] heroDescri = str(data['data']) #英雄的详细名称和图片 self.version = data['version'] self.updated = data['updated'] #将英雄名称加入url中 ifFirst = True for key, value in heroNameList.items(): # print('%s----%s'%(key, value)) #通过正则表达式取出中文名和称谓 pattern = re.compile( key + r'[\s\S]*?\'name\': \'(.*?)\'[\s\S]*?\'title\': \'(.*?)\'', re.S) match = pattern.findall(heroDescri) name = match[0][0] + "-" + match[0][1] self.heroIdList[value] = name
def load_data(): csvs = get_vsvs() for csv_name in csvs: with open(csv_name, 'rb') as fin: encoding_type = chardet.detect(fin.read(70))['encoding'] # reader = csv.reader(fin, delimiter=' ', quotechar='|', ) # for row in reader: # print(row) row_dict = OrderedDict() titles = [] with open(csv_name, newline='', encoding=encoding_type) as f: reader = csv.reader(f) for row in reader: title_row = row[0] titles = title_row.split('\t') break print('titles=', titles) for row in reader: print('row==', row) title_row = row[0] # row = title_row.rsplit() print('split row ', row) row = title_row.split('\t') print('22split row ', row) d = {} for index, value in enumerate(row): print(index) d[titles[index]] = value row_dict[d.get(titles[0])] = d print(row_dict) return titles, row_dict
def get_tokens(self, text, unfiltered=False): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ if not isinstance(text, str): if self.encoding == 'guess': text, _ = guess_decode(text) elif self.encoding == 'chardet': try: from pip._vendor import chardet except ImportError as e: raise ImportError( 'To enable chardet encoding guessing, ' 'please install the chardet library ' 'from http://chardet.feedparser.org/') from e # check for BOM first decoded = None for bom, encoding in _encoding_map: if text.startswith(bom): decoded = text[len(bom):].decode(encoding, 'replace') break # no BOM found, so use chardet if decoded is None: enc = chardet.detect(text[:1024]) # Guess using first 1KB decoded = text.decode( enc.get('encoding') or 'utf-8', 'replace') text = decoded else: text = text.decode(self.encoding) if text.startswith('\ufeff'): text = text[len('\ufeff'):] else: if text.startswith('\ufeff'): text = text[len('\ufeff'):] # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): for _, t, v in self.get_tokens_unprocessed(text): yield t, v stream = streamer() if not unfiltered: stream = apply_filters(stream, self.filters, self) return stream
def autoTransformEncoding(self, x): if (x == None or x == ''): return '' defaultEncoding = sys.getdefaultencoding() infoencode = chardet.detect(x).get('encoding', defaultEncoding) if (infoencode == None): infoencode = defaultEncoding return x.decode(infoencode, 'ignore').encode('utf-8')
def upload_image(): encoded_img = request.form[ 'base64'] # 'base64' is the name of the parameter used to post image file filename = request.form[ 'ImageName'] # 'ImageName' is name of the parameter used to post image name img_data = base64.b64decode( encoded_img) # decode base64 string back to image image_path_name = os.path.join( app.config['UPLOAD_FOLDER']) + "/" + filename with open(image_path_name, 'wb') as f: # "w"rite and "b"inary = wb f.write(img_data) original_image = Image.open(image_path_name, mode='r') original_image = original_image.convert('RGB') detect(original_image, image_path_name, min_score=0.2, max_overlap=0.5, top_k=200) return send_file(image_path_name)
def test_get_artist(app, authorization_header): response = app.get('/music-archive/api/v1/artists/1', headers=authorization_header, follow_redirects=True) json_data = json.loads( response.data.decode(chardet.detect(response.data)["encoding"])) expected_json = load_json('single_artist.json') assert response.status_code == 200 assert json_data == expected_json
def test_detail_page(self): re.sub(ur'.*[\u4E00-\u9FA5]+.*', '', unicode('中文')) epre = re.compile(r"[\s\w]+") chre = re.compile(ur".*[\u4E00-\u9FA5]+.*") jpre = re.compile(ur".*[\u3040-\u30FF\u31F0-\u31FF]+.*") hgre = re.compile(ur".*[\u1100-\u11FF\u3130-\u318F\uAC00-\uD7AF]+.*") x = '中文' defaultEncoding = sys.getdefaultencoding() infoencode = chardet.detect(x).get('encoding', defaultEncoding) print(x.decode(infoencode, 'ignore').encode('utf-8'))
def turn(file): with open(file, 'rb') as f: data = f.read() encoding = chardet.detect(data)['encoding'] data_str = data.decode(encoding) tp = 'LF' if '\r\n' in data_str: tp = 'CRLF' data_str = data_str.replace('\r\n', '\n') if encoding not in ['utf-8', 'ascii'] or tp == 'CRLF': with open(file, 'w', newline='\n', encoding='utf-8') as f: f.write(data_str) print(f"{file}: ({tp},{encoding}) trun to (LF,utf-8) success!")
from urllib import request import re from pip._vendor import chardet msg = "master msg" req = request.urlopen('https://coding.imooc.com/') html = req.read() encode_type = chardet.detect(html) html = html.decode(encode_type['encoding']) img_urls = re.findall(r'src=.+\.jpg', html) i = 0 for url in img_urls: url = str(url).replace("src=\"", "http:") # wb + 以二进制方式进行打开储存、图片需要以二进制方式打开存储 f = open(str(i) + ".jpg", 'wb+') req = request.urlopen(url) img_file = req.read() f.write(img_file) i += 1 a = "master_1_3"
def check_encoding_of_file(name): with open(name + ".csv", "rb") as file: result = chardet.detect(file.read()) return result['encoding']
# 5. Выполнить пинг веб-ресурсов yandex.ru, youtube.com и преобразовать результаты из байтовового # в строковый тип на кириллице. import subprocess from pip._vendor import chardet # args = ["ping","yandex.ru"] # sub_ping = subprocess.Popen(args,stdout=subprocess.PIPE) # for line in sub_ping.stdout: # result = chardet.detect(line) # line = line.decode(result["encoding"]).encode("utf-8") # print(line.decode("utf-8")) args = ["ping","youtube.com"] sub_ping = subprocess.Popen(args,stdout=subprocess.PIPE) for line in sub_ping.stdout: result = chardet.detect(line) line = line.decode(result["encoding"]).encode("utf-8") print(line.decode("utf-8"))
#-*-coding:utf-8-*- import urllib from pip._vendor import chardet if __name__ == '__main__': url = "http://www.baidu.com" req = urllib.request.urlopen(url) html = req.read() cs = chardet.detect(html) print(type(cs)) html = html.decode(cs.get("encoding" , "utf-8")) print(html)
def file_name_is_legal(name): detect = chardet.detect(name.encode('utf-8')) return detect['encoding'] == 'ascii' or name.endswith('.py')
def get_encoding(file): # 二进制方式读取,获取字节数据,检测类型 with open(file, 'rb') as f: return chardet.detect(f.read())['encoding']