Python guess示例

编程语言: Python

命名空间/包名称: nkf

方法/功能: guess

hotexamples.com的示例: 6

Python guess - 已找到6个示例。这些是从开源项目中提取的最受好评的nkf.guess现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def make_train_text(model, use_wakatigaki):
    input_text = open(os.path.join(model.prepared_file_path, 'input.txt'), 'w')
    if use_wakatigaki:
        logger.info('Use wakatigaki option.')
        import MeCab
        none = None
        m = MeCab.Tagger("-Owakati")
        for f in ds_utils.find_all_files(model.dataset.dataset_path):
            raw_text = open(f, 'r').read()
            encoding = nkf.guess(raw_text)
            if encoding == 'BINARY':
                continue
            text = raw_text.decode(encoding, 'ignore')
            text = text.replace('\r', '')
            encoded_text = text.encode('UTF-8')
            lines = encoded_text.splitlines()
            for line in lines:
                result = m.parse(line)
                if isinstance(none, type(result)):
                    continue
                input_text.write(result)
                input_text.flush()
    else:
        for f in ds_utils.find_all_files(model.dataset.dataset_path):
            temp_text = open(f, 'r').read()
            encoding = nkf.guess(temp_text)
            if encoding == 'BINARY':
                continue
            decoded_text = temp_text.decode(encoding, 'ignore')
            decoded_text = decoded_text.replace('\r', '')
            encoded_text = decoded_text.encode('UTF-8')
            input_text.write(encoded_text)
            input_text.flush()
    input_text.close()
    return os.path.join(model.prepared_file_path, 'input.txt')

示例#2

显示文件

 def save_uploaded_file_to_category(self, uploaded_file, category):
     filename = uploaded_file.filename
     name, ext = os.path.splitext(filename)
     ext = ext.lower()
     if self.type == 'image':
         if ext not in ('.jpg', '.jpeg', '.png', '.gif'):
             raise ValueError('Invalid file type.')
     elif self.type == 'text':
         if ext not in ('.txt', ):
             raise ValueError('Invalid file type.')
     new_filename = os.path.join(
         self.dataset_path, category,
         ds_util.get_timestamp() + '_' + secure_filename(filename))
     if self.type == 'image':
         uploaded_file.save(new_filename)
     elif self.type == 'text':
         text = uploaded_file.stream.read()
         if nkf.guess(text) == 'binary':
             raise ValueError(
                 'Invalid file type. File must be a text file.')
         f = open(new_filename, 'w')
         f.write(text)
         f.close()
     self.file_num += 1
     self.update_and_commit()

示例#3

显示文件

def get_text_sample(path, character_num=-1):
    raw_text = open(path).read()
    encoding = nkf.guess(raw_text)
    text = raw_text.decode(encoding)
    if character_num > -1:
        return text[0:character_num]
    else:
        return text

示例#4

显示文件

文件： mycodecs.py 项目： pombredanne/atango

def decode(text, encoding=None, *args):
    if not encoding or encoding in ('ISO-8859-1', 'iso-8859-1'):
        encoding = nkf.guess(text)
        if encoding in ('BINARY', 'ISO-8859-1'):
            encoding = 'utf8'
    encoding = normalize_encoding(encoding)
    if not encoding in all_encodings:
        return nkf.nkf('-w', text).decode('utf8')
    return text.decode(encoding, *args)

示例#5

显示文件

文件： core.py 项目： kirin123kirin/cmd

 def getencoding(dat:bytes):
     if b"\0" in dat:
         return None
     enc = nkf.guess(dat).lower()
     if enc and enc == "shift_jis":
         return "cp932"
     elif enc == "binary":
         return None
     else:
         return enc

示例#6

显示文件

文件： HtmlParseAndList.py 项目： syakesaba/python-scripts-useful

#!/usr/bin/env python3
# encoding: utf-8

(print) # this is a python3 script. not python2.

import nkf # NKF wrapper for python3. See; http://sourceforge.jp/projects/nkf/scm/git/nkf/tree/master

try: #  BeautifulSoup,The best HTML parser python,is available to python3 after 2to3.
    from bs4 import BeautifulSoup as BSoup
except:
    from BeautifulSoup import BeautifulSoup as BSoup
import urllib.request
from sys import argv

AURI="http://osu.ppy.sh/pages/include/profile-general.php?u=1679287"
if len(argv) <= 1:
    URI=AURI
else:
    URI=argv[1]
bHtml = urllib.request.urlopen(URI).read()
charset = nkf.guess(bHtml)
sHtml = bHtml.decode(charset)
print(BSoup(sHtml).prettify().decode())