Exemplo n.º 1
0
def do(filename, name):
    with opentext(filename, 'r') as f:
        y = yaml.load(f)
        data = y['conversations']

    from isname import has_name

    def scan(d):
        for l in d:
            for s in l:
                if has_name(str(s)):
                    break
                if len(str(s)) < 2:
                    break
            else:
                yield l

    def to_yml(f):
        for l in f:
            new = True
            for i in l:
                if new:
                    new = False
                    yield '- - ' + str(i) + '\n'
                else:
                    yield '  - ' + str(i) + '\n'

    head = 'categories:\n- %s\nconversations:\n' % name
    with open(name + '.yml', 'w', encoding='utf-8') as out:
        out.write(head)
        if len(data) != 0:
            out.writelines(to_yml(scan(data)))
Exemplo n.º 2
0
def do(name):
    filenames = [s for s in os.listdir('.') if s[-4:].lower() == '.yml']
    data = []
    for filename in progressbar(filenames):
        try:
            with opentext(filename, 'r') as f:
                l = yaml.load(f)
                data.extend(l['conversations'])
        except:
            print('%s fail' % filename)
            pass

    def to_yml(f):
        for l in f:
            new = True
            for i in l:
                if new:
                    new = False
                    yield '- - ' + i + '\n'
                else:
                    yield '  - ' + i + '\n'

    head = 'categories:\n- %s\nconversations:\n' % name
    with open(name + '.yml', 'w', encoding='utf-8') as out:
        out.write(head)
        out.writelines(to_yml(data))
Exemplo n.º 3
0
def do():
    filenames = [s for s in os.listdir('.') if s[-4:].lower() == '.txt']
    for filename in progressbar(filenames):
        try:
            with opentext(filename, 'r') as f:
                import re
                m_new = re.compile(r'\n\s*\n')
                m_msg = re.compile(r'“(.*?)”')
                m_noc = re.compile(r'\n[^-]{2}.*')
                m_con = re.compile(r'(--.*\s*)\n(\s*?\n){3,}')
                m_sin = re.compile(r'===\s*(--.*\s*\n)\s*===')
                m_lon = re.compile(
                    r'===\s*(--.*\s*\n)*?(--.{26,}\s*\n)+(--.*\s*\n)*?\s*===')
                text = '\n' + f.read()
                text = re.sub(m_new, '\n', text)
                text = re.sub(m_msg, r'\n--\1\n', text)
                text = re.sub(m_noc, '\n', text)
                text = re.sub(m_con, r'\1\n===\n', text)
                text = re.sub(m_new, '\n', text)
                while re.search(m_sin, text):
                    text = re.sub(m_sin, '===', text)
                while re.search(m_lon, text):
                    text = re.sub(m_lon, '===', text)

            with open(filename, 'w', encoding='utf-8') as f:
                f.write(text)

            import deletenames
            deletenames.do(filename)

            def to_yml(f):
                new = True
                for i in f:
                    if i.strip() == '':
                        continue
                    if i.strip() == '===':
                        new = True
                        continue
                    if new:
                        yield '- - ' + i[2:]
                    else:
                        yield '  - ' + i[2:]
                    new = False

            name = os.path.split(filename)[1]
            head = 'categories:\n- %s\nconversations:\n' % name
            with open(filename, 'r', encoding='utf-8') as f:
                with open(name + '.yml', 'w', encoding='utf-8') as out:
                    out.write(head)
                    out.writelines(to_yml(f))

            import yaml
            with open(name + '.yml', 'r', encoding='utf-8') as f:
                l = yaml.load(f.read())
            if l['conversations'] is None:
                os.remove(name + '.yml')
        except:
            print('%s fail' % filename)
            pass
Exemplo n.º 4
0
def do(filename):
  with opentext(filename, 'r') as f:
    import re
    from extractnames import gen
    text = f.read()
    stopwords.extend(gen(filename))
    for name in stopwords:
      if name != '':
        match_name = re.compile(r'===\s*(--.*\s*\n)*?(--.*%s.*\s*\n)+(--.*\s*\n)*?\s*===' % name)
        while re.search(match_name, text):
          text = re.sub(match_name, '===', text)
  with open(filename, 'w', encoding='utf-8') as f:
    f.write(text)
Exemplo n.º 5
0
import sys
if __name__ != '__main__':
  sys.exit()

if len(sys.argv) <= 1:
  print('请输入文件名!')
  sys.exit()

filename = sys.argv[1]
from opentext import opentext
with opentext(filename, 'r') as f:
  import re
  m_new = re.compile(r'\n\s*\n')
  m_msg = re.compile(r'“(.*?)”')
  m_noc = re.compile(r'\n[^-]{2}.*')
  m_con = re.compile(r'(--.*\s*)\n(\s*?\n){3,}')
  m_sin = re.compile(r'===\s*(--.*\s*\n)\s*===')
  m_lon = re.compile(r'===\s*(--.*\s*\n)*?(--.{26,}\s*\n)+(--.*\s*\n)*?\s*===')
  text = '\n' + f.read()
  text = re.sub(m_new, '\n', text)
  text = re.sub(m_msg, r'\n--\1\n', text)
  text = re.sub(m_noc, '\n', text)
  text = re.sub(m_con, r'\1\n===\n', text)
  text = re.sub(m_new, '\n', text)
  while re.search(m_sin, text):
    text = re.sub(m_sin, '===', text)
  while re.search(m_lon, text):
    text = re.sub(m_lon, '===', text)
  
with open(filename, 'w', encoding='utf-8') as f:
  f.write(text)
Exemplo n.º 6
0
def gen(filename):
    with opentext(filename, 'r') as f:
        l = getNames(cut(f))
        return set(l)