Python make_sure_path_exists示例

编程语言: Python

命名空间/包名称: utility

方法/功能: make_sure_path_exists

hotexamples.com的示例: 6

Python make_sure_path_exists - 已找到6个示例。这些是从开源项目中提取的最受好评的utility.make_sure_path_exists现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： ctb.py 项目： zhengzx-nlp/distance-parser

def convert(ctb_root, out_root):
    ctb_root = join(ctb_root, 'bracketed')
    fids = [f for f in listdir(ctb_root) if isfile(join(ctb_root, f)) and \
        f.endswith('.nw') or \
        f.endswith('.mz') or \
        f.endswith('.wb')]
    make_sure_path_exists(out_root)

    for f in fids:
        with open(join(ctb_root, f), 'r') as src, \
             open(join(out_root, f.split('.')[0] + '.fid'), 'w') as out:
            # encoding='GB2312'
            in_s_tag = False
            try:
                for line in src:
                    if line.startswith('<S ID=') or line.startswith('<seg id='):
                        in_s_tag = True
                    elif line.startswith('</S>') or line.startswith('</seg>'):
                        in_s_tag = False
                    elif line.startswith('<'):
                        continue
                    elif in_s_tag and len(line) > 1:
                        out.write(line)
            except:
                pass

示例#2

显示文件

def convert_ctb8_to_bracketed(ctb_root, out_root):
    ctb_root = join(ctb_root, 'bracketed')
    chtbs = [f for f in listdir(ctb_root) if isfile(join(ctb_root, f)) and f.startswith('chtb')]
    make_sure_path_exists(out_root)
    for f in chtbs:
        with open(join(ctb_root, f), encoding='utf-8') as src, open(join(out_root, f + '.txt'), 'w', encoding='utf-8') as out:
            for line in src:
                if not line.startswith('<'):
                    out.write(line)

示例#3

显示文件

def convert(ctb_root, out_root):
    ctb_root = join(ctb_root, 'bracketed')
    fids = [f for f in listdir(ctb_root) if isfile(join(ctb_root, f)) and f.endswith('.fid')]
    make_sure_path_exists(out_root)
    for f in fids:
        with open(join(ctb_root, f), encoding='GB2312') as src, open(join(out_root, f), 'w') as out:
            in_s_tag = False
            try:
                for line in src:
                    if line.startswith('<S ID='):
                        in_s_tag = True
                    elif line.startswith('</S>'):
                        in_s_tag = False
                    elif in_s_tag:
                        out.write(line)
            except:
                pass

示例#4

显示文件

文件： ctb.py 项目： y12uc231/TreebankPreprocessing

def convert_ctb5_to_backeted(ctb_root, out_root):
    ctb_root = join(ctb_root, 'bracketed')
    fids = [
        f for f in listdir(ctb_root)
        if isfile(join(ctb_root, f)) and f.endswith('.fid')
    ]
    make_sure_path_exists(out_root)
    for f in fids:
        with open(join(ctb_root, f),
                  encoding='GB2312') as src, open(join(out_root, f),
                                                  'w') as out:
            in_s_tag = False
            try:
                for line in src:
                    if line.startswith('<S ID='):
                        in_s_tag = True
                    elif line.startswith('</S>'):
                        in_s_tag = False
                    elif in_s_tag:
                        out.write(line)
            except:
                # The last file throws encoding error at the very end, doesn't affect sentences.
                pass

示例#5

显示文件

    print()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=
        'Convert combined Penn Treebank files (.txt) to Stanford Dependency format (.conllx)'
    )
    parser.add_argument(
        "--input",
        required=True,
        help=
        'The folder containing train.txt/dev.txt/test.txt in bracketed format')
    parser.add_argument(
        "--lang",
        required=True,
        help='Which language? Use en for English, cn for Chinese')
    parser.add_argument(
        "--output",
        required=True,
        dest="output",
        help=
        'The folder where to store the output train.conllx/dev.conllx/test.conllx in Stanford '
        'Dependency format')

    args = parser.parse_args()
    make_sure_path_exists(args.output)
    for f in ['train', 'dev', 'test']:
        convert(join(args.input, f + '.txt'), join(args.output, f + '.conllx'),
                args.lang)

示例#6

显示文件

        'Which task (par, pos)? Use par for phrase structure parsing, pos for part-of-speech '
        'tagging')

    args = parser.parse_args()
    root_path = args.output
    task = args.task
    ext = 'txt'

    if task == 'par':
        training = list(range(2, 21 + 1))
        development = [22]
        test = [23]
    elif task == 'pos':
        training = list(range(0, 18 + 1))
        development = list(range(19, 21 + 1))
        test = list(range(22, 24 + 1))
        ext = 'tsv'
    else:
        eprint('Invalid task {}'.format(task))
        exit(1)

    print('Importing ptb from nltk')
    from nltk.corpus import ptb

    print()

    make_sure_path_exists(root_path)
    combine(training, join(root_path, 'train.{}'.format(ext)), task)
    combine(development, join(root_path, 'dev.{}'.format(ext)), task)
    combine(test, join(root_path, 'test.{}'.format(ext)), task)