Exemplo n.º 1
0
def delete_corpus_helper(corpus=None, force=False):
    """Deletes one or several corpora.

    Args:
        corpus: string if you want to delete a specific corpus, None for
        every corpus.
    """
    if corpus is None:
        if force or query_yes_no(
                'Do you want to delete every working directory?'):
            corpora = read_corpus()
            write_corpus({})
            for _, value in corpora.iteritems():
                rmtree(value)
    else:
        corpora = read_corpus()
        if corpus in corpora:
            try:
                rmtree(corpora[corpus], ignore_errors=False)
            except Exception as exc:
                logger.warning("Could not delete {0}: {1}".format(
                    corpora[corpus], exc))
            del corpora[corpus]
        else:
            raise ValueError('This working directory does not exist.')
        write_corpus(corpora)
Exemplo n.º 2
0
def delete_broadcast_helper(broadcast=None, corpus=None, force=False):
    """Deletes one or all broadcast(s) from a corpus.

    Args:
        broadcast: Name of the broadcast (default: None). None to delete all broadcast
        corpus: Name of the corpus (default: None)
        force (boolean): do not ask for confirmation on deleting all

    """
    corpora = read_corpus()
    corpus_path = corpora[corpus]
    if broadcast is None:
        if force or query_yes_no('Do you want to delete every '
                                 'broadcast in this directory?'):
            corpora = [
                os.path.join(corpus_path, item)
                for item in os.listdir(corpus_path)
                if item != 'references' and item != 'raw'
            ]
            corpora = [
                item for item in corpora
                if os.path.exists(os.path.join(item, 'references.json'))
            ]
            for broadcast in corpora:
                # Get only the end of the path
                rmtree(broadcast, ignore_errors=False)
    else:
        if os.path.exists(
                os.path.join(corpus_path, broadcast, 'references.json')):
            rmtree(os.path.join(corpus_path, broadcast), ignore_errors=False)
Exemplo n.º 3
0
def list_broadcast_helper(corpus):
    """List every broadcast in the corpus

    Args:
        corpus: Name of the corpus

    Returns:
        a dict of {broadcast_name: broadcast_path}

    """
    corpora = read_corpus()

    if corpus not in corpora:
        raise ValueError('The working directory you stated does not exist')
    corpus_path = corpora[corpus]
    broadcasts = [
        os.path.join(corpus_path, item) for item in os.listdir(corpus_path)
        if item != 'references' and item != 'raw'
    ]
    broadcasts = [
        item for item in broadcasts
        if os.path.exists(os.path.join(item, 'references.json'))
    ]
    broadcast_dict = {}
    for broadcast in broadcasts:
        # Get only the end of the path
        broadcast_name = os.path.relpath(broadcast, os.path.dirname(broadcast))
        broadcast_dict[broadcast_name] = broadcast
    return broadcast_dict
Exemplo n.º 4
0
def init_corpus_helper(path, corpus):
    """Creates a new corpus.

    Args:
        path: the path where the corpus should be created.
        corpus: name of the corpus
    """
    corpus_path = os.path.join(path, corpus)
    corpora = read_corpus()
    if corpus in corpora:
        raise ValueError('Corpus already exists')
    if not os.path.exists(corpus_path):
        os.mkdir(corpus_path)
    if not os.path.exists(os.path.join(corpus_path, 'references')):
        os.mkdir(os.path.join(corpus_path, 'references'))
    corpora[corpus] = corpus_path
    write_corpus(corpora)
Exemplo n.º 5
0
def init_broadcast_helper(broadcast, corpus):
    """Initialize a broadcast inside a corpus.

    Args:
        broadcast: Name of the broadcast
        corpus: Name of the corpus

    """
    corpora = read_corpus()

    if corpus not in corpora:
        raise ValueError('The corpus you stated does not exist')
    broadcast_path = os.path.join(corpora[corpus], broadcast)
    try:
        os.mkdir(broadcast_path)
    except OSError as e:
        if 'File exists' in e:
            logger.warning(
                'The broadcast dir already exists. Still initializing')
        else:
            raise e
    open(os.path.join(broadcast_path, 'groundtruth.xml'), 'a').close()
    open(os.path.join(broadcast_path, 'references.json'), 'a').close()
    return corpora[corpus], broadcast
Exemplo n.º 6
0
def list_corpus_helper():
    """List all the corpora on a computer.

    """
    corpora = read_corpus()
    return corpora