def delete_corpus_helper(corpus=None, force=False): """Deletes one or several corpora. Args: corpus: string if you want to delete a specific corpus, None for every corpus. """ if corpus is None: if force or query_yes_no( 'Do you want to delete every working directory?'): corpora = read_corpus() write_corpus({}) for _, value in corpora.iteritems(): rmtree(value) else: corpora = read_corpus() if corpus in corpora: try: rmtree(corpora[corpus], ignore_errors=False) except Exception as exc: logger.warning("Could not delete {0}: {1}".format( corpora[corpus], exc)) del corpora[corpus] else: raise ValueError('This working directory does not exist.') write_corpus(corpora)
def delete_broadcast_helper(broadcast=None, corpus=None, force=False): """Deletes one or all broadcast(s) from a corpus. Args: broadcast: Name of the broadcast (default: None). None to delete all broadcast corpus: Name of the corpus (default: None) force (boolean): do not ask for confirmation on deleting all """ corpora = read_corpus() corpus_path = corpora[corpus] if broadcast is None: if force or query_yes_no('Do you want to delete every ' 'broadcast in this directory?'): corpora = [ os.path.join(corpus_path, item) for item in os.listdir(corpus_path) if item != 'references' and item != 'raw' ] corpora = [ item for item in corpora if os.path.exists(os.path.join(item, 'references.json')) ] for broadcast in corpora: # Get only the end of the path rmtree(broadcast, ignore_errors=False) else: if os.path.exists( os.path.join(corpus_path, broadcast, 'references.json')): rmtree(os.path.join(corpus_path, broadcast), ignore_errors=False)
def list_broadcast_helper(corpus): """List every broadcast in the corpus Args: corpus: Name of the corpus Returns: a dict of {broadcast_name: broadcast_path} """ corpora = read_corpus() if corpus not in corpora: raise ValueError('The working directory you stated does not exist') corpus_path = corpora[corpus] broadcasts = [ os.path.join(corpus_path, item) for item in os.listdir(corpus_path) if item != 'references' and item != 'raw' ] broadcasts = [ item for item in broadcasts if os.path.exists(os.path.join(item, 'references.json')) ] broadcast_dict = {} for broadcast in broadcasts: # Get only the end of the path broadcast_name = os.path.relpath(broadcast, os.path.dirname(broadcast)) broadcast_dict[broadcast_name] = broadcast return broadcast_dict
def init_corpus_helper(path, corpus): """Creates a new corpus. Args: path: the path where the corpus should be created. corpus: name of the corpus """ corpus_path = os.path.join(path, corpus) corpora = read_corpus() if corpus in corpora: raise ValueError('Corpus already exists') if not os.path.exists(corpus_path): os.mkdir(corpus_path) if not os.path.exists(os.path.join(corpus_path, 'references')): os.mkdir(os.path.join(corpus_path, 'references')) corpora[corpus] = corpus_path write_corpus(corpora)
def init_broadcast_helper(broadcast, corpus): """Initialize a broadcast inside a corpus. Args: broadcast: Name of the broadcast corpus: Name of the corpus """ corpora = read_corpus() if corpus not in corpora: raise ValueError('The corpus you stated does not exist') broadcast_path = os.path.join(corpora[corpus], broadcast) try: os.mkdir(broadcast_path) except OSError as e: if 'File exists' in e: logger.warning( 'The broadcast dir already exists. Still initializing') else: raise e open(os.path.join(broadcast_path, 'groundtruth.xml'), 'a').close() open(os.path.join(broadcast_path, 'references.json'), 'a').close() return corpora[corpus], broadcast
def list_corpus_helper(): """List all the corpora on a computer. """ corpora = read_corpus() return corpora