def write_csv_file(root_key, filename, description): data_path = os.path.join(root_key, filename) mkdir_p(root_key) open(data_path, 'a+').close() key = data_path[:data_path. rfind('.')] if data_path.rfind('.') > 0 else data_path DATA_STORE.add_file(key, data_path, description, force=True) return data_path
def register_to_datastore(data_path, root_key, description): root_length = len(data_path.split('/')) new_keys: List[str] = [] DATA_STORE.create_key(root_key, '', force=True) for root, _, filenames in os.walk(data_path): for filename in filenames: if not filename.endswith(".zip"): key = '/'.join( os.path.join(root, filename).split('/')[root_length:]) key = key[:key.rfind('.')] if key.rfind('.') > 0 else key new_keys.append(key) DATA_STORE.add_file(os.path.join(root_key, key), os.path.join(root, filename), description, force=True) return new_keys
def maybe_download_and_store_single_file(url: str, key: str, description: str = None, postprocess=None, **kwargs) -> str: if not DATA_STORE.is_valid(key): # This is where the hard work happens # First, we have to download the file into the working directory if postprocess is None: data_path = maybe_download( url.split('/')[-1], url, DATA_STORE.working_directory) else: data_path = maybe_download(url.split('/')[-1], url, DATA_STORE.working_directory, postprocess=postprocess, **kwargs) DATA_STORE.add_file(key, data_path, description, force=True) return key
def maybe_download_and_store_google_drive(file_pair: Dict[str, str], root_key: str, description: str = None, force_download: bool = False, use_subkeys=True, **kwargs) -> List[str]: old_keys: List[str] = [] if not force_download and DATA_STORE.is_valid( root_key) and validate_subkeys(root_key, old_keys): return old_keys keys = [] DATA_STORE.create_key(root_key, 'root.key', force=True) for file_name in file_pair: log_message("Downloading " + file_name) file_id = file_pair[file_name] file_dest = os.path.join(DATA_STORE.working_directory, file_name) data_path = maybe_download_google_drive(file_id, file_dest, force_download=force_download) data_path = post_process(data_path) log_message("Decompressed " + file_name + "to " + data_path) if os.path.isdir(data_path): if use_subkeys: _keys = register_to_datastore(data_path, root_key, description) keys.extend(_keys) else: data_key = os.path.join(root_key, file_name.split(".zip")[0]) DATA_STORE.add_folder(data_key, data_path, force=True) keys.append(data_key) else: _key = os.path.join(root_key, file_name.split(".")[0]) DATA_STORE.add_file(_key, data_path, description, force=True) keys.append(_key) log_message("Completed " + file_name) DATA_STORE.create_key(root_key, 'root.key', force=True) return [k for k in keys] + [root_key]