Exemplo n.º 1
0
Arquivo: oss.py Projeto: samlex20/dvc
    def download(
        self,
        from_infos,
        to_infos,
        names=None,
        no_progress_bar=False,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)
        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info.scheme != self.scheme:
                raise NotImplementedError
            if to_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Downloading '{}' to '{}'".format(from_info, to_info))

            tmp_file = tmp_fname(to_info)
            if not name:
                name = to_info.name

            cb = None if no_progress_bar else Callback(name)

            makedirs(fspath_py35(to_info.parent), exist_ok=True)
            try:
                self.oss_service.get_object_to_file(from_info.path,
                                                    tmp_file,
                                                    progress_callback=cb)
            except Exception:
                logger.warning("failed to download '{}'".format(from_info))
            else:
                move(tmp_file, fspath_py35(to_info))
            finally:
                if not no_progress_bar:
                    progress.finish_target(name)
Exemplo n.º 2
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        s3 = self.s3

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 's3':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], to_info['bucket'], to_info['key']))

            if not name:
                name = os.path.basename(from_info['path'])

            total = os.path.getsize(from_info['path'])
            cb = Callback(name, total)

            try:
                s3.upload_file(from_info['path'],
                               to_info['bucket'],
                               to_info['key'],
                               Callback=cb)
            except Exception as exc:
                msg = "Failed to upload '{}'".format(from_info['path'])
                Logger.error(msg, exc)
                continue

            progress.finish_target(name)
Exemplo n.º 3
0
Arquivo: gs.py Projeto: Faadal/dvc
    def upload(self, paths, path_infos, names=None):
        names = self._verify_path_args(path_infos, paths, names)

        gs = self.gs

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 'gs':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                path, path_info['bucket'], path_info['key']))

            if not name:
                name = os.path.basename(path)

            progress.update_target(name, 0, None)

            try:
                gs.bucket(path_info['bucket']).blob(
                    path_info['key']).upload_from_filename(path)
            except Exception as exc:
                Logger.error(
                    "Failed to upload '{}' to '{}/{}'".format(
                        path, path_info['bucket'], path_info['key']), exc)
                continue

            progress.finish_target(name)
Exemplo n.º 4
0
Arquivo: local.py Projeto: ei-grad/dvc
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        for md5, info in ret.items():
            info["status"] = STATUS_MAP[(md5 in local_exists, md5
                                         in remote_exists)]

        return ret
Exemplo n.º 5
0
def copyfile(src, dest):
    '''Copy file with progress bar'''
    copied = 0
    name = os.path.basename(src)
    total = os.stat(src).st_size

    fsrc = open(src, 'rb')

    if os.path.isdir(dest):
        fdest = open(dest + '/' + name, 'wb+')
    else:
        fdest = open(dest, 'wb+')

    while True:
        buf = fsrc.read(LOCAL_CHUNK_SIZE)
        if not buf:
            break
        fdest.write(buf)
        copied += len(buf)
        progress.update_target(name, copied, total)

    progress.finish_target(name)

    fsrc.close()
    fdest.close()
Exemplo n.º 6
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        s3 = self.s3

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != "s3":
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info["path"], to_info["bucket"], to_info["path"]))

            if not name:
                name = os.path.basename(from_info["path"])

            total = os.path.getsize(from_info["path"])
            cb = Callback(name, total)

            try:
                s3.upload_file(
                    from_info["path"],
                    to_info["bucket"],
                    to_info["path"],
                    Callback=cb,
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info["path"])
                logger.error(msg)
                continue

            progress.finish_target(name)
Exemplo n.º 7
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != self.scheme:
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            bucket = to_info["bucket"]
            path = to_info["path"]

            logger.debug(
                "Uploading '{}' to '{}/{}'".format(
                    from_info["path"], bucket, path
                )
            )

            if not name:
                name = os.path.basename(from_info["path"])

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(
                    bucket, path, from_info["path"], progress_callback=cb
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info["path"])
                logger.warning(msg)
            else:
                progress.finish_target(name)
Exemplo n.º 8
0
    def push(self, data_item):
        """ push, aws version """

        aws_key = self.cache_file_key(data_item.resolved_cache.dvc)
        bucket = self._get_bucket_aws(self.storage_bucket)
        key = bucket.get_key(aws_key)
        if key:
            Logger.debug(
                'File already uploaded to the cloud. Checksum validation...')

            if self._cmp_checksum(key, data_item.resolved_cache.dvc):
                Logger.debug('File checksum matches. No uploading is needed.')
                return data_item

            Logger.debug('Checksum miss-match. Re-uploading is required.')

        key = bucket.new_key(aws_key)

        try:
            self._push_multipart(key, data_item.resolved_cache.relative)
        except Exception as exc:
            Logger.error('Failed to upload "{}": {}'.format(
                data_item.resolved_cache.relative, exc))
            return None

        progress.finish_target(
            os.path.basename(data_item.resolved_cache.relative))

        return data_item
Exemplo n.º 9
0
    def _import(self, bucket_name, key, fname, data_item):

        bucket = self._get_bucket_gc(bucket_name)

        name = os.path.basename(fname)
        tmp_file = self.tmp_file(fname)

        blob = bucket.get_blob(key)
        if not blob:
            Logger.error('File "{}" does not exist in the cloud'.format(key))
            return None

        if self._cmp_checksum(blob, fname):
            Logger.debug('File "{}" matches with "{}".'.format(fname, key))
            return data_item

        Logger.debug('Downloading cache file from gc "{}/{}"'.format(
            bucket.name, key))

        # percent_cb is not available for download_to_filename, so
        # lets at least update progress at keypoints(start, finish)
        progress.update_target(name, 0, None)

        try:
            blob.download_to_filename(tmp_file)
            os.rename(tmp_file, fname)
        except Exception as exc:
            Logger.error('Failed to download "{}": {}'.format(key, exc))
            return None

        progress.finish_target(name)

        Logger.debug('Downloading completed')

        return data_item
Exemplo n.º 10
0
Arquivo: gs.py Projeto: sjoerdapp/dvc
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        gs = self.gs

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'gs':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(from_info['path'],
                                                            to_info['bucket'],
                                                            to_info['key']))

            if not name:
                name = os.path.basename(from_info['path'])

            progress.update_target(name, 0, None)

            try:
                bucket = gs.bucket(to_info['bucket'])
                blob = bucket.blob(to_info['key'])
                blob.upload_from_filename(from_info['path'])
            except Exception as exc:
                msg = "Failed to upload '{}' to '{}/{}'"
                Logger.warn(msg.format(from_info['path'],
                                       to_info['bucket'],
                                       to_info['key']), exc)
                continue

            progress.finish_target(name)
Exemplo n.º 11
0
    def _import(self, bucket_name, key_name, fname, data_item):

        bucket = self._get_bucket_aws(bucket_name)

        tmp_file = self.tmp_file(fname)
        name = os.path.basename(fname)
        key = bucket.get_key(key_name)
        if not key:
            Logger.error(
                'File "{}" does not exist in the cloud'.format(key_name))
            return None

        if self._cmp_checksum(key, fname):
            Logger.debug('File "{}" matches with "{}".'.format(
                fname, key_name))
            return data_item

        Logger.debug('Downloading cache file from S3 "{}/{}" to "{}"'.format(
            bucket.name, key_name, fname))

        res_h = ResumableDownloadHandler(
            tracker_file_name=self._download_tracker(tmp_file), num_retries=10)
        try:
            key.get_contents_to_filename(tmp_file,
                                         cb=create_cb(name),
                                         res_download_handler=res_h)
            os.rename(tmp_file, fname)
        except Exception as exc:
            Logger.error('Failed to download "{}": {}'.format(key_name, exc))
            return None

        progress.finish_target(name)
        Logger.debug('Downloading completed')

        return data_item
Exemplo n.º 12
0
    def upload(self, from_infos, to_infos, names=None, no_progress_bar=False):
        names = self._verify_path_args(to_infos, from_infos, names)

        gs = self.gs

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info.scheme != "gs":
                raise NotImplementedError

            if from_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(from_info, to_info))

            if not name:
                name = from_info.name

            if not no_progress_bar:
                progress.update_target(name, 0, None)

            try:
                bucket = gs.bucket(to_info.bucket)
                blob = bucket.blob(to_info.path)
                blob.upload_from_filename(from_info.fspath)
            except Exception:
                msg = "failed to upload '{}' to '{}'"
                logger.exception(msg.format(from_info, to_info))
                continue

            progress.finish_target(name)
Exemplo n.º 13
0
Arquivo: azure.py Projeto: Faadal/dvc
    def download(self, path_infos, fnames, no_progress_bar=False, names=None):
        names = self._verify_path_args(path_infos, fnames, names)

        for fname, path_info, name in zip(fnames, path_infos, names):
            if path_info['scheme'] != self.scheme:
                raise NotImplementedError

            bucket = path_info['bucket']
            key = path_info['key']

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                bucket, key, fname))

            tmp_file = self.tmp_file(fname)
            if not name:
                name = os.path.basename(fname)

            cb = None if no_progress_bar else Callback(name)

            self._makedirs(fname)

            try:
                self.blob_service.get_blob_to_path(
                    bucket, key, tmp_file, progress_callback=cb)
            except Exception as exc:
                Logger.error("Failed to download '{}/{}'".format(
                    bucket, key), exc)
            else:
                os.rename(tmp_file, fname)

                if not no_progress_bar:
                    progress.finish_target(name)
Exemplo n.º 14
0
Arquivo: azure.py Projeto: Faadal/dvc
    def upload(self, paths, path_infos, names=None):
        names = self._verify_path_args(path_infos, paths, names)

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != self.scheme:
                raise NotImplementedError

            bucket = path_info['bucket']
            key = path_info['key']

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                path, bucket, key))

            if not name:
                name = os.path.basename(path)

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(
                    bucket, key, path, progress_callback=cb)
            except Exception as ex:
                Logger.error("Failed to upload '{}'".format(path), ex)
            else:
                progress.finish_target(name)
Exemplo n.º 15
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        self._fill_statuses(ret, local_exists, remote_exists)

        self._log_missing_caches(ret)

        return ret
Exemplo n.º 16
0
    def push(self, data_item):
        """ push, gcp version """

        bucket = self._get_bucket_gc(self.storage_bucket)
        blob_name = self.cache_file_key(data_item.resolved_cache.dvc)
        name = os.path.basename(data_item.resolved_cache.dvc)

        blob = bucket.get_blob(blob_name)
        if blob is not None and blob.exists():
            if self._cmp_checksum(blob, data_item.resolved_cache.dvc):
                Logger.debug('checksum %s matches.  Skipping upload' %
                             data_item.cache.relative)
                return data_item
            Logger.debug('checksum %s mismatch.  re-uploading' %
                         data_item.cache.relative)

        # same as in _import
        progress.update_target(name, 0, None)

        blob = bucket.blob(blob_name)
        blob.upload_from_filename(data_item.resolved_cache.relative)

        progress.finish_target(name)
        Logger.debug('uploading %s completed' %
                     data_item.resolved_cache.relative)

        return data_item
Exemplo n.º 17
0
    def upload(self, from_infos, to_infos, names=None, no_progress_bar=False):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info.scheme != self.scheme:
                raise NotImplementedError

            if from_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(from_info, to_info))
            if not name:
                name = from_info.name

            cb = None if no_progress_bar else Callback(name)

            try:
                self.blob_service.create_blob_from_path(
                    to_info.bucket,
                    to_info.path,
                    from_info.fspath,
                    progress_callback=cb,
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info)
                logger.warning(msg)
            else:
                progress.finish_target(name)
Exemplo n.º 18
0
    def _save_dir(self, path_info):
        path = path_info['path']
        md5, dir_info = self.state.update_info(path)
        dir_relpath = os.path.relpath(path)
        dir_size = len(dir_info)
        bar = dir_size > LARGE_DIR_SIZE

        logger.info("Linking directory '{}'.".format(dir_relpath))

        for processed, entry in enumerate(dir_info):
            relpath = entry[self.PARAM_RELPATH]
            m = entry[self.PARAM_MD5]
            p = os.path.join(path, relpath)
            c = self.get(m)

            if self.changed_cache(m):
                self._move(p, c)
            else:
                remove(p)

            self.link(c, p)

            if bar:
                progress.update_target(dir_relpath, processed, dir_size)

        self.state.update_link(path)

        if bar:
            progress.finish_target(dir_relpath)

        return {self.PARAM_MD5: md5}
Exemplo n.º 19
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.exceptions import DvcException
    from dvc.progress import progress
    from dvc.system import System

    copied = 0
    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    if os.path.isdir(dest):
        dest = os.path.join(dest, os.path.basename(src))

    try:
        System.reflink(src, dest)
    except DvcException:
        with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
            while True:
                buf = fsrc.read(LOCAL_CHUNK_SIZE)
                if not buf:
                    break
                fdest.write(buf)
                copied += len(buf)
                if not no_progress_bar:
                    progress.update_target(name, copied, total)

    if not no_progress_bar:
        progress.finish_target(name)
Exemplo n.º 20
0
    def upload(self, from_info, to_info, name=None, no_progress_bar=False):
        if not hasattr(self, "_upload"):
            raise RemoteActionNotImplemented("upload", self.scheme)

        if to_info.scheme != self.scheme:
            raise NotImplementedError

        if from_info.scheme != "local":
            raise NotImplementedError

        logger.debug("Uploading '{}' to '{}'".format(from_info, to_info))

        name = name or from_info.name

        if not no_progress_bar:
            progress.update_target(name, 0, None)

        try:
            self._upload(
                from_info.fspath,
                to_info,
                name=name,
                no_progress_bar=no_progress_bar,
            )
        except Exception:
            msg = "failed to upload '{}' to '{}'"
            logger.exception(msg.format(from_info, to_info))
            return 1  # 1 fail

        if not no_progress_bar:
            progress.finish_target(name)

        return 0
Exemplo n.º 21
0
Arquivo: local.py Projeto: ei-grad/dvc
    def _save_dir(self, path, md5):
        dir_info = self.load_dir_cache(md5)
        dir_relpath = os.path.relpath(path)
        dir_size = len(dir_info)
        bar = dir_size > LARGE_DIR_SIZE

        logger.info("Linking directory '{}'.".format(dir_relpath))

        for processed, entry in enumerate(dir_info):
            relpath = entry[self.PARAM_RELPATH]
            m = entry[self.PARAM_CHECKSUM]
            p = os.path.join(path, relpath)
            c = self.get(m)

            if self.changed_cache(m):
                self._move(p, c)
            else:
                remove(p)

            self.link(c, p)

            self.state.update(p, m)
            self.state.update(c, m)

            if bar:
                progress.update_target(dir_relpath, processed, dir_size)

        self.state.update_link(path)

        cache = self.get(md5)
        self.state.update(cache)
        self.state.update(path, md5)

        if bar:
            progress.finish_target(dir_relpath)
Exemplo n.º 22
0
Arquivo: gs.py Projeto: sergeyenin/dvc
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        gs = self.gs

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != "gs":
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info["path"], to_info["bucket"], to_info["path"]))

            if not name:
                name = os.path.basename(from_info["path"])

            progress.update_target(name, 0, None)

            try:
                bucket = gs.bucket(to_info["bucket"])
                blob = bucket.blob(to_info["path"])
                blob.upload_from_filename(from_info["path"])
            except Exception:
                msg = "failed to upload '{}' to '{}/{}'"
                logger.error(
                    msg.format(from_info["path"], to_info["bucket"],
                               to_info["path"]))
                continue

            progress.finish_target(name)
Exemplo n.º 23
0
Arquivo: local.py Projeto: ei-grad/dvc
    def _get_chunks(self, download, remote, status_info, status, jobs):
        title = "Analysing status."

        progress.set_n_total(1)
        total = len(status_info)
        current = 0

        cache = []
        path_infos = []
        names = []
        for md5, info in status_info.items():
            if info["status"] == status:
                cache.append(self.checksum_to_path_info(md5))
                path_infos.append(remote.checksum_to_path_info(md5))
                names.append(info["name"])
            current += 1
            progress.update_target(title, current, total)

        progress.finish_target(title)

        progress.set_n_total(len(names))

        if download:
            to_infos = cache
            from_infos = path_infos
        else:
            to_infos = path_infos
            from_infos = cache

        return list(
            zip(
                to_chunks(from_infos, jobs),
                to_chunks(to_infos, jobs),
                to_chunks(names, jobs),
            ))
Exemplo n.º 24
0
    def upload(self, from_infos, to_infos, names=None, no_progress_bar=False):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info.scheme != self.scheme:
                raise NotImplementedError

            if from_info.scheme != "local":
                raise NotImplementedError

            bucket = to_info.bucket
            path = to_info.path

            logger.debug(
                "Uploading '{}' to 'oss://{}/{}'".format(
                    from_info.path, bucket, path
                )
            )

            if not name:
                name = os.path.basename(from_info.path)

            cb = None if no_progress_bar else Callback(name)

            try:
                self.oss_service.put_object_from_file(
                    path, from_info.path, progress_callback=cb
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info.path)
                logger.warning(msg)
            else:
                progress.finish_target(name)
Exemplo n.º 25
0
    def _pull_key(self, key, path, no_progress_bar=False):
        self._makedirs(path)

        name = os.path.relpath(path, self._cloud_settings.cache.cache_dir)
        tmp_file = self.tmp_file(path)

        if self._cmp_checksum(key, path):
            Logger.debug('File "{}" matches with "{}".'.format(path, key.name))
            return path

        Logger.debug('Downloading cache file from gc "{}/{}"'.format(
            key.bucket.name, key.name))

        if not no_progress_bar:
            # percent_cb is not available for download_to_filename, so
            # lets at least update progress at keypoints(start, finish)
            progress.update_target(name, 0, None)

        try:
            key.download_to_filename(tmp_file)
        except Exception as exc:
            Logger.error('Failed to download "{}": {}'.format(key.name, exc))
            return None

        os.rename(tmp_file, path)

        if not no_progress_bar:
            progress.finish_target(name)

        Logger.debug('Downloading completed')

        return path
Exemplo n.º 26
0
Arquivo: local.py Projeto: Pariyat/dvc
    def status(self, checksum_infos, remote, jobs=1, show_checksums=False):
        Logger.info("Preparing to pull data from {}".format(remote.url))
        title = "Collecting information"

        progress.set_n_total(0)
        progress.update_target(title, 0, 100)

        checksum_infos, missing = self._collect(checksum_infos)
        checksum_infos += missing

        progress.update_target(title, 10, 100)

        md5s, names = self._group(checksum_infos,
                                  show_checksums=show_checksums)

        progress.update_target(title, 20, 100)

        path_infos = remote.md5s_to_path_infos(md5s)

        progress.update_target(title, 30, 100)

        remote_exists = remote.exists(path_infos)

        progress.update_target(title, 90, 100)

        local_exists = [not self.changed(md5) for md5 in md5s]

        progress.finish_target(title)

        return [(name, STATUS_MAP[l, r])
                for name, l, r in zip(names, local_exists, remote_exists)]
Exemplo n.º 27
0
    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)

        s3 = self.s3

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info["scheme"] != "s3":
                raise NotImplementedError

            if to_info["scheme"] == "s3":
                self.copy(from_info, to_info, s3=s3)
                continue

            if to_info["scheme"] != "local":
                raise NotImplementedError

            msg = "Downloading '{}/{}' to '{}'".format(
                from_info["bucket"], from_info["path"], to_info["path"]
            )
            logger.debug(msg)

            tmp_file = tmp_fname(to_info["path"])
            if not name:
                name = os.path.basename(to_info["path"])

            makedirs(os.path.dirname(to_info["path"]), exist_ok=True)

            try:
                if no_progress_bar:
                    cb = None
                else:
                    total = s3.head_object(
                        Bucket=from_info["bucket"], Key=from_info["path"]
                    )["ContentLength"]
                    cb = Callback(name, total)

                s3.download_file(
                    from_info["bucket"],
                    from_info["path"],
                    tmp_file,
                    Callback=cb,
                )
            except Exception:
                msg = "failed to download '{}/{}'".format(
                    from_info["bucket"], from_info["path"]
                )
                logger.error(msg)
                continue

            move(tmp_file, to_info["path"])

            if not no_progress_bar:
                progress.finish_target(name)
Exemplo n.º 28
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != self.scheme:
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = to_info['bucket']
            key = to_info['key']

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], bucket, key))

            if not name:
                name = os.path.basename(from_info['path'])

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(bucket,
                                                        key,
                                                        from_info['path'],
                                                        progress_callback=cb)
            except Exception as ex:
                msg = "Failed to upload '{}'".format(from_info['path'])
                Logger.warn(msg, ex)
            else:
                progress.finish_target(name)
Exemplo n.º 29
0
Arquivo: utils.py Projeto: sotte/dvc
def copyfile(src, dest, no_progress_bar=False, name=None):
    '''Copy file with progress bar'''
    copied = 0
    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    fsrc = open(src, 'rb')

    if os.path.isdir(dest):
        fdest = open(os.path.join(dest, os.path.basename(src)), 'wb+')
    else:
        fdest = open(dest, 'wb+')

    while True:
        buf = fsrc.read(LOCAL_CHUNK_SIZE)
        if not buf:
            break
        fdest.write(buf)
        copied += len(buf)
        if not no_progress_bar:
            progress.update_target(name, copied, total)

    if not no_progress_bar:
        progress.finish_target(name)

    fsrc.close()
    fdest.close()
Exemplo n.º 30
0
    def download_file(self, from_url, to_file):
        """
        Download single file from url.
        """
        r = requests.get(from_url, stream=True)

        name = os.path.basename(from_url)
        chunk_size = 1024 * 100
        downloaded = 0
        last_reported = 0
        report_bucket = 100 * 1024 * 10
        total_length = r.headers.get('content-length')

        with open(to_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=chunk_size):
                if not chunk:  # filter out keep-alive new chunks
                    continue

                downloaded += chunk_size

                last_reported += chunk_size
                if last_reported >= report_bucket:
                    last_reported = 0
                    Logger.debug('Downloaded {}'.format(
                        sizeof_fmt(downloaded)))

                # update progress bar
                progress.update_target(name, downloaded, total_length)

                f.write(chunk)

        # tell progress bar that this target is finished downloading
        progress.finish_target(name)