示例#1
0
    def dfs(self, path):
        print "DFS: {path}".format(path=to_utf8(path))
        _finish = False
        _context = u''
        max_retry = self._max_retry

        path = to_unicode(path)

        while not _finish:
            request = ListFolderRequest(bucket_name=self._bucket, cos_path=path, context=_context)
            ret = self._cos_api.list_folder(request)

            if ret['code'] != 0:
                max_retry -= 1
            else:
                _finish = ret['data']['listover']
                _context = ret['data']['context']
                for item in ret['data']['infos']:
                    if 'filelen' in item:
                        try:
                            key = "{prefix}{filename}".format(prefix=path, filename=item['name'])
                            yield Task(key, item['filelen'], None)
                        except:
                            pass
                    else:
                        _sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'),
                                                               filename=item['name'].encode('utf-8'))
                        for i in self.dfs(_sub_dir):
                            yield i

            if max_retry == 0:
                _finish = True
示例#2
0
 def list(self, marker):
     for obj in oss2.ObjectIterator(self._oss_api,
                                    prefix=self._prefix,
                                    marker=marker):
         if obj.key[-1] == '/':
             continue
         logger.info("yield new object: {}".format(obj.key))
         yield Task(obj.key, obj.size, None)
    def get_task_by_key(self, key):
        _path = key

        if not _path.startswith('/'):
            _path = '/' + _path
        logger = getLogger(__name__)

        if isinstance(_path, str):
            _path = _path.decode('utf-8')
        request = StatFileRequest(self._bucket, _path)
        ret = self._cos_api.stat_file(request)
        logger.info("ret: " + str(ret))
        # import json
        # v = json.loads(ret)
        if ret['code'] != 0:
            logger.warn("get task by key error, key = {},error code: {}".format(key, str(ret['code'])))
            return Task(key, None, None, None)

        return Task(key, int(ret['data']['filesize']), None, None)
示例#4
0
    def __dfs_list(self, path):
        logger.info("try to dump file list under {}".format(path))

        _finish = False
        _context = u''
        max_retry = 10
        while not _finish:
            try:
                request = ListFolderRequest(bucket_name=self._bucket,
                                            cos_path=path,
                                            context=_context)
                ret = self._cos_api.list_folder(request)
            except Exception:
                logger.exception("list failed")
                max_retry -= 1
                continue

            logger.debug(str(ret))

            if ret['code'] != 0:
                logger.warning("request failed: {}".format(str(ret)))
                max_retry -= 1
            else:
                _finish = not ret['data']['has_more']
                _context = ret['data']['context']
                for item in ret['data']['infos']:
                    if 'filelen' in item:
                        # file
                        key = "{prefix}{filename}".format(
                            prefix=path, filename=item['name'])
                        yield Task(key, item['filelen'], None)
                    else:
                        _sub_dir = "{prefix}{filename}/".format(
                            prefix=path, filename=item['name'])
                        if isinstance(_sub_dir, str):
                            _sub_dir = _sub_dir.decode('utf-8')
                        for i in self.__dfs_list(_sub_dir):
                            yield i
                            # directory

            if max_retry == 0:
                _finish = True
                logger.error(
                    "reach max retry times, finish this directory {}".format(
                        path))

        logger.info("finish directory {}".format(path))
    def list(self, marker):
        if self._filelist is not None and len(self._filelist) > 0:
            filelist_path = self._sync_files_dir + '/' + os.path.basename(self._filelist)
	    filelist_task = Task(self._filelist, None, None, None)
            self._download(filelist_task, filelist_path, True)
            with open(filelist_path) as f:
                line = f.readline()
                while line:
                    if not line.startswith('/'):
                        line = '/' + line
                    task = self.get_task_by_key(line.strip())
                    yield task
                    line = f.readline()
        else:
            if self._prefix_dir is None:
                for i in self.dfs('/'):
                    yield i
            else:
                for i in self.dfs(self._prefix_dir):
                    yield i
示例#6
0
    def list(self, marker):
        limit = 100
        delimiter = None
        marker = None

        eof = False

        while not eof:
            try:
                ret, eof, info = self._qiniu_api.list(self._bucket,
                                                      self._prefix, marker,
                                                      limit, delimiter)
                if ret is None:
                    logger.warn("ret is None")
                    if info.error == 'bad token':
                        raise TokenException
                    else:
                        logger.warn(info.text_body)
                        raise IOError(info.error)

                for i in ret['items']:
                    logger.info("yield new object: {}".format(i['key']))
                    yield Task(i['key'], i['fsize'], None, None)

                if eof is True:
                    logger.info("eof is {}".format(eof))
                    continue

                if not eof and 'marker' in ret:
                    marker = ret['marker']
                else:
                    eof = True
            except TokenException as e:
                eof = True
                logger.warn(
                    "Your accessid/accesskey is incorrect, Please double check your configures"
                )
            except Exception as e:
                logger.exception("list exception: " + str(e))
 def list(self):
     for file in os.listdir(self._workspace):
         from os import path
         yield Task(file, path.getsize(file), None, None)
示例#8
0
 def list(self):
     for obj in self._bucket_api.list(prefix=self._prefix):
         if obj.name[-1] == '/':
             continue
         logger.info("yield new object: {}".format(obj.key))
         yield Task(obj.name, obj.size, None)