def test_map_progress(self): def f(target): with open(target, 'w+') as o: o.write(target) targets = ['map{}'.format(i) for i in range(1, 10)] n_threads = [1, 10, 20] for n in n_threads: utils.map_progress(f, targets, n)
def _map_targets(self, func, targets, jobs, collect_local=False, collect_cloud=False, remote=None): """ Process targets as data items in parallel. """ if not remote: cloud = self._cloud else: cloud = self._init_remote(remote) if not cloud: return cloud.connect() collected = set() if collect_local: collected |= self._collect(cloud, targets, jobs, True) if collect_cloud: collected |= self._collect(cloud, targets, jobs, False) ret = map_progress(getattr(cloud, func), list(collected), jobs) cloud.disconnect() return ret
def _map_targets(self, func, targets, jobs): """ Process targets as data items in parallel. """ collected = self._collect_targets(targets) return map_progress(func, collected, jobs)
def run(self): with DvcLock(self.is_locker, self.git): cloud = DataCloud(self.settings) targets = [] if len(self.parsed_args.targets) == 0: raise DataSyncError('Sync target is not specified') for target in self.parsed_args.targets: if System.islink(target): targets.append(target) elif os.path.isdir(target): for root, dirs, files in os.walk(target): for f in files: targets.append(os.path.join(root, f)) else: raise DataSyncError( 'File "{}" does not exit'.format(target)) map_progress(cloud.sync, targets, self.parsed_args.jobs) pass
def _filter(self, func, status, targets, jobs, remote): cloud = self._get_cloud(remote) if not cloud: return [] with cloud: filtered = [] for t, s in self._status(cloud, targets, jobs): if s == STATUS_MODIFIED or s == status: filtered.append(t) return map_progress(getattr(cloud, func), filtered, jobs)
def _map_targets(self, func, targets, jobs, collect_local=False, collect_cloud=False): """ Process targets as data items in parallel. """ self._cloud.connect() collected = set() if collect_local: collected |= self._collect(targets, jobs, True) if collect_cloud: collected |= self._collect(targets, jobs, False) return map_progress(func, list(collected), jobs)
def _map_targets(self, func, targets, jobs): """ Process targets as data items in parallel. """ return map_progress(func, targets, jobs)
def import_data(self, targets, jobs=1): """ Import data items in a cloud-agnostic way. """ return map_progress(self._import, targets, jobs)
def _status(self, cloud, targets, jobs=1): collected = self._collect_targets(cloud, targets, jobs) return map_progress(cloud.status, collected, jobs)
def import_data(self, targets, jobs=1): return map_progress(self._import, targets, jobs)
def _map_targets(self, f, targets, jobs): collected = self._collect_targets(targets) return map_progress(f, collected, jobs)
def download_targets(self, targets): """ Download targets in a number of threads. """ map_progress(self.download_target, targets, self.parsed_args.jobs)