def main(): cmds = ['--version', 'init', 'run', 'sync', 'repro', 'data', 'remove', 'import', 'lock', 'cloud', \ 'cloud', 'cloud-run', 'cloud-instance-create', 'cloud-instance-remove', 'cloud-instance-describe', \ 'test', 'test-aws', 'test-gcloud', 'test-cloud'] if len(sys.argv) < 2 or sys.argv[1] not in cmds: if len(sys.argv) >= 2: print('Unimplemented or unrecognized command: ' + ' '.join(sys.argv[1:])) print_usage() sys.exit(-1) cmd = sys.argv[1] if cmd == '--version': print('dvc version {}'.format(VERSION)) elif cmd == 'init': Runtime.run(CmdInit, parse_config=False) elif cmd == 'run': Runtime.run(CmdRun) elif cmd == 'repro': Runtime.run(CmdRepro) elif cmd == 'sync': Runtime.run(CmdDataSync) elif cmd == 'import': Runtime.run(CmdImportBulk) elif cmd == 'import-file': Runtime.run(CmdImportFile) elif cmd == 'remove': Runtime.run(CmdDataRemove) elif cmd == 'lock': Runtime.run(CmdLock) elif cmd == 'cloud-run': print('cloud-run unimplemented') elif cmd == 'cloud-instance-create': print('cloud-instance-create unimplemented') elif cmd == 'clould-instance-remove': print('cloud-instance-remove unimplemented') elif cmd == 'cloud-instance-describe': print('cloud-instance-describe unimplemented') elif cmd == 'test-aws': print('TODO: test aws credentials') elif cmd == 'test-gcloud': Runtime.run(CmdTest) else: print('Unimplemented or unrecognized command. ' + ' '.join(sys.argv[1])) print_usage() sys.exit(-1)
cmd = CmdImportFile(self.settings) cmd.set_git_action(not self.no_git_actions) cmd.set_locker(False) output = self.parsed_args.output for input in self.parsed_args.input: if not os.path.isdir(input): cmd.import_and_commit_if_needed(input, output, self.parsed_args.lock) else: input_dir = os.path.basename(input) for root, dirs, files in os.walk(input): for file in files: filename = os.path.join(root, file) rel = os.path.relpath(filename, input) out = os.path.join(output, input_dir, rel) out_dir = os.path.dirname(out) if not os.path.exists(out_dir): os.mkdir(out_dir) cmd.import_and_commit_if_needed( filename, out, self.parsed_args.lock) pass pass if __name__ == '__main__': Runtime.run(CmdImportBulk)
if state.locked and target: Logger.warn('Data item {} is already locked'.format( data_item.data.relative)) elif not state.locked and not target: Logger.warn('Data item {} is already unlocked'.format( data_item.data.relative)) else: state.locked = target Logger.debug('Saving status file for data item {}'.format( data_item.data.relative)) state.save() Logger.info('Data item {} was {}ed'.format( data_item.data.relative, cmd)) except Exception as ex: error += 1 Logger.error('Unable to {} {}: {}'.format(cmd, file, ex)) if error > 0 and not self.no_git_actions: Logger.error( 'Errors occurred. One or more repro cmd was not successful.') self.not_committed_changes_warning() else: self.commit_if_needed('DVC lock: {}'.format(' '.join(self.args))) return 0 if __name__ == '__main__': Runtime.run(CmdLock, False)
return not error def data_items_from_args(self, argv): result = [] for arg in argv: try: if os.path.isfile(arg): data_item = self.settings.path_factory.data_item(arg) result.append(data_item) except NotInDataDirError: pass return result def _data_items_from_params(self, files, param_text): if not files: return [] data_items, external = self.settings.path_factory.to_data_items(files) if external: raise RunError( '{} should point to data items from data dir: {}'.format( param_text, ', '.join(external))) return data_items if __name__ == '__main__': Runtime.run(CmdRun)
r'(?:/?|[/?]\S+)$', re.IGNORECASE) @staticmethod def is_url(url): return CmdDataImport.URL_REGEX.match(url) is not None @staticmethod def download_file(from_url, to_file): r = requests.get(from_url, stream=True) chunk_size = 1024 * 100 downloaded = 0 last_reported = 0 report_bucket = 100 * 1024 * 1024 with open(to_file, 'wb') as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks downloaded += chunk_size last_reported += chunk_size if last_reported >= report_bucket: last_reported = 0 Logger.debug('Downloaded {}'.format( sizeof_fmt(downloaded))) f.write(chunk) return if __name__ == '__main__': Runtime.run(CmdDataImport)
if not force and not was_source_code_changed and not were_input_files_changed: Logger.debug('Data item "{}" is up to date'.format( self._data_item.data.relative)) return False return self.reproduce_data_file() @property def dependencies(self): dependency_data_items = [] for input_file in self.state.input_files: try: data_item = self._cmd_obj.settings.path_factory.data_item( input_file) except NotInDataDirError: raise ReproError( u'The dependency file "{}" is not a data item'.format( input_file)) except Exception as ex: raise ReproError( u'Unable to reproduced the dependency file "{}": {}'. format(input_file, ex)) dependency_data_items.append(data_item) return dependency_data_items if __name__ == '__main__': Runtime.run(CmdRepro)
if blob.md5_hash == b64_encoded_md5: Logger.debug('checksum %s matches. Skipping upload' % data_item.cache.relative) return Logger.debug('checksum %s mismatch. re-uploading' % data_item.cache.relative) Logger.info('uploading cache file "{} to gc "{}"'.format(data_item.cache.relative, blob_name)) blob = bucket.blob(blob_name) blob.upload_from_filename(data_item.resolved_cache.relative) Logger.info('uploading %s completed' % data_item.resolved_cache.relative) def sync_from_cloud(self, item): cloud = self.settings.config.cloud assert cloud in ['aws', 'gcp'], 'unknown cloud %s' % cloud if cloud == 'aws': return self._sync_from_cloud_aws(item) elif cloud == 'gcp': return self._sync_from_cloud_gcp(item) def sync_to_cloud(self, data_item): cloud = self.settings.config.cloud assert cloud in ['aws', 'gcp'], 'unknown cloud %s' % cloud if cloud == 'aws': return self._sync_to_cloud_aws(data_item) elif cloud == 'gcp': return self._sync_to_cloud_gcp(data_item) if __name__ == '__main__': Runtime.run(CmdDataSync)
data_dir_path.name, cache_dir_path.name, state_dir_path.name)) conf_file = open(conf_file_name, 'wt') conf_file.write( self.CONFIG_TEMPLATE.format(data_dir_path.name, cache_dir_path.name, state_dir_path.name)) conf_file.close() self.modify_gitignore(cache_dir_path.name) message = 'DVC init. data dir {}, cache dir {}, state dir {}'.format( data_dir_path.name, cache_dir_path.name, state_dir_path.name) return self.commit_if_needed(message) def modify_gitignore(self, cache_dir_name): gitignore_file = os.path.join(self.git.git_dir, '.gitignore') if not os.path.exists(gitignore_file): open(gitignore_file, 'a').close() Logger.info('File .gitignore was created') with open(gitignore_file, 'a') as fd: fd.write('\n{}'.format(cache_dir_name)) fd.write('\n{}'.format(os.path.basename(self.git.lock_file))) Logger.info( 'Directory {} was added to .gitignore file'.format(cache_dir_name)) if __name__ == '__main__': Runtime.run(CmdInit, False)
self.config.aws_secret_access_key) bucket_name = self.config.storage_bucket bucket = conn.lookup(bucket_name) if bucket: key = bucket.get_key(aws_file_name) if not key: Logger.warn( '[Cmd-Remove] S3 remove warning: file "{}" does not exist in S3' .format(aws_file_name)) else: key.delete() Logger.info( '[Cmd-Remove] File "{}" was removed from S3'.format( aws_file_name)) pass def remove_dir_file_by_file(self, target): for f in os.listdir(target): file = os.path.join(target, f) if os.path.isdir(file): self.remove_dir_file_by_file(file) else: self.remove_file(file) os.rmdir(target) pass if __name__ == '__main__': Runtime.run(CmdDataRemove)
print_usage() sys.exit(-1) cmd = sys.argv[1] subcmd = None if cmd in cmds_expand: if (len(sys.argv) < 3 or sys.argv[2] not in cmds_expand[cmd]): print('for command %s, eligible actions are %s' % (cmd, cmds_expand[cmd])) print_usage() sys.exit(-1) else: subcmd = sys.argv[2] if cmd == 'init': Runtime.run(CmdRun, args_start_loc=2) elif cmd == 'run': Runtime.run(CmdDataImport, args_start_loc=2) elif cmd == 'repro': Runtime.run(CmdRepro, args_start_loc=2) elif cmd == 'data-sync' or (cmd == 'data' and subcmd == 'sync'): Runtime.run(CmdDataSync, args_start_loc=2) elif cmd == 'data-import' or (cmd == 'data' and subcmd == 'import'): Runtime.run(CmdDataImport, args_start_loc=2) elif cmd == 'cloud-run' or (cmd == 'cloud' and subcmd == 'run'): print('cloud-run unimplemented') elif cmd == 'cloud-instance-create' or (cmd == 'cloud' and subcmd == 'instance-create'): print('cloud-instance-create unimplemented') elif cmd == 'clould-instance-remove' or (cmd == 'cloud' and subcmd == 'instance-remove'):