def upload(self, folder: str, dag: Dag, control_reqs: bool = True): hashs = self.file_provider.hashs(dag.project) files = [] all_files = [] spec = self._build_spec(folder) for o in glob(os.path.join(folder, '**'), recursive=True): path = os.path.relpath(o, folder) if spec.match_file(path) or path == '.': continue if isdir(o): self.provider.add( DagStorage(dag=dag.id, path=path, is_dir=True) ) continue content = open(o, 'rb').read() md5 = hashlib.md5(content).hexdigest() all_files.append(o) if md5 in hashs: file_id = hashs[md5] else: file = File( md5=md5, content=content, project=dag.project, dag=dag.id, created=now() ) self.file_provider.add(file) file_id = file.id hashs[md5] = file.id files.append(o) self.provider.add( DagStorage(dag=dag.id, path=path, file=file_id, is_dir=False) ) if INSTALL_DEPENDENCIES and control_reqs: reqs = control_requirements(folder, files=all_files) for name, rel, version in reqs: self.library_provider.add( DagLibrary(dag=dag.id, library=name, version=version) )
def upload(self, folder: str, dag: Dag, control_reqs: bool = True): self.log_info('upload started') hashs = self.file_provider.hashs(dag.project) self.log_info('hashes are retrieved') all_files = [] spec = self._build_spec(folder) files = glob(os.path.join(folder, '**')) for file in files[:]: path = os.path.relpath(file, folder) if spec.match_file(path) or path == '.': continue if os.path.isdir(file): child_files = glob(os.path.join(folder, file, '**'), recursive=True) files.extend(child_files) if self.max_count and len(files) > self.max_count: raise Exception(f'files count = {len(files)} ' f'But max count = {self.max_count}') self.log_info('list of files formed') folders_to_add = [] files_to_add = [] files_storage_to_add = [] total_size_added = 0 for o in files: path = os.path.relpath(o, folder) if spec.match_file(path) or path == '.': continue if isdir(o): folder_to_add = DagStorage(dag=dag.id, path=path, is_dir=True) folders_to_add.append(folder_to_add) continue content = open(o, 'rb').read() size = sys.getsizeof(content) if self.max_file_size and size > self.max_file_size: raise Exception( f'file = {o} has size {size}.' f' But max size is set to {self.max_file_size}') md5 = hashlib.md5(content).hexdigest() all_files.append(o) if md5 not in hashs: file = File(md5=md5, content=content, project=dag.project, dag=dag.id, created=now()) hashs[md5] = file files_to_add.append(file) total_size_added += size file_storage = DagStorage(dag=dag.id, path=path, file=hashs[md5], is_dir=False) files_storage_to_add.append(file_storage) self.log_info('inserting DagStorage folders') if len(folders_to_add) > 0: self.provider.bulk_save_objects(folders_to_add) self.log_info('inserting Files') if len(files_to_add) > 0: self.file_provider.bulk_save_objects(files_to_add, return_defaults=True) self.log_info('inserting DagStorage Files') if len(files_storage_to_add) > 0: for file_storage in files_storage_to_add: if isinstance(file_storage.file, File): # noinspection PyUnresolvedReferences file_storage.file = file_storage.file.id self.provider.bulk_save_objects(files_storage_to_add) dag.file_size += total_size_added self.dag_provider.update() if INSTALL_DEPENDENCIES and control_reqs: reqs = control_requirements(folder, files=all_files) for name, rel, version in reqs: self.library_provider.add( DagLibrary(dag=dag.id, library=name, version=version))