def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() system = current_system() source = get_file_by_url(step.options.source_url) source_url = source.parsed_url source_path = source_url.path cut = system.cut_storage cut_filename = uuid.uuid4().hex cut_path = os.path.join(resolve_path(cut.root_path), cut_filename) if source_url.scheme == 'card': if step.options.source_root_path is not None: source_path = os.path.join(step.options.source_root_path, source_path[1:]) else: raise Exception("Only support card here") filecopy = FileCopy( source=source_path, destination=cut_path, link=False, remove_source=False, ) filecopy.run() step.result = ToCutResult(path=cut_path) step.status = StepStatus.done
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() cut = job.get_step('to_cut') if cut is not None: source_path = cut.result.path if os.path.exists(source_path): os.remove(source_path) step.result = CleanCutResult() step.status = StepStatus.done
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() if step.options.path is not None: path = step.options.path else: cut = job.get_step('to_cut') path = cut.result.path ref = calculate_hash(path) step.result = CalculateHashResult(calculated_hash=ref) step.status = StepStatus.done
def run_scan(self, name, root_path): # Scan the root path for files matching the filter system = current_system() source = next((t for t in self.sources if t.name == name), None) if source is None: logging.debug("No source for '%s'", None) return prios = {x: n for (n, x) in enumerate(source.extension)} def prio(x): return prios[os.path.splitext(x)[1][1:].lower()] scanner = FolderScanner(root_path, extensions=source.extension) collected = {} for file_path in scanner.scan(): if not '.' in file_path: continue url = source.get_path_url(file_path) if system.select('file').has(url): logging.info('Has %s already', url) continue stem, _ = os.path.splitext(file_path) if stem in collected.keys(): collected[stem].append(file_path) else: collected[stem] = [file_path] if len(collected) >= 100: break logging.info('Collected %d files.', len(collected)) # Create entries and import jobs for each found file for _, file_paths in sorted(collected.items(), key=lambda x: x[0]): logging.debug("Importing %s", ' + '.join(file_paths)) parts = [] for file_path in sorted(file_paths, key=prio): full_path = os.path.join(root_path, file_path) mime_type, is_raw = guess_mime_type(full_path) parts.append(RegisterPart( server=system.hostname, source=source.name, root_path=root_path, path=file_path, is_raw=is_raw, mime_type=mime_type, )) yield Job( steps=[ Register.AsStep( parts=parts, ) ] )
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() if step.options.path is not None: path = step.options.path else: cut = job.get_step('to_cut') path = cut.result.path analyse = get_analyser(step.options.mime_type) if analyse is None: logging.info("Found no metadata analyser for %s", step.options.mime_type) step.status = StepStatus.done return metadata = analyse(path) if metadata is not None and step.options.entry_id is not None: @retry() def update(): entry = get_entry_by_id(step.options.entry_id) if entry.metadata is None: entry.metadata = metadata else: entry.metadata.merge(metadata) update_entry_by_id(entry.id, entry) update() step.result = ReadMetadataResult(metadata=metadata) step.status = StepStatus.done
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() entry = get_entry_by_id(step.options.entry_id) source = get_file_by_url(step.options.source_url) if step.options.path is not None: source_path = step.options.path else: cut = job.get_step('to_cut') source_path = cut.result.path assert source_path, "Missing source path (forgot to_cut step?)" if step.options.reference is not None: reference = step.options.reference else: calculate_hash = job.get_step('calculate_hash') reference = calculate_hash.result.calculated_hash assert source_path, "Missing reference (forgot calculate_reference step?)" metadata_step = job.get_step('read_metadata') if metadata_step.result is not None and metadata_step.result.metadata is not None: metadata = metadata_step.result.metadata else: metadata = entry.metadata if metadata is not None and hasattr( metadata, 'taken_ts') and metadata.taken_ts is not None: taken_ts = metadata.taken_ts[:10] else: taken_ts = datetime.datetime.fromtimestamp( os.path.getmtime(source_path)).strftime('%Y-%m-%d') system = current_system() file_ref = next( (fr for fr in entry.files if fr.reference == source.reference), None) purpose = file_ref.purpose if file_ref is not None else FilePurpose.unknown main_root = resolve_path(system.main_storage.root_path) filename = os.path.basename(step.options.source_url) parts = [ main_root, entry.type.value, purpose.value, taken_ts, filename ] main_path = os.path.join(*parts) logging.info("Source: %s", str(source.reference)) logging.info("Main path: %s", str(main_path)) filecopy = FileCopy( source=source_path, destination=main_path, link=True, remove_source=False, ) filecopy.run() @retry() def push(): entry = get_entry_by_id(step.options.entry_id) new_file_ref = FileReference( purpose=file_ref.purpose, version=file_ref.version, reference=reference, mime_type=file_ref.mime_type, ) new_file = File( reference=reference, url=system.main_storage.get_file_url(main_path), mime_type=source.mime_type, status=FileStatus.managed, ) create_file(new_file) entry.files.append(new_file_ref) update_entry_by_id(entry.id, entry) push() step.result = ToMainResult() step.status = StepStatus.done
def run(self, job: Job): logging.debug(job.to_json()) step = job.get_current_step() entry = get_entry_by_id(step.options.entry_id) source = get_file_by_url(step.options.source_url) source_ref = entry.get_file_reference(source.reference) if step.options.path is not None: source_path = step.options.path else: cut = job.get_step('to_cut') source_path = cut.result.path transcoder = get_transcoder(source.mime_type, 'proxy') if transcoder is None: step.status = StepStatus.done return targets = [ transcoder.Options(entry=entry, cut_source=source_path, purpose=FilePurpose.proxy), transcoder.Options(entry=entry, cut_source=source_path, purpose=FilePurpose.thumb), transcoder.Options(entry=entry, cut_source=source_path, purpose=FilePurpose.check), ] filerefs = [] for target in targets: f = transcoder.run(target) filerefs.append( FileReference( purpose=target.purpose, version=source_ref.version, reference=f.reference, mime_type=f.mime_type, )) logging.info(filerefs) @retry() def push(): entry = get_entry_by_id(step.options.entry_id) for file_ref in filerefs: new_file_ref = FileReference( purpose=file_ref.purpose, version=file_ref.version, reference=file_ref.reference, mime_type=file_ref.mime_type, ) entry.files.append(new_file_ref) update_entry_by_id(entry.id, entry) push() step.result = CreateProxyResult() step.status = StepStatus.done
def register_parts(self): raw = None original = None derivative = None entry = None source = None root_path = None for part in self.options.parts: url = part.get_url(self.system) root_path = root_path or part.root_path f = File(url=url, reference=url, status=FileStatus.new, mime_type=part.mime_type) try: f = create_file(f) except Conflict: f = get_file_by_url(url) if f.reference is not None: entry = next( iter(get_entries_by_reference(f.reference).entries), None) if f is None: logging.error('Bad file: %s', f.to_json()) if part.is_raw: raw = f source = part.source elif raw is None: original = f source = part.source else: derivative = f primary = raw or original or derivative if primary is None: logging.error('No valid file!\n%s', self.step.to_json()) return if entry is None: entry = Entry( type=EntryType.image, metadata=DefaultEntryMetadata( original_filename=os.path.basename(primary.url), source=source, ), ) entry = create_entry(entry) jobs = [] for f, p in ((raw, FilePurpose.raw), (original, FilePurpose.original), (derivative, FilePurpose.derivative)): if f is None: continue entry.files.append( FileReference( reference=f.reference, purpose=p, version=0, mime_type=f.mime_type, )) jobs.append( Job(steps=[ ToCut.AsStep( source_root_path=root_path, source_url=f.url, ), CalculateHash.AsStep(), ReadMetadata.AsStep( entry_id=entry.id, mime_type=f.mime_type, ), ToMain.AsStep( entry_id=entry.id, source_url=f.url, ), CreateProxy.AsStep( entry_id=entry.id, source_url=f.url, ), CleanCut.AsStep(), ])) update_entry_by_id(entry.id, entry) with QueueClient('ipc://job_queue') as q: for job in jobs: q.send(job)