def migrate_storage(self, storage): """Migrate storage.""" if str(storage[u'_id']) not in self.storage_index: self.unreferenced_storages.append(storage[u'_id']) return 1 data_id = self.storage_index[str(storage[u'_id'])]['id'] data_path = self.storage_index[str(storage[u'_id'])]['path'] data = Data.objects.get(pk=data_id) new = Storage() new.name = 'data_{}_storage'.format(data_id) new.data = data new.json = storage[u'json'] new.contributor = self.get_contributor(storage[u'author_id']) # XXX: Django will change this on create new.created = storage[u'date_created'] # XXX: Django will change this on save new.modified = storage[u'date_modified'] new.save() dict_dot(data.output, data_path, new.pk) data.save() self.id_mapping['storage'][str(storage[u'_id'])] = new.pk
def assertFiles(self, obj, field_path, fn_list, **kwargs): # pylint: disable=invalid-name """Compare a process's output file to the given correct file. :param obj: object which includes the files to compare :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the list of file names :param list fn_list: list of file names (and relative paths) of files to compare against. Paths should be relative to the ``tests/files`` directory of a Django application. :param str compression: if not ``None``, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are *gzip* and *zip*. :param filter: Function for filtering the contents of output files. It is used in :obj:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns ``True``, the line is excluded from comparison of the two files. :type filter: ~types.FunctionType """ field = dict_dot(obj.output, field_path) if len(field) != len(fn_list): self.fail(msg="Lengths of list:basic:file field and files list are not equal.") for fn_tested, fn_correct in zip(field, fn_list): self._assert_file(obj, fn_tested['file'], fn_correct, **kwargs)
def assertFiles(self, obj, field_path, fn_list, **kwargs): # pylint: disable=invalid-name """Compare list of processes' output files to the given correct files :param obj: Data object which includes files that we want to compare. :type obj: :obj:`resolwe.flow.models.Data` :param str field_path: Path to list of file names in Data object. :param list fn_list: List od file names (and relative paths) of files to which we want to compare. Name/path is relative to ``tests/files`` folder of a Django application. :param compression: If not None, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are "gzip" and "zip". :type compression: :obj:`str` :param filter: Function for filtering the contents of output files. It is used in :obj:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns `True`, the line is excluded from comparison of the two files. :type filter: :obj:`function` """ field = dict_dot(obj.output, field_path) if len(field) != len(fn_list): self.fail(msg="Lengths of list:basic:file field and files list are not equal.") for fn_tested, fn_correct in zip(field, fn_list): self._assert_file(obj, fn_tested['file'], fn_correct, **kwargs)
def assertFiles(self, obj, field_path, fn, compression=None, filter=lambda _: False): # pylint: disable=invalid-name """Compare output file of a processor to the given correct file. :param obj: Data object which includes file that we want to compare. :type obj: :obj:`resolwe.flow.models.Data` :param field_path: Path to file name in Data object. :type field_path: :obj:`str` :param fn: File name (and relative path) of file to which we want to compare. Name/path is relative to ``tests/files`` folder of a Django application. :type fn: :obj:`str` :param compression: If not None, files will be uncompressed with the appropriate compression library before comparison. Currently supported compression formats are "gzip" and "zip". :type compression: :obj:`str` :param filter: Function for filtering the contents of output files. It is used in :obj:`itertools.filterfalse` function and takes one parameter, a line of the output file. If it returns `True`, the line is excluded from comparison of the two files. :type filter: :obj:`function` """ open_kwargs = {} if compression is None: open_fn = open # by default, open() will open files as text and return str # objects, but we need bytes objects open_kwargs['mode'] = 'rb' elif compression == 'gzip': open_fn = gzip.open elif compression == 'zip': open_fn = zipfile.ZipFile.open else: raise ValueError("Unsupported compression format.") field = dict_dot(obj.output, field_path) output = os.path.join(settings.FLOW_EXECUTOR['DATA_PATH'], str(obj.pk), field['file']) with open_fn(output, **open_kwargs) as output_file: output_contents = b"".join([line for line in filterfalse(filter, output_file)]) output_hash = hashlib.sha256(output_contents).hexdigest() wanted = os.path.join(self.files_path, fn) if not os.path.isfile(wanted): shutil.copyfile(output, wanted) self.fail(msg="Output file {} missing so it was created.".format(fn)) with open_fn(wanted, **open_kwargs) as wanted_file: wanted_contents = b"".join([line for line in filterfalse(filter, wanted_file)]) wanted_hash = hashlib.sha256(wanted_contents).hexdigest() self.assertEqual(wanted_hash, output_hash, msg="File contents hash mismatch: {} != {}".format( wanted_hash, output_hash) + self._debug_info(obj))
def migrate_storage(self, storage): if str(storage["_id"]) not in self.storage_index: self.unreferenced_storages.append(storage["_id"]) return 1 data_id = self.storage_index[str(storage["_id"])]["id"] data_path = self.storage_index[str(storage["_id"])]["path"] data = Data.objects.get(pk=data_id) new = Storage() new.name = "data_{}_storage".format(data_id) new.data = data new.json = storage["json"] new.contributor = self.get_contributor(storage["author_id"]) # XXX: Django will change this on create new.created = storage["date_created"] # XXX: Django will change this on save new.modified = storage["date_modified"] new.save() dict_dot(data.output, data_path, new.pk) data.save() self.id_mapping["storage"][str(storage["_id"])] = new.pk
def assertFileExists(self, obj, field_path): # pylint: disable=invalid-name """Ensure a file in the given object's field exists. :param obj: object that includes the file for which to check if it exists :type obj: ~resolwe.flow.models.Data :param str field_path: path to :class:`~resolwe.flow.models.Data` object's field with the file name/path """ field = dict_dot(obj.output, field_path) output = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(obj.pk), field['file']) if not os.path.isfile(output): self.fail(msg="File {} does not exist.".format(field_path))
def assertFileExists(self, obj, field_path): # pylint: disable=invalid-name """Compare output file of a processor to the given correct file. :param obj: Data object which includes file that we want to compare. :type obj: :obj:`resolwe.flow.models.Data` :param field_path: Path to file name in Data object. :type field_path: :obj:`str` """ field = dict_dot(obj.output, field_path) output = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(obj.pk), field['file']) if not os.path.isfile(output): self.fail(msg="File {} does not exist.".format(field_path))
def assertJSON(self, obj, storage, field_path, file_name): # pylint: disable=invalid-name """Compare JSON in Storage object to the given correct JSON. :param obj: object to which the :class:`~resolwe.flow.models.Storage` object belongs :type obj: ~resolwe.flow.models.Data :param storage: object or id which contains JSON to compare :type storage: :class:`~resolwe.flow.models.Storage` or :class:`str` :param str field_path: path to JSON subset in the :class:`~resolwe.flow.models.Storage`'s object to compare against. If it is empty, the entire object will be compared. :param str file_name: file name (and relative path) of the file with the correct JSON to compare against. Path should be relative to the ``tests/files`` directory of a Django application. .. note:: The given JSON file should be compresed with *gzip* and have the ``.gz`` extension. """ self.assertEqual(os.path.splitext(file_name)[1], '.gz', msg='File extension must be .gz') if not isinstance(storage, Storage): storage = Storage.objects.get(pk=storage) storage_obj = dict_dot(storage.json, field_path) file_path = os.path.join(self.files_path, file_name) if not os.path.isfile(file_path): with gzip.open(file_path, mode='wt') as f: json.dump(storage_obj, f) self.fail(msg="Output file {} missing so it was created.".format(file_name)) with gzip.open(file_path, mode='rt') as f: file_obj = json.load(f) self.assertEqual(storage_obj, file_obj, msg="Storage {} field '{}' does not match file {}".format( storage.id, field_path, file_name) + self._debug_info(obj))
def assertFields(self, obj, path, value): # pylint: disable=invalid-name """Compare Data object's field to given value. :param obj: Data object with field to compare :type obj: :obj:`resolwe.flow.models.Data` :param path: Path to field in Data object. :type path: :obj:`str` :param value: Desired value. :type value: :obj:`str` """ field = dict_dot(obj.output, path) self.assertEqual(field, value, msg="Field 'output.{}' mismatch: {} != {}".format(path, field, value) + self._debug_info(obj))
def assertFields(self, obj, path, value): # pylint: disable=invalid-name """Compare object's field to the given value. :param obj: object with the field to compare :type obj: ~resolwe.flow.models.Data :param str path: path to :class:`~resolwe.flow.models.Data` object's field :param str value: desired value of :class:`~resolwe.flow.models.Data` object's field """ field = dict_dot(obj.output, path) self.assertEqual(field, value, msg="Field 'output.{}' mismatch: {} != {}".format(path, field, value) + self._debug_info(obj))
def assertJSON(self, obj, storage, field_path, file_name): # pylint: disable=invalid-name """Compare JSON in Storage object to the given correct output. :param obj: Data object which includes file that we want to compare. :type obj: :obj:`resolwe.flow.models.Data` :param storage: Storage (or storage id) which contains JSON to compare. :type storage: :obj:`resolwe.flow.models.Storage` or :obj:`str` :param field_path: Path to JSON subset to compare in Storage object. If it is empty, entire Storage object will be compared. :type field_path: :obj:`str` :param file_name: File name (and relative path) of file to which we want to compare. Name/path is relative to ``tests/files`` folder of a Django application. :type file_name: :obj:`str` """ self.assertEqual(os.path.splitext(file_name)[1], '.gz', msg='File extension must be .gz') if not isinstance(storage, Storage): storage = Storage.objects.get(pk=storage) storage_obj = dict_dot(storage.json, field_path) file_path = os.path.join(self.files_path, file_name) if not os.path.isfile(file_path): with gzip.open(file_path, 'w') as f: json.dump(storage_obj, f) self.fail(msg="Output file {} missing so it was created.".format(file_name)) with gzip.open(file_path) as f: file_obj = json.load(f) self.assertEqual(storage_obj, file_obj, msg="Storage {} field '{}' does not match file {}".format( storage.id, field_path, file_name) + self._debug_info(obj))
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response({'collections': ['Invalid pk "{}" - object does not exist.'.format(collection_id)]}, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug).order_by('version') if not process_query.exists(): # XXX: security - is it ok to reveal which processes (don't) exist? return Response({'process': ['Invalid process slug "{}" - object does not exist.'.format(process_slug)]}, status=status.HTTP_400_BAD_REQUEST) process = process_query.last() request.data['process'] = process.pk # check that user has permission on the process if not request.user.has_perm('view_process', obj=process): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema(process_input, process.input_schema): if 'default' in field_schema and field_schema['name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(ResolweCreateDataModelMixin, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp
def run(self, data_id, script, verbosity=1): """Execute the script and save results.""" if verbosity >= 1: print('RUN: {} {}'.format(data_id, script)) self.data_id = data_id dir_mode = settings.FLOW_EXECUTOR.get('DATA_DIR_MODE', 0o755) output_path = os.path.join(settings.FLOW_EXECUTOR['DATA_PATH'], str(data_id)) os.mkdir(output_path) # os.mkdir is not guaranteed to set the given mode os.chmod(output_path, dir_mode) os.chdir(output_path) log_file = open('stdout.txt', 'w+') json_file = open('jsonout.txt', 'w+') proc_pid = self.start() self.update_data_status( status=Data.STATUS_PROCESSING, started=now(), process_pid=proc_pid ) # Run processor and handle intermediate results self.run_script(script) spawn_processors = [] output = {} process_error, process_warning, process_info = [], [], [] process_progress, process_rc = 0, 0 # read processor output try: stdout = self.get_stdout() while True: line = stdout.readline() if not line: break try: if line.strip().startswith('run'): # Save processor and spawn if no errors log_file.write(line) log_file.flush() for obj in iterjson(line[3:].strip()): spawn_processors.append(obj) else: # If JSON, save to MongoDB updates = {} for obj in iterjson(line): for key, val in six.iteritems(obj): if key.startswith('proc.'): if key == 'proc.error': process_error.append(val) if not process_rc: process_rc = 1 updates['process_rc'] = process_rc updates['process_error'] = process_error updates['status'] = Data.STATUS_ERROR elif key == 'proc.warning': process_warning.append(val) updates['process_warning'] = process_warning elif key == 'proc.info': process_info.append(val) updates['process_info'] = process_info elif key == 'proc.rc': process_rc = int(val) updates['process_rc'] = process_rc if process_rc != 0: updates['status'] = Data.STATUS_ERROR elif key == 'proc.progress': process_progress = int(float(val) * 100) updates['process_progress'] = process_progress else: dict_dot(output, key, val) updates['output'] = output if updates: updates['modified'] = now() self.update_data_status(**updates) if process_rc > 0: log_file.close() json_file.close() os.chdir(CWD) return # Debug output # Not referenced in Data object json_file.write(line) json_file.flush() except ValueError as ex: # Ignore if not JSON log_file.write(line) log_file.flush() except MemoryError as ex: logger.error(__("Out of memory: {}", ex)) except IOError as ex: # TODO: if ex.errno == 28: no more free space raise ex finally: # Store results log_file.close() json_file.close() os.chdir(CWD) return_code = self.end() if process_rc < return_code: process_rc = return_code if process_rc == 0: self.update_data_status( status=Data.STATUS_DONE, process_progress=100, finished=now() ) else: self.update_data_status( status=Data.STATUS_ERROR, process_progress=100, process_rc=process_rc, finished=now() ) # try: # # Cleanup after processor # data_purge(data_ids=[data_id], delete=True, verbosity=0) # except: # pylint: disable=bare-except # logger.error(__("Purge error:\n\n{}", traceback.format_exc())) # if not update_data(data): # Data was deleted # # Restore original directory # os.chdir(settings.PROJECT_ROOT) # return if spawn_processors and Data.objects.get(pk=self.data_id).status == Data.STATUS_DONE: # Spawn processors for d in spawn_processors: d['contributor'] = Data.objects.get(pk=self.data_id).contributor d['process'] = Process.objects.get(slug=d['process']) Data.objects.create(**d)
def run(self, data_id, script, verbosity=1): """Execute the script and save results.""" if verbosity >= 1: print('RUN: {} {}'.format(data_id, script)) self.data_id = data_id data_dir = settings.FLOW_EXECUTOR['DATA_DIR'] dir_mode = getattr(settings, 'FLOW_EXECUTOR', {}).get('DATA_DIR_MODE', 0o755) output_path = os.path.join(data_dir, str(data_id)) os.mkdir(output_path) # os.mkdir is not guaranteed to set the given mode os.chmod(output_path, dir_mode) os.chdir(output_path) log_file = open('stdout.txt', 'w+') json_file = open('jsonout.txt', 'w+') proc_pid = self.start() self.update_data_status( status=Data.STATUS_PROCESSING, started=now(), process_pid=proc_pid ) # Run processor and handle intermediate results self.run_script(script) spawn_processors = [] output = {} process_error, process_warning, process_info = [], [], [] process_progress, process_rc = 0, 0 # read processor output try: stdout = self.get_stdout() while True: line = stdout.readline() if not line: break try: if line.strip().startswith('run'): # Save processor and spawn if no errors log_file.write(line) log_file.flush() for obj in iterjson(line[3:].strip()): spawn_processors.append(obj) elif line.strip().startswith('export'): file_name = line[6:].strip() export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR'] unique_name = 'export_{}'.format(uuid.uuid4().hex) export_path = os.path.join(export_folder, unique_name) EXPORTED_FILES_MAPPER[file_name] = unique_name shutil.move(file_name, export_path) else: # If JSON, save to MongoDB updates = {} for obj in iterjson(line): for key, val in six.iteritems(obj): if key.startswith('proc.'): if key == 'proc.error': process_error.append(val) if not process_rc: process_rc = 1 updates['process_rc'] = process_rc updates['process_error'] = process_error updates['status'] = Data.STATUS_ERROR elif key == 'proc.warning': process_warning.append(val) updates['process_warning'] = process_warning elif key == 'proc.info': process_info.append(val) updates['process_info'] = process_info elif key == 'proc.rc': process_rc = int(val) updates['process_rc'] = process_rc if process_rc != 0: updates['status'] = Data.STATUS_ERROR elif key == 'proc.progress': process_progress = int(float(val) * 100) updates['process_progress'] = process_progress else: dict_dot(output, key, val) updates['output'] = output if updates: updates['modified'] = now() self.update_data_status(**updates) if process_rc > 0: log_file.close() json_file.close() os.chdir(CWD) return # Debug output # Not referenced in Data object json_file.write(line) json_file.flush() except ValueError as ex: # Ignore if not JSON log_file.write(line) log_file.flush() except MemoryError as ex: logger.error(__("Out of memory: {}", ex)) except IOError as ex: # TODO: if ex.errno == 28: no more free space raise ex finally: # Store results log_file.close() json_file.close() os.chdir(CWD) return_code = self.end() if process_rc < return_code: process_rc = return_code if spawn_processors and process_rc == 0: parent_data = Data.objects.get(pk=self.data_id) # Spawn processors for d in spawn_processors: d['contributor'] = parent_data.contributor d['process'] = Process.objects.filter(slug=d['process']).order_by('version').last() for field_schema, fields in iterate_fields(d.get('input', {}), d['process'].input_schema): type_ = field_schema['type'] name = field_schema['name'] value = fields[name] if type_ == 'basic:file:': fields[name] = hydrate_spawned_files(value, data_id) elif type_ == 'list:basic:file:': fields[name] = [hydrate_spawned_files(fn, data_id) for fn in value] with transaction.atomic(): d = Data.objects.create(**d) for collection in parent_data.collection_set.all(): collection.data.add(d) if process_rc == 0: self.update_data_status( status=Data.STATUS_DONE, process_progress=100, finished=now() ) else: self.update_data_status( status=Data.STATUS_ERROR, process_progress=100, process_rc=process_rc, finished=now() ) try: # Cleanup after processor if data_id != 'no_data_id': data_purge(data_ids=[data_id], delete=True, verbosity=verbosity) except: # pylint: disable=bare-except logger.error(__("Purge error:\n\n{}", traceback.format_exc()))