Пример #1
0
    def migrate_data_references(self):
        """Migrate data references."""
        def map_reference(reference):
            """Map references to new IDs."""
            try:
                return self.id_mapping['data'][reference]
            except KeyError as error:
                self.missing_data.add(error.message)  # pylint: disable=no-member
                return None

        # Fix references in JSON documents in the second pass.
        for new_id in self.id_mapping['data'].values():
            data = Data.objects.get(pk=new_id)
            for field_schema, fields in iterate_fields(data.input, data.process.input_schema):
                if 'type' not in field_schema:
                    continue

                name = field_schema['name']
                value = fields[name]
                if field_schema['type'].startswith('data:'):
                    fields[name] = map_reference(value)
                elif field_schema['type'].startswith('list:data:'):
                    fields[name] = map(map_reference, value)

            data.save()
Пример #2
0
def dependency_status(data):
    """Return abstracted satus of dependencies.

    STATUS_ERROR .. one dependency has error status
    STATUS_DONE .. all dependencies have done status
    None .. other

    """
    for field_schema, fields in iterate_fields(data.input, data.process.input_schema):
        if (field_schema['type'].lower().startswith('data:') or
                field_schema['type'].lower().startswith('list:data:')):
            name = field_schema['name']
            value = fields[name]

            if field_schema['type'].lower().startswith('data:'):
                value = [value]

            for uid in value:
                try:
                    _data = Data.objects.get(id=uid)
                except Data.DoesNotExist:
                    return Data.STATUS_ERROR

                if _data.status == Data.STATUS_ERROR:
                    return Data.STATUS_ERROR

                if _data.status != Data.STATUS_DONE:
                    return None

    return Data.STATUS_DONE
Пример #3
0
    def migrate_data_references(self):
        """Migrate data references."""
        def map_reference(reference):
            """Map references to new IDs."""
            try:
                return self.id_mapping['data'][reference]
            except KeyError as error:
                self.missing_data.add(error.message)  # pylint: disable=no-member
                return None

        # Fix references in JSON documents in the second pass.
        for new_id in self.id_mapping['data'].values():
            data = Data.objects.get(pk=new_id)
            for field_schema, fields in iterate_fields(
                    data.input, data.process.input_schema):
                if 'type' not in field_schema:
                    continue

                name = field_schema['name']
                value = fields[name]
                if field_schema['type'].startswith('data:'):
                    fields[name] = map_reference(value)
                elif field_schema['type'].startswith('list:data:'):
                    fields[name] = map(map_reference, value)

            data.save()
Пример #4
0
def get_purge_files(root, output, output_schema, descriptor, descriptor_schema):
    def remove_file(fn, paths):
        """From paths remove fn and dirs before fn in dir tree."""
        while fn:
            for i in range(len(paths) - 1, -1, -1):
                if fn == paths[i]:
                    paths.pop(i)
            fn, _ = os.path.split(fn)

    def remove_tree(fn, paths):
        """From paths remove fn and dirs before or after fn in dir tree."""
        for i in range(len(paths) - 1, -1, -1):
            head = paths[i]
            while head:
                if fn == head:
                    paths.pop(i)
                    break
                head, _ = os.path.split(head)

        remove_file(fn, paths)

    def subfiles(root):
        """Extend unreferenced list with all subdirs and files in top dir."""
        subs = []
        for path, dirs, files in os.walk(root, topdown=False):
            path = path[len(root) + 1:]
            subs.extend(os.path.join(path, f) for f in files)
            subs.extend(os.path.join(path, d) for d in dirs)
        return subs

    unreferenced_files = subfiles(root)

    remove_file('jsonout.txt', unreferenced_files)
    remove_file('stderr.txt', unreferenced_files)
    remove_file('stdout.txt', unreferenced_files)

    meta_fields = [
        [output, output_schema],
        [descriptor, descriptor_schema]
    ]

    for meta_field, meta_field_schema in meta_fields:
        for field_schema, fields in iterate_fields(meta_field, meta_field_schema):
            if 'type' in field_schema:
                field_type = field_schema['type']
                field_name = field_schema['name']

                # Remove basic:file: entries
                if field_type.startswith('basic:file:'):
                    remove_file(fields[field_name]['file'], unreferenced_files)

                # Remove list:basic:file: entries
                elif field_type.startswith('list:basic:file:'):
                    for field in fields[field_name]:
                        remove_file(field['file'], unreferenced_files)

                # Remove basic:dir: entries
                elif field_type.startswith('basic:dir:'):
                    remove_tree(fields[field_name]['dir'], unreferenced_files)

                # Remove list:basic:dir: entries
                elif field_type.startswith('list:basic:dir:'):
                    for field in fields[field_name]:
                        remove_tree(field['dir'], unreferenced_files)

                # Remove refs entries
                if field_type.startswith('basic:file:') or field_type.startswith('basic:dir:'):
                    for ref in fields[field_name].get('refs', []):
                        remove_tree(ref, unreferenced_files)

                elif field_type.startswith('list:basic:file:') or field_type.startswith('list:basic:dir:'):
                    for field in fields[field_name]:
                        for ref in field.get('refs', []):
                            remove_tree(ref, unreferenced_files)

    return set([os.path.join(root, filename) for filename in unreferenced_files])
Пример #5
0
    def migrate_data(self, data):
        contributor = self.get_contributor(data["author_id"])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get("static_schema", []))
        ds_fields.extend(data.get("var_template", []))
        ds_fields.sort(key=lambda d: d["name"])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = "data_{}_descriptor".format(data["_id"])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get("static", {}))
        descriptor.update(data.get("var", {}))

        # PROCESS ######################################################
        if "processor_version" not in data:
            data["processor_version"] = "0.0.0"

        process_slug = self.process_slug(data["processor_name"])
        process_version = data["processor_version"]
        try:
            process = Process.objects.get(slug=process_slug, version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(slug=process_slug).order_by("-version").first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data["type"]
                process.output_schema = data["output_schema"]
                process.input_schema = data.get("input_schema", {})
                process.persistence = self.persistence_dict[data["persistence"]]

                process.run["script"] = 'gen-require common\ngen-error "Depricated process, use the latest version."'

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in get_users_with_perms(latest, attach_perms=True).iteritems():
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in get_groups_with_perms(latest, attach_perms=True).iteritems():
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = "Dummy processor of type {}".format(data["type"])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug="non-existent",
                        contributor=get_user_model().objects.filter(is_superuser=True).first(),
                        type=data["type"],
                        category="data:non-existent",
                        run={"script": {'gen-require common\ngen-error "This processor is not intendent to be run."'}},
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get("static", {}).get("name", "")
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + "..."
        new.status = self.status_dict[data["status"]]
        new.process = process
        new.contributor = contributor
        new.input = data["input"] if "input" in data else {}
        new.output = data["output"]
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get("checksum", "")
        # XXX: Django will change this on create
        new.created = data["date_created"]
        # XXX: Django will change this on save
        new.modified = data["date_modified"]
        if "date_start" in data and "date_finish" in data:
            new.started = data["date_start"]
            new.finished = data["date_finish"]
        elif "date_finish" in data:
            new.started = data["date_finish"]
            new.finished = data["date_finish"]
        elif "date_start" in data:
            new.started = data["date_start"]
            new.finished = data["date_start"]
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data["case_ids"]:
            try:
                collection = Collection.objects.get(pk=self.id_mapping["collection"][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(data["output"], data["output_schema"], ""):
            if "type" in field_schema and field_schema["type"].startswith("basic:json:"):
                self.storage_index[fields[field_schema["name"]]] = {"id": new.pk, "path": path}

        self.migrate_permissions(new, data)

        self.id_mapping["data"][str(data["_id"])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm("view_descriptorschema", user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm("view_descriptorschema", group, obj=descriptor_schema)
Пример #6
0
    def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE,
                    descriptor=None, descriptor_schema=None, run_manager=True,
                    verbosity=0):
        """Run the specified process with the given inputs.

        If input is a file, file path should be given relative to the
        ``tests/files`` directory of a Django application.
        If ``assert_status`` is given, check if
        :class:`~resolwe.flow.models.Data` object's status matches
        it after the process has finished.

        :param str process_slug: slug of the
            :class:`~resolwe.flow.models.Process` to run

        :param dict ``input_``: :class:`~resolwe.flow.models.Process`'s
            input parameters

            .. note::

                You don't have to specify parameters with defined
                default values.

        :param str ``assert_status``: desired status of the
            :class:`~resolwe.flow.models.Data` object

        :param dict descriptor: descriptor to set on the
            :class:`~resolwe.flow.models.Data` object

        :param dict descriptor_schema: descriptor schema to set on the
            :class:`~resolwe.flow.models.Data` object

        :return: object created by
            :class:`~resolwe.flow.models.Process`
        :rtype: ~resolwe.flow.models.Data

        """
        # backward compatibility
        process_slug = slugify(process_slug.replace(':', '-'))

        process = Process.objects.filter(slug=process_slug).order_by('-version').first()

        def mock_upload(file_path):
            """Mock file upload."""
            old_path = os.path.join(self.files_path, file_path)
            if not os.path.isfile(old_path):
                raise RuntimeError('Missing file: {}'.format(old_path))

            new_path = os.path.join(self.upload_dir, file_path)
            # create directories needed by new_path
            new_path_dir = os.path.dirname(new_path)
            if not os.path.exists(new_path_dir):
                os.makedirs(new_path_dir)
            shutil.copy2(old_path, new_path)
            self._upload_files.append(new_path)
            return {
                'file': file_path,
                'file_temp': file_path,
            }

        for field_schema, fields in iterate_fields(input_, process.input_schema):
            # copy referenced files to upload dir
            if field_schema['type'] == "basic:file:":
                fields[field_schema['name']] = mock_upload(fields[field_schema['name']])
            elif field_schema['type'] == "list:basic:file:":
                file_list = [mock_upload(file_path) for file_path in fields[field_schema['name']]]
                fields[field_schema['name']] = file_list

            # convert primary keys to strings
            if field_schema['type'].startswith('data:'):
                fields[field_schema['name']] = fields[field_schema['name']]
            if field_schema['type'].startswith('list:data:'):
                fields[field_schema['name']] = [obj for obj in fields[field_schema['name']]]

        data = Data.objects.create(
            input=input_,
            contributor=self.admin,
            process=process,
            slug=get_random_string(length=6),
            descriptor_schema=descriptor_schema,
            descriptor=descriptor or {})
        self.collection.data.add(data)

        if run_manager:
            manager.communicate(run_sync=True, verbosity=verbosity)

        # Fetch latest Data object from database
        data = Data.objects.get(pk=data.pk)
        if not run_manager and assert_status == Data.STATUS_DONE:
            assert_status = Data.STATUS_RESOLVING

        if assert_status:
            self.assertStatus(data, assert_status)

        return data
Пример #7
0
    def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE,
                    descriptor=None, descriptor_schema=None, run_manager=True,
                    verbosity=0):
        """Runs given processor with specified inputs.

        If input is file, file path should be given relative to
        ``tests/files`` folder of a Django application.
        If ``assert_status`` is given check if Data object's status
        matches ``assert_status`` after finishing processor.

        :param processor_name: name of the processor to run
        :type processor_name: :obj:`str`

        :param ``input_``: Input paramaters for processor. You don't
            have to specifie parameters for which default values are
            given.
        :type ``input_``: :obj:`dict`

        :param ``assert_status``: Desired status of Data object
        :type ``assert_status``: :obj:`str`

        :param descriptor: Descriptor to set on the data object.
        :type descriptor: :obj:`dict`

        :param descriptor_schema: Descriptor schema to set on the data object.
        :type descriptor_schema: :obj:`dict`

        :return: :obj:`resolwe.flow.models.Data` object which is created by
            the processor.

        """

        # backward compatibility
        process_slug = slugify(process_slug.replace(':', '-'))

        p = Process.objects.get(slug=process_slug)

        def mock_upload(file_path):
            old_path = os.path.join(self.files_path, file_path)
            if not os.path.isfile(old_path):
                raise RuntimeError('Missing file: {}'.format(old_path))

            new_path = os.path.join(self.upload_dir, file_path)
            # create directories needed by new_path
            new_path_dir = os.path.dirname(new_path)
            if not os.path.exists(new_path_dir):
                os.makedirs(new_path_dir)
            shutil.copy2(old_path, new_path)
            self._upload_files.append(new_path)
            return {
                'file': file_path,
                'file_temp': file_path,
            }

        for field_schema, fields in iterate_fields(input_, p.input_schema):
            # copy referenced files to upload dir
            if field_schema['type'] == "basic:file:":
                fields[field_schema['name']] = mock_upload(fields[field_schema['name']])
            elif field_schema['type'] == "list:basic:file:":
                file_list = [mock_upload(file_path) for file_path in fields[field_schema['name']]]
                fields[field_schema['name']] = file_list

            # convert primary keys to strings
            if field_schema['type'].startswith('data:'):
                fields[field_schema['name']] = fields[field_schema['name']]
            if field_schema['type'].startswith('list:data:'):
                fields[field_schema['name']] = [obj for obj in fields[field_schema['name']]]

        d = Data.objects.create(
            input=input_,
            contributor=self.admin,
            process=p,
            slug=get_random_string(length=6),
            descriptor_schema=descriptor_schema,
            descriptor=descriptor or {})
        self.collection.data.add(d)

        if run_manager:
            manager.communicate(run_sync=True, verbosity=verbosity)

        # Fetch latest Data object from database
        d = Data.objects.get(pk=d.pk)
        if not run_manager and assert_status == Data.STATUS_DONE:
            assert_status = Data.STATUS_RESOLVING

        if assert_status:
            self.assertStatus(d, assert_status)

        return d
Пример #8
0
    def migrate_data(self, data):
        """Migrate data."""
        contributor = self.get_contributor(data[u'author_id'])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get(u'static_schema', []))
        ds_fields.extend(data.get(u'var_template', []))
        ds_fields.sort(key=lambda d: d[u'name'])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id'])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get(u'static', {}))
        descriptor.update(data.get(u'var', {}))

        # PROCESS ######################################################
        if u'processor_version' not in data:
            data[u'processor_version'] = '0.0.0'

        process_slug = self.process_slug(data[u'processor_name'])
        process_version = data[u'processor_version']
        try:
            process = Process.objects.get(slug=process_slug, version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(slug=process_slug).order_by('-version').first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data[u'type']
                process.output_schema = data[u'output_schema']
                process.input_schema = data.get(u'input_schema', {})
                process.persistence = self.persistence_dict[data[u'persistence']]

                process.run['script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."'  # noqa pylint: disable=unsubscriptable-object

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in six.iteritems(get_users_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in six.iteritems(get_groups_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = 'Dummy processor of type {}'.format(data[u'type'])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug='non-existent',
                        contributor=get_user_model().objects.filter(is_superuser=True).first(),
                        type=data[u'type'],
                        category='data:non-existent',
                        run={'script': {'gen-require common\ngen-error "This processor is not intendent to be run."'}},
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get(u'static', {}).get(u'name', '')
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + '...'
        new.status = self.status_dict[data[u'status']]
        new.process = process
        new.contributor = contributor
        new.input = data[u'input'] if u'input' in data else {}
        new.output = data[u'output']
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get(u'checksum', '')
        # XXX: Django will change this on create
        new.created = data[u'date_created']
        # XXX: Django will change this on save
        new.modified = data[u'date_modified']
        if u'date_start' in data and u'date_finish' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_finish']
        elif u'date_finish' in data:
            new.started = data[u'date_finish']
            new.finished = data[u'date_finish']
        elif u'date_start' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_start']
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data[u'case_ids']:
            try:
                collection = Collection.objects.get(pk=self.id_mapping[u'collection'][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(data[u'output'], data[u'output_schema'], ''):
            if 'type' in field_schema and field_schema['type'].startswith('basic:json:'):
                self.storage_index[fields[field_schema['name']]] = {
                    'id': new.pk,
                    'path': path,
                }

        self.migrate_permissions(new, data)

        self.id_mapping['data'][str(data[u'_id'])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm('view_descriptorschema', user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm('view_descriptorschema', group, obj=descriptor_schema)
Пример #9
0
    def run(self, data_id, script, verbosity=1):
        """Execute the script and save results."""
        if verbosity >= 1:
            print('RUN: {} {}'.format(data_id, script))

        self.data_id = data_id

        data_dir = settings.FLOW_EXECUTOR['DATA_DIR']
        dir_mode = getattr(settings, 'FLOW_EXECUTOR', {}).get('DATA_DIR_MODE', 0o755)

        output_path = os.path.join(data_dir, str(data_id))

        os.mkdir(output_path)
        # os.mkdir is not guaranteed to set the given mode
        os.chmod(output_path, dir_mode)
        os.chdir(output_path)

        log_file = open('stdout.txt', 'w+')
        json_file = open('jsonout.txt', 'w+')

        proc_pid = self.start()

        self.update_data_status(
            status=Data.STATUS_PROCESSING,
            started=now(),
            process_pid=proc_pid
        )

        # Run processor and handle intermediate results
        self.run_script(script)
        spawn_processors = []
        output = {}
        process_error, process_warning, process_info = [], [], []
        process_progress, process_rc = 0, 0

        # read processor output
        try:
            stdout = self.get_stdout()
            while True:
                line = stdout.readline()
                if not line:
                    break

                try:
                    if line.strip().startswith('run'):
                        # Save processor and spawn if no errors
                        log_file.write(line)
                        log_file.flush()

                        for obj in iterjson(line[3:].strip()):
                            spawn_processors.append(obj)
                    elif line.strip().startswith('export'):
                        file_name = line[6:].strip()

                        export_folder = settings.FLOW_EXECUTOR['UPLOAD_DIR']
                        unique_name = 'export_{}'.format(uuid.uuid4().hex)
                        export_path = os.path.join(export_folder, unique_name)

                        EXPORTED_FILES_MAPPER[file_name] = unique_name

                        shutil.move(file_name, export_path)
                    else:
                        # If JSON, save to MongoDB
                        updates = {}
                        for obj in iterjson(line):
                            for key, val in six.iteritems(obj):
                                if key.startswith('proc.'):
                                    if key == 'proc.error':
                                        process_error.append(val)
                                        if not process_rc:
                                            process_rc = 1
                                            updates['process_rc'] = process_rc
                                        updates['process_error'] = process_error
                                        updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.warning':
                                        process_warning.append(val)
                                        updates['process_warning'] = process_warning
                                    elif key == 'proc.info':
                                        process_info.append(val)
                                        updates['process_info'] = process_info
                                    elif key == 'proc.rc':
                                        process_rc = int(val)
                                        updates['process_rc'] = process_rc
                                        if process_rc != 0:
                                            updates['status'] = Data.STATUS_ERROR
                                    elif key == 'proc.progress':
                                        process_progress = int(float(val) * 100)
                                        updates['process_progress'] = process_progress
                                else:
                                    dict_dot(output, key, val)
                                    updates['output'] = output

                        if updates:
                            updates['modified'] = now()
                            self.update_data_status(**updates)

                        if process_rc > 0:
                            log_file.close()
                            json_file.close()
                            os.chdir(CWD)
                            return

                        # Debug output
                        # Not referenced in Data object
                        json_file.write(line)
                        json_file.flush()

                except ValueError as ex:
                    # Ignore if not JSON
                    log_file.write(line)
                    log_file.flush()

        except MemoryError as ex:
            logger.error(__("Out of memory: {}", ex))

        except IOError as ex:
            # TODO: if ex.errno == 28: no more free space
            raise ex
        finally:
            # Store results
            log_file.close()
            json_file.close()
            os.chdir(CWD)

        return_code = self.end()

        if process_rc < return_code:
            process_rc = return_code

        if spawn_processors and process_rc == 0:
            parent_data = Data.objects.get(pk=self.data_id)

            # Spawn processors
            for d in spawn_processors:
                d['contributor'] = parent_data.contributor
                d['process'] = Process.objects.filter(slug=d['process']).order_by('version').last()

                for field_schema, fields in iterate_fields(d.get('input', {}), d['process'].input_schema):
                    type_ = field_schema['type']
                    name = field_schema['name']
                    value = fields[name]

                    if type_ == 'basic:file:':
                        fields[name] = hydrate_spawned_files(value, data_id)
                    elif type_ == 'list:basic:file:':
                        fields[name] = [hydrate_spawned_files(fn, data_id) for fn in value]

                with transaction.atomic():
                    d = Data.objects.create(**d)
                    for collection in parent_data.collection_set.all():
                        collection.data.add(d)

        if process_rc == 0:
            self.update_data_status(
                status=Data.STATUS_DONE,
                process_progress=100,
                finished=now()
            )
        else:
            self.update_data_status(
                status=Data.STATUS_ERROR,
                process_progress=100,
                process_rc=process_rc,
                finished=now()
            )

        try:
            # Cleanup after processor
            if data_id != 'no_data_id':
                data_purge(data_ids=[data_id], delete=True, verbosity=verbosity)
        except:  # pylint: disable=bare-except
            logger.error(__("Purge error:\n\n{}", traceback.format_exc()))
Пример #10
0
    def migrate_data(self, data):
        """Migrate data."""
        contributor = self.get_contributor(data[u'author_id'])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get(u'static_schema', []))
        ds_fields.extend(data.get(u'var_template', []))
        ds_fields.sort(key=lambda d: d[u'name'])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id'])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get(u'static', {}))
        descriptor.update(data.get(u'var', {}))

        # PROCESS ######################################################
        if u'processor_version' not in data:
            data[u'processor_version'] = '0.0.0'

        process_slug = self.process_slug(data[u'processor_name'])
        process_version = data[u'processor_version']
        try:
            process = Process.objects.get(slug=process_slug,
                                          version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(
                slug=process_slug).order_by('-version').first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data[u'type']
                process.output_schema = data[u'output_schema']
                process.input_schema = data.get(u'input_schema', {})
                process.persistence = self.persistence_dict[
                    data[u'persistence']]

                process.run[
                    'script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."'  # noqa pylint: disable=unsubscriptable-object

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in six.iteritems(
                        get_users_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in six.iteritems(
                        get_groups_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = 'Dummy processor of type {}'.format(data[u'type'])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug='non-existent',
                        contributor=get_user_model().objects.filter(
                            is_superuser=True).first(),
                        type=data[u'type'],
                        category='data:non-existent',
                        run={
                            'script': {
                                'gen-require common\ngen-error "This processor is not intendent to be run."'
                            }
                        },
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get(u'static', {}).get(u'name', '')
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + '...'
        new.status = self.status_dict[data[u'status']]
        new.process = process
        new.contributor = contributor
        new.input = data[u'input'] if u'input' in data else {}
        new.output = data[u'output']
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get(u'checksum', '')
        # XXX: Django will change this on create
        new.created = data[u'date_created']
        # XXX: Django will change this on save
        new.modified = data[u'date_modified']
        if u'date_start' in data and u'date_finish' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_finish']
        elif u'date_finish' in data:
            new.started = data[u'date_finish']
            new.finished = data[u'date_finish']
        elif u'date_start' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_start']
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data[u'case_ids']:
            try:
                collection = Collection.objects.get(
                    pk=self.id_mapping[u'collection'][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(
                data[u'output'], data[u'output_schema'], ''):
            if 'type' in field_schema and field_schema['type'].startswith(
                    'basic:json:'):
                self.storage_index[fields[field_schema['name']]] = {
                    'id': new.pk,
                    'path': path,
                }

        self.migrate_permissions(new, data)

        self.id_mapping['data'][str(data[u'_id'])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm('view_descriptorschema', user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm('view_descriptorschema', group, obj=descriptor_schema)
Пример #11
0
    def run_process(self, process_slug, input_={}, assert_status=Data.STATUS_DONE, run_manager=True, verbosity=0):
        """Runs given processor with specified inputs.

        If input is file, file path should be given relative to
        ``tests/files`` folder of a Django application.
        If ``assert_status`` is given check if Data object's status
        matches ``assert_status`` after finishing processor.

        :param processor_name: name of the processor to run
        :type processor_name: :obj:`str`

        :param ``input_``: Input paramaters for processor. You don't
            have to specifie parameters for which default values are
            given.
        :type ``input_``: :obj:`dict`

        :param ``assert_status``: Desired status of Data object
        :type ``assert_status``: :obj:`str`

        :return: :obj:`resolwe.flow.models.Data` object which is created by
            the processor.

        """

        # backward compatibility
        process_slug = slugify(process_slug.replace(':', '-'))

        p = Process.objects.get(slug=process_slug)

        for field_schema, fields in iterate_fields(input_, p.input_schema):
            # copy referenced files to upload dir
            if field_schema['type'] == "basic:file:":
                for app_config in apps.get_app_configs():

                    old_path = os.path.join(app_config.path, 'tests', 'files', fields[field_schema['name']])
                    if os.path.isfile(old_path):
                        file_name = os.path.basename(fields[field_schema['name']])
                        new_path = os.path.join(self.upload_path, file_name)
                        shutil.copy2(old_path, new_path)
                        self._upload_files.append(new_path)

                        # since we don't know what uid/gid will be used inside Docker executor,
                        # we must give others read and write permissions
                        os.chmod(new_path, 0o666)
                        fields[field_schema['name']] = {
                            'file': file_name,
                            'file_temp': file_name,
                        }
                        break

            # convert primary keys to strings
            if field_schema['type'].startswith('data:'):
                fields[field_schema['name']] = str(fields[field_schema['name']])
            if field_schema['type'].startswith('list:data:'):
                fields[field_schema['name']] = [str(obj) for obj in fields[field_schema['name']]]

        d = Data.objects.create(
            input=input_,
            contributor=self.admin,
            process=p,
            slug=get_random_string(length=6))
        self.collection.data.add(d)

        if run_manager:
            manager.communicate(run_sync=True, verbosity=verbosity)

        # Fetch latest Data object from database
        d = Data.objects.get(pk=d.pk)

        if not run_manager and assert_status == Data.STATUS_DONE:
            assert_status = Data.STATUS_RESOLVING

        if assert_status:
            self.assertStatus(d, assert_status)

        return d