示例#1
0
    def test_log_querybuilder(self):
        """ Test querying for logs by joining on nodes in the QueryBuilder """
        from aiida.orm import QueryBuilder

        # Setup nodes
        log_1, calc = self.create_log()
        log_2 = Log(now(), 'loggername', logging.getLevelName(LOG_LEVEL_REPORT), calc.id, 'log message #2')
        log_3 = Log(now(), 'loggername', logging.getLevelName(LOG_LEVEL_REPORT), calc.id, 'log message #3')

        # Retrieve a node by joining on a specific log ('log_1')
        builder = QueryBuilder()
        builder.append(Log, tag='log', filters={'id': log_2.id})
        builder.append(orm.CalculationNode, with_log='log', project=['uuid'])
        nodes = builder.all()

        self.assertEqual(len(nodes), 1)
        for node in nodes:
            self.assertIn(str(node[0]), [calc.uuid])

        # Retrieve all logs for a specific node by joining on a said node
        builder = QueryBuilder()
        builder.append(orm.CalculationNode, tag='calc', filters={'id': calc.id})
        builder.append(Log, with_node='calc', project=['uuid'])
        logs = builder.all()

        self.assertEqual(len(logs), 3)
        for log in logs:
            self.assertIn(str(log[0]), [str(log_1.uuid), str(log_2.uuid), str(log_3.uuid)])
示例#2
0
    def test_delete_through_utility_method(self):
        """Test deletion works correctly through the `aiida.backends.utils.delete_nodes_and_connections`."""
        from aiida.common import timezone
        from aiida.backends.utils import delete_nodes_and_connections

        data_one = Data().store()
        data_two = Data().store()
        calculation = CalculationNode()
        calculation.add_incoming(data_one, LinkType.INPUT_CALC, 'input_one')
        calculation.add_incoming(data_two, LinkType.INPUT_CALC, 'input_two')
        calculation.store()

        log_one = Log(timezone.now(), 'test', 'INFO', data_one.pk).store()
        log_two = Log(timezone.now(), 'test', 'INFO', data_two.pk).store()

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 1
        assert Log.objects.get_logs_for(data_two)[0].pk == log_two.pk

        delete_nodes_and_connections([data_two.pk])

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 0
示例#3
0
def print_last_process_state_change(process_type=None):
    """
    Print the last time that a process of the specified type has changed its state.
    This function will also print a warning if the daemon is not running.

    :param process_type: optional process type for which to get the latest state change timestamp.
        Valid process types are either 'calculation' or 'work'.
    """
    from aiida.cmdline.utils.echo import echo_info, echo_warning
    from aiida.common import timezone
    from aiida.common.utils import str_timedelta
    from aiida.engine.daemon.client import get_daemon_client
    from aiida.engine.utils import get_process_state_change_timestamp

    client = get_daemon_client()

    timestamp = get_process_state_change_timestamp(process_type)

    if timestamp is None:
        echo_info('last time an entry changed state: never')
    else:
        timedelta = timezone.delta(timestamp, timezone.now())
        formatted = format_local_time(timestamp,
                                      format_str='at %H:%M:%S on %Y-%m-%d')
        relative = str_timedelta(timedelta,
                                 negative_to_zero=True,
                                 max_num_fields=1)
        echo_info('last time an entry changed state: {} ({})'.format(
            relative, formatted))

    if not client.is_daemon_running:
        echo_warning('the daemon is not running', bold=True)
示例#4
0
def set_process_state_change_timestamp(process):
    """
    Set the global setting that reflects the last time a process changed state, for the process type
    of the given process, to the current timestamp. The process type will be determined based on
    the class of the calculation node it has as its database container.

    :param process: the Process instance that changed its state
    """
    from aiida.backends.utils import get_settings_manager
    from aiida.common import timezone
    from aiida.common.exceptions import UniquenessError
    from aiida.orm import ProcessNode, CalculationNode, WorkflowNode

    if isinstance(process.node, CalculationNode):
        process_type = 'calculation'
    elif isinstance(process.node, WorkflowNode):
        process_type = 'work'
    elif isinstance(process.node, ProcessNode):
        # This will only occur for testing, as in general users cannot launch plain Process classes
        return
    else:
        raise ValueError('unsupported calculation node type {}'.format(type(process.node)))

    key = PROCESS_STATE_CHANGE_KEY.format(process_type)
    description = PROCESS_STATE_CHANGE_DESCRIPTION.format(process_type)
    value = timezone.datetime_to_isoformat(timezone.now())

    try:
        manager = get_settings_manager()
        manager.set(key, value, description)
    except UniquenessError as exception:
        process.logger.debug('could not update the {} setting because of a UniquenessError: {}'.format(key, exception))
示例#5
0
    def get_query_set(self,
                      relationships=None,
                      filters=None,
                      order_by=None,
                      past_days=None,
                      limit=None):
        """
        Return the query set of calculations for the given filters and query parameters

        :param relationships: a mapping of relationships to join on, e.g. {'with_node': Group} to join on a Group. The
            keys in this dictionary should be the keyword used in the `append` method of the `QueryBuilder` to join the
            entity on that is defined as the value.
        :param filters: rules to filter query results with
        :param order_by: order the query set by this criterion
        :param past_days: only include entries from the last past days
        :param limit: limit the query set to this number of entries
        :return: the query set, a list of dictionaries
        """
        import datetime

        from aiida import orm
        from aiida.common import timezone

        # Define the list of projections for the QueryBuilder, which are all valid minus the compound projections
        projected_attributes = [
            self.mapper.get_attribute(projection)
            for projection in self._valid_projections
            if projection not in self._compound_projections
        ]

        if filters is None:
            filters = {}

        if past_days is not None:
            filters['ctime'] = {
                '>': timezone.now() - datetime.timedelta(days=past_days)
            }

        builder = orm.QueryBuilder()
        builder.append(cls=orm.ProcessNode,
                       filters=filters,
                       project=projected_attributes,
                       tag='process')

        if relationships is not None:
            for tag, entity in relationships.items():
                builder.append(cls=type(entity),
                               filters={'id': entity.id},
                               **{tag: 'process'})

        if order_by is not None:
            builder.order_by({'process': order_by})
        else:
            builder.order_by({'process': {'ctime': 'asc'}})

        if limit is not None:
            builder.limit(limit)

        return builder.iterdict()
示例#6
0
 def test_timezone_now(self):
     """Test timezone.now function."""
     delta = datetime.timedelta(minutes=1)
     ref = timezone.now()
     from_tz = timezone.make_aware(
         datetime.datetime.fromtimestamp(time.time()))
     self.assertLessEqual(from_tz, ref + delta)
     self.assertGreaterEqual(from_tz, ref - delta)
示例#7
0
    def test_creation(self):
        """Test creation of a BackendNode and all its properties."""
        node = self.backend.nodes.create(node_type=self.node_type,
                                         user=self.user,
                                         label=self.node_label,
                                         description=self.node_description)

        # Before storing
        self.assertIsNone(node.id)
        self.assertIsNone(node.pk)
        self.assertTrue(isinstance(node.uuid, str))
        self.assertTrue(isinstance(node.ctime, datetime))
        self.assertIsNone(node.mtime)
        self.assertIsNone(node.process_type)
        self.assertEqual(node.attributes, dict())
        self.assertEqual(node.extras, dict())
        self.assertEqual(node.node_type, self.node_type)
        self.assertEqual(node.label, self.node_label)
        self.assertEqual(node.description, self.node_description)

        # Store the node.ctime before the store as a reference
        now = timezone.now()
        node_ctime_before_store = node.ctime
        self.assertTrue(
            now > node.ctime,
            '{} is not smaller than now {}'.format(node.ctime, now))

        node.store()
        node_ctime = node.ctime
        node_mtime = node.mtime

        # The node.ctime should have been unchanged, but the node.mtime should have changed
        self.assertEqual(node.ctime, node_ctime_before_store)
        self.assertIsNotNone(node.mtime)
        self.assertTrue(now < node.mtime,
                        '{} is not larger than now {}'.format(node.mtime, now))

        # After storing
        self.assertTrue(isinstance(node.id, int))
        self.assertTrue(isinstance(node.pk, int))
        self.assertTrue(isinstance(node.uuid, str))
        self.assertTrue(isinstance(node.ctime, datetime))
        self.assertTrue(isinstance(node.mtime, datetime))
        self.assertIsNone(node.process_type)
        self.assertEqual(node.attributes, dict())
        self.assertEqual(node.extras, dict())
        self.assertEqual(node.node_type, self.node_type)
        self.assertEqual(node.label, self.node_label)
        self.assertEqual(node.description, self.node_description)

        # Try to construct a UUID from the UUID value to prove that it has a valid UUID
        UUID(node.uuid)

        # Change a column, which should trigger the save, update the mtime but leave the ctime untouched
        node.label = 'test'
        self.assertEqual(node.ctime, node_ctime)
        self.assertTrue(node.mtime > node_mtime)
示例#8
0
    def __init__(self):
        """Initialize with defaults."""
        self._exclude = None
        self._include = None

        now = timezone.now()
        default_label_prefix = f"Verdi autogroup on {now.strftime('%Y-%m-%d %H:%M:%S')}"
        self._group_label_prefix = default_label_prefix
        self._group_label = None  # Actual group label, set by `get_or_create_group`
    def setUpBeforeMigration(self):
        from aiida.common import timezone

        db_setting_model = self.apps.get_model('db', 'DbSetting')

        self.settings_info['2daemon|task_stop|updater2'] = dict(
            key='2daemon|task_stop|updater2',
            datatype='date',
            dval=timezone.datetime_to_isoformat(timezone.now()),
            description='The last time the daemon finished to run '
            'the task \'updater\' (updater)'
        )
        self.settings_info['2daemon|task_start|updater2'] = dict(
            key='2daemon|task_start|updater2',
            datatype='date',
            dval=timezone.datetime_to_isoformat(timezone.now()),
            description='The last time the daemon started to run '
            'the task \'updater\' (updater)'
        )
        self.settings_info['2db|backend2'] = dict(
            key='2db|backend2',
            datatype='txt',
            tval='django',
            description='The backend used to communicate with the database.'
        )
        self.settings_info['2daemon|user2'] = dict(
            key='2daemon|user2',
            datatype='txt',
            tval='*****@*****.**',
            description='The only user that is allowed to run the AiiDA daemon on '
            'this DB instance'
        )
        self.settings_info['2db|schemaversion2'] = dict(
            key='2db|schemaversion2',
            datatype='txt',
            tval=' 1.0.8',
            description='The version of the schema used in this database.'
        )

        with transaction.atomic():
            for setting_info in self.settings_info.values():
                setting = db_setting_model(**setting_info)
                setting.save()
示例#10
0
 def get_group_name(self):
     """Get the name of the group.
     If no group name was set, it will set a default one by itself."""
     try:
         return self.group_name
     except AttributeError:
         now = timezone.now()
         gname = 'Verdi autogroup on ' + now.strftime('%Y-%m-%d %H:%M:%S')
         self.set_group_name(gname)
         return self.group_name
示例#11
0
    def test_delete_collection_logs(self):
        """Test deletion works correctly through objects collection."""
        from aiida.common import timezone

        data_one = Data().store()
        data_two = Data().store()

        log_one = Log(timezone.now(), 'test', 'INFO', data_one.pk).store()
        log_two = Log(timezone.now(), 'test', 'INFO', data_two.pk).store()

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 1
        assert Log.objects.get_logs_for(data_two)[0].pk == log_two.pk

        Node.objects.delete(data_two.pk)

        assert len(Log.objects.get_logs_for(data_one)) == 1
        assert Log.objects.get_logs_for(data_one)[0].pk == log_one.pk
        assert len(Log.objects.get_logs_for(data_two)) == 0
示例#12
0
 def setUp(self):
     super().setUp()
     self.log_record = {
         'time': now(),
         'loggername': 'loggername',
         'levelname': logging.getLevelName(LOG_LEVEL_REPORT),
         'dbnode_id': None,
         'message': 'This is a template record message',
         'metadata': {
             'content': 'test'
         },
     }
示例#13
0
    def test_timezone_now(self):
        """Test timezone.now function.

        Check that the time returned by AiiDA's timezone.now() function is compatible
        with attaching a timezone to a "naive" time stamp using timezone.make_aware().
        """
        delta = datetime.timedelta(minutes=1)
        ref = timezone.now()

        from_tz = timezone.make_aware(datetime.datetime.fromtimestamp(time.time()))
        self.assertLessEqual(from_tz, ref + delta)
        self.assertGreaterEqual(from_tz, ref - delta)
示例#14
0
    def test_delete_many_ctime_mtime(self):
        """Test `delete_many` method filtering on `ctime` and `mtime`"""
        from datetime import timedelta

        # Initialization
        comment_uuids = []
        found_comments_ctime = []
        found_comments_mtime = []
        found_comments_uuid = []

        now = timezone.now()
        two_days_ago = now - timedelta(days=2)
        one_day_ago = now - timedelta(days=1)
        comment_times = [now, one_day_ago, two_days_ago]

        # Create comments
        comment1 = self.create_comment(ctime=now, mtime=now)
        comment2 = self.create_comment(ctime=one_day_ago, mtime=now)
        comment3 = self.create_comment(ctime=two_days_ago, mtime=one_day_ago)
        for comment in [comment1, comment2, comment3]:
            comment.store()
            comment_uuids.append(str(comment.uuid))

        # Make sure they exist with the correct times
        builder = orm.QueryBuilder().append(orm.Comment, project=['ctime', 'mtime', 'uuid'])
        self.assertGreater(builder.count(), 0)
        for comment in builder.all():
            found_comments_ctime.append(comment[0])
            found_comments_mtime.append(comment[1])
            found_comments_uuid.append(comment[2])
        for time, uuid in zip(comment_times, comment_uuids):
            self.assertIn(time, found_comments_ctime)
            self.assertIn(uuid, found_comments_uuid)
            if time != two_days_ago:
                self.assertIn(time, found_comments_mtime)

        # Delete comments that are created more than 1 hour ago,
        # unless they have been modified within 5 hours
        ctime_turning_point = now - timedelta(seconds=60 * 60)
        mtime_turning_point = now - timedelta(seconds=60 * 60 * 5)
        filters = {'and': [{'ctime': {'<': ctime_turning_point}}, {'mtime': {'<': mtime_turning_point}}]}
        self.backend.comments.delete_many(filters=filters)

        # Check only the most stale comment (comment3) was deleted
        builder = orm.QueryBuilder().append(orm.Comment, project='uuid')
        self.assertGreater(builder.count(), 1)  # There should still be at least 2
        found_comments_uuid = [_[0] for _ in builder.all()]
        self.assertNotIn(comment_uuids[2], found_comments_uuid)

        # Make sure the other comments were not deleted
        for comment_uuid in comment_uuids[:-1]:
            self.assertIn(comment_uuid, found_comments_uuid)
示例#15
0
def format_relative_time(datetime):
    """
    Return a string formatted timedelta of the given datetime with respect to the current datetime

    :param datetime: the datetime to format
    :return: string representation of the relative time since the given datetime
    """
    from aiida.common.utils import str_timedelta
    from aiida.common import timezone

    timedelta = timezone.delta(datetime, timezone.now())

    return str_timedelta(timedelta, negative_to_zero=True, max_num_fields=1)
示例#16
0
    def create_log(self, **kwargs):
        """Create BackendLog"""
        time = kwargs['time'] if 'time' in kwargs else timezone.now()
        dbnode_id = kwargs['dbnode_id'] if 'dbnode_id' in kwargs else self.node.id

        return self.backend.logs.create(
            time=time,
            loggername='loggername',
            levelname=logging.getLevelName(LOG_LEVEL_REPORT),
            dbnode_id=dbnode_id,
            message=self.log_message,
            metadata={'content': 'test'}
        )
示例#17
0
def _make_import_group(*, group: Optional[ImportGroup],
                       node_pks: List[int]) -> ImportGroup:
    """Make an import group containing all imported nodes.

    :param group: Use an existing group
    :param node_pks: node pks to add to group

    """
    # So that we do not create empty groups
    if not node_pks:
        IMPORT_LOGGER.debug('No nodes to import, so no import group created')
        return group

    # If user specified a group, import all things into it
    if not group:
        # Get an unique name for the import group, based on the current (local) time
        basename = timezone.localtime(timezone.now()).strftime('%Y%m%d-%H%M%S')
        counter = 0
        group_label = basename

        while Group.objects.find(filters={'label': group_label}):
            counter += 1
            group_label = f'{basename}_{counter}'

            if counter == MAX_GROUPS:
                raise exceptions.ImportUniquenessError(
                    f"Overflow of import groups (more than {MAX_GROUPS} groups exists with basename '{basename}')"
                )
        group = ImportGroup(label=group_label).store()

    # Add all the nodes to the new group
    builder = QueryBuilder().append(Node, filters={'id': {'in': node_pks}})

    first = True
    nodes = []
    description = 'Creating import Group - Preprocessing'

    with get_progress_reporter()(total=len(node_pks),
                                 desc=description) as progress:
        for entry in builder.iterall():
            if first:
                progress.set_description_str('Creating import Group',
                                             refresh=False)
                first = False
            progress.update()
            nodes.append(entry[0])

        group.add_nodes(nodes)
        progress.set_description_str('Done (cleaning up)', refresh=True)

    return group
示例#18
0
    def test_creation(self):
        """Test creation of a BackendComment and all its properties."""
        comment = self.backend.comments.create(node=self.node,
                                               user=self.user,
                                               content=self.comment_content)

        # Before storing
        self.assertIsNone(comment.id)
        self.assertIsNone(comment.pk)
        self.assertTrue(isinstance(comment.uuid, str))
        self.assertTrue(comment.node, self.node)
        self.assertTrue(isinstance(comment.ctime, datetime))
        self.assertIsNone(comment.mtime)
        self.assertTrue(comment.user, self.user)
        self.assertEqual(comment.content, self.comment_content)

        # Store the comment.ctime before the store as a reference
        now = timezone.now()
        comment_ctime_before_store = comment.ctime
        self.assertTrue(
            now > comment.ctime,
            '{} is not smaller than now {}'.format(comment.ctime, now))

        comment.store()
        comment_ctime = comment.ctime
        comment_mtime = comment.mtime

        # The comment.ctime should have been unchanged, but the comment.mtime should have changed
        self.assertEqual(comment.ctime, comment_ctime_before_store)
        self.assertIsNotNone(comment.mtime)
        self.assertTrue(
            now < comment.mtime,
            '{} is not larger than now {}'.format(comment.mtime, now))

        # After storing
        self.assertTrue(isinstance(comment.id, int))
        self.assertTrue(isinstance(comment.pk, int))
        self.assertTrue(isinstance(comment.uuid, str))
        self.assertTrue(comment.node, self.node)
        self.assertTrue(isinstance(comment.ctime, datetime))
        self.assertTrue(isinstance(comment.mtime, datetime))
        self.assertTrue(comment.user, self.user)
        self.assertEqual(comment.content, self.comment_content)

        # Try to construct a UUID from the UUID value to prove that it has a valid UUID
        UUID(comment.uuid)

        # Change a column, which should trigger the save, update the mtime but leave the ctime untouched
        comment.set_content('test')
        self.assertEqual(comment.ctime, comment_ctime)
        self.assertTrue(comment.mtime > comment_mtime)
示例#19
0
    def query_past_days(q_object, args):
        """
        Subselect to filter data nodes by their age.

        :param q_object: a query object
        :param args: a namespace with parsed command line parameters.
        """
        from aiida.common import timezone
        from django.db.models import Q
        import datetime
        if args.past_days is not None:
            now = timezone.now()
            n_days_ago = now - datetime.timedelta(days=args.past_days)
            q_object.add(Q(ctime__gte=n_days_ago), Q.AND)
示例#20
0
    def test_delete_many_time(self):
        """Test `delete_many` method filtering on `time`"""
        from datetime import timedelta

        # Initialization
        log_uuids = []
        found_logs_time = []
        found_logs_uuid = []

        now = timezone.now()
        two_days_ago = now - timedelta(days=2)
        one_day_ago = now - timedelta(days=1)
        log_times = [now, one_day_ago, two_days_ago]

        # Create logs
        log1 = self.create_log(time=now)
        log2 = self.create_log(time=one_day_ago)
        log3 = self.create_log(time=two_days_ago)
        for log in [log1, log2, log3]:
            log.store()
            log_uuids.append(str(log.uuid))

        # Make sure they exist with the correct times
        builder = orm.QueryBuilder().append(orm.Log, project=['time', 'uuid'])
        self.assertGreater(builder.count(), 0)
        for log in builder.all():
            found_logs_time.append(log[0])
            found_logs_uuid.append(log[1])
        for log_time in log_times:
            self.assertIn(log_time, found_logs_time)
        for log_uuid in log_uuids:
            self.assertIn(log_uuid, found_logs_uuid)

        # Delete logs that are older than 1 hour
        turning_point = now - timedelta(seconds=60 * 60)
        filters = {'time': {'<': turning_point}}
        self.backend.logs.delete_many(filters=filters)

        # Check they were deleted
        builder = orm.QueryBuilder().append(orm.Log, project='uuid')
        self.assertGreater(builder.count(),
                           0)  # There should still be at least 1
        found_logs_uuid = [_[0] for _ in builder.all()]
        for log_uuid in log_uuids[1:]:
            self.assertNotIn(log_uuid, found_logs_uuid)

        # Make sure the newest log (log1) was not deleted
        self.assertIn(log_uuids[0], found_logs_uuid)
示例#21
0
    def set_scheduler_state(self, state):
        """Set the scheduler state.

        :param state: an instance of `JobState`
        """
        from aiida.common import timezone
        from aiida.schedulers.datastructures import JobState

        if not isinstance(state, JobState):
            raise ValueError(
                f'scheduler state should be an instance of JobState, got: {state}'
            )

        self.set_attribute(self.SCHEDULER_STATE_KEY, state.value)
        self.set_attribute(self.SCHEDULER_LAST_CHECK_TIME_KEY,
                           timezone.datetime_to_isoformat(timezone.now()))
示例#22
0
def group_show(group, raw, limit, uuid):
    """Show information for a given group."""
    from tabulate import tabulate

    from aiida.common.utils import str_timedelta
    from aiida.common import timezone

    if limit:
        node_iterator = group.nodes[:limit]
    else:
        node_iterator = group.nodes

    if raw:
        if uuid:
            echo.echo(' '.join(str(_.uuid) for _ in node_iterator))
        else:
            echo.echo(' '.join(str(_.pk) for _ in node_iterator))
    else:
        type_string = group.type_string
        desc = group.description
        now = timezone.now()

        table = []
        table.append(['Group label', group.label])
        table.append(['Group type_string', type_string])
        table.append(
            ['Group description', desc if desc else '<no description>'])
        echo.echo(tabulate(table))

        table = []
        header = []
        if uuid:
            header.append('UUID')
        header.extend(['PK', 'Type', 'Created'])
        echo.echo('# Nodes:')
        for node in node_iterator:
            row = []
            if uuid:
                row.append(node.uuid)
            row.append(node.pk)
            row.append(node.node_type.rsplit('.', 2)[1])
            row.append(
                str_timedelta(now - node.ctime,
                              short=True,
                              negative_to_zero=True))
            table.append(row)
        echo.echo(tabulate(table, headers=header))
示例#23
0
    def _backup(cls, filepath):
        """Create a backup of the configuration file with the given filepath.

        :param filepath: absolute path to the configuration file to backup
        :return: the absolute path of the created backup
        """
        from aiida.common import timezone

        filepath_backup = None

        # Keep generating a new backup filename based on the current time until it does not exist
        while not filepath_backup or os.path.isfile(filepath_backup):
            filepath_backup = '{}.{}'.format(
                filepath,
                timezone.now().strftime('%Y%m%d-%H%M%S.%f'))

        shutil.copy(filepath, filepath_backup)

        return filepath_backup
示例#24
0
def query(datatype, project, past_days, group_pks, all_users):
    """
    Perform the query
    """
    import datetime

    from aiida import orm
    from aiida.common import timezone

    qbl = orm.QueryBuilder()
    if all_users is False:
        user = orm.User.objects.get_default()
        qbl.append(orm.User, tag='creator', filters={'email': user.email})
    else:
        qbl.append(orm.User, tag='creator')

    # If there is a time restriction
    data_filters = {}
    if past_days is not None:
        now = timezone.now()
        n_days_ago = now - datetime.timedelta(days=past_days)
        data_filters.update({'ctime': {'>=': n_days_ago}})

    qbl.append(datatype,
               tag='data',
               with_user='******',
               filters=data_filters,
               project=project)

    # If there is a group restriction
    if group_pks is not None:
        group_filters = dict()
        group_filters.update({'id': {'in': group_pks}})
        qbl.append(orm.Group,
                   tag='group',
                   filters=group_filters,
                   with_node='data')

    qbl.order_by({datatype: {'ctime': 'asc'}})

    object_list = qbl.distinct()
    return object_list.all()
示例#25
0
def import_data_sqla(in_path,
                     group=None,
                     ignore_unknown_nodes=False,
                     extras_mode_existing='kcl',
                     extras_mode_new='import',
                     comment_mode='newest',
                     silent=False,
                     **kwargs):
    """Import exported AiiDA archive to the AiiDA database and repository.

    Specific for the SQLAlchemy backend.
    If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format
    (zip, tar.gz, tar.bz2, ...) and calls the correct function.

    :param in_path: the path to a file or folder that can be imported in AiiDA.
    :type in_path: str

    :param group: Group wherein all imported Nodes will be placed.
    :type group: :py:class:`~aiida.orm.groups.Group`

    :param extras_mode_existing: 3 letter code that will identify what to do with the extras import.
        The first letter acts on extras that are present in the original node and not present in the imported node.
        Can be either:
        'k' (keep it) or
        'n' (do not keep it).
        The second letter acts on the imported extras that are not present in the original node.
        Can be either:
        'c' (create it) or
        'n' (do not create it).
        The third letter defines what to do in case of a name collision.
        Can be either:
        'l' (leave the old value),
        'u' (update with a new value),
        'd' (delete the extra), or
        'a' (ask what to do if the content is different).
    :type extras_mode_existing: str

    :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them.
    :type extras_mode_new: str

    :param comment_mode: Comment import modes (when same UUIDs are found).
        Can be either:
        'newest' (will keep the Comment with the most recent modification time (mtime)) or
        'overwrite' (will overwrite existing Comments with the ones from the import file).
    :type comment_mode: str

    :param silent: suppress progress bar and summary.
    :type silent: bool

    :return: New and existing Nodes and Links.
    :rtype: dict

    :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of
        `metadata.json` or `data.json` can not be validated.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is
        corrupted.
    :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
        export version is not equal to the export version of AiiDA at the moment of import.
    :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when
        importing.
    :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be
        created.
    """
    from aiida.backends.sqlalchemy.models.node import DbNode, DbLink
    from aiida.backends.sqlalchemy.utils import flag_modified

    # This is the export version expected by this function
    expected_export_version = StrictVersion(EXPORT_VERSION)

    # The returned dictionary with new and existing nodes and links
    ret_dict = {}

    # Initial check(s)
    if group:
        if not isinstance(group, Group):
            raise exceptions.ImportValidationError(
                'group must be a Group entity')
        elif not group.is_stored:
            group.store()

    if silent:
        logging.disable(level=logging.CRITICAL)

    ################
    # EXTRACT DATA #
    ################
    # The sandbox has to remain open until the end
    with SandboxFolder() as folder:
        if os.path.isdir(in_path):
            extract_tree(in_path, folder)
        else:
            if tarfile.is_tarfile(in_path):
                extract_tar(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER,
                            **kwargs)
            elif zipfile.is_zipfile(in_path):
                extract_zip(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER,
                            **kwargs)
            else:
                raise exceptions.ImportValidationError(
                    'Unable to detect the input file format, it is neither a '
                    'tar file, nor a (possibly compressed) zip file.')

        if not folder.get_content_list():
            raise exceptions.CorruptArchive(
                'The provided file/folder ({}) is empty'.format(in_path))
        try:
            IMPORT_LOGGER.debug('CACHING metadata.json')
            with open(folder.get_abs_path('metadata.json'),
                      encoding='utf8') as fhandle:
                metadata = json.load(fhandle)

            IMPORT_LOGGER.debug('CACHING data.json')
            with open(folder.get_abs_path('data.json'),
                      encoding='utf8') as fhandle:
                data = json.load(fhandle)
        except IOError as error:
            raise exceptions.CorruptArchive(
                'Unable to find the file {} in the import file or folder'.
                format(error.filename))

        ######################
        # PRELIMINARY CHECKS #
        ######################
        export_version = StrictVersion(str(metadata['export_version']))
        if export_version != expected_export_version:
            msg = 'Export file version is {}, can import only version {}'\
                    .format(metadata['export_version'], expected_export_version)
            if export_version < expected_export_version:
                msg += "\nUse 'verdi export migrate' to update this export file."
            else:
                msg += '\nUpdate your AiiDA version in order to import this file.'

            raise exceptions.IncompatibleArchiveVersionError(msg)

        start_summary(in_path, comment_mode, extras_mode_new,
                      extras_mode_existing)

        ###################################################################
        #           CREATE UUID REVERSE TABLES AND CHECK IF               #
        #              I HAVE ALL NODES FOR THE LINKS                     #
        ###################################################################
        IMPORT_LOGGER.debug(
            'CHECKING IF NODES FROM LINKS ARE IN DB OR ARCHIVE...')

        linked_nodes = set(
            chain.from_iterable(
                (l['input'], l['output']) for l in data['links_uuid']))
        group_nodes = set(chain.from_iterable(data['groups_uuid'].values()))

        # Check that UUIDs are valid
        linked_nodes = set(x for x in linked_nodes if validate_uuid(x))
        group_nodes = set(x for x in group_nodes if validate_uuid(x))

        import_nodes_uuid = set()
        for value in data['export_data'].get(NODE_ENTITY_NAME, {}).values():
            import_nodes_uuid.add(value['uuid'])

        unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid

        if unknown_nodes and not ignore_unknown_nodes:
            raise exceptions.DanglingLinkError(
                'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first '
                'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n'
                ''.format(len(unknown_nodes)) +
                '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes))

        ###################################
        # DOUBLE-CHECK MODEL DEPENDENCIES #
        ###################################
        # The entity import order. It is defined by the database model relationships.
        entity_order = [
            USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME,
            GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME
        ]

        #  I make a new list that contains the entity names:
        # eg: ['User', 'Computer', 'Node', 'Group']
        for import_field_name in metadata['all_fields_info']:
            if import_field_name not in entity_order:
                raise exceptions.ImportValidationError(
                    "You are trying to import an unknown model '{}'!".format(
                        import_field_name))

        for idx, entity_name in enumerate(entity_order):
            dependencies = []
            # for every field, I checked the dependencies given as value for key requires
            for field in metadata['all_fields_info'][entity_name].values():
                try:
                    dependencies.append(field['requires'])
                except KeyError:
                    # (No ForeignKey)
                    pass
            for dependency in dependencies:
                if dependency not in entity_order[:idx]:
                    raise exceptions.ArchiveImportError(
                        'Entity {} requires {} but would be loaded first; stopping...'
                        .format(entity_name, dependency))

        ###################################################
        # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS #
        ###################################################
        # This is nested dictionary of entity_name:{id:uuid}
        # to map one id (the pk) to a different one.
        # One of the things to remove for v0.4
        # {
        # 'Node': {2362: '82a897b5-fb3a-47d7-8b22-c5fe1b4f2c14',
        #           2363: 'ef04aa5d-99e7-4bfd-95ef-fe412a6a3524', 2364: '1dc59576-af21-4d71-81c2-bac1fc82a84a'},
        # 'User': {1: 'aiida@localhost'}
        # }
        IMPORT_LOGGER.debug('CREATING PK-2-UUID/EMAIL MAPPING...')
        import_unique_ids_mappings = {}
        # Export data since v0.3 contains the keys entity_name
        for entity_name, import_data in data['export_data'].items():
            # Again I need the entity_name since that's what's being stored since 0.3
            if entity_name in metadata['unique_identifiers']:
                # I have to reconvert the pk to integer
                import_unique_ids_mappings[entity_name] = {
                    int(k): v[metadata['unique_identifiers'][entity_name]]
                    for k, v in import_data.items()
                }
        ###############
        # IMPORT DATA #
        ###############
        # DO ALL WITH A TRANSACTION
        import aiida.backends.sqlalchemy

        session = aiida.backends.sqlalchemy.get_scoped_session()

        try:
            foreign_ids_reverse_mappings = {}
            new_entries = {}
            existing_entries = {}

            IMPORT_LOGGER.debug('GENERATING LIST OF DATA...')

            # Instantiate progress bar
            progress_bar = get_progress_bar(total=1,
                                            leave=False,
                                            disable=silent)
            pbar_base_str = 'Generating list of data - '

            # Get total entities from data.json
            # To be used with progress bar
            number_of_entities = 0

            # I first generate the list of data
            for entity_name in entity_order:
                entity = entity_names_to_entities[entity_name]
                # I get the unique identifier, since v0.3 stored under entity_name
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, None)

                # so, new_entries. Also, since v0.3 it makes more sense to use the entity_name
                new_entries[entity_name] = {}
                existing_entries[entity_name] = {}
                foreign_ids_reverse_mappings[entity_name] = {}

                # Not necessarily all models are exported
                if entity_name in data['export_data']:

                    IMPORT_LOGGER.debug('  %s...', entity_name)

                    progress_bar.set_description_str(pbar_base_str +
                                                     entity_name,
                                                     refresh=False)
                    number_of_entities += len(data['export_data'][entity_name])

                    if unique_identifier is not None:
                        import_unique_ids = set(
                            v[unique_identifier]
                            for v in data['export_data'][entity_name].values())

                        relevant_db_entries = {}
                        if import_unique_ids:
                            builder = QueryBuilder()
                            builder.append(entity,
                                           filters={
                                               unique_identifier: {
                                                   'in': import_unique_ids
                                               }
                                           },
                                           project='*')

                            if builder.count():
                                progress_bar = get_progress_bar(
                                    total=builder.count(), disable=silent)
                                for object_ in builder.iterall():
                                    progress_bar.update()

                                    relevant_db_entries.update({
                                        getattr(object_[0], unique_identifier):
                                        object_[0]
                                    })

                            foreign_ids_reverse_mappings[entity_name] = {
                                k: v.pk
                                for k, v in relevant_db_entries.items()
                            }

                        IMPORT_LOGGER.debug('    GOING THROUGH ARCHIVE...')

                        imported_comp_names = set()
                        for key, value in data['export_data'][
                                entity_name].items():
                            if entity_name == GROUP_ENTITY_NAME:
                                # Check if there is already a group with the same name,
                                # and if so, recreate the name
                                orig_label = value['label']
                                dupl_counter = 0
                                while QueryBuilder().append(
                                        entity,
                                        filters={
                                            'label': {
                                                '==': value['label']
                                            }
                                        }).count():
                                    # Rename the new group
                                    value[
                                        'label'] = orig_label + DUPL_SUFFIX.format(
                                            dupl_counter)
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A group of that label ( {} ) already exists and I could not create a new '
                                            'one'.format(orig_label))

                            elif entity_name == COMPUTER_ENTITY_NAME:
                                # The following is done for compatibility
                                # reasons in case the export file was generated
                                # with the Django export method. In Django the
                                # metadata and the transport parameters are
                                # stored as (unicode) strings of the serialized
                                # JSON objects and not as simple serialized
                                # JSON objects.
                                if isinstance(value['metadata'], (str, bytes)):
                                    value['metadata'] = json.loads(
                                        value['metadata'])

                                # Check if there is already a computer with the
                                # same name in the database
                                builder = QueryBuilder()
                                builder.append(
                                    entity,
                                    filters={'name': {
                                        '==': value['name']
                                    }},
                                    project=['*'],
                                    tag='res')
                                dupl = builder.count(
                                ) or value['name'] in imported_comp_names
                                dupl_counter = 0
                                orig_name = value['name']
                                while dupl:
                                    # Rename the new computer
                                    value[
                                        'name'] = orig_name + DUPL_SUFFIX.format(
                                            dupl_counter)
                                    builder = QueryBuilder()
                                    builder.append(entity,
                                                   filters={
                                                       'name': {
                                                           '==': value['name']
                                                       }
                                                   },
                                                   project=['*'],
                                                   tag='res')
                                    dupl = builder.count(
                                    ) or value['name'] in imported_comp_names
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A computer of that name ( {} ) already exists and I could not create a '
                                            'new one'.format(orig_name))

                                imported_comp_names.add(value['name'])

                            if value[unique_identifier] in relevant_db_entries:
                                # Already in DB
                                # again, switched to entity_name in v0.3
                                existing_entries[entity_name][key] = value
                            else:
                                # To be added
                                new_entries[entity_name][key] = value
                    else:
                        new_entries[entity_name] = data['export_data'][
                            entity_name]

            # Progress bar - reset for import
            progress_bar = get_progress_bar(total=number_of_entities,
                                            disable=silent)
            reset_progress_bar = {}

            # I import data from the given model
            for entity_name in entity_order:
                entity = entity_names_to_entities[entity_name]
                fields_info = metadata['all_fields_info'].get(entity_name, {})
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, '')

                # Progress bar initialization - Model
                if reset_progress_bar:
                    progress_bar = get_progress_bar(
                        total=reset_progress_bar['total'], disable=silent)
                    progress_bar.n = reset_progress_bar['n']
                    reset_progress_bar = {}
                pbar_base_str = '{}s - '.format(entity_name)
                progress_bar.set_description_str(pbar_base_str +
                                                 'Initializing',
                                                 refresh=True)

                # EXISTING ENTRIES
                if existing_entries[entity_name]:
                    # Progress bar update - Model
                    progress_bar.set_description_str(
                        pbar_base_str + '{} existing entries'.format(
                            len(existing_entries[entity_name])),
                        refresh=True)

                for import_entry_pk, entry_data in existing_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    existing_entry_pk = foreign_ids_reverse_mappings[
                        entity_name][unique_id]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())
                    # TODO COMPARE, AND COMPARE ATTRIBUTES

                    if entity_name == COMMENT_ENTITY_NAME:
                        new_entry_uuid = merge_comment(import_data,
                                                       comment_mode)
                        if new_entry_uuid is not None:
                            entry_data[unique_identifier] = new_entry_uuid
                            new_entries[entity_name][
                                import_entry_pk] = entry_data

                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['existing'].append(
                        (import_entry_pk, existing_entry_pk))
                    IMPORT_LOGGER.debug('Existing %s: %s (%s->%s)',
                                        entity_name, unique_id,
                                        import_entry_pk, existing_entry_pk)

                # Store all objects for this model in a list, and store them
                # all in once at the end.
                objects_to_create = list()
                # In the following list we add the objects to be updated
                objects_to_update = list()
                # This is needed later to associate the import entry with the new pk
                import_new_entry_pks = dict()

                # NEW ENTRIES
                if new_entries[entity_name]:
                    # Progress bar update - Model
                    progress_bar.set_description_str(
                        pbar_base_str +
                        '{} new entries'.format(len(new_entries[entity_name])),
                        refresh=True)

                for import_entry_pk, entry_data in new_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())

                    # We convert the Django fields to SQLA. Note that some of
                    # the Django fields were converted to SQLA compatible
                    # fields by the deserialize_field method. This was done
                    # for optimization reasons in Django but makes them
                    # compatible with the SQLA schema and they don't need any
                    # further conversion.
                    if entity_name in file_fields_to_model_fields:
                        for file_fkey in file_fields_to_model_fields[
                                entity_name]:

                            # This is an exception because the DbLog model defines the `_metadata` column instead of the
                            # `metadata` column used in the Django model. This is because the SqlAlchemy model base
                            # class already has a metadata attribute that cannot be overridden. For consistency, the
                            # `DbLog` class however expects the `metadata` keyword in its constructor, so we should
                            # ignore the mapping here
                            if entity_name == LOG_ENTITY_NAME and file_fkey == 'metadata':
                                continue

                            model_fkey = file_fields_to_model_fields[
                                entity_name][file_fkey]
                            if model_fkey in import_data:
                                continue
                            import_data[model_fkey] = import_data[file_fkey]
                            import_data.pop(file_fkey, None)

                    db_entity = get_object_from_string(
                        entity_names_to_sqla_schema[entity_name])

                    objects_to_create.append(db_entity(**import_data))
                    import_new_entry_pks[unique_id] = import_entry_pk

                if entity_name == NODE_ENTITY_NAME:
                    IMPORT_LOGGER.debug(
                        'STORING NEW NODE REPOSITORY FILES & ATTRIBUTES...')

                    # NEW NODES
                    for object_ in objects_to_create:
                        import_entry_uuid = object_.uuid
                        import_entry_pk = import_new_entry_pks[
                            import_entry_uuid]

                        # Progress bar initialization - Node
                        progress_bar.update()
                        pbar_node_base_str = pbar_base_str + 'UUID={} - '.format(
                            import_entry_uuid.split('-')[0])

                        # Before storing entries in the DB, I store the files (if these are nodes).
                        # Note: only for new entries!
                        subfolder = folder.get_subfolder(
                            os.path.join(NODES_EXPORT_SUBFOLDER,
                                         export_shard_uuid(import_entry_uuid)))
                        if not subfolder.exists():
                            raise exceptions.CorruptArchive(
                                'Unable to find the repository folder for Node with UUID={} in the exported '
                                'file'.format(import_entry_uuid))
                        destdir = RepositoryFolder(
                            section=Repository._section_name,
                            uuid=import_entry_uuid)
                        # Replace the folder, possibly destroying existing previous folders, and move the files
                        # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Repository',
                                                         refresh=True)
                        destdir.replace_with_folder(subfolder.abspath,
                                                    move=True,
                                                    overwrite=True)

                        # For Nodes, we also have to store Attributes!
                        IMPORT_LOGGER.debug('STORING NEW NODE ATTRIBUTES...')
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Attributes',
                                                         refresh=True)

                        # Get attributes from import file
                        try:
                            object_.attributes = data['node_attributes'][str(
                                import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find attribute info for Node with UUID={}'
                                .format(import_entry_uuid))

                        # For DbNodes, we also have to store extras
                        if extras_mode_new == 'import':
                            IMPORT_LOGGER.debug('STORING NEW NODE EXTRAS...')
                            progress_bar.set_description_str(
                                pbar_node_base_str + 'Extras', refresh=True)

                            # Get extras from import file
                            try:
                                extras = data['node_extras'][str(
                                    import_entry_pk)]
                            except KeyError:
                                raise exceptions.CorruptArchive(
                                    'Unable to find extra info for Node with UUID={}'
                                    .format(import_entry_uuid))
                            # TODO: remove when aiida extras will be moved somewhere else
                            # from here
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key.startswith('_aiida_')
                            }
                            if object_.node_type.endswith('code.Code.'):
                                extras = {
                                    key: value
                                    for key, value in extras.items()
                                    if not key == 'hidden'
                                }
                            # till here
                            object_.extras = extras
                        elif extras_mode_new == 'none':
                            IMPORT_LOGGER.debug('SKIPPING NEW NODE EXTRAS...')
                        else:
                            raise exceptions.ImportValidationError(
                                "Unknown extras_mode_new value: {}, should be either 'import' or 'none'"
                                ''.format(extras_mode_new))

                    # EXISTING NODES (Extras)
                    IMPORT_LOGGER.debug('UPDATING EXISTING NODE EXTRAS...')

                    import_existing_entry_pks = {
                        entry_data[unique_identifier]: import_entry_pk
                        for import_entry_pk, entry_data in
                        existing_entries[entity_name].items()
                    }
                    for node in session.query(DbNode).filter(
                            DbNode.uuid.in_(import_existing_entry_pks)).all():
                        import_entry_uuid = str(node.uuid)
                        import_entry_pk = import_existing_entry_pks[
                            import_entry_uuid]

                        # Progress bar initialization - Node
                        pbar_node_base_str = pbar_base_str + 'UUID={} - '.format(
                            import_entry_uuid.split('-')[0])
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Extras',
                                                         refresh=False)
                        progress_bar.update()

                        # Get extras from import file
                        try:
                            extras = data['node_extras'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find extra info for Node with UUID={}'
                                .format(import_entry_uuid))

                        old_extras = node.extras.copy()
                        # TODO: remove when aiida extras will be moved somewhere else
                        # from here
                        extras = {
                            key: value
                            for key, value in extras.items()
                            if not key.startswith('_aiida_')
                        }
                        if node.node_type.endswith('code.Code.'):
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key == 'hidden'
                            }
                        # till here
                        new_extras = merge_extras(node.extras, extras,
                                                  extras_mode_existing)
                        if new_extras != old_extras:
                            node.extras = new_extras
                            flag_modified(node, 'extras')
                            objects_to_update.append(node)

                else:
                    # Update progress bar with new non-Node entries
                    progress_bar.update(n=len(existing_entries[entity_name]) +
                                        len(new_entries[entity_name]))

                progress_bar.set_description_str(pbar_base_str + 'Storing',
                                                 refresh=True)

                # Store them all in once; However, the PK are not set in this way...
                if objects_to_create:
                    session.add_all(objects_to_create)
                if objects_to_update:
                    session.add_all(objects_to_update)

                session.flush()

                just_saved = {}
                if import_new_entry_pks.keys():
                    reset_progress_bar = {
                        'total': progress_bar.total,
                        'n': progress_bar.n
                    }
                    progress_bar = get_progress_bar(
                        total=len(import_new_entry_pks), disable=silent)

                    builder = QueryBuilder()
                    builder.append(entity,
                                   filters={
                                       unique_identifier: {
                                           'in':
                                           list(import_new_entry_pks.keys())
                                       }
                                   },
                                   project=[unique_identifier, 'id'])

                    for entry in builder.iterall():
                        progress_bar.update()

                        just_saved.update({entry[0]: entry[1]})

                progress_bar.set_description_str(pbar_base_str + 'Done!',
                                                 refresh=True)

                # Now I have the PKs, print the info
                # Moreover, add newly created Nodes to foreign_ids_reverse_mappings
                for unique_id, new_pk in just_saved.items():
                    from uuid import UUID
                    if isinstance(unique_id, UUID):
                        unique_id = str(unique_id)
                    import_entry_pk = import_new_entry_pks[unique_id]
                    foreign_ids_reverse_mappings[entity_name][
                        unique_id] = new_pk
                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['new'].append(
                        (import_entry_pk, new_pk))

                    IMPORT_LOGGER.debug('N %s: %s (%s->%s)', entity_name,
                                        unique_id, import_entry_pk, new_pk)

            IMPORT_LOGGER.debug('STORING NODE LINKS...')

            import_links = data['links_uuid']

            if import_links:
                progress_bar = get_progress_bar(total=len(import_links),
                                                disable=silent)
                pbar_base_str = 'Links - '

            for link in import_links:
                # Check for dangling Links within the, supposed, self-consistent archive
                progress_bar.set_description_str(
                    pbar_base_str + 'label={}'.format(link['label']),
                    refresh=False)
                progress_bar.update()

                try:
                    in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['input']]
                    out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['output']]
                except KeyError:
                    if ignore_unknown_nodes:
                        continue
                    raise exceptions.ImportValidationError(
                        'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, '
                        'label={}, type={})'.format(link['input'],
                                                    link['output'],
                                                    link['label'],
                                                    link['type']))

                # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them.
                source = QueryBuilder().append(Node,
                                               filters={
                                                   'id': in_id
                                               },
                                               project='*').first()[0]
                target = QueryBuilder().append(Node,
                                               filters={
                                                   'id': out_id
                                               },
                                               project='*').first()[0]
                link_type = LinkType(link['type'])

                # Check for existence of a triple link, i.e. unique triple.
                # If it exists, then the link already exists, continue to next link, otherwise, validate link.
                if link_triple_exists(source, target, link_type,
                                      link['label']):
                    continue

                try:
                    validate_link(source, target, link_type, link['label'])
                except ValueError as why:
                    raise exceptions.ImportValidationError(
                        'Error occurred during Link validation: {}'.format(
                            why))

                # New link
                session.add(
                    DbLink(input_id=in_id,
                           output_id=out_id,
                           label=link['label'],
                           type=link['type']))
                if 'Link' not in ret_dict:
                    ret_dict['Link'] = {'new': []}
                ret_dict['Link']['new'].append((in_id, out_id))

            IMPORT_LOGGER.debug('   (%d new links...)',
                                len(ret_dict.get('Link', {}).get('new', [])))

            IMPORT_LOGGER.debug('STORING GROUP ELEMENTS...')

            import_groups = data['groups_uuid']

            if import_groups:
                progress_bar = get_progress_bar(total=len(import_groups),
                                                disable=silent)
                pbar_base_str = 'Groups - '

            for groupuuid, groupnodes in import_groups.items():
                # # TODO: cache these to avoid too many queries
                qb_group = QueryBuilder().append(
                    Group, filters={'uuid': {
                        '==': groupuuid
                    }})
                group_ = qb_group.first()[0]

                progress_bar.set_description_str(
                    pbar_base_str + 'label={}'.format(group_.label),
                    refresh=False)
                progress_bar.update()

                nodes_ids_to_add = [
                    foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid]
                    for node_uuid in groupnodes
                ]
                qb_nodes = QueryBuilder().append(
                    Node, filters={'id': {
                        'in': nodes_ids_to_add
                    }})
                # Adding nodes to group avoiding the SQLA ORM to increase speed
                nodes_to_add = [n[0].backend_entity for n in qb_nodes.all()]
                group_.backend_entity.add_nodes(nodes_to_add, skip_orm=True)

            ######################################################
            # Put everything in a specific group
            ######################################################
            existing = existing_entries.get(NODE_ENTITY_NAME, {})
            existing_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in existing.values()
            ]
            new = new_entries.get(NODE_ENTITY_NAME, {})
            new_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in new.values()
            ]

            pks_for_group = existing_pk + new_pk

            # So that we do not create empty groups
            if pks_for_group:
                # If user specified a group, import all things into it
                if not group:
                    from aiida.backends.sqlalchemy.models.group import DbGroup

                    # Get an unique name for the import group, based on the current (local) time
                    basename = timezone.localtime(
                        timezone.now()).strftime('%Y%m%d-%H%M%S')
                    counter = 0
                    group_label = basename
                    while session.query(DbGroup).filter(
                            DbGroup.label == group_label).count() > 0:
                        counter += 1
                        group_label = '{}_{}'.format(basename, counter)

                        if counter == 100:
                            raise exceptions.ImportUniquenessError(
                                "Overflow of import groups (more than 100 import groups exists with basename '{}')"
                                ''.format(basename))
                    group = ImportGroup(label=group_label)
                    session.add(group.backend_entity._dbmodel)

                # Adding nodes to group avoiding the SQLA ORM to increase speed
                builder = QueryBuilder().append(
                    Node, filters={'id': {
                        'in': pks_for_group
                    }})

                progress_bar = get_progress_bar(total=len(pks_for_group),
                                                disable=silent)
                progress_bar.set_description_str(
                    'Creating import Group - Preprocessing', refresh=True)
                first = True

                nodes = []
                for entry in builder.iterall():
                    if first:
                        progress_bar.set_description_str(
                            'Creating import Group', refresh=False)
                        first = False
                    progress_bar.update()
                    nodes.append(entry[0].backend_entity)
                group.backend_entity.add_nodes(nodes, skip_orm=True)
                progress_bar.set_description_str('Done (cleaning up)',
                                                 refresh=True)
            else:
                IMPORT_LOGGER.debug(
                    'No Nodes to import, so no Group created, if it did not already exist'
                )

            IMPORT_LOGGER.debug('COMMITTING EVERYTHING...')
            session.commit()

            # Finalize Progress bar
            close_progress_bar(leave=False)

            # Summarize import
            result_summary(ret_dict, getattr(group, 'label', None))

        except:
            # Finalize Progress bar
            close_progress_bar(leave=False)

            result_summary({}, None)

            IMPORT_LOGGER.debug('Rolling back')
            session.rollback()
            raise

    # Reset logging level
    if silent:
        logging.disable(level=logging.NOTSET)

    return ret_dict
示例#26
0
    def test_node_access_with_sessions(self):
        """This checks that changes to a node from a different session (e.g. different interpreter,
        or the daemon) are immediately reflected on the AiiDA node when read directly e.g. a change
        to node.description will immediately be seen.

        Tests for bug #1372"""
        from aiida.common import timezone
        import aiida.backends.sqlalchemy as sa

        session = sessionmaker(bind=sa.ENGINE)
        custom_session = session()

        user = self.backend.users.create(email='test@localhost').store()
        node = self.backend.nodes.create(node_type='', user=user).store()
        master_session = node.dbmodel.session
        self.assertIsNot(master_session, custom_session)

        # Manually load the DbNode in a different session
        dbnode_reloaded = custom_session.query(sa.models.node.DbNode).get(
            node.id)

        # Now, go through one by one changing the possible attributes (of the model)
        # and check that they're updated when the user reads them from the aiida node

        def check_attrs_match(name):
            node_attr = getattr(node, name)
            dbnode_attr = getattr(dbnode_reloaded, name)
            self.assertEqual(
                node_attr, dbnode_attr,
                "Values of '{}' don't match ({} != {})".format(
                    name, node_attr, dbnode_attr))

        def do_value_checks(attr_name, original, changed):
            try:
                setattr(node, attr_name, original)
            except AttributeError:
                # This may mean that it is immutable, but we should still be able to
                # change it below directly through the dbnode
                pass
            # Refresh the custom session and make sure they match
            custom_session.refresh(dbnode_reloaded, attribute_names=[str_attr])
            check_attrs_match(attr_name)

            # Change the value in the custom session via the DbNode
            setattr(dbnode_reloaded, attr_name, changed)
            custom_session.commit()

            # Check that the Node 'sees' the change
            check_attrs_match(str_attr)

        for str_attr in ['label', 'description']:
            do_value_checks(str_attr, 'original', 'changed')

        for str_attr in ['ctime', 'mtime']:
            do_value_checks(str_attr, timezone.now(), timezone.now())

        # Attributes
        self.assertDictEqual(node.attributes, dbnode_reloaded.attributes)
        dbnode_reloaded.attributes['test_attrs'] = 'Boo!'
        custom_session.commit()
        self.assertDictEqual(node.attributes, dbnode_reloaded.attributes)

        # Extras
        self.assertDictEqual(node.extras, dbnode_reloaded.extras)
        dbnode_reloaded.extras['test_extras'] = 'Boo!'
        custom_session.commit()
        self.assertDictEqual(node.attributes, dbnode_reloaded.attributes)
示例#27
0
    def setUpClass(cls, *args, **kwargs):  # pylint: disable=too-many-locals, too-many-statements
        """
        Basides the standard setup we need to add few more objects in the
        database to be able to explore different requests/filters/orderings etc.
        """
        # call parent setUpClass method
        super(RESTApiTestCase, cls).setUpClass()

        # connect the app and the api
        # Init the api by connecting it the the app (N.B. respect the following
        # order, api.__init__)
        kwargs = dict(PREFIX=cls._url_prefix,
                      PERPAGE_DEFAULT=cls._PERPAGE_DEFAULT,
                      LIMIT_DEFAULT=cls._LIMIT_DEFAULT)

        cls.app = App(__name__)
        cls.app.config['TESTING'] = True
        AiidaApi(cls.app, **kwargs)

        # create test inputs
        cell = ((2., 0., 0.), (0., 2., 0.), (0., 0., 2.))
        structure = orm.StructureData(cell=cell)
        structure.append_atom(position=(0., 0., 0.), symbols=['Ba'])
        structure.store()
        structure.add_comment('This is test comment.')
        structure.add_comment('Add another comment.')

        cif = orm.CifData(ase=structure.get_ase())
        cif.store()

        parameter1 = orm.Dict(dict={'a': 1, 'b': 2})
        parameter1.store()

        parameter2 = orm.Dict(dict={'c': 3, 'd': 4})
        parameter2.store()

        kpoint = orm.KpointsData()
        kpoint.set_kpoints_mesh([4, 4, 4])
        kpoint.store()

        resources = {'num_machines': 1, 'num_mpiprocs_per_machine': 1}

        calcfunc = orm.CalcFunctionNode(computer=cls.computer)
        calcfunc.store()

        calc = orm.CalcJobNode(computer=cls.computer)
        calc.set_option('resources', resources)
        calc.set_attribute('attr1', 'OK')
        calc.set_attribute('attr2', 'OK')
        calc.set_extra('extra1', False)
        calc.set_extra('extra2', 'extra_info')

        calc.add_incoming(structure,
                          link_type=LinkType.INPUT_CALC,
                          link_label='link_structure')
        calc.add_incoming(parameter1,
                          link_type=LinkType.INPUT_CALC,
                          link_label='link_parameter')

        aiida_in = 'The input file\nof the CalcJob node'
        # Add the calcjob_inputs folder with the aiida.in file to the CalcJobNode repository
        with tempfile.NamedTemporaryFile(mode='w+') as handle:
            handle.write(aiida_in)
            handle.flush()
            handle.seek(0)
            calc.put_object_from_filelike(handle,
                                          key='calcjob_inputs/aiida.in',
                                          force=True)
        calc.store()

        # create log message for calcjob
        import logging
        from aiida.common.log import LOG_LEVEL_REPORT
        from aiida.common.timezone import now
        from aiida.orm import Log

        log_record = {
            'time': now(),
            'loggername': 'loggername',
            'levelname': logging.getLevelName(LOG_LEVEL_REPORT),
            'dbnode_id': calc.id,
            'message': 'This is a template record message',
            'metadata': {
                'content': 'test'
            },
        }
        Log(**log_record)

        aiida_out = 'The output file\nof the CalcJob node'
        retrieved_outputs = orm.FolderData()
        # Add the calcjob_outputs folder with the aiida.out file to the FolderData node
        with tempfile.NamedTemporaryFile(mode='w+') as handle:
            handle.write(aiida_out)
            handle.flush()
            handle.seek(0)
            retrieved_outputs.put_object_from_filelike(
                handle, key='calcjob_outputs/aiida.out', force=True)
        retrieved_outputs.store()
        retrieved_outputs.add_incoming(calc,
                                       link_type=LinkType.CREATE,
                                       link_label='retrieved')

        kpoint.add_incoming(calc,
                            link_type=LinkType.CREATE,
                            link_label='create')

        calc1 = orm.CalcJobNode(computer=cls.computer)
        calc1.set_option('resources', resources)
        calc1.store()

        dummy_computers = [{
            'name': 'test1',
            'hostname': 'test1.epfl.ch',
            'transport_type': 'ssh',
            'scheduler_type': 'pbspro',
        }, {
            'name': 'test2',
            'hostname': 'test2.epfl.ch',
            'transport_type': 'ssh',
            'scheduler_type': 'torque',
        }, {
            'name': 'test3',
            'hostname': 'test3.epfl.ch',
            'transport_type': 'local',
            'scheduler_type': 'slurm',
        }, {
            'name': 'test4',
            'hostname': 'test4.epfl.ch',
            'transport_type': 'ssh',
            'scheduler_type': 'slurm',
        }]

        for dummy_computer in dummy_computers:
            computer = orm.Computer(**dummy_computer)
            computer.store()

        # Prepare typical REST responses
        cls.process_dummy_data()
示例#28
0
def get_calcjob_remote_paths(pks=None,
                             past_days=None,
                             older_than=None,
                             computers=None,
                             user=None):
    """
    Return a mapping of computer uuids to a list of remote paths, for a given set of calcjobs. The set of
    calcjobs will be determined by a query with filters based on the pks, past_days, older_than,
    computers and user arguments.

    :param pks: onlu include calcjobs with a pk in this list
    :param past_days: only include calcjobs created since past_days
    :param older_than: only include calcjobs older than
    :param computers: only include calcjobs that were ran on these computers
    :param user: only include calcjobs of this user
    :return: mapping of computer uuid and list of remote paths, or None
    """
    from datetime import timedelta

    from aiida import orm
    from aiida.orm import CalcJobNode
    from aiida.common import timezone

    filters_calc = {}
    filters_computer = {}

    if user is None:
        user = orm.User.objects.get_default()

    if computers is not None:
        filters_computer['id'] = {
            'in': [computer.pk for computer in computers]
        }

    if past_days is not None:
        filters_calc['mtime'] = {
            '>': timezone.now() - timedelta(days=past_days)
        }

    if older_than is not None:
        filters_calc['mtime'] = {
            '<': timezone.now() - timedelta(days=older_than)
        }

    if pks:
        filters_calc['id'] = {'in': pks}

    query = orm.QueryBuilder()
    query.append(CalcJobNode,
                 tag='calc',
                 project=['attributes.remote_workdir'],
                 filters=filters_calc)
    query.append(orm.Computer,
                 with_node='calc',
                 tag='computer',
                 project=['*'],
                 filters=filters_computer)
    query.append(orm.User, with_node='calc', filters={'email': user.email})

    if query.count() == 0:
        return None

    path_mapping = {}

    for path, computer in query.all():
        if path is not None:
            path_mapping.setdefault(computer.uuid, []).append(path)

    return path_mapping
示例#29
0
def import_data_dj(in_path,
                   group=None,
                   ignore_unknown_nodes=False,
                   extras_mode_existing='kcl',
                   extras_mode_new='import',
                   comment_mode='newest',
                   silent=False):
    """Import exported AiiDA archive to the AiiDA database and repository.

    Specific for the Django backend.
    If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format
    (zip, tar.gz, tar.bz2, ...) and calls the correct function.

    :param in_path: the path to a file or folder that can be imported in AiiDA.
    :type in_path: str

    :param group: Group wherein all imported Nodes will be placed.
    :type group: :py:class:`~aiida.orm.groups.Group`

    :param extras_mode_existing: 3 letter code that will identify what to do with the extras import.
        The first letter acts on extras that are present in the original node and not present in the imported node.
        Can be either:
        'k' (keep it) or
        'n' (do not keep it).
        The second letter acts on the imported extras that are not present in the original node.
        Can be either:
        'c' (create it) or
        'n' (do not create it).
        The third letter defines what to do in case of a name collision.
        Can be either:
        'l' (leave the old value),
        'u' (update with a new value),
        'd' (delete the extra), or
        'a' (ask what to do if the content is different).
    :type extras_mode_existing: str

    :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them.
    :type extras_mode_new: str

    :param comment_mode: Comment import modes (when same UUIDs are found).
        Can be either:
        'newest' (will keep the Comment with the most recent modification time (mtime)) or
        'overwrite' (will overwrite existing Comments with the ones from the import file).
    :type comment_mode: str

    :param silent: suppress prints.
    :type silent: bool

    :return: New and existing Nodes and Links.
    :rtype: dict

    :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of
        `metadata.json` or `data.json` can not be validated.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is
        corrupted.
    :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
        export version is not equal to the export version of AiiDA at the moment of import.
    :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when
        importing.
    :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be
        created.
    """
    from django.db import transaction  # pylint: disable=import-error,no-name-in-module
    from aiida.backends.djsite.db import models

    # This is the export version expected by this function
    expected_export_version = StrictVersion(EXPORT_VERSION)

    # The returned dictionary with new and existing nodes and links
    ret_dict = {}

    # Initial check(s)
    if group:
        if not isinstance(group, Group):
            raise exceptions.ImportValidationError(
                'group must be a Group entity')
        elif not group.is_stored:
            group.store()

    ################
    # EXTRACT DATA #
    ################
    # The sandbox has to remain open until the end
    with SandboxFolder() as folder:
        if os.path.isdir(in_path):
            extract_tree(in_path, folder)
        else:
            if tarfile.is_tarfile(in_path):
                extract_tar(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
            elif zipfile.is_zipfile(in_path):
                try:
                    extract_zip(in_path,
                                folder,
                                silent=silent,
                                nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
                except ValueError as exc:
                    print(
                        'The following problem occured while processing the provided file: {}'
                        .format(exc))
                    return
            else:
                raise exceptions.ImportValidationError(
                    'Unable to detect the input file format, it is neither a '
                    '(possibly compressed) tar file, nor a zip file.')

        if not folder.get_content_list():
            raise exceptions.CorruptArchive(
                'The provided file/folder ({}) is empty'.format(in_path))
        try:
            with open(folder.get_abs_path('metadata.json'),
                      'r',
                      encoding='utf8') as fhandle:
                metadata = json.load(fhandle)

            with open(folder.get_abs_path('data.json'), 'r',
                      encoding='utf8') as fhandle:
                data = json.load(fhandle)
        except IOError as error:
            raise exceptions.CorruptArchive(
                'Unable to find the file {} in the import file or folder'.
                format(error.filename))

        ######################
        # PRELIMINARY CHECKS #
        ######################
        export_version = StrictVersion(str(metadata['export_version']))
        if export_version != expected_export_version:
            msg = 'Export file version is {}, can import only version {}'\
                    .format(metadata['export_version'], expected_export_version)
            if export_version < expected_export_version:
                msg += "\nUse 'verdi export migrate' to update this export file."
            else:
                msg += '\nUpdate your AiiDA version in order to import this file.'

            raise exceptions.IncompatibleArchiveVersionError(msg)

        ##########################################################################
        # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS #
        ##########################################################################
        linked_nodes = set(
            chain.from_iterable(
                (l['input'], l['output']) for l in data['links_uuid']))
        group_nodes = set(chain.from_iterable(data['groups_uuid'].values()))

        if NODE_ENTITY_NAME in data['export_data']:
            import_nodes_uuid = set(
                v['uuid']
                for v in data['export_data'][NODE_ENTITY_NAME].values())
        else:
            import_nodes_uuid = set()

        # the combined set of linked_nodes and group_nodes was obtained from looking at all the links
        # the set of import_nodes_uuid was received from the stuff actually referred to in export_data
        unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid

        if unknown_nodes and not ignore_unknown_nodes:
            raise exceptions.DanglingLinkError(
                'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first '
                'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n'
                ''.format(len(unknown_nodes)) +
                '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes))

        ###################################
        # DOUBLE-CHECK MODEL DEPENDENCIES #
        ###################################
        # The entity import order. It is defined by the database model relationships.

        model_order = (USER_ENTITY_NAME, COMPUTER_ENTITY_NAME,
                       NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME,
                       COMMENT_ENTITY_NAME)

        for import_field_name in metadata['all_fields_info']:
            if import_field_name not in model_order:
                raise exceptions.ImportValidationError(
                    "You are trying to import an unknown model '{}'!".format(
                        import_field_name))

        for idx, model_name in enumerate(model_order):
            dependencies = []
            for field in metadata['all_fields_info'][model_name].values():
                try:
                    dependencies.append(field['requires'])
                except KeyError:
                    # (No ForeignKey)
                    pass
            for dependency in dependencies:
                if dependency not in model_order[:idx]:
                    raise exceptions.ArchiveImportError(
                        'Model {} requires {} but would be loaded first; stopping...'
                        .format(model_name, dependency))

        ###################################################
        # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS #
        ###################################################
        import_unique_ids_mappings = {}
        for model_name, import_data in data['export_data'].items():
            if model_name in metadata['unique_identifiers']:
                # I have to reconvert the pk to integer
                import_unique_ids_mappings[model_name] = {
                    int(k): v[metadata['unique_identifiers'][model_name]]
                    for k, v in import_data.items()
                }

        ###############
        # IMPORT DATA #
        ###############
        # DO ALL WITH A TRANSACTION

        # batch size for bulk create operations
        batch_size = get_config_option('db.batch_size')

        with transaction.atomic():
            foreign_ids_reverse_mappings = {}
            new_entries = {}
            existing_entries = {}

            # I first generate the list of data
            for model_name in model_order:
                cls_signature = entity_names_to_signatures[model_name]
                model = get_object_from_string(cls_signature)
                fields_info = metadata['all_fields_info'].get(model_name, {})
                unique_identifier = metadata['unique_identifiers'].get(
                    model_name, None)

                new_entries[model_name] = {}
                existing_entries[model_name] = {}

                foreign_ids_reverse_mappings[model_name] = {}

                # Not necessarily all models are exported
                if model_name in data['export_data']:

                    # skip nodes that are already present in the DB
                    if unique_identifier is not None:
                        import_unique_ids = set(
                            v[unique_identifier]
                            for v in data['export_data'][model_name].values())

                        relevant_db_entries_result = model.objects.filter(**{
                            '{}__in'.format(unique_identifier):
                            import_unique_ids
                        })
                        # Note: uuids need to be converted to strings
                        relevant_db_entries = {
                            str(getattr(n, unique_identifier)): n
                            for n in relevant_db_entries_result
                        }

                        foreign_ids_reverse_mappings[model_name] = {
                            k: v.pk
                            for k, v in relevant_db_entries.items()
                        }
                        for key, value in data['export_data'][
                                model_name].items():
                            if value[
                                    unique_identifier] in relevant_db_entries.keys(
                                    ):
                                # Already in DB
                                existing_entries[model_name][key] = value
                            else:
                                # To be added
                                new_entries[model_name][key] = value
                    else:
                        new_entries[model_name] = data['export_data'][
                            model_name].copy()

            # Show Comment mode if not silent
            if not silent:
                print('Comment mode: {}'.format(comment_mode))

            # I import data from the given model
            for model_name in model_order:
                cls_signature = entity_names_to_signatures[model_name]
                model = get_object_from_string(cls_signature)
                fields_info = metadata['all_fields_info'].get(model_name, {})
                unique_identifier = metadata['unique_identifiers'].get(
                    model_name, None)

                # EXISTING ENTRIES
                for import_entry_pk, entry_data in existing_entries[
                        model_name].items():
                    unique_id = entry_data[unique_identifier]
                    existing_entry_id = foreign_ids_reverse_mappings[
                        model_name][unique_id]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())
                    # TODO COMPARE, AND COMPARE ATTRIBUTES

                    if model is models.DbComment:
                        new_entry_uuid = merge_comment(import_data,
                                                       comment_mode)
                        if new_entry_uuid is not None:
                            entry_data[unique_identifier] = new_entry_uuid
                            new_entries[model_name][
                                import_entry_pk] = entry_data

                    if model_name not in ret_dict:
                        ret_dict[model_name] = {'new': [], 'existing': []}
                    ret_dict[model_name]['existing'].append(
                        (import_entry_pk, existing_entry_id))
                    if not silent:
                        print('existing %s: %s (%s->%s)' %
                              (model_name, unique_id, import_entry_pk,
                               existing_entry_id))
                        # print("  `-> WARNING: NO DUPLICITY CHECK DONE!")
                        # CHECK ALSO FILES!

                # Store all objects for this model in a list, and store them all in once at the end.
                objects_to_create = []
                # This is needed later to associate the import entry with the new pk
                import_new_entry_pks = {}
                imported_comp_names = set()

                # NEW ENTRIES
                for import_entry_pk, entry_data in new_entries[
                        model_name].items():
                    unique_id = entry_data[unique_identifier]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())

                    if model is models.DbGroup:
                        # Check if there is already a group with the same name
                        dupl_counter = 0
                        orig_label = import_data['label']
                        while model.objects.filter(label=import_data['label']):
                            import_data[
                                'label'] = orig_label + DUPL_SUFFIX.format(
                                    dupl_counter)
                            dupl_counter += 1
                            if dupl_counter == 100:
                                raise exceptions.ImportUniquenessError(
                                    'A group of that label ( {} ) already exists and I could not create a new one'
                                    ''.format(orig_label))

                    elif model is models.DbComputer:
                        # Check if there is already a computer with the same name in the database
                        dupl = (model.objects.filter(name=import_data['name'])
                                or import_data['name'] in imported_comp_names)
                        orig_name = import_data['name']
                        dupl_counter = 0
                        while dupl:
                            # Rename the new computer
                            import_data['name'] = (
                                orig_name + DUPL_SUFFIX.format(dupl_counter))
                            dupl = (
                                model.objects.filter(name=import_data['name'])
                                or import_data['name'] in imported_comp_names)
                            dupl_counter += 1
                            if dupl_counter == 100:
                                raise exceptions.ImportUniquenessError(
                                    'A computer of that name ( {} ) already exists and I could not create a new one'
                                    ''.format(orig_name))

                        imported_comp_names.add(import_data['name'])

                    objects_to_create.append(model(**import_data))
                    import_new_entry_pks[unique_id] = import_entry_pk

                if model_name == NODE_ENTITY_NAME:
                    if not silent:
                        print('STORING NEW NODE REPOSITORY FILES...')

                    # NEW NODES
                    for object_ in objects_to_create:
                        import_entry_uuid = object_.uuid
                        import_entry_pk = import_new_entry_pks[
                            import_entry_uuid]

                        # Before storing entries in the DB, I store the files (if these are nodes).
                        # Note: only for new entries!
                        subfolder = folder.get_subfolder(
                            os.path.join(NODES_EXPORT_SUBFOLDER,
                                         export_shard_uuid(import_entry_uuid)))
                        if not subfolder.exists():
                            raise exceptions.CorruptArchive(
                                'Unable to find the repository folder for Node with UUID={} in the exported '
                                'file'.format(import_entry_uuid))
                        destdir = RepositoryFolder(
                            section=Repository._section_name,
                            uuid=import_entry_uuid)
                        # Replace the folder, possibly destroying existing previous folders, and move the files
                        # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
                        destdir.replace_with_folder(subfolder.abspath,
                                                    move=True,
                                                    overwrite=True)

                        # For DbNodes, we also have to store its attributes
                        if not silent:
                            print('STORING NEW NODE ATTRIBUTES...')

                        # Get attributes from import file
                        try:
                            object_.attributes = data['node_attributes'][str(
                                import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find attribute info for Node with UUID={}'
                                .format(import_entry_uuid))

                        # For DbNodes, we also have to store its extras
                        if extras_mode_new == 'import':
                            if not silent:
                                print('STORING NEW NODE EXTRAS...')

                            # Get extras from import file
                            try:
                                extras = data['node_extras'][str(
                                    import_entry_pk)]
                            except KeyError:
                                raise exceptions.CorruptArchive(
                                    'Unable to find extra info for Node with UUID={}'
                                    .format(import_entry_uuid))
                            # TODO: remove when aiida extras will be moved somewhere else
                            # from here
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key.startswith('_aiida_')
                            }
                            if object_.node_type.endswith('code.Code.'):
                                extras = {
                                    key: value
                                    for key, value in extras.items()
                                    if not key == 'hidden'
                                }
                            # till here
                            object_.extras = extras
                        elif extras_mode_new == 'none':
                            if not silent:
                                print('SKIPPING NEW NODE EXTRAS...')
                        else:
                            raise exceptions.ImportValidationError(
                                "Unknown extras_mode_new value: {}, should be either 'import' or 'none'"
                                ''.format(extras_mode_new))

                    # EXISTING NODES (Extras)
                    # For the existing nodes that are also in the imported list we also update their extras if necessary
                    if not silent:
                        print(
                            'UPDATING EXISTING NODE EXTRAS (mode: {})'.format(
                                extras_mode_existing))

                    import_existing_entry_pks = {
                        entry_data[unique_identifier]: import_entry_pk
                        for import_entry_pk, entry_data in
                        existing_entries[model_name].items()
                    }
                    for node in models.DbNode.objects.filter(
                            uuid__in=import_existing_entry_pks).all():  # pylint: disable=no-member
                        import_entry_uuid = str(node.uuid)
                        import_entry_pk = import_existing_entry_pks[
                            import_entry_uuid]

                        # Get extras from import file
                        try:
                            extras = data['node_extras'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find extra info for ode with UUID={}'
                                .format(import_entry_uuid))

                        # TODO: remove when aiida extras will be moved somewhere else
                        # from here
                        extras = {
                            key: value
                            for key, value in extras.items()
                            if not key.startswith('_aiida_')
                        }
                        if node.node_type.endswith('code.Code.'):
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key == 'hidden'
                            }
                        # till here
                        node.extras = merge_extras(node.extras, extras,
                                                   extras_mode_existing)

                        # Already saving existing node here to update its extras
                        node.save()

                # If there is an mtime in the field, disable the automatic update
                # to keep the mtime that we have set here
                if 'mtime' in [
                        field.name for field in model._meta.local_fields
                ]:
                    with models.suppress_auto_now([(model, ['mtime'])]):
                        # Store them all in once; however, the PK are not set in this way...
                        model.objects.bulk_create(objects_to_create,
                                                  batch_size=batch_size)
                else:
                    model.objects.bulk_create(objects_to_create,
                                              batch_size=batch_size)

                # Get back the just-saved entries
                just_saved_queryset = model.objects.filter(
                    **{
                        '{}__in'.format(unique_identifier):
                        import_new_entry_pks.keys()
                    }).values_list(unique_identifier, 'pk')
                # note: convert uuids from type UUID to strings
                just_saved = {
                    str(key): value
                    for key, value in just_saved_queryset
                }

                # Now I have the PKs, print the info
                # Moreover, add newly created Nodes to foreign_ids_reverse_mappings
                for unique_id, new_pk in just_saved.items():
                    import_entry_pk = import_new_entry_pks[unique_id]
                    foreign_ids_reverse_mappings[model_name][
                        unique_id] = new_pk
                    if model_name not in ret_dict:
                        ret_dict[model_name] = {'new': [], 'existing': []}
                    ret_dict[model_name]['new'].append(
                        (import_entry_pk, new_pk))

                    if not silent:
                        print('NEW %s: %s (%s->%s)' %
                              (model_name, unique_id, import_entry_pk, new_pk))

            if not silent:
                print('STORING NODE LINKS...')
            import_links = data['links_uuid']
            links_to_store = []

            # Needed, since QueryBuilder does not yet work for recently saved Nodes
            existing_links_raw = models.DbLink.objects.all().values_list(
                'input', 'output', 'label', 'type')
            existing_links = {(l[0], l[1], l[2], l[3])
                              for l in existing_links_raw}
            existing_outgoing_unique = {(l[0], l[3])
                                        for l in existing_links_raw}
            existing_outgoing_unique_pair = {(l[0], l[2], l[3])
                                             for l in existing_links_raw}
            existing_incoming_unique = {(l[1], l[3])
                                        for l in existing_links_raw}
            existing_incoming_unique_pair = {(l[1], l[2], l[3])
                                             for l in existing_links_raw}

            calculation_node_types = 'process.calculation.'
            workflow_node_types = 'process.workflow.'
            data_node_types = 'data.'

            link_mapping = {
                LinkType.CALL_CALC:
                (workflow_node_types, calculation_node_types, 'unique_triple',
                 'unique'),
                LinkType.CALL_WORK: (workflow_node_types, workflow_node_types,
                                     'unique_triple', 'unique'),
                LinkType.CREATE: (calculation_node_types, data_node_types,
                                  'unique_pair', 'unique'),
                LinkType.INPUT_CALC: (data_node_types, calculation_node_types,
                                      'unique_triple', 'unique_pair'),
                LinkType.INPUT_WORK: (data_node_types, workflow_node_types,
                                      'unique_triple', 'unique_pair'),
                LinkType.RETURN: (workflow_node_types, data_node_types,
                                  'unique_pair', 'unique_triple'),
            }

            for link in import_links:
                # Check for dangling Links within the, supposed, self-consistent archive
                try:
                    in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['input']]
                    out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['output']]
                except KeyError:
                    if ignore_unknown_nodes:
                        continue
                    raise exceptions.ImportValidationError(
                        'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, '
                        'label={}, type={})'.format(link['input'],
                                                    link['output'],
                                                    link['label'],
                                                    link['type']))

                # Check if link already exists, skip if it does
                # This is equivalent to an existing triple link (i.e. unique_triple from below)
                if (in_id, out_id, link['label'],
                        link['type']) in existing_links:
                    continue

                # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them.
                try:
                    validate_link_label(link['label'])
                except ValueError as why:
                    raise exceptions.ImportValidationError(
                        'Error during Link label validation: {}'.format(why))

                source = models.DbNode.objects.get(id=in_id)
                target = models.DbNode.objects.get(id=out_id)

                if source.uuid == target.uuid:
                    raise exceptions.ImportValidationError(
                        'Cannot add a link to oneself')

                link_type = LinkType(link['type'])
                type_source, type_target, outdegree, indegree = link_mapping[
                    link_type]

                # Check if source Node is a valid type
                if not source.node_type.startswith(type_source):
                    raise exceptions.ImportValidationError(
                        'Cannot add a {} link from {} to {}'.format(
                            link_type, source.node_type, target.node_type))

                # Check if target Node is a valid type
                if not target.node_type.startswith(type_target):
                    raise exceptions.ImportValidationError(
                        'Cannot add a {} link from {} to {}'.format(
                            link_type, source.node_type, target.node_type))

                # If the outdegree is `unique` there cannot already be any other outgoing link of that type,
                # i.e., the source Node may not have a LinkType of current LinkType, going out, existing already.
                if outdegree == 'unique' and (
                        in_id, link['type']) in existing_outgoing_unique:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an outgoing {} link'.format(
                            source.uuid, link_type))

                # If the outdegree is `unique_pair`,
                # then the link labels for outgoing links of this type should be unique,
                # i.e., the source Node may not have a LinkType of current LinkType, going out,
                # that also has the current Link label, existing already.
                elif outdegree == 'unique_pair' and \
                (in_id, link['label'], link['type']) in existing_outgoing_unique_pair:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an outgoing {} link with label "{}"'
                        .format(source.uuid, link_type, link['label']))

                # If the indegree is `unique` there cannot already be any other incoming links of that type,
                # i.e., the target Node may not have a LinkType of current LinkType, coming in, existing already.
                if indegree == 'unique' and (
                        out_id, link['type']) in existing_incoming_unique:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an incoming {} link'.format(
                            target.uuid, link_type))

                # If the indegree is `unique_pair`,
                # then the link labels for incoming links of this type should be unique,
                # i.e., the target Node may not have a LinkType of current LinkType, coming in
                # that also has the current Link label, existing already.
                elif indegree == 'unique_pair' and \
                (out_id, link['label'], link['type']) in existing_incoming_unique_pair:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an incoming {} link with label "{}"'
                        .format(target.uuid, link_type, link['label']))

                # New link
                links_to_store.append(
                    models.DbLink(input_id=in_id,
                                  output_id=out_id,
                                  label=link['label'],
                                  type=link['type']))
                if 'Link' not in ret_dict:
                    ret_dict['Link'] = {'new': []}
                ret_dict['Link']['new'].append((in_id, out_id))

                # Add new Link to sets of existing Links 'input PK', 'output PK', 'label', 'type'
                existing_links.add(
                    (in_id, out_id, link['label'], link['type']))
                existing_outgoing_unique.add((in_id, link['type']))
                existing_outgoing_unique_pair.add(
                    (in_id, link['label'], link['type']))
                existing_incoming_unique.add((out_id, link['type']))
                existing_incoming_unique_pair.add(
                    (out_id, link['label'], link['type']))

            # Store new links
            if links_to_store:
                if not silent:
                    print('   ({} new links...)'.format(len(links_to_store)))

                models.DbLink.objects.bulk_create(links_to_store,
                                                  batch_size=batch_size)
            else:
                if not silent:
                    print('   (0 new links...)')

            if not silent:
                print('STORING GROUP ELEMENTS...')
            import_groups = data['groups_uuid']
            for groupuuid, groupnodes in import_groups.items():
                # TODO: cache these to avoid too many queries
                group_ = models.DbGroup.objects.get(uuid=groupuuid)
                nodes_to_store = [
                    foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid]
                    for node_uuid in groupnodes
                ]
                if nodes_to_store:
                    group_.dbnodes.add(*nodes_to_store)

        ######################################################
        # Put everything in a specific group
        ######################################################
        existing = existing_entries.get(NODE_ENTITY_NAME, {})
        existing_pk = [
            foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
            for v in existing.values()
        ]
        new = new_entries.get(NODE_ENTITY_NAME, {})
        new_pk = [
            foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
            for v in new.values()
        ]

        pks_for_group = existing_pk + new_pk

        # So that we do not create empty groups
        if pks_for_group:
            # If user specified a group, import all things into it
            if not group:
                # Get an unique name for the import group, based on the current (local) time
                basename = timezone.localtime(
                    timezone.now()).strftime('%Y%m%d-%H%M%S')
                counter = 0
                group_label = basename

                while Group.objects.find(filters={'label': group_label}):
                    counter += 1
                    group_label = '{}_{}'.format(basename, counter)

                    if counter == 100:
                        raise exceptions.ImportUniquenessError(
                            "Overflow of import groups (more than 100 import groups exists with basename '{}')"
                            ''.format(basename))
                group = ImportGroup(label=group_label).store()

            # Add all the nodes to the new group
            # TODO: decide if we want to return the group label
            nodes = [
                entry[0]
                for entry in QueryBuilder().append(Node,
                                                   filters={
                                                       'id': {
                                                           'in': pks_for_group
                                                       }
                                                   }).all()
            ]
            group.add_nodes(nodes)

            if not silent:
                print(
                    "IMPORTED NODES ARE GROUPED IN THE IMPORT GROUP LABELED '{}'"
                    .format(group.label))
        else:
            if not silent:
                print(
                    'NO NODES TO IMPORT, SO NO GROUP CREATED, IF IT DID NOT ALREADY EXIST'
                )

    if not silent:
        print('DONE.')

    return ret_dict
示例#30
0
def group_list(all_users, user, all_entries, group_type, type_string,
               with_description, count, past_days, startswith, endswith,
               contains, order_by, order_dir, node):
    """Show a list of existing groups."""
    # pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
    import datetime
    from aiida import orm
    from aiida.common import timezone
    from aiida.common.escaping import escape_for_sql_like
    from tabulate import tabulate

    builder = orm.QueryBuilder()
    filters = {}

    if group_type is not None:
        warnings.warn(
            '`--group-type` is deprecated, use `--type-string` instead',
            AiidaDeprecationWarning)  # pylint: disable=no-member

        if type_string is not None:
            raise click.BadOptionUsage(
                'group-type',
                'cannot use `--group-type` and `--type-string` at the same time.'
            )
        else:
            type_string = group_type

    # Have to specify the default for `type_string` here instead of directly in the option otherwise it will always
    # raise above if the user specifies just the `--group-type` option. Once that option is removed, the default can
    # be moved to the option itself.
    if type_string is None:
        type_string = 'core'

    if not all_entries:
        if '%' in type_string or '_' in type_string:
            filters['type_string'] = {'like': type_string}
        else:
            filters['type_string'] = type_string

    # Creation time
    if past_days:
        filters['time'] = {
            '>': timezone.now() - datetime.timedelta(days=past_days)
        }

    # Query for specific group names
    filters['or'] = []
    if startswith:
        filters['or'].append(
            {'label': {
                'like': f'{escape_for_sql_like(startswith)}%'
            }})
    if endswith:
        filters['or'].append(
            {'label': {
                'like': f'%{escape_for_sql_like(endswith)}'
            }})
    if contains:
        filters['or'].append(
            {'label': {
                'like': f'%{escape_for_sql_like(contains)}%'
            }})

    builder.append(orm.Group, filters=filters, tag='group', project='*')

    # Query groups that belong to specific user
    if user:
        user_email = user.email
    else:
        # By default: only groups of this user
        user_email = orm.User.objects.get_default().email

    # Query groups that belong to all users
    if not all_users:
        builder.append(orm.User,
                       filters={'email': {
                           '==': user_email
                       }},
                       with_group='group')

    # Query groups that contain a particular node
    if node:
        builder.append(orm.Node,
                       filters={'id': {
                           '==': node.id
                       }},
                       with_group='group')

    builder.order_by({orm.Group: {order_by: order_dir}})
    result = builder.all()

    projection_lambdas = {
        'pk': lambda group: str(group.pk),
        'label': lambda group: group.label,
        'type_string': lambda group: group.type_string,
        'count': lambda group: group.count(),
        'user': lambda group: group.user.email.strip(),
        'description': lambda group: group.description
    }

    table = []
    projection_header = ['PK', 'Label', 'Type string', 'User']
    projection_fields = ['pk', 'label', 'type_string', 'user']

    if with_description:
        projection_header.append('Description')
        projection_fields.append('description')

    if count:
        projection_header.append('Node count')
        projection_fields.append('count')

    for group in result:
        table.append([
            projection_lambdas[field](group[0]) for field in projection_fields
        ])

    if not all_entries:
        echo.echo_info(
            'to show groups of all types, use the `-a/--all` option.')

    if not table:
        echo.echo_info('no groups found matching the specified criteria.')
    else:
        echo.echo(tabulate(table, headers=projection_header))