Пример #1
0
def test_tag_dtype(sample_bidsfile, subject_entity):
    f, e = sample_bidsfile, subject_entity
    # Various ways of initializing--should all give same result
    tags = [
        Tag(f, e, 4, int),
        Tag(f, e, '4', 'int'),
        Tag(f, e, '4', int),
        Tag(f, e, 4),
        Tag(file=f, entity=e, dtype=int, value='4')
    ]
    assert all([t.dtype == int for t in tags])
Пример #2
0
def test_entity_add_file(sample_bidsfile):
    session = create_session()
    bf = sample_bidsfile
    e = Entity('prop', r'-(\d+)')
    t = Tag(file=bf, entity=e, value=4)
    session.add_all([t, e, bf])
    session.commit()
    assert e.files[bf.path] == 4
Пример #3
0
def writable_file(tmpdir):
    engine = create_engine('sqlite://')
    Base.metadata.create_all(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    testfile = 'sub-03_ses-2_task-rest_acq-fullbrain_run-2_bold.nii.gz'
    fn = tmpdir.mkdir("tmp").join(testfile)
    fn.write('###')
    bf = BIDSFile(os.path.join(str(fn)))

    tag_dict = {'task': 'rest', 'run': 2, 'subject': '3'}
    ents = {name: Entity(name) for name in tag_dict.keys()}
    tags = [Tag(bf, ents[k], value=v) for k, v in tag_dict.items()]

    session.add_all(list(ents.values()) + tags + [bf])
    session.commit()
    return bf
Пример #4
0
def test_tag_init(sample_bidsfile, subject_entity):
    f, e = sample_bidsfile, subject_entity
    tag = Tag(f, e, 'zzz')
    rep = str(tag)
    assert rep.startswith("<Tag file:") and f.path in rep and 'zzz' in rep
Пример #5
0
    def index_metadata(self, **filters):
        """Index metadata for all files in the BIDS dataset. """
        # Process JSON files first if we're indexing metadata
        all_files = self.layout.get(absolute_paths=True, **filters)

        # Track ALL entities we've seen in file names or metadatas
        all_entities = {}
        for c in self.config:
            all_entities.update(c.entities)

        # If key/value pairs in JSON files duplicate ones extracted from files,
        # we can end up with Tag collisions in the DB. To prevent this, we
        # store all filename/entity pairs and the value, and then check against
        # that before adding each new Tag.
        all_tags = {}
        for t in self.session.query(Tag).all():
            key = '{}_{}'.format(t.file_path, t.entity_name)
            all_tags[key] = str(t.value)

        # We build up a store of all file data as we iterate files. It looks
        # like: { extension/suffix: dirname: [(entities, payload)]}}.
        # The payload is left empty for non-JSON files.
        file_data = {}

        for bf in all_files:
            file_ents = bf.entities.copy()
            suffix = file_ents.pop('suffix', None)
            ext = file_ents.pop('extension', None)

            if suffix is not None and ext is not None:
                key = "{}/{}".format(ext, suffix)
                if key not in file_data:
                    file_data[key] = defaultdict(list)

                if ext == 'json':
                    with open(bf.path, 'r') as handle:
                        try:
                            payload = json.load(handle)
                        except json.JSONDecodeError as e:
                            msg = ("Error occurred while trying to decode JSON"
                                   " from file '{}'.".format(bf.path))
                            raise IOError(msg) from e
                else:
                    payload = None

                to_store = (file_ents, payload, bf.path)
                file_data[key][bf.dirname].append(to_store)

        # To avoid integrity errors, track primary keys we've seen
        seen_assocs = set()

        def create_association_pair(src, dst, kind, kind2=None):
            kind2 = kind2 or kind
            pk1 = '#'.join([src, dst, kind])
            if pk1 not in seen_assocs:
                self.session.add(FileAssociation(src=src, dst=dst, kind=kind))
                seen_assocs.add(pk1)
            pk2 = '#'.join([dst, src, kind2])
            if pk2 not in seen_assocs:
                self.session.add(FileAssociation(src=dst, dst=src, kind=kind2))
                seen_assocs.add(pk2)

        # TODO: Efficiency of everything in this loop could be improved
        filenames = [bf for bf in all_files if not bf.path.endswith('.json')]

        for bf in filenames:
            file_ents = bf.entities.copy()
            suffix = file_ents.pop('suffix', None)
            ext = file_ents.pop('extension', None)
            file_ent_keys = set(file_ents.keys())

            if suffix is None or ext is None:
                continue

            # Extract metadata associated with the file. The idea is
            # that we loop over parent directories, and if we find
            # payloads in the file_data store (indexing by directory
            # and current file suffix), we check to see if the
            # candidate JS file's entities are entirely consumed by
            # the current file. If so, it's a valid candidate, and we
            # add the payload to the stack. Finally, we invert the
            # stack and merge the payloads in order.
            ext_key = "{}/{}".format(ext, suffix)
            json_key = "json/{}".format(suffix)
            dirname = bf.dirname

            payloads = []
            ancestors = []

            while True:
                # Get JSON payloads
                json_data = file_data.get(json_key, {}).get(dirname, [])
                for js_ents, js_md, js_path in json_data:
                    js_keys = set(js_ents.keys())
                    if js_keys - file_ent_keys:
                        continue
                    matches = [
                        js_ents[name] == file_ents[name] for name in js_keys
                    ]
                    if all(matches):
                        payloads.append((js_md, js_path))

                # Get all files this file inherits from
                candidates = file_data.get(ext_key, {}).get(dirname, [])
                for ents, _, path in candidates:
                    keys = set(ents.keys())
                    if keys - file_ent_keys:
                        continue
                    matches = [ents[name] == file_ents[name] for name in keys]
                    if all(matches):
                        ancestors.append(path)

                parent = os.path.dirname(dirname)
                if parent == dirname:
                    break
                dirname = parent

            if not payloads:
                continue

            # Create DB records for metadata associations
            js_file = payloads[-1][1]
            create_association_pair(js_file, bf.path, 'Metadata')

            # Consolidate metadata by looping over inherited JSON files
            file_md = {}
            for pl, js_file in payloads[::-1]:
                file_md.update(pl)

            # Create FileAssociation records for JSON inheritance
            n_pl = len(payloads)
            for i, (pl, js_file) in enumerate(payloads):
                if (i + 1) < n_pl:
                    other = payloads[i + 1][1]
                    create_association_pair(js_file, other, 'Child', 'Parent')

            # Inheritance for current file
            n_pl = len(ancestors)
            for i, src in enumerate(ancestors):
                if (i + 1) < n_pl:
                    dst = ancestors[i + 1]
                    create_association_pair(src, dst, 'Child', 'Parent')

            # Files with IntendedFor field always get mapped to targets
            intended = listify(file_md.get('IntendedFor', []))
            for target in intended:
                # Per spec, IntendedFor paths are relative to sub dir.
                target = os.path.join(self.root,
                                      'sub-{}'.format(bf.entities['subject']),
                                      target)
                create_association_pair(bf.path, target, 'IntendedFor',
                                        'InformedBy')

            # Link files to BOLD runs
            if suffix in ['physio', 'stim', 'events', 'sbref']:
                images = self.layout.get(extension=['nii', 'nii.gz'],
                                         suffix='bold',
                                         return_type='filename',
                                         **file_ents)
                for img in images:
                    create_association_pair(bf.path, img, 'IntendedFor',
                                            'InformedBy')

            # Link files to DWI runs
            if suffix == 'sbref' or ext in ['bvec', 'bval']:
                images = self.layout.get(extension=['nii', 'nii.gz'],
                                         suffix='dwi',
                                         return_type='filename',
                                         **file_ents)
                for img in images:
                    create_association_pair(bf.path, img, 'IntendedFor',
                                            'InformedBy')

            # Create Tag <-> Entity mappings, and any newly discovered Entities
            for md_key, md_val in file_md.items():
                tag_string = '{}_{}'.format(bf.path, md_key)
                # Skip pairs that were already found in the filenames
                if tag_string in all_tags:
                    file_val = all_tags[tag_string]
                    if str(md_val) != file_val:
                        msg = (
                            "Conflicting values found for entity '{}' in "
                            "filename {} (value='{}') versus its JSON sidecar "
                            "(value='{}'). Please reconcile this discrepancy.")
                        raise ValueError(
                            msg.format(md_key, bf.path, file_val, md_val))
                    continue
                if md_key not in all_entities:
                    all_entities[md_key] = Entity(md_key, is_metadata=True)
                    self.session.add(all_entities[md_key])
                tag = Tag(bf, all_entities[md_key], md_val)
                self.session.add(tag)

            if len(self.session.new) >= 1000:
                self.session.commit()

        self.session.commit()