示例#1
0
    def setup(cls, blackfynn_local_instance):
        """ make sure we have all datasources
            calling this again will refresh helpers
        """
        for _cls in cls.mro():
            if _cls != cls:
                if hasattr(_cls, 'setup'):
                    _cls.setup()

        dat.DatasetStructure.rate = cls.rate

        # unanchored helpers
        if cls.no_google:
            log.critical('no google no organ data')

            class asdf:
                modality = lambda v: None
                organ_term = lambda v: None
                award_manual = lambda v: None
                byCol = _byCol([['award', 'award_manual', 'organ_term'], []])

            cls.organs_sheet = asdf
        else:
            cls.organs_sheet = sheets.Organs()  # ipv6 resolution issues :/

        cls.organ = OrganData()
        cls.member = State.member
示例#2
0
    def normalize(cls, value):
        _ovalue = value
        value = super().normalize(value, preserve_case=True)
        if 'OT2' in value and 'OD' not in value:
            # one is missing the OD >_<
            log.warning(value)
            value = value.replace('-', '-OD')  # hack

        n = (
            value.strip().replace('-', '-')  # can you spot the difference?
            .replace('(', '').replace(')', '').replace('-01S1', '').replace(
                '-01', '').replace('-02S2', '').replace('-02', '').replace(
                    'SPARC',
                    '').replace('NIH-1', '').replace('NIH-', '').replace(
                        '-', '').replace('NIH ', '').replace(' ', ''))
        if n[0] in ('1', '3', '5'):
            n = n[1:]

        if n.endswith('S2'):
            n = n[:-2]

        if n.endswith('D23864'):  # FIXME another trailing zero
            log.critical(_ovalue)
            n = n.replace('D23864', 'D023864')

        if n != _ovalue:
            log.debug(f'\n{_ovalue}\n{n}')
        return n
示例#3
0
        def warn(triple):
            for element in triple:
                if (not (isinstance(element, rdflib.URIRef) or
                         isinstance(element, rdflib.BNode) or
                         isinstance(element, rdflib.Literal)) or
                    (hasattr(element, '_value') and isinstance(element._value, dict))):
                    log.critical(element)

            return triple
示例#4
0
    def setup(cls, *, local_only=False):
        # FIXME this is a mess
        """ make sure we have all datasources
            calling this again will refresh helpers
        """
        if hasattr(Integrator, '__setup') and Integrator.__setup:
            return  # already setup

        Integrator.__setup = True

        for _cls in cls.mro():
            if _cls != cls:
                if hasattr(_cls, 'setup'):
                    _cls.setup()

        dat.DatasetStructure.rate = cls.rate

        # unanchored helpers
        if cls.no_google or local_only:
            log.critical('no google no organ data')

            class FakeOrganSheet:
                modality = lambda v: None
                organ_term = lambda v: None
                award_manual = lambda v: None
                byCol = _byCol([['award', 'award_manual', 'organ_term'], []])
                techniques = lambda v: []
                protocol_uris = lambda v: []

            class FakeAffilSheet:
                def __call__(self, *args, **kwargs):
                    return

            cls.organs_sheet = FakeOrganSheet
            cls.affiliations = FakeAffilSheet()
        else:
            cls.organs_sheet = sheets.Organs()  # ipv6 resolution issues :/
            cls.affiliations = sheets.Affiliations()

        if cls.no_google:
            cls.organ = lambda award: None

        if local_only:
            cls.organ = lambda award: None
            cls.member = lambda first, last: None
        else:
            cls.organ = OrganData()
            if hasattr(State, 'member'):
                cls.member = State.member
            else:
                log.error('State missing member, using State seems '
                          'like a good idea until you go to multiprocessing')
                cls.member = lambda first, last: None
示例#5
0
        def lookup(d, one, two):
            if one in d:
                ind = d[one]
                if two in ind:
                    member_list = ind[two]
                    if member_list:
                        member = member_list[0]
                        if len(member_list) > 1:
                            log.critical(f'WE NEED ORCIDS! {one} {two} -> {member_list}')
                            # organization maybe?
                            # or better, check by dataset?

                        return member
示例#6
0
    def counts(self):
        if not hasattr(self, '_counts'):
            size = 0
            dirs = 0
            files = 0
            need_meta = []
            if not self.is_dir():
                gen = self,

            else:
                gen = self.rchildren

            for c in gen:
                if c.is_dir():
                    dirs += 1
                else:
                    files += 1  # testing for broken symlinks is hard
                    try:
                        maybe_size = c.cache.meta.size
                    except AttributeError as e:
                        log.error(f'no cache or no meta for {c}\n{e}')
                        continue

                    if maybe_size is None:
                        need_meta.append(c)
                    else:
                        size += maybe_size

            if need_meta and self._refresh_on_missing:
                nl = '\n'
                log.info(
                    f'refreshing {len(need_meta)} files with missing metadata in {self}'
                    f'\n{nl.join(_.as_posix() for _ in need_meta)}')
                new_caches = Async(rate=self.rate)(deferred(c.cache.refresh)()
                                                   for c in need_meta)
                for c in new_caches:  # FIXME first time around meta doesn't get updated ??
                    if c is None:
                        continue  # file was deleted (logged previously)

                    if c.meta is None:
                        log.critical(f'missing metdata! {c}')
                        continue

                    size += c.meta.size

            self._counts = dict(size=FileSize(size), dirs=dirs, files=files)

        return self._counts
示例#7
0
    def _protocol_uris_resolved(self):
        # FIXME quite slow ...
        for start_uri in self.protocol_uris:
            log.debug(start_uri)
            try:
                if not hasattr(start_uri, 'dereference'):
                    start_uri = idlib.StreamUri(start_uri)

                end_uri = start_uri.dereference()
                yield end_uri
                sc = end_uri.progenitor.status_code
                if sc > 400:
                    msg = f'error accessing {end_uri} {sc}'
                    self.addError(msg, blame='submission', logfunc=logd.error)
            except idlib.exceptions.ResolutionError as e:
                pass  # FIXME I think we already log this error?
            except requests.exceptions.MissingSchema as e:
                self.addError(e, blame='submission', logfunc=logd.error)
            except OntId.BadCurieError as e:
                self.addError(e, blame='submission', logfunc=logd.error)
            except BaseException as e:
                #breakpoint()
                log.exception(e)
                log.critical('see exception above')
示例#8
0
    def setup(cls, *, local_only=False):
        # FIXME this is a mess
        """ make sure we have all datasources
            calling this again will refresh helpers
        """
        if hasattr(Integrator, '__setup') and Integrator.__setup:
            return  # already setup

        Integrator.__setup = True

        for _cls in cls.mro():
            if _cls != cls:
                if hasattr(_cls, 'setup'):
                    _cls.setup()

        dat.DatasetStructure.rate = cls.rate

        class FakeOrganSheet:
            modality = lambda v: None
            organ_term = lambda v: None
            award_manual = lambda v: None
            byCol = _byCol([['award', 'award_manual', 'organ_term'], []])
            techniques = lambda v: []
            protocol_uris = lambda v: []

        class FakeAffilSheet:
            def __call__(self, *args, **kwargs):
                return

        class FakeOverviewSheet:
            def __call__(self, *args, **kwargs):
                return

        # unanchored helpers
        if cls.no_google or local_only:
            log.critical('no google no organ data')
            cls.organs_sheet = FakeOrganSheet
            cls.affiliations = FakeAffilSheet()
            cls.overview_sheet = FakeOverviewSheet()
        else:
            # ipv6 resolution issues :/ also issues with pickling
            #cls.organs_sheet = sheets.Organs(fetch_grid=True)  # this kills parallelism
            cls.organs_sheet = sheets.Organs(
            )  # if fetch_grid = False @ class level ok
            cls.affiliations = sheets.Affiliations()
            cls.overview_sheet = sheets.Overview()

            # zap all the services (apparently doesn't help)
            # yep, its just the organ sheet, these go in and out just fine
            #if hasattr(sheets.Sheet, '_Sheet__spreadsheet_service'):
            #delattr(sheets.Sheet, '_Sheet__spreadsheet_service')
            #if hasattr(sheets.Sheet, '_Sheet__spreadsheet_service_ro'):
            #delattr(sheets.Sheet, '_Sheet__spreadsheet_service_ro')

            #for s in (cls.organs_sheet, cls.affiliations, cls.overview_sheet):
            #if hasattr(s, '_spreadsheet_service'):
            #delattr(s, '_spreadsheet_service')

            # YOU THOUGHT IT WAS GOOGLE IT WAS ME ORGANS ALL ALONG!
            #cls.organs_sheet = FakeOrganSheet  # organs is BAD

            #cls.affiliations = FakeAffilSheet()  # affiliations is OK
            #cls.overview_sheet = FakeOverviewSheet()  # overview is OK

            #breakpoint()
            # remove byCol which is unpickleable (super duper sigh)
            #for s in (cls.organs_sheet, cls.affiliations, cls.overview_sheet):
            #if hasattr(s, 'byCol'):
            #delattr(s, 'byCol')

        if cls.no_google:
            cls.organ = lambda award: None

        if local_only:
            cls.organ = lambda award: None
            cls.member = lambda first, last: None
        else:
            cls.organ = OrganData()
            if hasattr(State, 'member'):
                cls.member = State.member
            else:
                log.error('State missing member, using State seems '
                          'like a good idea until you go to multiprocessing')
                cls.member = lambda first, last: None