Пример #1
0
 def init(self,**kwargs):
     self.__dict__.update(ginit(self,**kwargs))
     if 'database' in self.__dict__ and type(self.database) == Database:
       # already have databse stored
       pass
     else:
       self.database = Database(self.db_file,\
                         **(fdict(self.__dict__.copy())))
Пример #2
0
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))
        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]
        if self.sds is not None:
            self.msg(f'initial SDS {self.sds}')
            self.required_sds = self.sds

        # for most transactions, we want all SDS
        # so self.sds should reflect that
        self.sds = None
        response = self.database.get_from_db('SDS', self.product)
        if response:
            self.msg("found SDS names in database")
            self.sds = response
            self.msg(self.sds)
            # require them all
            if 'required_sds' not in self.__dict__:
                self.required_sds = self.sds
Пример #3
0
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))

        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]
Пример #4
0
    def stitch_date(self, year, doy, get_files=False, test=False):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        fd = fdict(d)
        # dont need to read it
        fd['no_read'] = True
        ofilebase = f"{self.product}/data.__SDS__." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"

        hdf_urls = self.get_url(**(fd))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            if get_files:
                return None, None
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        if not test and not get_files:
            # look up in db
            warp_args = None
            dstNodata = None
            step = 1
            #this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
            store_flag = 'modis'
            kwargs = {'year': self.year, 'doy':doy,'day':self.day,'month':self.month,'step':step,\
                    'warp_args':warp_args,'product': self.product, 'dstNodata':dstNodata, 'tile': self.tile}
            mkey = json.dumps(kwargs)
            # this is an hdf file
            response = self.database.get_from_db(store_flag, mkey)
            if response and self.noclobber:
                # test
                if self.test_ok(response[0]):
                    # safe to return
                    self.msg(f'positive response from database')
                    ofiles = response
                    return ofiles
                else:
                    msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                    print(msg)
                    self.msg(msg)

        try:
            hdf_files = [str(f.local()) for f in hdf_urls]
        except:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
        if get_files:
            sds = self.get_sds(hdf_files, do_all=False)
            return hdf_files, sds

        sds = self.get_sds(hdf_files, do_all=True)
        if sds == []:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        # early return if we just want sds
        if test == True:
            return sds
        if len(sds) == 0:
            # failed to get SDS: need to download example file
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f'{ofilebase.replace("__SDS__",self.sds[i])}.vrt'.replace(
                ' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            spatial_file.parent.mkdir(parents=True, exist_ok=True)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {mkey: ofiles}}
        #self.database.set_db(cache,write=True)
        return ofiles
Пример #5
0
class Modis():
    '''
  get MODIS datasets from the server
  '''
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))
        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]
        if self.sds is not None:
            self.msg(f'initial SDS {self.sds}')
            self.required_sds = self.sds

        # for most transactions, we want all SDS
        # so self.sds should reflect that
        self.sds = None
        response = self.database.get_from_db('SDS', self.product)
        if response:
            self.msg("found SDS names in database")
            self.sds = response
            self.msg(self.sds)
            # require them all
            if 'required_sds' not in self.__dict__:
                self.required_sds = self.sds

    #def __del__(self):
    #  cache = {"done" : { "done" : "exit" }}
    #  self.database.set_db(cache,write=True)

    def msg(self, *args):
        '''msg to self.stderr'''
        this = str(*args)
        try:
            # DONT REPEAT MESSAGES ... doesnt work as yet
            if this in self.store_msg:
                return
            self.store_msg.append(this)
        except:
            self.store_msg = [this]
        try:
            if self.verbose or (self.log is not None):
                print('-->', *args, file=self.stderr)
        except:
            pass

    def get_data(self,
                 year,
                 doy=None,
                 idict=None,
                 month=None,
                 day=None,
                 step=1,
                 fatal=False):
        '''
    Return data dictionary of MODIS dataset for specified time period

    args:
      year  : year (2000 to present for MOD, or 2002 to present if using MYD)
              NB this is ignoired if idict is given
 
    options:
      doy   : day in year, or day in month if month specified, or None
              when specified as day in year, or day in month, can be a list
              1-365/366 or 1-28-31 as appropriate
      day   : day in month or None. Can be list.
      month : month index 1-12 or None. Can be list.
      step  : dataset step. Default 1, but set to 4 for 4-day product, i
              8 for 8-day, 365/366 for year etc.
      fatal : default False. If True, exit if dataset not found.
      idict : data file dictionary provided by eg call to
              self.get_modis(year,doy=None,month=None,step=1,fatal=False)
              see get_modis for more details
 
    returns:
      data dictionary with keys specified by:
            - self.sds list 
            - or all SDS if self.sds is None (default)
      data dictionary key 'bandnames' of DOY 

      Each data item a 2- or 3-dimensional numpy array:

      For a single date:
        kwargs = {
            'tile'      :    ['h17v03', 'h17v04', 'h18v03', 'h18v04'],
            'product'   :    'MCD15A3H',
            'sds'       :    'Lai_500m',
        }
        modis = Modis(**kwargs)
        # specify day of year (DOY) and year
        data_MCD15A3H = modis.get_data(2019,doy=1+4*10)

        print(data_MCD15A3H.keys())
        dict_keys(['Lai_500m', 'bandnames', 'files'])

        print(data_MCD15A3H['Lai_500m'].shape)
        (4800, 4800)
 
        print(len(data_MCD15A3H['bandnames']))
        1

      If a list of days, or a month or year is specified, the datasets are 3-D:
        kwargs = {
            'tile'      :    ['h19v03'],
            'product'   :    'MOD10A1',
            'sds'       :     ['NDSI_Snow_Cover']
        }

        year  = 2019
        month = 1
        # get the data
        modis = Modis(**kwargs)
        # specify month and year
        data_MOD10A1 = modis.get_data(year,month=1)

        print(data_MOD10A1.keys())
        dict_keys(['NDSI_Snow_Cover', 'bandnames', 'files'])

        print(data_MOD10A1['NDSI_Snow_Cover'].shape)
        (31, 2400, 2400)

        print(len(data_MOD10A1['bandnames']))
        31
        

    '''
        idict = idict or self.get_modis(
            year, day=day, doy=doy, month=month, step=step, fatal=fatal)
        # for get_data, we only want required_sds
        try:
            if 'required_sds' in self.__dict__:
                sds = self.required_sds
                bandnames = idict['bandnames']
            else:
                bandnames = idict['bandnames']
                del idict['bandnames']
                self.required_sds = idict.keys()
                sds = self.required_sds

            vfiles = [idict[k] for k in sds]

            data = []
            for i, s in enumerate(sds):
                g = gdal.Open(vfiles[i])
                dataset = g.ReadAsArray()
                if dataset is None:
                    msg = f"WARNING: no datasets in get_data() for {vfiles[i]}\n" +\
                          f"check datasets and database file {str(self.db_file)}"
                    print(msg)
                    self.msg(msg)
                    if fatal == True:
                        sys.exit(1)
                data.append(dataset)
            # enforce 3D
            data = np.atleast_3d(np.array(data).T).T
            odict = dict(zip(sds, data))
            odict['bandnames'] = bandnames
            odict['files'] = idict
            return odict
        except:
            self.msg("Error calling get_data")
        return {}

    def monkey(self, kwargs):
        # could use json.dumps(d)
        return json.dumps(kwargs)
        keys = np.array(list(kwargs.keys()))
        keys.sort()
        s = ''
        for k in keys:
            v = kwargs[k]
            if type(v) is dict:
                ss = '{' + f'{self.monkey(v)}' + '}'
            elif type(v) is list:
                ss = "_".join(v)
            else:
                ss = str(v)
            s = s + '.' + f"{k}:{ss}"
        s = self.tidy(s.replace(' ', '_'))
        return s

    def sort_vfiles(self, vfiles, sds):
        # reconcile the order of sds and vfiles list
        _sds = np.array([s.replace(" ", "_") for s in sds])
        _vfiles = np.array([f.split('/')[-1].split('.')[1] for f in vfiles])
        index = tuple([np.where(_vfiles == ts)[0][0] for ts in _sds])
        vf = vfiles.copy()
        vfiles = [vf[i] for i in index]
        return vfiles

    def get_modis(self,year,doy=None,day=None,month=None,step=1,\
                       warp_args=None,dstNodata=None,fatal=False):
        '''
    Return data dictionary of MODIS datasets for specified time period
      
    args:     
      year  : year (2000 to present for MOD, or 2002 to present if using MYD)
      
    options:  
      doy       : day in year, or day in month if month specified, or None
                  when specified as day in year, or day in month, can be a list
                  1-365/366 or 1-28-31 as appropriate 
      day       : day in month or None. Can be list.
      month     : month index 1-12 or None. Can be list.
      step      : dataset step. Integer. Default 1, but set to 4 for 4-day product, i
                  8 for 8-day, 365/366 for year etc.
      dstNodata : fill value 
      warp_args : sub-setting and warping control
      fatal     : default False. If True, exit if dataset not found.
     
    returns:
      data dictionary with SDS names as keys and gdal VRT filename
      data dictionary key 'bandnames' of DOY 
            
      For a single date:
        kwargs = {
            'tile'      :    ['h17v03', 'h17v04', 'h18v03', 'h18v04'],
            'product'   :    'MCD15A3H',
        } 
        modis = Modis(**kwargs)
        # specify day of year (DOY) and year
        data_MCD15A3H = modis.get_modis(2019,1+4*10)
        
        print(data_MCD15A3H.keys())
        dict_keys(['Lai_500m', ... 'bandnames'])
        
        print(len(data_MCD15A3H['bandnames']))
        1
        
      If a list of days, or a month or year is specified, the datasets are 3-D:
        kwargs = {
            'tile'      :    ['h19v03'],
            'product'   :    'MOD10A1',
        }
        
        year  = 2019
        month = 1
        # get the data
        modis = Modis(**kwargs)
        # specify month and year
        data_MOD10A1 = modis.get_modis(year,month=1)

        print(data_MOD10A1.keys())
        dict_keys(['NDSI_Snow_Cover', ... 'bandnames'])

        print(len(data_MOD10A1['bandnames']))
        31

     If a month and day are specified, the datasets are 3-D:
        kwargs = {      
            'tile'      :    ['h22v10'],
            'product'   :    'MCD64A1',
        }

        year  = 2019
        month = 1
        day = 1
        # get the data
        modis = Modis(**kwargs)
        # specify month and year
        data_MCD64A1 = modis.get_modis(year,month=month,day=day)

        print(data_MCD64A1.keys())
        dict_keys(['NDSI_Snow_Cover', ... 'bandnames'])

        print(len(data_MCD64A1['bandnames']))
        31

    '''
        # check in db
        #store for diagnostics

        kwargs = {'year': year, 'doy':doy,'day':day,'month':month,'step':step,\
                  'warp_args':warp_args,'product': self.product, 'dstNodata':dstNodata, 'tile': self.tile}
        mkey = json.dumps(kwargs)
        response = self.database.get_from_db("modis-vrt", mkey)

        if response is not None:
            if (type(response) is list) and (len(response)):
                return response[0]
            elif (type(response) is dict):
                # test to see it has all SDS
                ok = True
                for s in self.sds:
                    if s not in response.keys():
                        ok = False
                if ok:
                    return response
            else:
                return response

        dates = list_of_doys(year, doy=doy, day=day, month=month, step=step)
        year_list, doy_list = list(dates['year']), list(dates['doy'])
        bandnames = [f'{year}-{d :0>3d}' for d, y in zip(doy_list, year_list)]
        vfiles = self.stitch(year=year_list,doy=doy_list,\
                             dstNodata=dstNodata,warp_args=warp_args)
        # error
        if (not vfiles) or (len(vfiles) == 0) or (len(vfiles) and
                                                  (vfiles[0] == None)):
            msg = f"WARNING: no datasets in get_data() for product {self.product} tile {self.tile} year {year} month {month} doy {doy}"
            print(msg)
            self.msg(msg)
            self.msg(f"dict   : {self.__dict__}")
            self.msg(f"kwargs : {kwargs}")
            try:
                return dict(zip(self.sds, [[]] * len(self.sds)))
            except:
                return {None: None}

        # cache before selection
        odict = dict(zip(self.sds, vfiles))
        odict['bandnames'] = bandnames
        cache = {"modis-vrt": {mkey: odict}}
        self.database.set_db(cache, write=True)

        # now filter to just what was asked for
        if 'required_sds' in self.__dict__:
            sds = self.required_sds
        else:
            sds = self.sds

        vfiles = self.sort_vfiles(vfiles, sds)
        odict = dict(zip(sds, vfiles))
        odict['bandnames'] = bandnames
        return odict

    def tidy(self, s):
        ss = str(s).replace("'", "").replace('"',
                                             '').replace(',', '_').replace(
                                                 '[', '_').replace(']', '_')
        ss = ss.replace(' ', '')
        return ss

    def read_data(self, ifile):
        g = gdal.Open(ifile)
        if not g:
            return None, None
        data = np.array([
            g.GetRasterBand(i).ReadAsArray()
            for i in range(1, len(g.GetFileList()))
        ])
        b = g.GetRasterBand(1)
        return data, (b.GetScale(), b.GetOffset())

    def fix_sds(self, sds, year, doy):
        '''fix sds'''
        if sds:
            return sds
        #if 'required_sds' in self.__dict__:
        #  self.sds = self.required_sds

        # else look in dictionary
        response = self.database.get_from_db("SDS", self.product)
        if response:
            self.msg("found SDS names in database")
            self.sds = response
            self.msg(self.sds)

        # else need to derive it
        self.msg("polling for SDS names")
        self.stitch_date(year, doy, test=True)
        if self.sds is None:
            # try again
            self.msg("error finding SDS names")
            return []
        #if 'required_sds' not in self.__dict__:
        #  self.required_sds = self.sds
        self.msg(f"SDS: {self.sds}")
        return self.sds

    def get_blank(self, dstNodata, s, i):
        # no dataset
        if ('blanco' in self.__dict__) and (Path(self.blanco).exists()):
            output_filename = self.blanco
            self.msg(f'using file with value {dstNodata} {output_filename}')
            bthis = f'blank-{dstNodata}-{str(i):0>2s}'
            this = output_filename
        else:
            try:
                # repeat last for now
                self.msg(
                    f'no dataset for sds {s} for dataset {i}: using filler')
                this = ofiles[-1]
                output_filename = this.replace('.vrt', '{dstNodata}_blank.tif')
                if not Path(output_filename).exists():
                    # need to set to invalid number ...
                    self.msg(f'creating dummy file')
                    create_blank_file(this, output_filename, value=dstNodata)
                self.msg(
                    f'using file with value {dstNodata} {output_filename}')
                self.blanco = output_filename
                bthis = f'blank-{dstNodata}-{str(i):0>2s}'
                this = output_filename
            except:
                bthis = f'blank-{dstNodata}-{str(i):0>2s}'
                this = None
        return this, bthis

    def stitch(self,
               year,
               month=None,
               day=None,
               doy=None,
               step=1,
               warp_args=None,
               dstNodata=None):
        '''create vrt dataset of all images for doys / a month / year'''
        # get a dict of year, doy
        dates = list_of_doys(year, month=month, day=day, doy=doy, step=step)
        years, doys = list(dates['year']), list(dates['doy'])

        ndays = len(years)
        self.msg(f"create vrt dataset for doys {doys} year {years}")

        sfiles = {}
        bandlist = []
        # sds may not be defined
        self.fix_sds(self.sds, years[0], doys[0])

        # set nodata value
        if (warp_args is not None) and (dstNodata is None):
            dstNodata = warp_args['dstNodata']
        if dstNodata is None:
            dstNodata = 0

        if (warp_args is not None) and ('dstNodata' not in warp_args):
            warp_args['dstNodata'] = dstNodata

        # loop over sds
        store_files = [None] * len(years)
        for i, s in enumerate(self.sds):
            ofiles = []
            bandlist = []
            for j, (year, doy) in enumerate(zip(years, doys)):
                year = int(year)
                doy = int(doy)
                ifiles = self.stitch_date(year, doy)

                if (not ifiles) or (len(ifiles) and ifiles[0] == None):
                    this, bthis = self.get_blank(dstNodata, s, i)
                else:
                    this, bthis = ifiles[i], f'{str(i):0>2s}'

                store_files[j] = ifiles

                if this:
                    bandlist.append(bthis)
                    ofiles.append(this)
            if len(ofiles):
                ofile = f"{self.product}/data.{self.sds[i]}.{self.tidy(self.tile)}." + \
                        f"{year}.{str(int(doy)) :0>3s}.{str(int(step)) :0>3s}.vrt"
                ofile = ofile.replace(' ', '_')
                spatial_file = Path(f"{self.local_dir[0]}", ofile)
                spatial_file.parent.mkdir(parents=True, exist_ok=True)
                g = gdal.BuildVRT(spatial_file.as_posix(),
                                  ofiles,
                                  separate=True)
                try:
                    g.FlushCache()
                except:
                    pass
                if not g:
                    d = self.__dict__
                    print(
                        f"problem building dataset for {spatial_file} with {fdict(d)}"
                    )
                del g
                if warp_args is not None:
                    warp_args['format'] = 'VRT'
                    # warp the files using warp_args
                    spatial_ofile = Path(spatial_file.as_posix().replace(
                        '.vrt', '_warp.vrt'))
                    self.msg(f"warping to {spatial_ofile} using {warp_args}")
                    g = gdal.Warp(spatial_ofile.as_posix(),
                                  spatial_file.as_posix(), **warp_args)
                    try:
                        g.FlushCache()
                    except:
                        pass
                    if not g:
                        d = self.__dict__
                        print(
                            f"problem building dataset for {spatial_ofile} with {fdict(d)}"
                        )
                    del g
                    sfiles[s] = spatial_ofile
                else:
                    sfiles[s] = spatial_file

        # build list of files
        ofiles = [str(i) for i in sfiles.values()]
        return ofiles

    def test_ok(self, hdffile, dosubs=True):
        '''sanity check on file'''
        if not Path(hdffile).exists():
            msg = f'test: file {hdffile} does not exist'
            self.msg(msg)
            return False
        g = gdal.Open(hdffile)
        if not g:
            msg = f'test: file {hdffile} failed to open with gdal'
            self.msg(msg)
            del g
            return False
        # check referenced files
        if dosubs:
            for f in g.GetFileList():
                # dont do too much recursion
                if not self.test_ok(f, dosubs=False):
                    return False
        data = g.ReadAsArray(xsize=1, ysize=1)
        if data is None:
            msg = f'test: file {hdffile} failed: None returned in read '
            self.msg(msg)
            del g
            return False
        return True

    def stitch_date(self, year, doy, get_files=False, test=False):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        fd = fdict(d)
        # dont need to read it
        fd['no_read'] = True
        ofilebase = f"{self.product}/data.__SDS__." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"

        hdf_urls = self.get_url(**(fd))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            if get_files:
                return None, None
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        if not test and not get_files:
            # look up in db
            warp_args = None
            dstNodata = None
            step = 1
            #this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
            store_flag = 'modis'
            kwargs = {'year': self.year, 'doy':doy,'day':self.day,'month':self.month,'step':step,\
                    'warp_args':warp_args,'product': self.product, 'dstNodata':dstNodata, 'tile': self.tile}
            mkey = json.dumps(kwargs)
            # this is an hdf file
            response = self.database.get_from_db(store_flag, mkey)
            if response and self.noclobber:
                # test
                if self.test_ok(response[0]):
                    # safe to return
                    self.msg(f'positive response from database')
                    ofiles = response
                    return ofiles
                else:
                    msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                    print(msg)
                    self.msg(msg)

        try:
            hdf_files = [str(f.local()) for f in hdf_urls]
        except:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
        if get_files:
            sds = self.get_sds(hdf_files, do_all=False)
            return hdf_files, sds

        sds = self.get_sds(hdf_files, do_all=True)
        if sds == []:
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        # early return if we just want sds
        if test == True:
            return sds
        if len(sds) == 0:
            # failed to get SDS: need to download example file
            for f in hdf_urls:
                d = f.read_bytes()
            hdf_files = [str(f.local()) for f in hdf_urls]
            sds = self.get_sds(hdf_files, do_all=True)

        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f'{ofilebase.replace("__SDS__",self.sds[i])}.vrt'.replace(
                ' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            spatial_file.parent.mkdir(parents=True, exist_ok=True)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {mkey: ofiles}}
        #self.database.set_db(cache,write=True)
        return ofiles

    def get_files(self, year, doy):
        '''
    get MODIS dataset for specified doy and year

    return:
      files : list of filenames
      sds   : list of SDS names
    '''
        return self.stitch_date(year, doy, get_files=True)

    def has_wildness(self, uc):
        is_wild = np.logical_or(np.array(['*' in i for i in uc]),
                                np.array(['?' in i for i in uc]))
        is_wild_2 = np.logical_or(np.array(['[' in i for i in uc]),
                                  np.array([']' in i for i in uc]))
        is_wild = np.logical_or(is_wild, is_wild_2)
        return is_wild

    def get_url(self, **kwargs):
        '''
    Get URL object list for NASA MODIS products
    for the specified product, tile, year, month, day
    
    Keyword Arguments:
    
    verbose:  bool
    site    : str 
    product : str e.g. 'MCD15A3H'
    tile    : str e.g. 'h08v06'
    year    : str valid 2000-present
    month   : str 01-12
    day     : str 01-(28,29,30,31)
    
    '''
        site = ('site' in kwargs
                and kwargs['site']) or 'https://e4ftl01.cr.usgs.gov'

        product = ('product' in kwargs and kwargs['product']) or self.product
        tile = ('tile' in kwargs and kwargs['tile']) or self.tile
        day = ('day' in kwargs and kwargs['day']) or self.day
        month = ('month' in kwargs and kwargs['month']) or self.month
        year = ('year' in kwargs and kwargs['year']) or self.year
        doy = ('doy' in kwargs and kwargs['doy']) or self.doy

        if product[:5] == "MOD10" or product[:5] == "MYD10":
            # NSIDC
            site = "https://n5eil01u.ecs.nsidc.org"
            self.msg(f"Snow and ice product {product}")
            self.msg(f"switching to server {site}")

        if product[:3] == "MOD":
            code = "MOST"
        elif product[:3] == "MYD":
            code = "MOSA"
        else:
            code = "MOTA"
        self.msg(f"product {product} -> code {code}")

        # special cases
        #if self.product[:5] == 'MCD19':
        #  self.site = 'https://ladsweb.modaps.eosdis.nasa.gov'
        # you should put some tests in
        site_dir = f'{code}/{product}.006/{year}.{month}.{day}'
        if site == 'https://ladsweb.modaps.eosdis.nasa.gov':
            if self.doy is None:
                try:
                    doy = (datetime.datetime(year+1, 1, 1) - \
                           datetime.datetime(year=int(year),month=int(month),day=int(day))).days
                except:
                    self.verbose = True
                    self.msg(
                        f"ERROR: you need to specify doy explicitly for product {self.product}"
                    )
                    sys.exit(1)
                site_dir = f'archive/allData/6/{product}/{year}/{doy}'

        site_file = f'*.{tile}*.hdf'
        kwargs = {"verbose"    : self.verbose,\
                  "full_url"   : True,\
                  "skipper"    : True,
                  "noclobber"  : self.noclobber,\
                  "db_dir"     : self.db_dir,\
                  "db_file"    : self.db_file,\
                  "log"        : self.log,\
                  "size_check" : self.size_check,\
                  "local_file" : self.local_file,\
                  "database"   : self.database.database,
                  "local_dir"  : self.local_dir }

        hdf_urls = []
        url = None
        for t in self.tile:
            url = ((url is None) and URL(site,site_dir,**kwargs)) or \
                   url.update(site,site_dir,**kwargs)
            hdf_urls += url.glob(f'{self.product}*.{t}*.hdf')
        if len(hdf_urls) == 0:
            return [None]

        self.db_file = hdf_urls[0].db_file

        return hdf_urls

    def sdscode(self, s1):
        '''PITA decoding of SDS from HDF field that comes from s0,s1 in g.GetSubDatasets()'''
        return (' '.join(s1.split()[1:-3])).split(
            self.product)[0].split('MOD')[0].strip()

    def get_sds(self, hdf_files, do_all=False):
        '''get defined SDS or all'''
        if type(hdf_files) is not list:
            hdf_files = [hdf_files]

        if do_all or ((self.sds is None) or len(self.sds) == 0 or \
          ((len(self.sds) == 1) and len(self.sds[0]) == 0)) :
            response = self.database.get_from_db('SDS', self.product)
            if response:
                self.msg("found SDS names in database")
                self.sds = response
                self.msg(self.sds)
                # require them all
                if 'required_sds' not in self.__dict__:
                    self.required_sds = self.sds

        if len(hdf_files) < 1:
            return []
        try:
            lfile = hdf_files[0]
            if not Path(lfile).exists():
                return []
            g = gdal.Open(str(lfile))
            if not g:
                return []
        except:
            # need to pull this first
            return []

        #hdf_files = list(np.sort(np.unique(np.array(hdf_files))))
        # in case not defined
        if ((self.sds is None) or len(self.sds) == 0 or \
          ((len(self.sds) == 1) and len(self.sds[0]) == 0)) :
            self.msg("trying to get SDS names")
            self.sds = [self.sdscode(s1) for s0, s1 in g.GetSubDatasets()]
            cache = {"SDS": {self.product: self.sds}}
            self.database.set_db(cache, write=True)

            if 'required_sds' in self.__dict__:
                self.msg(f'require: {self.required_sds}')
            self.msg(self.sds)

        all_subs = [(s0.replace(str(lfile), '{local_file}'), s1)
                    for s0, s1 in g.GetSubDatasets()]
        this_subs = []

        if (not do_all) and ('required_sds' in self.__dict__):
            sds = self.required_sds
        else:
            sds = self.sds

        for sd in sds:
            this_subs += [s0 for s0, s1 in all_subs if sd == self.sdscode(s1)]
        ofiles = [[sub.format(local_file=str(lfile)) for lfile in hdf_files]
                  for sub in this_subs]
        return ofiles
Пример #6
0
class URL(urlpath.URL,urllib.parse._NetlocResultMixinStr, PurePath):
  '''
  Derived from 
  https://raw.githubusercontent.com/chrono-meter/urlpath/master/urlpath.py

  to provide more compatibility with pathlib.Path functionality

  '''

  '''
  modified new and init
  '''
  def __new__(cls,*args,**kwargs):
      self = super(URL, cls).__new__(cls,*args) 
      self.init(**kwargs)
      return self

  def __init__(self,*args,**kwargs):
      # remove any trailing '/' from args
      args = list(args)
      for i,arg in enumerate(args):
        arg = str(arg)
        while arg[-1] == '/':
          if len(arg) == 1:
            break
          arg = arg[:-1]
        args[i] = arg
      args = tuple(args)
      if not kwargs:
        kwargs = {}
      self.fourOhOne = False

  def init(self,**kwargs):
      self.__dict__.update(ginit(self,**kwargs))
      if 'database' in self.__dict__ and type(self.database) == Database:
        # already have databse stored
        pass
      else:
        self.database = Database(self.db_file,\
                          **(fdict(self.__dict__.copy())))

  def __del__(self):
      try:
        del self.database
        self.msg(f'clone: {url.is_clone}')
      except:
        pass

  def __exit__(self, exc_type, exc_value, traceback):
      '''cleanup'''
      try:
        del self.database
      except:
        pass
      tempfile.clean()

  def dedate(self):
    if '_cache_original' in  self.__dict__:
      self.__dict__ = self._cache_original.copy()
      if '_cache_original' in  self.__dict__:
        del self.__dict__['_cache_original']

  def update(self,*args,**kwargs):
    '''update args in object'''
    if '_cache_original' not in  self.__dict__:
      self._cache_original = self.__dict__.copy()

    # whetehr we specify full URL in update or not

    if ('full_url' in kwargs) and (kwargs['full_url'] == True):
      args = list(args)
    else:
      args = [str(self)] + list(args)   
    url = super(URL, self).__new__(self,*args)
    url.is_clone = True
    url.__dict__ = fdict(self._cache_original.copy())
    return url

  def check_path(self,ppp):
    '''
    You can corrupt the database by having files where we expect directories
    so we need to clean these up
    '''
    parts = list(ppp.parts)
    for i,part in enumerate(parts):
      this = Path(*(parts[:i+1]))
      if this.exists() and (not this.is_dir()):
        # warning path in expected directory
        self.msg('found non-directory term in path {str(this)}')
        try: 
          self.msg('trying to correct')
          this.unlink()
          return True
        except:
          self.msg('failed to correct')
          return False
    return True

  def indb(self):
    # might be in database
    store_url  = str(self)
    store_flag = 'data'
    ifile = self.get_name(self.database.get_from_db(store_flag,store_url))
    if ifile:
      old = self.local_file
      self.local_file = Path(ifile)
      if self.local_file.exists() and self.local_file.suffix == '.store':
        return True
      if self.local_file.suffix != '.store':
        self.local_file = old
        return False
      return True
    return False

  def call_local(self):
    '''
    sort out and return local_file

    This comes from the URL and local_dir
    and ends .store
    '''
    if self.indb():
      if callable(self.local):
        sys.msg(f"**unexpected method for self.local {self.local}")
      else:
        return self.local
    
    kwargs = fdict(self.__dict__.copy())
    if 'local_dir' in kwargs and \
        (kwargs['local_dir'] is not None) and \
        len(kwargs['local_dir']) > 0:
      self.local_dir = list_resolve(kwargs['local_dir'])

    if (self.local_dir is None) or (len(self.local_dir) == 0):
      self.local_dir = list_resolve(self.db_dir)
    self.local_file = Path(self.local_dir[0],self.as_posix().split("://")[1]) 
    #self.local_file = Path(self.local_dir[-1],str(self.with_scheme(''))[2:]).absolute()
    # replace ' '
    self.local_file = Path(str(self.local_file).replace(' ','_'))
    suffix = self.local_file.suffix
    self.local_file = self.local_file.with_suffix(suffix + '.store')
    self.check_path(self.local_file.parent)
    self.local_file.parent.mkdir(parents=True,exist_ok=True) 
    return self.local_file

  def get_read_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[readlist]
    return (filelist.size and filelist[-1]) or None

  def get_write_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[writelist]
    return (filelist.size and filelist[-1]) or None

  def get_readwrite_file(self,filelist):
    filelist = name_resolve(filelist)
    readlist,writelist = list_info(filelist)
    filelist = np.array(filelist,dtype=np.object)[np.logical_and(np.array(writelist),np.array(readlist))]
    return (filelist.size and filelist[-1]) or None

  def _local_file(self,mode="r"):
    '''get local file name'''
    if self.indb():
      return self.local_file
    self.call_local()
    # clobber
    if not self.noclobber:
      local_file  = self.get_write_file(self.local_file)
      # file name for writing
    elif mode == "r":
      local_file = self.get_read_file(self.local_file)
      if local_file and not local_file.exists():
        self.msg("read file {local_file} doesnt exist")
        self.local_file = self.local_file[self.local_file != local_file]
        return self._local_file(mode="r")
    else:
      # file name for writing
      local_file = self.get_write_file(self.local_file)

    if local_file == None:
      return local_file

    # local_file is real
    if local_file.exists():
      if local_file.is_dir():
        try:
          local_file.rmdir()
          return None
        except:
          pass

      # delete the file if noclobber is False
      if not self.noclobber:
        try:
          self.msg(f"deleting existing file {local_file}")
          local_file.unlink()
        except:
          pass
      else:
        self.msg(f"keeping existing file {local_file}")
      
    return local_file

  def open(self,mode='r',buffering=-1, encoding=None, errors=None, newline=None):
      '''
      Open the file pointed by this URL and return a file object, as
      the built-in open() function does.
      '''
      kwargs = {'mode':mode,'buffering':buffering,'encoding':encoding,\
                'errors':errors,'newline':newline}

      if self._isfile():
        self.msg(f'{self} is not a URL: interpreting as Path')
        return Path(self).open(**kwargs)

      # check in database
      store_url  = str(self)
      store_flag = 'data'

      binary = ('b' in mode) and ('t' not in mode) 

      get_download,ifile,ofile = self._test_already_local()

      # get from ofile
      if ofile and Path(ofile).exists():
        ofile = Path(ofile)
        if binary:
          data = io.BytesIO(ofile.read_bytes())
        else:
          data = io.StringIO(ofile.read_text())
        cache = {store_flag : { str(store_url) : str(ofile) }}
        self.database.set_db(cache)
        return data

      # get from ifile
      if ifile and Path(ifile).exists():
        ifile = Path(ifile)
        if binary:
          data = io.BytesIO(ifile.read_bytes())
        else:
          data = io.StringIO(ifile.read_text())
        self.check_path(ifile.parent)
        ifile.parent.mkdir(parents=True,exist_ok=True)
        if ofile:
          ofile = Path(ofile)
          if binary:
            ofile.write_bytes(data)
          else:
            ofile.write_text(data)
        cache = {store_flag : { str(store_url) : str(ifile) }}
        self.database.set_db(cache)
        return data

      if 'r' in mode:
        self.msg(f"reading data from {self}")
        # read 
        if binary:
          self.msg("open() binary stream")
          idata = self.read_bytes()
          data = io.BytesIO(idata)
        else:
          self.msg("open() text stream")
          idata = self.read_text()
          data = io.StringIO(idata)
        if ofile:
          try:
            ofile = Path(ofile)
            if binary:
              ofile.write_bytes(idata)
            else:
              ofile.write_text(idata)
            cache = {store_flag : { str(store_url) : str(ifile) }}
            self.database.set_db(cache)
          except:
            pass
        return data

      if ofile:
        return Path(ofile).open(**kwargs)

  def write_text(self,data, encoding=None, errors=None):
      '''Open the file in text mode, write to it, and close the file.'''
      kwargs = {'encoding':encoding}
      if self._isfile():
          self.msg(f'{self} is not a URL: interpreting as Path')
          return Path(self).write_text(data)

      get_download,ifile,ofile = self._test_already_local()

      if ofile and Path(ofile).exists():
         self.msg("file exists so not writing")
         return Path(ofile).stat().st_size

      if ofile:
        self.msg(f'opening output file {ofile}')
        return Path(ofile).write_text(data,**kwargs)

  def write_bytes(self,data):
      '''Open the file in bytes mode, write to it, and close the file.'''

      if self._isfile():
          self.msg(f'{self} is not a URL: interpreting as Path')
          return Path(self).write_bytes(data)

      get_download,ifile,ofile = self._test_already_local()

      if ofile and Path(ofile).exists():
         self.msg("file exists so not writing")
         return Path(ofile).stat().st_size

      if ofile:
        self.msg(f'opening output file {ofile}')
        return Path(ofile).write_bytes(data)

  def _get_login(self,head=True):
      u = self
      with requests.Session() as session:
        if u.username and u.password:
          session.auth = u.username,u.password
        else:
          uinfo = Cylog(u.anchor).login()
          if uinfo == (None,None):
            return None
          session.auth = uinfo[0].decode('utf-8'),uinfo[1].decode('utf-8')
          u.msg(f'logging in to {u.anchor}')
        try:
          r1 = session.request('get',u)
          if r1.status_code == 200:
            u.msg(f'data read from {u.anchor}')
            return r1
          # try encoded login
          if head:
            r2 = session.head(r1.url)
          else:
            r2 = session.get(r1.url)
          if r2.status_code == 200:
            u.msg(f'data read from {u.anchor}')
          if type(r2) == requests.models.Response:
            return r2
        except:
          u.msg(f'failure reading data from {u.anchor}')
          return None
      u.msg(f'failure reading data from {u.anchor}')
      return None

  def msg(self,*args):
    '''msg to self.stderr'''
    this = str(*args)
    try:
      # DONT REPEAT MESSAGES ... doesnt work as yet
      if this in self.store_msg:
        return
      self.store_msg.extend(this)
    except:
      self.store_msg = [this]
    try:
        if self.verbose or (self.log is not None):
            print('-->',*args,file=self.stderr)
    except:
        pass

  def get_name(self,ofile):
    if ofile == [] or ofile == {}:
      ofile = None
    if type(ofile) == list:
      ofile = ofile[0]
    if type(ofile) == dict:
      ofile = list(ofile.values())[0]
    return ofile

  def _test_already_local(self):
    # get local_filename we would use for output
    # delete it if not noclobber
    # dont greate dir if it doesnt exist

    # return False if already downloaded

    # check in database
    store_url  = str(self)
    store_flag = 'data'

    ifile = self.get_name(self.database.get_from_db(store_flag,store_url))

    if ifile is not None:
      ifile = Path(ifile)
      if not ifile.exists():
        # otherwise incorrect db entry
        self.database.rm_from_db(store_flag,store_url)
      if not self.noclobber and ifile.exists():   
        # clobber
        self.msg(f'deleting local file {ifile}')
        ifile.unlink()
        ifile = None

    ofile = self.get_name(self._local_file("w"))
    if callable(ofile):
      print(f"ERROR in type of self.lcoal {ofile}: should be str or list")
      sys.exit(1)
    if ifile is None:
      return True,ifile,ofile

    if not ifile.exists():
      return True,None,ofile

    # simple if no size check
    if (not self.size_check) and ifile.exists():
      self.msg(f'local file {ifile} exists') #: no size check')
      # cache this in case we want to re-use it
      cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache)
      return False,ifile,ofile

    if self.size_check:
      lsize = ifile.stat().st_size
      rsize = self.stat().st_size
      if rsize < 0:
        # then its not available
        self.msg(f'not downloading file')
        # we might not want to download

        # cache this in case we want to re-use it
        cache = {store_flag : { str(store_url) : ifile }}
        self.database.set_db(cache)
        return False,ifile,ofile

      elif lsize == rsize:
        self.msg(f'local and remote file sizes equal {lsize}')
        self.msg(f'not downloading file')
        # we might not want to download
        # cache this in case we want to re-use it
        cache = {store_flag : { str(store_url) : ifile }}
        self.database.set_db(cache)
        return False,ifile,ofile
      self.msg(f'local and remote file sizes not equal {lsize}/{rsize} respectively')
      self.msg(f'so we need to download (or set size_check=False)')
      if not self.noclobber:
        if ifile and ifile.exists():
          self.msg(f'deleting local ifile {local_file}')
          ifile.unlink()
          ifile = None
        if ofile and ofile.exists():
          self.msg(f'deleting local ofile {local_file}')
          ofile.unlink()
          ofile = None

    return True,ifile,ofile


  def read_text(self, encoding=None, errors=None):
    '''Open the URL, read in text mode and return text.'''  

    kwargs = {'encoding':encoding}
    u = self
    store_url  = str(u)
    store_flag = 'data'

    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      return Path(u).read_text()

    get_download,ifile,ofile = self._test_already_local()

    text = None

    # get it from ofile
    if ofile and Path(ofile).exists():
      text = Path(ofile).read_text(**kwargs)
      cache = {store_flag : { str(store_url) : str(ofile) }}
      self.database.set_db(cache)
      return text

    # get it from ifile 
    if ifile and Path(ifile).exists():
      self.msg(f'opening already downloaded file {ifile}')
      text = Path(ifile).read_text(**kwargs)
      if ofile:
        ofile = Path(ofile)
        ofile.write_text(text)
        cache = {store_flag : { str(store_url) : str(ofile) }}
      else:
        cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache)
      return text

    if text is not None:
      return text

    try:
      u.msg(f'trying {self}')
      text = u.get_text()
      if text and ofile:
        try:
          ofile = Path(ofile)
          self.check_path(ofile.parent)
          ofile.parent.mkdir(parents=True,exist_ok=True)
          ofile.write_text(text)
          cache = {store_flag : { str(store_url) : str(ofile) }}
          self.database.set_db(cache)
          return text
        except:
          pass
      if text:
        return text
    except:
      pass

    u.msg(f'getting login')
    r = u._get_login(head=False)
    if type(r) != requests.models.Response:
      return None
    if r.status_code == 200:
      u.msg(f'code {r.status_code}')
      text = r.text
      if ofile:
         ofile = Path(ofile)
         self.check_path(ofile.parent)
         ofile.parent.mkdir(parents=True,exist_ok=True)
         ofile.write_text(text)
         cache = {store_flag : { str(store_url) : str(ofile) }}
         self.database.set_db(cache)
      return text

    if type(r) == requests.models.Response:
        u.msg(f'code {r.status_code}')
        return r
    u.msg(f'failed to connect')
    return None

  def local(self,get_file=False):
    ''' local filename'''
    u = self
    get_download,ifile,ofile = u._test_already_local()
    for f in [ifile,ofile]:
      if f and get_file:
        if  Path(f).exists():
          return Path(f)
        else:
          # pull file
          self.read_bytes()
          return self.local(get_file=get_file)
      elif f:
        return Path(f)
    return None

  def exists(self):
    '''Whether this URL exists and can be accessed'''

    u = self
    store_url  = str(u)
    store_flag = 'exists' 
 
    ex = self.database.get_from_db(store_flag,store_url)
    if ex is not None:
      return ex
 
    ex = False 
    get_download,ifile,ofile = u._test_already_local()
    if ofile and Path(ofile).exists():
      ex = True
      cache = {store_flag : { str(store_url) : True }}
    if not ex:
      ex = self.ping()
    if ex:
      cache = {store_flag : { str(store_url) : True }}
      self.database.set_db(cache)
      
    return ex

  def stat(self, head=False):
    '''
    Some of the functionality of stat for URLs

    Currently, only stat_result.st_size is used.
    '''
    input = [0,0,0,0,0,0,self._st_size(head=head),0,0,0]
    stat_result = os.stat_result(input)
    return stat_result

  def _isfile(self):
    if self.scheme == '' or self.scheme == 'file':
      self.msg('we are a file ...')
      return True
    #self.msg('we are not a file ...')
    return False

  def _st_size(self, head=False):
    '''
    retrieve the remote file size

    You should specify any required login/password with
    with_components(username=str,password=str)

    Returns:
      int if data available
    Or:
      -1
    '''
    u = self
    # check in database
    store_url  = u
    store_flag = 'st_size'
    remote_size = self.database.get_from_db(store_flag,store_url)
    if remote_size is not None:
      return remote_size

    remote_size = -1
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      # not a URL
      u = Path(u)
      return u.stat().st_size
    try:
      u.msg(f'trying {u}')
      if head:
        r = u.head()
      else:
        r = u.get()
      if type(r) == requests.models.Response:
        if r.status_code == 200:
          u.msg(f'code 200')
          hdr = r.headers
          if "Content-Length" in hdr.keys():
              remote_size = int(hdr["Content-Length"])
          elif 'Transfer-Encoding' in hdr.keys() and hdr["Transfer-Encoding"] == 'chunked':
              u.msg(f'file is compressed, remote size not directly available')
          #self.msg(hdr)
          if remote_size > 0:
            # cache this in case we want to re-use it
            cache = {store_flag : { str(store_url) : remote_size }}
            self.database.set_db(cache)
            return(remote_size)

        # 
        if r.status_code == 401:
          u.msg(f'code 401')
          self.fourOhOne = True

        if self.fourOhOne:
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=head)
          if r.status_code == 200:
            u.msg(f'code 200')
            hdr = r.headers
            if "Content-Length" in hdr:
              remote_size = int(hdr["Content-Length"])
            if remote_size > 0:
              # cache this in case we want to re-use it
              cache = {store_flag : { str(store_url) : remote_size }}
              self.database.set_db(cache)
              return(remote_size)
        elif head == False:
          u.msg(f'code {r.status_code}')
          return remote_size
        # return it even if 0
        return remote_size
    except:
      pass
    if head == False:
      u.msg(f'failed to connect')
      # give up
      remote_size = -2
      # cache this in case we want to re-use it even if its -1
      cache = {store_flag : { str(store_url) : remote_size }}
      self.database.set_db(cache)
      return remote_size
    u.msg(f'trying get')
    return u.st_size(head=False)

  def ping(self, head=True):
    '''
    ping the URL data return True if response is 200

    You should specify any required login/password with
    with_components(username=str,password=str)

    Returns:
      True if data available
    Or:
      False
    '''
    u = self
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      # not a URL
      u = Path(u)
      return u.exists()
    try:
      u.msg(f'trying {u}')
      if head:
        r = u.head()
      else:
        r = u.get()
      if type(r) == requests.models.Response:
        if r.status_code == 200:
          u.msg(f'code 200')
          return True
        if r.status_code == 401:
          u.msg(f'code 401')
          u.msg(f'trying another')
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=head)
          if r.status_code == 200:
            u.msg(f'code 200')
            return True
        elif head == False:
          u.msg(f'code {r.status_code}')
          return False
    except:
      pass
    if head == False:
      u.msg(f'failed to connect')
      return False
    u.msg(f'trying get')
    return u.ping(head=False)

  def read_bytes(self):
    '''
    Open the URL data in bytes mode, read it and return the data

    This first tried self.get() but if the authorisation is more complex
    (e.g. when using NASA server) then a fuller 2-pass session
    is used.

    You should specify any required login/password with 
    with_components(username=str,password=str) 

    Returns:
      data from url
    Or:
      None                     : on failure 
      requests.models.Response : on connection problem
    '''

    if 'skipper' in self.__dict__:
      skipper = self.skipper
    else:
      skipper = False

    u = self
    store_url  = str(u)
    store_flag = 'data'
    if u._isfile():
      self.msg(f'{u} is not a URL: interpreting as Path')
      return Path(u).read_bytes()

    get_download,ifile,ofile = self._test_already_local()

    # get from ofile
    if ofile and Path(ofile).exists():
      data = ofile.read_bytes()
      ofile = Path(ofile)
      cache = {store_flag : { str(store_url) : str(ofile) }}
      self.database.set_db(cache,write=True)
      return data

    # get from ifile
    if ifile and Path(ifile).exists():
      ifile = Path(ifile)
      self.msg(f'opening already downloaded file {ifile}')
      data = ifile.read_bytes()
      if ofile: 
        ofile = Path(ofile)
        self.check_path(ofile.parent)
        ofile.parent.mkdir(parents=True,exist_ok=True)
        ofile.write_bytes(data)
        cache = {store_flag : { str(store_url) : str(ofile) }}
      else:
        cache = {store_flag : { str(store_url) : str(ifile) }}
      self.database.set_db(cache,write=True)
      return data

    try:
      if not skipper: 
        u.msg(f'trying {u}')
        r = u.get()
        
      if skipper or (type(r) == requests.models.Response):
        if (not skipper) and r.status_code == 200:
          u.msg(f'code {r.status_code}')
          data = r.content
          if ofile:
            ofile = Path(ofile)
            self.check_path(ofile.parent)
            ofile.parent.mkdir(parents=True,exist_ok=True)
            ofile.write_bytes(data)
            cache = {store_flag : { str(store_url) : str(ofile) }}
            self.database.set_db(cache,write=True)
          return data
        if skipper or (r.status_code == 401):
          if not skipper:
            u.msg(f'code {r.status_code}')
            u.msg(f'trying another')
          # unauthorised
          # more complex session login and auth
          # e.g. needed for NASA Earthdata login
          u.msg(f'getting login')
          r = u._get_login(head=False)
          if type(r) != requests.models.Response:
            return None
          if r.status_code == 200:
            u.msg(f'code {r.status_code}')
            data = r.content
            if ofile:
              ofile = Path(ofile)
              self.check_path(ofile.parent)
              ofile.parent.mkdir(parents=True,exist_ok=True)
              ofile.write_bytes(data)
              cache = {store_flag : { str(store_url) : str(ofile) }}
              self.database.set_db(cache,write=True)
            return data
        else:
          u.msg(f'code {r.status_code}')
          return r
    except:
      pass

    u.msg(f'failed to connect')
    return None 

  def _convert_to_abs(self,ilist):
    # this is slow and may be not needed
    self.msg(f'parsing URLs from html file {len(ilist)} items')
    return [self.update(*[str(self),l.rstrip('/#')],**(fdict(self.__dict__.copy()))) for l in ilist ]

  def _filter(self,links,pattern,pre_filter=True):
    # pre-filter
    if pre_filter: 
      links = np.array([str(l).rstrip('/#') for l in links])
      matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links])
      links = list(links[matches])
    

    links = self._convert_to_abs(links)
    olist = []
    try:
      p = self.done[pattern]
    except:
      try:
        self.done[pattern] = []
        
      except:
        self.done = {pattern:[]}
    p = self.done[pattern]
    
    olist = [u for u in links if u not in p]    
    self.done[pattern] = self.done[pattern] + olist
    return olist

  def has_wildness(self,uc):
    is_wild   = np.logical_or(np.array(['*' in i for i in uc]),
                              np.array(['?' in i for i in uc]))
    is_wild_2 = np.logical_or(np.array(['[' in i for i in uc]),
                              np.array([']' in i for i in uc]))
    is_wild = np.logical_or(is_wild,is_wild_2)
    return is_wild

  def glob(self,pattern,pre_filter=True):
    '''
    Iterate over this subtree and yield all existing files (of any
    kind, including directories) matching the given relative pattern.

    The URL here then needs to return lxml html code.

    Positional arguments:
       patterm  : to search for e.g. */2021.*.01
                  only wildcards * and ? considered at present

    '''
    u = self
    url = str(u)
    if url[-1] == '/':
      url = urls[:-1]
    url = self.update(url,pattern)
    # check in database
    store_url  = url
    store_flag = 'query' 
    olist = self.database.get_from_db(store_flag,store_url)
    if olist is not None:
      if type(olist) is list:
        return [self.update(o) for o in olist]
      return [self.update(olist)]

    # start at the top
    uc = np.array(url.parts)
    for i,w in enumerate(uc[1:]): 
      if i == 0:
        base_list = [self.update(uc[0])]
      new_list = []
      for b in base_list:
        # set to new item
        glob = self.update(b)._glob(w,pre_filter=pre_filter)
        
        # glob with the next item
        new_list = new_list + glob
      base_list = np.unique(np.array(new_list,dtype=np.object).flatten())

    base_list = np.unique(np.array(base_list,dtype=np.object))
 
    olist = list(np.array([self.update(i) for i in base_list]).flatten())
    self.dedate()

    for l in olist:
      l.init(**(fdict(self.__dict__.copy())))

    # cache this in case we want to re-use it
    cache = {store_flag : { str(store_url) : [str(i) for i in olist] }}
    self.database.set_db(cache)
    if type(olist) is list: 
      return [self.update(o) for o in olist]
    return [self.update(olist)]

  def rglob(self, pattern,pre_filter=True):
    '''
    Recursively yield all existing files (of any kind, including
    directories) matching the given relative pattern, anywhere in
    this subtree.

    Positional arguments:
       patterm  : to search for e.g. 2021.*.01
                  only wildcards * and ? considered at present


    '''
    return self.glob(pattern,pre_filter=pre_filter)

  def flush(self):
    try:
      return self.database.set_db(self.database.database,write=True)
    except:
      return None

  def _glob(self, pattern,pre_filter=True):
    '''
    Iterate over this subtree and yield all existing files (of any
    kind, including directories) matching the given relative pattern.

    The URL here then needs to return lxml html code.
    '''
    # take off training slash
    if pattern[-1] == '/':
      pattern = pattern[:-1]
    store_url  = str(self.update(pattern))
    store_flag = 'query'
    if not self.noclobber:
      # dont trust cache
      response = None
    else:
      response = self.database.get_from_db(store_flag,store_url)
    if response:
      self.msg(f'got response from database for {store_url}')
      self.msg(f'discovered {len(response)} files with pattern {pattern} in {str(self)}')
      return [self.update(str(f)) for f in response] 

    try:
      html = self.read_text()
      links = np.array([mylink.attrs['href'] for mylink in BeautifulSoup(html,'lxml').find_all('a')])
      links = np.array(self._filter(links,pattern,pre_filter=pre_filter))
      matches = np.array([fnmatch.fnmatch(str(l), '*'+pattern) for l in links]) 
      files = list(links[matches])
    except:
      files = []
    self.msg(f'discovered {len(files)} files with pattern {pattern} in {str(self)}')
   
    files = [str(i) for i in files]
    # cache this in db
    cache = {store_flag : { str(store_url) : files }}
    self.database.set_db(cache)
 
    return files 
Пример #7
0
class Modis():
    '''
  get MODIS datasets from the server
  '''
    def __init__(self, **kwargs):
        kwargs['defaults'] = {
         'store_msg'  : [],\
         'database'   : None,\
         'product'    : 'MCD15A3H',\
         'tile'       : 'h08v06',\
         'log'        : None,\
         'day'        : '01',\
         'doy'        : None,
         'month'      : '*',\
         'sds'        : None,
         'year'       : "2019",\
         'site'       : 'https://e4ftl01.cr.usgs.gov',\
         'size_check' : False,\
         'noclobber'  : True,\
         'local_dir'  : 'work',\
         'local_file' : None,\
         'db_file'    : None,\
         'db_dir'     : 'work',\
         'verbose'    : False,\
         'stderr'     : sys.stderr
        }
        self.__dict__.update(ginit(self, **kwargs))
        if 'database' in self.__dict__ and type(self.database) == Database:
            # already have databse stored
            pass
        else:
            self.database = Database(self.db_file,\
                              **(fdict(self.__dict__.copy(),ignore=['db_dir','db_file'])))

        self.translateoptions = gdal.TranslateOptions(
            gdal.ParseCommandLine("-of Gtiff -co COMPRESS=LZW"))

        # list of tiles
        if type(self.tile) is str:
            self.tile = [self.tile]

        if type(self.sds) is str:
            self.sds = [self.sds]

    def msg(self, *args):
        '''msg to self.stderr'''
        this = str(*args)
        try:
            # DONT REPEAT MESSAGES ... doesnt work as yet
            if this in self.store_msg:
                return
            self.store_msg.append(this)
        except:
            self.store_msg = [this]
        try:
            if self.verbose or (self.log is not None):
                print('-->', *args, file=self.stderr)
        except:
            pass

    def get_data(self, year, doy):
        '''return data dict for doy year as sds dictionary'''
        vfiles = self.stitch_date(year, doy)
        if (not vfiles) or (len(vfiles) and vfiles[0] == None):
            msg = f"WARNING: no datasets in get_data() for product {self.product} tile {self.tile} year {year} doy {doy}"
            print(msg)
            self.msg(msg)
            try:
                return dict(zip(self.sds, [[]] * len(self.sds)))
            except:
                return {None: None}
        if not self.sds:
            # recover from vfiles
            self.msg("trying to recover SDS from files")
            self.sds = [Path(i).name.split('.')[1] for i in vfiles]
            self.msg(self.sds)

        sds = [Path(i).name.split('.')[1] for i in vfiles]

        data = []
        for i, s in enumerate(sds):
            g = gdal.Open(vfiles[i])
            dataset = g.ReadAsArray()
            if dataset is None:
                msg = f"WARNING: no datasets in get_data() for {vfiles[i]}\n" +\
                      f"check datasets and database file {str(self.db_file)}"
                print(msg)
                self.msg(msg)
            data.append(dataset)
        return dict(zip(self.sds, data))

    def read_data(self, ifile):
        g = gdal.Open(ifile)
        if not g:
            return None, None
        data = np.array([
            g.GetRasterBand(i).ReadAsArray()
            for i in range(1, len(g.GetFileList()))
        ])
        b = g.GetRasterBand(1)
        return data, (b.GetScale(), b.GetOffset())

    def get_year(self, year, step=4):
        '''create vrt dataset of all images for a year'''
        year = int(year)
        self.year = f'{year}'
        ayear = (datetime.datetime(year + 1, 1, 1) -
                 datetime.datetime(year, 1, 1)).days

        sfiles = {}
        bandlist = []
        for i, s in enumerate(self.sds):
            ofiles = []
            bandlist = []
            for doy in range(1, ayear + 1, step):
                ifiles = self.stitch_date(year, doy)
                if (not ifiles) or (len(ifiles) and ifiles[0] == None):
                    # no dataset
                    try:
                        # repeat last for now
                        self.msg(
                            'no dataset for sds {s} for dataset {i}: using filler'
                        )
                        this = ofiles[-1]
                        bthis = 'filler ' + bandlist[-1]
                    except:
                        this = None
                else:
                    this = ifiles[i]
                    bthis = f'{str(i):0>2s}'
                if this:
                    bandlist.append(bthis)
                    ofiles.append(this)
            if len(ofiles):
                ofile = f"data.{self.sds[i]}.{'_'.join(self.tile)}.{self.year}.vrt"
                ofile = ofile.replace(' ', '_')
                spatial_file = Path(f"{self.local_dir[0]}", ofile)
                g = gdal.BuildVRT(spatial_file.as_posix(),
                                  ofiles,
                                  separate=True)
                try:
                    g.FlushCache()
                except:
                    pass
                if not g:
                    d = self.__dict__
                    print(
                        f"problem building dataset for {spatial_file} with {fdict(d)}"
                    )
                del g
                sfiles[s] = spatial_file
                sfiles[s + '_name'] = bandlist
        return sfiles, bandlist

    def test_ok(self, hdffile, dosubs=True):
        '''sanity check on file'''
        if not Path(hdffile).exists():
            msg = f'test: file {hdffile} does not exist'
            self.msg(msg)
            return False
        g = gdal.Open(hdffile)
        if not g:
            msg = f'test: file {hdffile} failed to open with gdal'
            self.msg(msg)
            del g
            return False
        # check referenced files
        if dosubs:
            for f in g.GetFileList():
                # dont do too much recursion
                if not self.test_ok(f, dosubs=False):
                    return False
        data = g.ReadAsArray(xsize=1, ysize=1)
        if data is None:
            msg = f'test: file {hdffile} failed: None returned in read '
            self.msg(msg)
            del g
            return False
        return True

    def stitch_date(self, year, doy):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        hdf_urls = self.get_url(**(fdict(d)))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        # look up in db
        this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
        store_flag = 'modis'
        response = self.database.get_from_db(store_flag, this_set)
        if response and self.noclobber:
            # test
            if self.test_ok(response[0]):
                # safe to return
                self.msg(f'positive response from database')
                ofiles = response
                return ofiles
            else:
                msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                print(msg)
                self.msg(msg)

        for f in hdf_urls:
            d = f.read_bytes()
        hdf_files = [str(f.local()) for f in hdf_urls]
        sds = self.get_sds(hdf_files, do_all=True)
        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f"data.{self.sds[i]}." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}.vrt"
            ofile = ofile.replace(' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {this_set: ofiles}}
        self.database.set_db(cache, write=True)
        return ofiles

    #def get_files(self,**kwargs):
    #  hdf_urls = self.get_url(**kwargs)
    #  hdf_files = [f.local() for f in hdf_urls]
    #  return hdf_files

    def has_wildness(self, uc):
        is_wild = np.logical_or(np.array(['*' in i for i in uc]),
                                np.array(['?' in i for i in uc]))
        is_wild_2 = np.logical_or(np.array(['[' in i for i in uc]),
                                  np.array([']' in i for i in uc]))
        is_wild = np.logical_or(is_wild, is_wild_2)
        return is_wild

    def get_url(self, **kwargs):
        '''
    Get URL object list for NASA MODIS products
    for the specified product, tile, year, month, day
    
    Keyword Arguments:
    
    verbose:  bool
    site    : str 
    product : str e.g. 'MCD15A3H'
    tile    : str e.g. 'h08v06'
    year    : str valid 2000-present
    month   : str 01-12
    day     : str 01-(28,29,30,31)
    
    '''
        site = ('site' in kwargs
                and kwargs['site']) or 'https://e4ftl01.cr.usgs.gov'

        product = ('product' in kwargs and kwargs['product']) or self.product
        tile = ('tile' in kwargs and kwargs['tile']) or self.tile
        day = ('day' in kwargs and kwargs['day']) or self.day
        month = ('month' in kwargs and kwargs['month']) or self.month
        year = ('year' in kwargs and kwargs['year']) or self.year
        doy = ('doy' in kwargs and kwargs['doy']) or self.doy

        if product[:5] == "MOD10" or product[:5] == "MYD10":
            # NSIDC
            site = "https://n5eil01u.ecs.nsidc.org"
            self.msg(f"Snow and ice product {product}")
            self.msg(f"switching to server {site}")

        if product[:3] == "MOD":
            code = "MOST"
        elif product[:3] == "MYD":
            code = "MOSA"
        else:
            code = "MOTA"
        self.msg(f"product {product} -> code {code}")

        # special cases
        #if self.product[:5] == 'MCD19':
        #  self.site = 'https://ladsweb.modaps.eosdis.nasa.gov'
        # you should put some tests in
        site_dir = f'{code}/{product}.006/{year}.{month}.{day}'
        if site == 'https://ladsweb.modaps.eosdis.nasa.gov':
            if self.doy is None:
                try:
                    doy = (datetime.datetime(year+1, 1, 1) - \
                           datetime.datetime(year=int(year),month=int(month),day=int(day))).days
                except:
                    self.verbose = True
                    self.msg(
                        f"ERROR: you need to specify doy explicitly for product {self.product}"
                    )
                    sys.exit(1)
                site_dir = f'archive/allData/6/{product}/{year}/{doy}'

        site_file = f'*.{tile}*.hdf'
        kwargs = {"verbose"    : self.verbose,\
                  "full_url"   : True,\
                  "noclobber"  : self.noclobber,\
                  "db_dir"     : self.db_dir,\
                  "db_file"    : self.db_file,\
                  "log"        : self.log,\
                  "size_check" : self.size_check,\
                  "local_file" : self.local_file,\
                  "local_dir"  : self.local_dir }

        hdf_urls = []
        url = None
        for t in self.tile:
            url = ((url is None) and URL(site,site_dir,**kwargs)) or \
                   url.update(site,site_dir,**kwargs)
            hdf_urls += url.glob(f'{self.product}*.{t}*.hdf')
        if len(hdf_urls) == 0:
            return [None]

        self.db_file = hdf_urls[0].db_file

        return hdf_urls

    def sdscode(self, s1):
        '''PITA decoding of SDS from HDF field that comes from s0,s1 in g.GetSubDatasets()'''
        return (' '.join(s1.split()[1:-3])).split(
            self.product)[0].split('MOD')[0].strip()

    def get_sds(self, hdf_files, do_all=False):
        '''get defined SDS or all'''
        if type(hdf_files) is not list:
            hdf_files = [hdf_files]

        if len(hdf_files) < 1:
            return []
        lfile = hdf_files[0]
        g = gdal.Open(str(lfile))
        if not g:
            return []
        # in case not defined
        if do_all or ((self.sds is None) or len(self.sds) == 0 or \
          ((len(self.sds) == 1) and len(self.sds[0]) == 0)) :
            self.msg("trying to get SDS names")
            self.sds = [self.sdscode(s1) for s0, s1 in g.GetSubDatasets()]
            self.msg(self.sds)

        all_subs = [(s0.replace(str(lfile), '{local_file}'), s1)
                    for s0, s1 in g.GetSubDatasets()]
        this_subs = []
        for sd in self.sds:
            this_subs += [s0 for s0, s1 in all_subs if sd == self.sdscode(s1)]
        return [[sub.format(local_file=str(lfile)) for lfile in hdf_files]
                for sub in this_subs]
Пример #8
0
    def stitch_date(self, year, doy):
        '''stitch data for date'''
        year = int(year)
        doy = int(doy)

        dater = (datetime.datetime(year, 1, 1) +\
                   datetime.timedelta(doy - 1)).strftime('%Y %m %d').split()
        self.year = f'{year}'
        self.month = f'{str(int(dater[1])) :0>2s}'
        self.day = f'{str(int(dater[2])) :0>2s}'

        d = self.__dict__.copy()
        hdf_urls = self.get_url(**(fdict(d)))

        if not (len(hdf_urls) and (type(hdf_urls[0]) == URL)):
            return [None]

        if 'db_file' in self.__dict__:
            if 'database' not in self.__dict__:
                # load database
                d = self.__dict__.copy()
                self.database = Database(
                    self.db_file, **(fdict(d, ignore=['db_dir', 'db_file'])))

        # look up in db
        this_set = f"{self.product}.{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}"
        store_flag = 'modis'
        response = self.database.get_from_db(store_flag, this_set)
        if response and self.noclobber:
            # test
            if self.test_ok(response[0]):
                # safe to return
                self.msg(f'positive response from database')
                ofiles = response
                return ofiles
            else:
                msg = f'WARNING: invalid entry {response[0]} in database {str(self.db_file)}'
                print(msg)
                self.msg(msg)

        for f in hdf_urls:
            d = f.read_bytes()
        hdf_files = [str(f.local()) for f in hdf_urls]
        sds = self.get_sds(hdf_files, do_all=True)
        ofiles = []
        if len(sds) > len(self.sds):
            self.msg(f"ERROR in product {self.product} specification of SDS")
            self.msg(f"all SDS claimed to be: {len(self.sds)}")
            self.msg(self.sds)
            self.msg(f"But request for {len(sds)} SDSs made")
            self.msg(sds)
            sys.exit(1)
        for i, sd in enumerate(sds):
            ofile = f"data.{self.sds[i]}." + \
                    f"{'_'.join(self.tile)}.{self.year}.{self.month}.{self.day}.vrt"
            ofile = ofile.replace(' ', '_')
            spatial_file = Path(f"{self.local_dir[0]}", ofile)
            g = gdal.BuildVRT(spatial_file.as_posix(), sds[i])
            if not g:
                d = self.__dict__
                print(
                    f"problem building dataset for {spatial_file} with {fdict(d)}"
                )
                sys.exit(1)
            del g
            ofiles.append(Path(spatial_file).absolute().as_posix())
        # store in db
        cache = {store_flag: {this_set: ofiles}}
        self.database.set_db(cache, write=True)
        return ofiles