def extract_environment(layer_name, x, postproc=lambda x:x, id_=None, lock=None): "Expects ALL locations to be in decimal degrees." fname = hashlib.sha1(x.tostring()+layer_name+str(id_)).hexdigest()+'.npy' path, name = os.path.split(layer_name) name = os.path.splitext(name)[0] if fname in os.listdir('anopheles-caches'): return name, numpy.load(os.path.join('anopheles-caches',fname)) else: # if lock is not None: # lock.acquire() grid_lon, grid_lat, grid_data, grid_type = map_utils.import_raster(name,path) # Convert to centroids grid_lon += (grid_lon[1]-grid_lon[0])/2. grid_lat += (grid_lat[1]-grid_lat[0])/2. # Interpolate extracted = map_utils.interp_geodata(grid_lon, grid_lat, postproc(grid_data).data, x[:,0], x[:,1], grid_data.mask, chunk=None, view='y-x+', order=0) del grid_data # if lock is not None: # lock.release() numpy.save(os.path.join('anopheles-caches',fname), extracted) return name, extracted
def extract_environment(name, x, cache=True): """Expects ALL locations to be in decimal degrees.""" if cache: x_hash = hashlib.sha1(x.data).hexdigest() fname = os.path.split(name)[1] + '_' + x_hash + '.hdf5' if 'anopheles-caches' in os.listdir('.'): if fname in os.listdir('anopheles-caches'): hf = tb.openFile(os.path.join('anopheles-caches',fname)) out = hf.root.eval[:] hf.close() return out print 'Evaluation of environmental layer %s on array with SHA1 hash %s not found, recomputing.'%(name, hashlib.sha1(x.data).hexdigest()) hr = get_datafile(name) if hasattr(hr, 'lon'): grid_lon = hr.lon[:] else: grid_lon = hr.long[:] grid_lat = hr.lat[:] grid_data = hr.data if hasattr(grid_data.attrs,'view'): view = grid_data.attrs.view else: raise ValueError, "Key 'view' not found in data array's attrs for datafile %s. \n\ I could assume a default view, but you would suffer bitterly. \n\ I could not bear it, human."%grid_data._v_file.filename if hasattr(hr, 'mask'): grid_mask = hr.mask else: grid_mask = None if np.prod(hr.data.shape) > 1e8: grid_chunk = hr.data.chunkshape else: grid_chunk=None eval = map_utils.interp_geodata(grid_lon, grid_lat, grid_data, x[:,0], x[:,1], grid_mask, grid_chunk, view=view) hr._v_file.close() if cache: hf = tb.openFile(os.path.join('anopheles-caches',fname),'w') hf.createArray('/','eval',eval) hf.close() return eval
def nan_callback(lon_old, lat_old, data, lon_new, lat_new, order): lon_ind = np.argmin(np.abs(np.subtract.outer(lon_old, lon_new)), axis=0) lat_ind = np.argmin(np.abs(np.subtract.outer(lat_old, lat_new)), axis=0) out = lat_new*0 for i in xrange(len(lon_new)): lai, loi = lat_ind[i], lon_ind[i] if data.mask[lai, loi]: for d in xrange(10): if True-np.all(data.mask[lai-d:lai+d,loi-d:loi+d]): out[i] = mode(data.data[lai-d:lai+d,loi-d:loi+d][np.where(True-data.mask[lai-d:lai+d,loi-d:loi+d])]) break else: out[i] = data[lai,loi] if np.any(np.isnan(out)): raise ValueError return out for fname in map(lambda n: n+'.hdf5', covariate_names): print 'Evaluating %s'%fname colname = os.path.splitext(fname)[0] hf = tb.openFile(os.path.join(covariate_path,fname)) cols[colname] = map_utils.interp_geodata(hf.root.lon[:],hf.root.lat[:],hf.root.data[:],cols['lon'],cols['lat'],hf.root.mask[:],order=0,nan_handler=nan_callback) if np.any(np.isnan(cols[colname])): raise ValueError hf.close() keys = cols.keys() data_out = np.rec.fromarrays([cols[k] for k in keys], names=keys) rec2csv(data_out, os.path.splitext(os.path.basename(sys.argv[1]))[0]+'_with_covariates.csv')