def argtopk(a_plus_idx, k, axis, keepdims): """ Chunk and combine function of argtopk Extract the indices of the k largest elements from a on the given axis. If k is negative, extract the indices of the -k smallest elements instead. Note that, unlike in the parent function, the returned elements are not sorted internally. """ assert keepdims is True axis = axis[0] if isinstance(a_plus_idx, list): a_plus_idx = list(flatten(a_plus_idx)) a = np.concatenate([ai for ai, _ in a_plus_idx], axis) idx = np.concatenate([broadcast_to(idxi, ai.shape) for ai, idxi in a_plus_idx], axis) else: a, idx = a_plus_idx if abs(k) >= a.shape[axis]: return a_plus_idx idx2 = np.argpartition(a, -k, axis=axis) k_slice = slice(-k, None) if k > 0 else slice(-k) idx2 = idx2[tuple(k_slice if i == axis else slice(None) for i in range(a.ndim))] return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
def agregar(símismo, nuevos, edad=0, etapas=None): # Limpiar edades de cohortes símismo._edades[símismo._pobs == 0] = 0 if etapas is None: rbn = slice(None) nuevos = símismo._proc_matr_datos(nuevos) else: rbn = símismo.rebanar(etapas) # Las edades y las poblaciones actuales de las etapas pobs = símismo._pobs[rbn] edades = símismo._edades[rbn] eje_coh = símismo.eje_coh() # Los índices de los días cuyos cohortes tienen la edad mínima. Si hay más que un día (cohorte) con la # edad mínima, tomará el primero. í_cohs = np.expand_dims(np.argmin(edades, axis=eje_coh), axis=eje_coh) # Las edades de los cohortes con las edades mínimas. eds_mín = np.take_along_axis(edades, í_cohs, axis=eje_coh) # Las poblaciones que corresponden a estas edades mínimas. pobs_coresp = np.take_along_axis(pobs, í_cohs, axis=eje_coh) # Dónde no hay población existente, reinicializamos la edad. eds_mín = np.where(pobs_coresp == 0, [0], eds_mín) # Calcular el peso de las edades existentes, según sus poblaciones existentes (para combinar con el nuevo # cohorte si hay que combinarlo con un cohorte existente). peso_ed_ya = np.divide(pobs_coresp, np.add(nuevos, pobs_coresp)) peso_ed_ya[np.isnan(peso_ed_ya)] = 0 # Los edades promedios. Si no había necesidad de combinar cohortes, será la población del nuevo cohorte. eds_prom = np.add(np.multiply(eds_mín, peso_ed_ya), np.multiply(edad, np.subtract(1, peso_ed_ya))) # Guardar las edades actualizadas en los índices apropiados np.put_along_axis(edades, í_cohs, eds_prom, axis=eje_coh) # Guardar las poblaciones actualizadas en los índices apropiados np.put_along_axis(pobs, í_cohs, nuevos + pobs_coresp, axis=eje_coh) símismo._pobs[rbn] = pobs símismo._edades[rbn] = edades
def argtopk_aggregate(a_plus_idx, k, axis, keepdims): """ Final aggregation function of argtopk Invoke argtopk one final time, sort the results internally, drop the data and return the index only. """ assert keepdims is True a, idx = argtopk(a_plus_idx, k, axis, keepdims) axis = axis[0] idx2 = np.argsort(a, axis=axis) idx = take_along_axis(idx, idx2, axis) if k < 0: return idx return idx[tuple(slice(None, None, -1) if i == axis else slice(None) for i in range(idx.ndim))]
def random_sample( self, inputs, n, topk=None, topp=None, states=None, temperature=1, min_ends=1 ): """随机采样n个结果 说明:非None的topk表示每一步只从概率最高的topk个中采样;而非None的topp 表示每一步只从概率最高的且概率之和刚好达到topp的若干个token中采样。 返回:n个解码序列组成的list。 """ inputs = [np.array([i]) for i in inputs] output_ids = self.first_output_ids results = [] for step in range(self.maxlen): probas, states = self.predict( inputs, output_ids, states, temperature, 'probas' ) # 计算当前概率 probas /= probas.sum(axis=1, keepdims=True) # 确保归一化 if step == 0: # 第1步预测后将结果重复n次 probas = np.repeat(probas, n, axis=0) inputs = [np.repeat(i, n, axis=0) for i in inputs] output_ids = np.repeat(output_ids, n, axis=0) if topk is not None: k_indices = probas.argpartition(-topk, axis=1)[:, -topk:] # 仅保留topk probas = np.take_along_axis(probas, k_indices, axis=1) # topk概率 probas /= probas.sum(axis=1, keepdims=True) # 重新归一化 if topp is not None: p_indices = probas.argsort(axis=1)[:, ::-1] # 从高到低排序 probas = np.take_along_axis(probas, p_indices, axis=1) # 排序概率 cumsum_probas = np.cumsum(probas, axis=1) # 累积概率 flag = np.roll(cumsum_probas >= topp, 1, axis=1) # 标记超过topp的部分 flag[:, 0] = False # 结合上面的np.roll,实现平移一位的效果 probas[flag] = 0 # 后面的全部置零 probas /= probas.sum(axis=1, keepdims=True) # 重新归一化 sample_func = lambda p: np.random.choice(len(p), p=p) # 按概率采样函数 sample_ids = np.apply_along_axis(sample_func, 1, probas) # 执行采样 sample_ids = sample_ids.reshape((-1, 1)) # 对齐形状 if topp is not None: sample_ids = np.take_along_axis( p_indices, sample_ids, axis=1 ) # 对齐原id if topk is not None: sample_ids = np.take_along_axis( k_indices, sample_ids, axis=1 ) # 对齐原id output_ids = np.concatenate([output_ids, sample_ids], 1) # 更新输出 is_end = output_ids[:, -1] == self.end_id # 标记是否以end标记结束 end_counts = (output_ids == self.end_id).sum(1) # 统计出现的end标记 if output_ids.shape[1] >= self.minlen: # 最短长度判断 flag = is_end & (end_counts >= min_ends) # 标记已完成序列 if flag.any(): # 如果有已完成的 for ids in output_ids[flag]: # 存好已完成序列 results.append(ids) flag = (flag == False) # 标记未完成序列 inputs = [i[flag] for i in inputs] # 只保留未完成部分输入 output_ids = output_ids[flag] # 只保留未完成部分候选集 end_counts = end_counts[flag] # 只保留未完成部分end计数 if len(output_ids) == 0: break # 如果还有未完成序列,直接放入结果 for ids in output_ids: results.append(ids) # 返回结果 return results
def main(): load_start = dt.datetime.now() #Try parsing arguments using argparse parser = argparse.ArgumentParser( description='wrf non-parallel convective diagnostics processer') parser.add_argument("-m", help="Model name", required=True) parser.add_argument("-r", help="Region name (default is aus)", default="aus") parser.add_argument("-t1", help="Time start YYYYMMDDHH", required=True) parser.add_argument("-t2", help="Time end YYYYMMDDHH", required=True) parser.add_argument( "-e", help= "CMIP5 experiment name (not required if using era5, erai or barra)", default="") parser.add_argument( "--barpa_forcing_mdl", help="BARPA forcing model (erai or ACCESS1-0). Default erai.", default="erai") parser.add_argument( "--ens", help="CMIP5 ensemble name (not required if using era5, erai or barra)", default="r1i1p1") parser.add_argument("--group", help="CMIP6 modelling group name", default="") parser.add_argument("--project", help="CMIP6 modelling intercomparison project", default="CMIP") parser.add_argument("--ver6hr", help="Version on al33 for 6hr data", default="") parser.add_argument("--ver3hr", help="Version on al33 for 3hr data", default="") parser.add_argument("--issave", help="Save output (True or False, default is False)", default="False") parser.add_argument( "--ub4", help= "Where to get era5 data. Default True for ub4 project, otherwise rt52", default="True") parser.add_argument( "--outname", help= "Name of saved output. In the form *outname*_*t1*_*t2*.nc. Default behaviour is the model name", default=None) parser.add_argument( "--is_dcape", help="Should DCAPE be calculated? (1 or 0. Default is 1)", default=1) parser.add_argument( "--al33", help= "Should data be gathered from al33? Default is False, and data is gathered from r87. If True, then group is required", default="False") parser.add_argument( "--delta_t", help= "Time step spacing for ERA5 data, in hours. Default is one the minimum spacing (1 hour)", default="1") parser.add_argument( "--era5_interp", help= "Horizontally interpolate model data before calculating convective parameters", default="False") args = parser.parse_args() #Parse arguments from cmd line and set up inputs (date region model) model = args.m region = args.r t1 = args.t1 t2 = args.t2 issave = args.issave ub4 = args.ub4 al33 = args.al33 if args.outname == None: out_name = model else: out_name = args.outname is_dcape = args.is_dcape barpa_forcing_mdl = args.barpa_forcing_mdl experiment = args.e ensemble = args.ens group = args.group project = args.project ver6hr = args.ver6hr ver3hr = args.ver3hr delta_t = int(args.delta_t) era5_interp = args.era5_interp if region == "sa_small": start_lat = -38 end_lat = -26 start_lon = 132 end_lon = 142 elif region == "aus": start_lat = -44.525 end_lat = -9.975 start_lon = 111.975 end_lon = 156.275 elif region == "global": start_lat = -70 end_lat = 70 start_lon = -180 end_lon = 179.75 else: raise ValueError("INVALID REGION\n") domain = [start_lat, end_lat, start_lon, end_lon] try: time = [ dt.datetime.strptime(t1, "%Y%m%d%H"), dt.datetime.strptime(t2, "%Y%m%d%H") ] except: raise ValueError("INVALID START OR END TIME. SHOULD BE YYYYMMDDHH\n") if era5_interp == "True": era5_interp = True elif era5_interp == "False": era5_interp = False else: raise ValueError("\n INVALID era5_interp...SHOULD BE True OR False") if ub4 == "True": ub4 = True elif ub4 == "False": ub4 = False else: raise ValueError("\n INVALID ub4...SHOULD BE True OR False") if issave == "True": issave = True elif issave == "False": issave = False else: raise ValueError("\n INVALID ISSAVE...SHOULD BE True OR False") if al33 == "True": al33 = True elif al33 == "False": al33 = False else: raise ValueError("\n INVALID al33...SHOULD BE True OR False") #Load data print("LOADING DATA...") if model == "erai": ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,\ cp,tp,wg10,mod_cape,lon,lat,date_list = \ read_erai(domain,time) cp = cp.astype("float32", order="C") tp = tp.astype("float32", order="C") mod_cape = mod_cape.astype("float32", order="C") elif model == "era5": if ub4: ta,temp1,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,\ cp,wg10,mod_cape,lon,lat,date_list = \ read_era5(domain,time,delta_t=delta_t) else: ta,temp1,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,\ cp,tp,wg10,mod_cape,lon,lat,date_list = \ read_era5_rt52(domain,time,delta_t=delta_t) cp = cp.astype("float32", order="C") tp = tp.astype("float32", order="C") mod_cape = mod_cape.astype("float32", order="C") wap = np.zeros(hgt.shape) elif model == "barra": ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \ read_barra(domain,time) elif model == "barra_fc": ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \ read_barra_fc(domain,time) elif model == "barpa": ta,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \ read_barpa(domain, time, experiment, barpa_forcing_mdl, ensemble) wap = np.zeros(hgt.shape) temp1 = None elif model == "barra_ad": wg10,temp2,ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,lon,lat,date_list = \ read_barra_ad(domain, time, False) elif model in ["ACCESS1-0","ACCESS1-3","GFDL-CM3","GFDL-ESM2M","CNRM-CM5","MIROC5",\ "MRI-CGCM3","IPSL-CM5A-LR","IPSL-CM5A-MR","GFDL-ESM2G","bcc-csm1-1","MIROC-ESM",\ "BNU-ESM"]: #Check that t1 and t2 are in the same year year = np.arange(int(t1[0:4]), int(t2[0:4]) + 1) ta, hur, hgt, terrain, p_3d, ps, ua, va, uas, vas, tas, ta2d, tp, lon, lat, \ date_list = read_cmip(model, experiment, \ ensemble, year, domain, cmip_ver=5, al33=al33, group=group, ver6hr=ver6hr, ver3hr=ver3hr) wap = np.zeros(hgt.shape) wg10 = np.zeros(ps.shape) mod_cape = np.zeros(ps.shape) p = np.zeros(p_3d[0, :, 0, 0].shape) #date_list = pd.to_datetime(date_list).to_pydatetime() temp1 = None tp = tp.astype("float32", order="C") elif model in ["ACCESS-ESM1-5", "ACCESS-CM2"]: year = np.arange(int(t1[0:4]), int(t2[0:4]) + 1) ta, hur, hgt, terrain, p_3d, ps, ua, va, uas, vas, tas, ta2d, lon, lat, \ date_list = read_cmip(model, experiment,\ ensemble, year, domain, cmip_ver=6, group=group, project=project) wap = np.zeros(hgt.shape) wg10 = np.zeros(ps.shape) p = np.zeros(p_3d[0, :, 0, 0].shape) #date_list = pd.to_datetime(date_list).to_pydatetime() temp1 = None else: raise ValueError("Model not recognised") del temp1 ta = ta.astype("float32", order="C") hur = hur.astype("float32", order="C") hgt = hgt.astype("float32", order="C") terrain = terrain.astype("float32", order="C") p = p.astype("float32", order="C") ps = ps.astype("float32", order="C") wap = wap.astype("float32", order="C") ua = ua.astype("float32", order="C") va = va.astype("float32", order="C") uas = uas.astype("float32", order="C") vas = vas.astype("float32", order="C") tas = tas.astype("float32", order="C") ta2d = ta2d.astype("float32", order="C") wg10 = wg10.astype("float32", order="C") lon = lon.astype("float32", order="C") lat = lat.astype("float32", order="C") gc.collect() param = np.array([ "mu_cape", "mu_cin", "muq", "s06", "s0500", "lr700_500", "mhgt", "ta500", "tp" ]) if model in ["erai", "era5"]: param = np.concatenate([param, ["mod_cape"]]) #Option to interpolate to the ERA5 grid if era5_interp: #Interpolate model data to the ERA5 grid from era5_read import get_lat_lon_rt52 as get_era5_lat_lon era5_lon, era5_lat = get_era5_lat_lon() era5_lon_ind = np.where((era5_lon >= domain[2]) & (era5_lon <= domain[3]))[0] era5_lat_ind = np.where((era5_lat >= domain[0]) & (era5_lat <= domain[1]))[0] era5_lon = era5_lon[era5_lon_ind] era5_lat = era5_lat[era5_lat_ind] terrain = interp_era5(terrain, lon, lat, era5_lon, era5_lat, d3=False) #Set output array output_data = np.zeros( (ps.shape[0], era5_lat.shape[0], era5_lon.shape[0], len(param))) else: output_data = np.zeros( (ps.shape[0], ps.shape[1], ps.shape[2], len(param))) #Assign p levels to a 3d array, with same dimensions as input variables (ta, hgt, etc.) #If the 3d p-lvl array already exists, then declare the variable "mdl_lvl" as true. try: p_3d mdl_lvl = True full_p3d = p_3d except: mdl_lvl = False if era5_interp: p_3d = np.moveaxis(np.tile(p,[ta.shape[2],ta.shape[3],1]),[0,1,2],[1,2,0]).\ astype(np.float32) else: p_3d = np.moveaxis(np.tile(p,[era5_lat.shape[0],era5_lon.shape[0],1]),[0,1,2],[1,2,0]).\ astype(np.float32) print("LOAD TIME..." + str(dt.datetime.now() - load_start)) tot_start = dt.datetime.now() for t in np.arange(0, ta.shape[0]): cape_start = dt.datetime.now() if era5_interp: ta_t = interp_era5(ta[t], lon, lat, era5_lon, era5_lat, d3=True) hur_t = interp_era5(hur[t], lon, lat, era5_lon, era5_lat, d3=True) hgt_t = interp_era5(hgt[t], lon, lat, era5_lon, era5_lat, d3=True) ps_t = interp_era5(ps[t], lon, lat, era5_lon, era5_lat, d3=False) wap_t = interp_era5(wap[t], lon, lat, era5_lon, era5_lat, d3=True) ua_t = interp_era5(ua[t], lon, lat, era5_lon, era5_lat, d3=True) va_t = interp_era5(va[t], lon, lat, era5_lon, era5_lat, d3=True) uas_t = interp_era5(uas[t], lon, lat, era5_lon, era5_lat, d3=False) vas_t = interp_era5(vas[t], lon, lat, era5_lon, era5_lat, d3=False) tas_t = interp_era5(tas[t], lon, lat, era5_lon, era5_lat, d3=False) ta2d_t = interp_era5(ta2d[t], lon, lat, era5_lon, era5_lat, d3=False) tp_t = interp_era5(tp[t], lon, lat, era5_lon, era5_lat, d3=False) mod_cape_t = interp_era5(mod_cape[t], lon, lat, era5_lon, era5_lat, d3=False) else: ta_t = ta[t] hur_t = hur[t] hgt_t = hgt[t] ps_t = ps[t] wap_t = wap[t] ua_t = ua[t] va_t = va[t] uas_t = uas[t] vas_t = vas[t] tas_t = tas[t] ta2d_t = ta2d[t] tp_t = tp[t] mod_cape_t = mod_cape[t] print(date_list[t]) output = np.zeros((1, ps_t.shape[0], ps_t.shape[1], len(param))) if mdl_lvl: if era5_interp: p_3d = interp_era5(full_p3d[t], lon, lat, era5_lon, era5_lat, d3=True) else: p_3d = full_p3d[t] dp = get_dp(hur=hur_t, ta=ta_t, dp_mask=False) #Insert surface arrays, creating new arrays with "sfc" prefix sfc_ta = np.insert(ta_t, 0, tas_t, axis=0) sfc_hgt = np.insert(hgt_t, 0, terrain, axis=0) sfc_dp = np.insert(dp, 0, ta2d_t, axis=0) sfc_p_3d = np.insert(p_3d, 0, ps_t, axis=0) sfc_ua = np.insert(ua_t, 0, uas_t, axis=0) sfc_va = np.insert(va_t, 0, vas_t, axis=0) sfc_wap = np.insert(wap_t, 0, np.zeros(vas_t.shape), axis=0) #Sort by ascending p a,temp1,temp2 = np.meshgrid(np.arange(sfc_p_3d.shape[0]) , np.arange(sfc_p_3d.shape[1]),\ np.arange(sfc_p_3d.shape[2])) sort_inds = np.flip(np.lexsort([np.swapaxes(a, 1, 0), sfc_p_3d], axis=0), axis=0) sfc_hgt = np.take_along_axis(sfc_hgt, sort_inds, axis=0) sfc_dp = np.take_along_axis(sfc_dp, sort_inds, axis=0) sfc_p_3d = np.take_along_axis(sfc_p_3d, sort_inds, axis=0) sfc_ua = np.take_along_axis(sfc_ua, sort_inds, axis=0) sfc_va = np.take_along_axis(sfc_va, sort_inds, axis=0) sfc_ta = np.take_along_axis(sfc_ta, sort_inds, axis=0) #Calculate q and wet bulb for pressure level arrays with surface values sfc_ta_unit = units.units.degC * sfc_ta sfc_dp_unit = units.units.degC * sfc_dp sfc_p_unit = units.units.hectopascals * sfc_p_3d hur_unit = mpcalc.relative_humidity_from_dewpoint(ta_t*units.units.degC, dp*units.units.degC)*\ 100*units.units.percent q_unit = mpcalc.mixing_ratio_from_relative_humidity(hur_unit,\ ta_t*units.units.degC,np.array(p_3d)*units.units.hectopascals) sfc_hur_unit = mpcalc.relative_humidity_from_dewpoint(sfc_ta_unit, sfc_dp_unit)*\ 100*units.units.percent sfc_q_unit = mpcalc.mixing_ratio_from_relative_humidity(sfc_hur_unit,\ sfc_ta_unit,sfc_p_unit) sfc_theta_unit = mpcalc.potential_temperature(sfc_p_unit, sfc_ta_unit) sfc_thetae_unit = mpcalc.equivalent_potential_temperature( sfc_p_unit, sfc_ta_unit, sfc_dp_unit) sfc_thetae = np.array(mpcalc.equivalent_potential_temperature(ps_t*units.units.hectopascals,tas_t*units.units.degC,\ ta2d_t*units.units.degC)) sfc_q = np.array(sfc_q_unit) sfc_hur = np.array(sfc_hur_unit) #sfc_wb = np.array(wrf.wetbulb( sfc_p_3d*100, sfc_ta+273.15, sfc_q, units="degC")) #Use getcape.f90 #cape_gb_mu1, cape_gb_mu4 = getcape_driver(sfc_p_3d, sfc_ta, sfc_dp, ps_t) #Now get most-unstable CAPE (max CAPE in vertical, ensuring parcels used are AGL) cape3d = wrf.cape_3d(sfc_p_3d,sfc_ta+273.15,\ sfc_q,sfc_hgt,\ terrain,ps_t,\ True,meta=False, missing=0) cape = cape3d.data[0] cin = cape3d.data[1] lfc = cape3d.data[2] lcl = cape3d.data[3] el = cape3d.data[4] #Mask values which are below the surface and above 350 hPa AGL cape[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan cin[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan lfc[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan lcl[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan el[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan #Get maximum (in the vertical), and get cin, lfc, lcl for the same parcel mu_cape_inds = np.tile(np.nanargmax(cape, axis=0), (cape.shape[0], 1, 1)) mu_cape = np.take_along_axis(cape, mu_cape_inds, 0)[0] mu_cin = np.take_along_axis(cin, mu_cape_inds, 0)[0] mu_lfc = np.take_along_axis(lfc, mu_cape_inds, 0)[0] mu_lcl = np.take_along_axis(lcl, mu_cape_inds, 0)[0] mu_el = np.take_along_axis(el, mu_cape_inds, 0)[0] muq = np.take_along_axis(sfc_q, mu_cape_inds, 0)[0] * 1000 #Calculate other parameters #Thermo thermo_start = dt.datetime.now() lr700_500 = get_lr_p(ta_t, p_3d, hgt_t, 700, 500) melting_hgt = get_t_hgt(sfc_ta, np.copy(sfc_hgt), 0, terrain) melting_hgt = np.where((melting_hgt < 0) | (np.isnan(melting_hgt)), 0, melting_hgt) ta500 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 500) ta925 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 925) ta850 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 850) ta700 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 700) rho = mpcalc.density( np.array(sfc_p_3d) * (units.units.hectopascal), sfc_ta * units.units.degC, sfc_q_unit) rho925 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 925)) rho850 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 850)) rho700 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 700)) #Winds winds_start = dt.datetime.now() s06 = get_shear_hgt(sfc_ua, sfc_va, np.copy(sfc_hgt), 0, 6000, terrain) s0500 = get_shear_p(ua_t, va_t, p_3d, "sfc", np.array([500]), p_3d, uas=uas_t, vas=vas_t)[0] #WAP if model in ["erai", "era5"]: sfc_w = mpcalc.vertical_velocity( wap_t * (units.units.pascal / units.units.second),\ np.array(p_3d) * (units.units.hectopascal), \ ta_t * units.units.degC, q_unit) w925 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 925)) w850 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 850)) w700 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 700)) #Convergence if era5_interp: x, y = np.meshgrid(era5_lon, era5_lat) else: x, y = np.meshgrid(lon, lat) dx, dy = mpcalc.lat_lon_grid_deltas(x, y) u925 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 925)) u850 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 850)) u700 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 700)) v925 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 925)) v850 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 850)) v700 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 700)) conv925 = -1e5 * np.array( mpcalc.divergence(u925 * (units.units.meter / units.units.second), v925 * (units.units.meter / units.units.second), dx, dy)) conv850 = -1e5 * np.array( mpcalc.divergence(u850 * (units.units.meter / units.units.second), v850 * (units.units.meter / units.units.second), dx, dy)) conv700 = -1e5 * np.array( mpcalc.divergence(u700 * (units.units.meter / units.units.second), v700 * (units.units.meter / units.units.second), dx, dy)) #CS6 mucs6 = mu_cape * np.power(s06, 1.67) #Fill output output = fill_output(output, t, param, ps, "mu_cape", mu_cape) output = fill_output(output, t, param, ps, "mu_cin", mu_cin) output = fill_output(output, t, param, ps, "muq", muq) output = fill_output(output, t, param, ps, "s06", s06) output = fill_output(output, t, param, ps, "s0500", s0500) output = fill_output(output, t, param, ps, "lr700_500", lr700_500) output = fill_output(output, t, param, ps, "ta500", ta500) output = fill_output(output, t, param, ps, "mhgt", melting_hgt) output = fill_output(output, t, param, ps, "tp", tp_t) if (model == "erai") | (model == "era5"): output = fill_output(output, t, param, ps, "mod_cape", mod_cape_t) output_data[t] = output print("SAVING DATA...") param_out = [] for param_name in param: temp_data = output_data[:, :, :, np.where(param == param_name)[0][0]] param_out.append(temp_data) #If the mhgt variable is zero everywhere, then it is likely that data has not been read. #In this case, all values are missing, set to zero. for t in np.arange(param_out[0].shape[0]): if param_out[np.where(param == "mhgt")[0][0]][t].max() == 0: for p in np.arange(len(param_out)): param_out[p][t] = np.nan if issave: if era5_interp: save_netcdf(region, model, out_name, date_list, era5_lat, era5_lon, param, param_out, \ out_dtype = "f4", compress=True) else: save_netcdf(region, model, out_name, date_list, lat, lon, param, param_out, \ out_dtype = "f4", compress=True) print(dt.datetime.now() - tot_start)
print('#'*80) print('Test mesh properties') print('#'*80) for name, mesh in mesh_generator(): check_properties(name, mesh) # fix triangle orientation print('#'*80) print('Fix triangle orientation') print('#'*80) for name, mesh in mesh_generator(): mesh.compute_vertex_normals() triangles = np.asarray(mesh.triangles) rnd_idx = np.random.rand(*triangles.shape).argsort(axis=1) rnd_idx[0] = (0, 1, 2) triangles = np.take_along_axis(triangles, rnd_idx, axis=1) mesh.triangles = Vector3iVector(triangles) draw_geometries([mesh]) sucess = mesh.orient_triangles() print('%s orientated: %s' % (name, 'yes' if sucess else 'no')) draw_geometries([mesh]) # intersection tests print('#'*80) print('Intersection tests') print('#'*80) np.random.seed(30) bbox = create_mesh_box(20,20,20).translate((-10,-10,-10)) meshes = [create_mesh_box() for _ in range(20)] meshes.append(create_mesh_sphere()) meshes.append(create_mesh_cone())
def get_vector_library(self, reciprocal_radius): """Calculates a library of diffraction vectors and pairwise inter-vector angles for a library of crystal structures. Parameters ---------- reciprocal_radius : float The maximum g-vector magnitude to be included in the library. Returns ------- vector_library : :class:`DiffractionVectorLibrary` Mapping of phase identifier to phase information in dictionary format. """ # Define DiffractionVectorLibrary object to contain results vector_library = DiffractionVectorLibrary() # Get structures from structure library structure_library = self.structures.struct_lib # Iterate through phases in library. for phase_name in structure_library.keys(): # Get diffpy.structure object associated with phase structure = structure_library[phase_name][0] # Get reciprocal lattice points within reciprocal_radius recip_latt = structure.lattice.reciprocal() miller_indices, coordinates, distances = get_points_in_sphere( recip_latt, reciprocal_radius) # Create pair_indices for selecting all point pair combinations num_indices = len(miller_indices) pair_a_indices, pair_b_indices = np.mgrid[:num_indices, : num_indices] # Only select one of the permutations and don't pair an index with # itself (select above diagonal) upper_indices = np.triu_indices(num_indices, 1) pair_a_indices = pair_a_indices[upper_indices].ravel() pair_b_indices = pair_b_indices[upper_indices].ravel() # Mask off origin (0, 0, 0) origin_index = num_indices // 2 pair_a_indices = pair_a_indices[pair_a_indices != origin_index] pair_b_indices = pair_b_indices[pair_b_indices != origin_index] pair_indices = np.vstack([pair_a_indices, pair_b_indices]) # Create library entries angles = get_angle_cartesian_vec(coordinates[pair_a_indices], coordinates[pair_b_indices]) pair_distances = distances[pair_indices.T] # Ensure longest vector is first len_sort = np.fliplr(pair_distances.argsort(axis=1)) # phase_index_pairs is a list of [hkl1, hkl2] phase_index_pairs = np.take_along_axis( miller_indices[pair_indices.T], len_sort[:, :, np.newaxis], axis=1) # phase_measurements is a list of [len1, len2, angle] phase_measurements = np.column_stack( (np.take_along_axis(pair_distances, len_sort, axis=1), angles)) # Only keep unique triplets unique_measurements, unique_measurement_indices = np.unique( phase_measurements, axis=0, return_index=True) vector_library[phase_name] = { 'indices': phase_index_pairs[unique_measurement_indices], 'measurements': unique_measurements } # Pass attributes to diffraction library from structure library. vector_library.identifiers = self.structures.identifiers vector_library.structures = self.structures.structures vector_library.reciprocal_radius = reciprocal_radius return vector_library
def yin( y, *, fmin, fmax, sr=22050, frame_length=2048, win_length=None, hop_length=None, trough_threshold=0.1, center=True, pad_mode="constant", ): """Fundamental frequency (F0) estimation using the YIN algorithm. YIN is an autocorrelation based method for fundamental frequency estimation [#]_. First, a normalized difference function is computed over short (overlapping) frames of audio. Next, the first minimum in the difference function below ``trough_threshold`` is selected as an estimate of the signal's period. Finally, the estimated period is refined using parabolic interpolation before converting into the corresponding frequency. .. [#] De Cheveigné, Alain, and Hideki Kawahara. "YIN, a fundamental frequency estimator for speech and music." The Journal of the Acoustical Society of America 111.4 (2002): 1917-1930. Parameters ---------- y : np.ndarray [shape=(..., n)] audio time series. Multi-channel is supported.. fmin : number > 0 [scalar] minimum frequency in Hertz. The recommended minimum is ``librosa.note_to_hz('C2')`` (~65 Hz) though lower values may be feasible. fmax : number > 0 [scalar] maximum frequency in Hertz. The recommended maximum is ``librosa.note_to_hz('C7')`` (~2093 Hz) though higher values may be feasible. sr : number > 0 [scalar] sampling rate of ``y`` in Hertz. frame_length : int > 0 [scalar] length of the frames in samples. By default, ``frame_length=2048`` corresponds to a time scale of about 93 ms at a sampling rate of 22050 Hz. win_length : None or int > 0 [scalar] length of the window for calculating autocorrelation in samples. If ``None``, defaults to ``frame_length // 2`` hop_length : None or int > 0 [scalar] number of audio samples between adjacent YIN predictions. If ``None``, defaults to ``frame_length // 4``. trough_threshold : number > 0 [scalar] absolute threshold for peak estimation. center : boolean If ``True``, the signal `y` is padded so that frame ``D[:, t]`` is centered at `y[t * hop_length]`. If ``False``, then ``D[:, t]`` begins at ``y[t * hop_length]``. Defaults to ``True``, which simplifies the alignment of ``D`` onto a time grid by means of ``librosa.core.frames_to_samples``. pad_mode : string or function If ``center=True``, this argument is passed to ``np.pad`` for padding the edges of the signal ``y``. By default (``pad_mode="constant"``), ``y`` is padded on both sides with zeros. If ``center=False``, this argument is ignored. .. see also:: `np.pad` Returns ------- f0: np.ndarray [shape=(..., n_frames)] time series of fundamental frequencies in Hertz. If multi-channel input is provided, f0 curves are estimated separately for each channel. See Also -------- librosa.pyin : Fundamental frequency (F0) estimation using probabilistic YIN (pYIN). Examples -------- Computing a fundamental frequency (F0) curve from an audio input >>> y = librosa.chirp(fmin=440, fmax=880, duration=5.0) >>> librosa.yin(y, fmin=440, fmax=880) array([442.66354675, 441.95299983, 441.58010963, ..., 871.161732 , 873.99001454, 877.04297681]) """ if fmin is None or fmax is None: raise ParameterError('both "fmin" and "fmax" must be provided') # Set the default window length if it is not already specified. if win_length is None: win_length = frame_length // 2 if win_length >= frame_length: raise ParameterError( "win_length={} cannot exceed given frame_length={}".format( win_length, frame_length)) # Set the default hop if it is not already specified. if hop_length is None: hop_length = frame_length // 4 # Check that audio is valid. util.valid_audio(y, mono=False) # Pad the time series so that frames are centered if center: padding = [(0, 0) for _ in y.shape] padding[-1] = (frame_length // 2, frame_length // 2) y = np.pad(y, padding, mode=pad_mode) # Frame audio. y_frames = util.frame(y, frame_length=frame_length, hop_length=hop_length) # Calculate minimum and maximum periods min_period = max(int(np.floor(sr / fmax)), 1) max_period = min(int(np.ceil(sr / fmin)), frame_length - win_length - 1) # Calculate cumulative mean normalized difference function. yin_frames = _cumulative_mean_normalized_difference( y_frames, frame_length, win_length, min_period, max_period) # Parabolic interpolation. parabolic_shifts = _parabolic_interpolation(yin_frames) # Find local minima. is_trough = util.localmin(yin_frames, axis=-2) is_trough[..., 0, :] = yin_frames[..., 0, :] < yin_frames[..., 1, :] # Find minima below peak threshold. is_threshold_trough = np.logical_and(is_trough, yin_frames < trough_threshold) # Absolute threshold. # "The solution we propose is to set an absolute threshold and choose the # smallest value of tau that gives a minimum of d' deeper than # this threshold. If none is found, the global minimum is chosen instead." target_shape = list(yin_frames.shape) target_shape[-2] = 1 global_min = np.argmin(yin_frames, axis=-2) yin_period = np.argmax(is_threshold_trough, axis=-2) global_min = global_min.reshape(target_shape) yin_period = yin_period.reshape(target_shape) no_trough_below_threshold = np.all(~is_threshold_trough, axis=-2, keepdims=True) yin_period[no_trough_below_threshold] = global_min[ no_trough_below_threshold] # Refine peak by parabolic interpolation. yin_period = ( min_period + yin_period + np.take_along_axis(parabolic_shifts, yin_period, axis=-2))[..., 0, :] # Convert period to fundamental frequency. f0 = sr / yin_period return f0
def generate_curve(self, truth, proposals, scores, interpolate=True): """ Generates PR curves given true query and proposal poses. Can select interpolation of precision, where precision values are replaced with maximum precision for all recall values greater or equal to current recall. """ t_errs, R_errs = self._compute_errors(truth, proposals) scores_u = np.unique(scores) max_score = np.max(scores_u) self.scores = np.linspace( np.min(scores_u) - 1e-3, max_score + 1e-3, endpoint=True, num=1000 ) if self.model in ["Single", "Seq Match", "Graph"]: # ensures iterating through list means higher # model confidence self.scores = np.flip(self.scores) self.precisions = np.ones_like(self.scores) self.recalls = np.zeros_like(self.scores) self.F1 = np.zeros_like(self.scores) for i, score_thres in enumerate(self.scores): if self.model in ["Seq Match", "Single"]: localized = scores < score_thres t_err = t_errs R_err = R_errs else: ind_loc = self._localize_indices(scores, score_thres) # identify traverses where threshold met localized = ind_loc != -1 t_err = np.squeeze( np.take_along_axis(t_errs, ind_loc[:, np.newaxis], 1) ) R_err = np.squeeze( np.take_along_axis(R_errs, ind_loc[:, np.newaxis], 1) ) correct = np.logical_and(t_err < self.t, R_err < self.R) # only count traverses with a proposal correct = np.logical_and(correct, localized) # compute precision and recall # index of -1 means not localized in max seq len nLocalized = np.count_nonzero(localized) nCorrect = np.count_nonzero(correct) if nLocalized > 0: # if none localized, precision = 1 by default self.precisions[i] = nCorrect / nLocalized if nCorrect + len(localized) - nLocalized > 0: self.recalls[i] = nCorrect / ( nCorrect + len(localized) - nLocalized ) # flip curves for increasing recall self.precisions = np.flip(self.precisions) self.recalls = np.flip(self.recalls) self.scores = np.flip(self.scores) # ensure recalls are nondecreasing self.recalls, inds = np.unique(self.recalls, return_index=True) self.precisions = self.precisions[inds] self.scores = self.scores[inds] # chop off curve when recall first reaches 1 ind_min = np.min(np.argwhere(self.recalls >= 1.0)) self.recalls = self.recalls[: ind_min + 1] self.precisions = self.precisions[: ind_min + 1] self.scores = self.scores[: ind_min + 1] # interpolate precision, take max precision for # recall greater than raw recall if interpolate: for i in range(len(self.precisions)): self.precisions[i] = np.max(self.precisions[i:]) return None
def survival_score(front, ideal_point): m, n = front.shape crowd_dist = np.zeros(m) if m < n: p = 1 normalization = np.max(front, axis=0) return crowd_dist, p, normalization # shift the ideal point to the origin front = front - ideal_point # Detect the extreme points and normalize the front extreme = find_corner_solutions(front) front, normalization = normalize(front, extreme) # set the distance for the extreme solutions crowd_dist[extreme] = np.inf selected = np.full(m, False) selected[extreme] = True # approximate p(norm) d = point_2_line_distance(front, np.zeros(n), np.ones(n)) d[extreme] = np.inf index = np.argmin(d) # selected(index) = true # crowd_dist(index) = Inf p = np.log(n) / np.log(1.0 / np.mean(front[index, :])) if np.isnan(p) or p <= 0.1: p = 1.0 elif p > 20: p = 20.0 # avoid numpy underflow nn = np.linalg.norm(front, p, axis=1) distances = minkowski_matrix(front, front, p=p) distances = distances / nn[:, None] neighbors = 2 remaining = np.arange(m) remaining = list(remaining[~selected]) for i in range(m - np.sum(selected)): mg = np.meshgrid(np.arange(selected.shape[0])[selected], remaining) D_mg = distances[tuple( mg)] # avoid Numpy's future deprecation of array special indexing if D_mg.shape[1] > 1: # equivalent to mink(distances(remaining, selected),neighbors,2); in Matlab maxim = np.argpartition(D_mg, neighbors - 1, axis=1)[:, :neighbors] tmp = np.sum(np.take_along_axis(D_mg, maxim, axis=1), axis=1) index: int = np.argmax(tmp) d = tmp[index] else: index = D_mg[:, 0].argmax() d = D_mg[index, 0] best = remaining.pop(index) selected[best] = True crowd_dist[best] = d return crowd_dist, p, normalization
def calculate_detection_probability(n_min_det_muts, panel_size, n_muts_cancer, hge_tumors, n_hge_normal, seq_err, sample_fraction, pval_th=None, required_mt_frags=None): """ Calculate the probability to detect a tumor if there are hge_tumor hGE circulating in the entire bloodstream :param n_min_det_muts: number of minimally called mutations required for a positive cancer detection test :param panel_size: sequencing panel size :param n_muts_cancer: number of mutations covered by the panel that are clonally present in the tumor :param hge_tumors: array_like numbers of haploid genome equivalents (hGE) circulating in the entire bloodstream :param n_hge_normal: number of normal hGE circulating in the entire bloodstream (will be multiplied by two to account for diploid genomes) :param seq_err: sequencing error rate per basepair :param sample_fraction: fraction of the bloodstream that is sampled :param pval_th: p-value threshold to call an individual mutation in the panel :param required_mt_frags: minimum number of mutated fragments required to call mutation at a given position :return: probability that the test will be positive """ # note: every cell contains one hGE (either considering maternal or paternal copy); hence two genomes n_genomes_total = 2 * n_hge_normal + 2 * hge_tumors tumor_vaf = hge_tumors / n_genomes_total normal_vaf = 1 - tumor_vaf mt_prob = sample_fraction * ((normal_vaf * seq_err) + (tumor_vaf * (1 - seq_err))) seq_err_prob = sample_fraction * seq_err if pval_th is None and required_mt_frags is None: err_str = ('Either a p-value threshold or a number of required mutant fragments is needed ' 'to compute detection probability.') logger.error(err_str) raise RuntimeError(err_str) elif pval_th is not None and required_mt_frags is not None: err_str = ('Only a p-value threshold or a number of required mutant fragments should be given ' 'to compute detection probability. Not both.') logger.error(err_str) raise RuntimeError(err_str) elif required_mt_frags is not None: # probability to observe required_mt_frags or more mutant fragments of each of the minimally called mutations # for detection mt_prob_mt_pos = binom.sf(k=required_mt_frags - 1, n=n_genomes_total, p=mt_prob) # probability to observe required_mt_frags or more mutant fragments at positions not mutated in the tumor mt_prob_wt_pos = binom.sf(k=required_mt_frags - 1, n=n_genomes_total, p=seq_err_prob) # sum probabilities of combinations in which n_min_det_muts mutations can be detected probs = np.zeros((n_min_det_muts + 1, len(hge_tumors))) for det_muts in range(n_min_det_muts + 1): # observe a mutation with at least required_mt_frags fragments at a mutated position in the tumor if det_muts == n_min_det_muts: probs[det_muts, :] = binom.sf(k=det_muts - 1, n=n_muts_cancer, p=mt_prob_mt_pos) else: probs[det_muts, :] = binom.pmf(k=det_muts, n=n_muts_cancer, p=mt_prob_mt_pos) # observe a mutation with at least required_mt_frags fragments at a position not mutated in the tumor probs[det_muts, :] *= binom.sf(k=n_min_det_muts - det_muts - 1, n=panel_size - n_muts_cancer, p=mt_prob_wt_pos) # sum each column which denotes the probability that at least X mutations are detected det_prob = np.sum(probs, axis=0) logger.debug(f'Probability to observe at least {required_mt_frags} mutant fragments at the {n_min_det_muts}th ' + f'most mutated basepair: {det_prob}') return det_prob else: # extreme maximum of mutated fragments that could be expected under any conditions n_max_frags = int(round(max(binom.ppf(1.0 - 1e-10, n=n_genomes_total, p=seq_err_prob)))) + 2 prob_k_more = np.zeros((n_max_frags, len(hge_tumors))) pvals = np.zeros_like(prob_k_more) for k in range(n_max_frags, 0, -1): # probability to observe k or more mutant fragments of each of the minimally called mutations for detection mt_prob_mt_pos = binom.sf(k=k - 1, n=n_genomes_total, p=mt_prob) # probability to observe k or more mutant fragments at positions not mutated in the tumor mt_prob_wt_pos = binom.sf(k=k - 1, n=n_genomes_total, p=seq_err_prob) # sum probabilities of combinations in which n_min_det_muts mutations can be detected probs = np.zeros((n_min_det_muts + 1, len(hge_tumors))) for det_muts in range(n_min_det_muts + 1): # observe a mutation with at least k fragments at a mutated position in the tumor if det_muts == n_min_det_muts: probs[det_muts, :] = binom.sf(k=det_muts - 1, n=n_muts_cancer, p=mt_prob_mt_pos) else: probs[det_muts, :] = binom.pmf(k=det_muts, n=n_muts_cancer, p=mt_prob_mt_pos) # observe a mutation with at least k fragments at a position not mutated in the tumor probs[det_muts, :] *= binom.sf(k=n_min_det_muts - det_muts - 1, n=panel_size - n_muts_cancer, p=mt_prob_wt_pos) # sum each column which denotes the probability that at least X mutations are detected prob_k_more[k - 1, :] = np.sum(probs, axis=0) logger.debug(f'Probability to observe at least {k} mutant fragments at the {n_min_det_muts}th ' + f'most mutated basepair: {np.mean(prob_k_more[k - 1, :]):.3e}') # probability to observe k or more mutant fragments at a basepair due to sequencing errors pvals[k - 1, :] = binom.sf(k=k - 1, n=n_genomes_total, p=seq_err_prob) # detection probability is equivalent to probability that p-value less or equal to p-value threshold is observed # take the minimal number of mutated fragments required that achieve a p-value lower or equal to the threshold required_mt_frags = np.argmin(pvals > pval_th, axis=0) # logger.info(f'{n_min_det_muts} muts required for detection requires: mean {np.mean(required_mt_frags)}, ' # + f'median {np.median(required_mt_frags)} mutant fragments.') det_prob = np.take_along_axis(prob_k_more, np.expand_dims(required_mt_frags, axis=0), axis=0)[0, :] return det_prob, required_mt_frags + 1
def run_top_k_test(self, layer_class, k, batch_size, num_queries, num_candidates, indices_dtype, use_exclusions, random_seed=42, check_export=True): layer = layer_class(k=k) rng = np.random.RandomState(random_seed) candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32) query = rng.normal(size=(num_queries, 4)).astype(np.float32) candidate_indices = np.arange(num_candidates).astype( indices_dtype if indices_dtype is not None else np.int32) exclude = rng.randint(0, num_candidates, size=(num_queries, 5)) scores = np.dot(query, candidates.T) # Set scores of candidates chosen for exclusion to a low value. adjusted_scores = scores.copy() if use_exclusions: exclude_identifiers = candidate_indices[exclude] for row_idx, row in enumerate(exclude): for col_idx in set(row): adjusted_scores[row_idx, col_idx] -= 1000.0 else: exclude_identifiers = None # Get indices based on adjusted scores, but retain actual scores. indices = np.argsort(-adjusted_scores, axis=1)[:, :k] expected_top_scores = np.take_along_axis(scores, indices, 1) expected_top_indices = candidate_indices[indices] candidates = tf.data.Dataset.from_tensor_slices(candidates).batch( batch_size) if indices_dtype is not None: identifiers = tf.data.Dataset.from_tensor_slices( candidate_indices).batch(batch_size) else: identifiers = None # Call twice to ensure the results are repeatable. for _ in range(2): if use_exclusions: layer.index(candidates, identifiers) top_scores, top_indices = layer.query_with_exclusions( query, exclude_identifiers) else: layer.index(candidates, identifiers) top_scores, top_indices = layer(query) self.assertAllEqual(top_scores.shape, expected_top_scores.shape) self.assertAllEqual(top_indices.shape, expected_top_indices.shape) self.assertAllClose(top_scores, expected_top_scores) self.assertAllEqual(top_indices.numpy().astype(indices_dtype), expected_top_indices) if not check_export: return # Save and restore to check export. path = os.path.join(self.get_temp_dir(), "layer") layer.save( path, options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])) restored = tf.keras.models.load_model(path) if use_exclusions: _, restored_top_indices = restored.query_with_exclusions( query, exclude_identifiers) else: _, restored_top_indices = restored(query) self.assertAllEqual(restored_top_indices.numpy().astype(indices_dtype), expected_top_indices)
np.add.at(ret, hash_poses, 1) return ret minsketch = np.apply_along_axis(count_index, -1, hash_poses_matrix) print("fs, rs: {} {}".format(np.average(np.array(fs)), np.average(np.array(rs)))) print(np.max(minsketch)) mslist = minsketch.tolist() #[print(w) for w in minsketch[0]] normal = [random.uniform(-1000, 1000) for _ in range(100)] outlier = [random.uniform(-200, 1000) for _ in range(100)] test_data = np.asarray([normal, outlier]) test_data = [np.take(test_data, dims, axis=1) for dims in chosen_dims] test_data = [(te - emins[i]) / (emins[i]) + (emaxs[i]) for i, te in enumerate(test_data)] test_data = [(np.expand_dims(os[i], 0) + te) / fs[i] for i, te in enumerate(test_data)] test_data_hashed = np.asarray( [[np.apply_along_axis(hfunc, 1, te) for hfunc in hashes] for te in test_data]) scores = np.take_along_axis(minsketch, test_data_hashed, -1) scores = scores.min(axis=1) scores = np.log2(scores + 1) scores = np.average(scores, axis=0) print(scores)
def value_inference(self): x = self.x.val indices = self.indices.val axis = self.axis.val return np.take_along_axis(x, indices, axis)
with tf.Graph().as_default(): w0 = tf.get_variable('dense_1_vars/weights', shape=(200, h1_dim)) # define variables that we want w1 = tf.get_variable('dense_1_vars/weights_1', shape=(h1_dim, h2_dim)) w2 = tf.get_variable('dense_1_vars/weights_2', shape=(h2_dim, 1)) b0 = zeros(h1_dim, 'dense_1_vars/bias') b1 = zeros(h2_dim, 'dense_1_vars/bias_1') b2 = zeros(1, 'dense_1_vars/bias_2') saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, 'F:/volume/0217graphsage/0106/model_output/model') node_pred = custom_Dense(w0, w1, w2, b0, b1, b2) i_prediction = sess.run(tf.nn.sigmoid(node_pred(x))).reshape(size, -1) # 研究 prediction print(len(np.where(i_prediction > 0.5)[0])) # 會有315個大於0.5, 157大於0.9 # find where is the ans index ans_where = np.where(np.in1d(candidate_ids, y)) # 答案的 emb index # print(i_prediction.reshape(-1)[ans_where]) # 找答案的預估值 print(len(np.where( i_prediction.reshape(-1)[ans_where] > 0.5)[0])) # NN猜的答案有幾個會大於0.5 new_sorter = i_prediction.argsort( axis=1)[:, -150:] # sort and select first 150 new_sorter = np.flip(new_sorter, axis=1) batch_classes = np.tile(candidate_ids, (size, 1)) # shape: N * len(candidate_ids) classes = np.take_along_axis(batch_classes, new_sorter, axis=1) # 算 hit print(np.where(np.in1d(classes.reshape(-1), y))[0].shape)
def sort(self, axis=-1, kind=None, order=None): """Sort an array in-place. Refer to `numpy.sort` for full documentation.""" # TODO: probably possible to do this faster than going through argsort! indices = self.argsort(axis, kind=kind, order=order) self[:] = np.take_along_axis(self, indices, axis=axis)
def _generate_lookup_table(recip_latt, reciprocal_radius: float, unique: bool=True): """Generate a look-up table with all combinations of indices, including their reciprocal distances and the angle between them. Parameters ---------- recip_latt : :class:`diffpy.structure.lattice.Lattice` Reciprocal lattice reciprocal_radius : float The maximum g-vector magnitude to be included in the library. unique : bool Return a unique list of phase measurements Returns ------- indices : np.array Nx2x3 numpy array containing the miller indices for reflection1, reflection2 measurements : np.array Nx3 numpy array containing len1, len2, angle """ miller_indices, coordinates, distances = get_points_in_sphere( recip_latt, reciprocal_radius) # Create pair_indices for selecting all point pair combinations num_indices = len(miller_indices) pair_a_indices, pair_b_indices = np.mgrid[:num_indices, :num_indices] # Only select one of the permutations and don't pair an index with # itself (select above diagonal) upper_indices = np.triu_indices(num_indices, 1) pair_a_indices = pair_a_indices[upper_indices].ravel() pair_b_indices = pair_b_indices[upper_indices].ravel() # Mask off origin (0, 0, 0) origin_index = num_indices // 2 pair_a_indices = pair_a_indices[pair_a_indices != origin_index] pair_b_indices = pair_b_indices[pair_b_indices != origin_index] pair_indices = np.vstack([pair_a_indices, pair_b_indices]) # Create library entries angles = get_angle_cartesian_vec(coordinates[pair_a_indices], coordinates[pair_b_indices]) pair_distances = distances[pair_indices.T] # Ensure longest vector is first len_sort = np.fliplr(pair_distances.argsort(axis=1)) # phase_index_pairs is a list of [hkl1, hkl2] phase_index_pairs = np.take_along_axis(miller_indices[pair_indices.T], len_sort[:, :, np.newaxis], axis=1) # phase_measurements is a list of [len1, len2, angle] phase_measurements = np.column_stack((np.take_along_axis(pair_distances, len_sort, axis=1), angles)) if unique: # Only keep unique triplets measurements, measurement_indices = np.unique(phase_measurements, axis=0, return_index=True) indices = phase_index_pairs[measurement_indices] else: measurements = phase_measurements indices = phase_index_pairs return measurements, indices
# # if chan_width.value < 0: # # newchan_width *= -1. # spec_axis = np.arange(subcube.spectral_axis[0].value, # subcube.spectral_axis[-1].value, # newchan_width) * unit # assert spec_axis.size > 0 # subcube = subcube.spectral_interpolate(spec_axis) # err_arr = noise_val * np.ones(subcube.shape[1:]) peaktemp = subcube.max(axis=0) vcent = subcube.moment1() peakchans = subcube.argmax(axis=0) peakvels = np.take_along_axis( subcube.spectral_axis[:, np.newaxis, np.newaxis], peakchans[np.newaxis, :, :], 0) peakvels = peakvels.squeeze() peakvels = peakvels.to(u.km / u.s) # peak_name = fifteenA_HI_BCtaper_wEBHIS_HI_file_dict['PeakTemp'] # peaktemp = Projection.from_hdu(fits.open(peak_name)) # vcent_name = fourteenA_wEBHIS_HI_file_dict['Moment1'] # vcent = Projection.from_hdu(fits.open(vcent_name)).to(u.km / u.s) # Restrict number of positions to fit. mask_peak = peaktemp >= 10 * u.K # Must have 5 channels above half the peak, following Braun+09 mask_halfabovepeak = (subcube.filled_data[:] > 5 * u.K).sum(0) > 5
top10[round_num] = test_metrics['top_10_categorical_accuracy'] # Generate Accuracy and Throughput Performance Curves keras_model = create_keras_model() # keras_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), metrics=[top1,top10]) state.model.assign_weights_to(keras_model) test_preds = keras_model.predict(test_data.lidar_data, batch_size=100) test_preds_idx = np.argsort(test_preds, axis=1) top_k = np.zeros(100) throughput_ratio_at_k = np.zeros(100) correct = 0 for i in range(100): correct += np.sum(test_preds_idx[:, -1 - i] == np.argmax(test_data.beam_output, axis=1)) top_k[i] = correct / test_data.beam_output.shape[0] throughput_ratio_at_k[i] = np.sum(np.log2( np.max(np.take_along_axis(test_data.beam_output_true, test_preds_idx, axis=1)[:, -1 - i:], axis=1) + 1.0)) / np.sum(np.log2(np.max(test_data.beam_output_true, axis=1) + 1.0)) sio.savemat('federated_accuracy' + str(round_num) + '.mat', {'accuracy': top_k}) sio.savemat('federated_throughput' + str(round_num) + '.mat', {'throughput': throughput_ratio_at_k}) sio.savemat('top1.mat', {'top1': top1}) sio.savemat('top10.mat', {'top10': top10}) np.savez("federated.npz", classification=top_k, throughput_ratio=throughput_ratio_at_k) accFL = accFL + metrics['train']['top_10_categorical_accuracy'] / MONTECARLO print(MONTECARLOi) print(accFL)
def MIG_4_dSprites_cupy(z_samples, z_mean, z_stddev, num_samples=10000, batch_size=10, version=1, gpu=0): """ :param z_samples: [3, 6, 40, 32, 32, z_dim] :param z_mean: [3, 6, 40, 32, 32, z_dim] :param z_stddev: [3, 6, 40, 32, 32, z_dim] :param batch_size: :param version: 1 or 2 :return: """ assert version == 1 or version == 2, "'version' can only be 1 or 2!" if version == 1: estimate_entropies = estimate_entropies_v1_cupy else: estimate_entropies = estimate_entropies_v2 assert z_samples.shape == z_mean.shape == z_stddev.shape, "z_samples.shape: {}, " \ "z_mean.shape: {}, z_stddev.shape: {}".format(z_samples.shape, z_mean.shape, z_stddev.shape) assert len(z_samples.shape) == 6 and z_samples.shape[:-1] == (3, 6, 40, 32, 32), \ "z_samples.shape: {}".format(z_samples.shape) print("Estimate marginal entropy") # H(q(z)) estimated with stratified sampling # (z_dim, ) marginal_entropies = estimate_entropies( np.reshape(z_samples, [3 * 6 * 40 * 32 * 32, -1]), np.reshape(z_mean, [3 * 6 * 40 * 32 * 32, -1]), np.reshape(z_stddev, [3 * 6 * 40 * 32 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # (1, z_dim) marginal_entropies = np.expand_dims(marginal_entropies, axis=0) # (5, z_dim) cond_entropies = np.zeros([5, z_samples.shape[-1]], dtype=np.float32) print("Estimate conditional entropy for shape") for i in range(3): cond_entropies_i = estimate_entropies( np.reshape(z_samples[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]), np.reshape(z_mean[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]), np.reshape(z_stddev[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # Compute the sum of conditional entropy for each scale value, then take the mean cond_entropies[0] += cond_entropies_i / 3.0 print("Estimate conditional entropy for scale") for i in range(6): cond_entropies_i = estimate_entropies( np.reshape(z_samples[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]), np.reshape(z_mean[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]), np.reshape(z_stddev[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # Compute the sum of conditional entropy for each scale value, then take the mean cond_entropies[1] += cond_entropies_i / 6.0 print("Estimate conditional entropy for rotation") for i in range(40): cond_entropies_i = estimate_entropies( np.reshape(z_samples[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]), np.reshape(z_mean[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]), np.reshape(z_stddev[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # Compute the sum of conditional entropy for each scale value, then take the mean cond_entropies[2] += cond_entropies_i / 40.0 print("Estimate conditional entropy for pos x") for i in range(32): cond_entropies_i = estimate_entropies( np.reshape(z_samples[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]), np.reshape(z_mean[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]), np.reshape(z_stddev[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # Compute the sum of conditional entropy for each scale value, then take the mean cond_entropies[3] += cond_entropies_i / 32.0 print("Estimate conditional entropy for pos y") for i in range(32): cond_entropies_i = estimate_entropies( np.reshape(z_samples[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]), np.reshape(z_mean[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]), np.reshape(z_stddev[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]), num_samples=num_samples, batch_size=batch_size, gpu=gpu) # Compute the sum of conditional entropy for each scale value, then take the mean cond_entropies[4] += cond_entropies_i / 32.0 # (5, z_dim) MIs = marginal_entropies - cond_entropies # (5, z_dim) ids_sorted = np.argsort(MIs, axis=1)[:, ::-1] MIs_sorted = np.take_along_axis(MIs, ids_sorted, axis=1) factor_entropies = np.log([3, 6, 40, 32, 32]) # Normalize MI by the entropy of factors # (5, z_dim) MIs_sorted_normed = MIs_sorted / np.expand_dims(factor_entropies, axis=-1) # (5,) MIG = MIs_sorted_normed[:, 0] - MIs_sorted_normed[:, 1] results = { 'H_z': marginal_entropies, 'H_y': factor_entropies, 'H_z_cond_y': cond_entropies, 'MI': MIs, 'MI_sorted': MIs_sorted, 'MI_sorted_normed': MIs_sorted_normed, 'MIG': MIG, } return results
def find_all_match_idx(query_embeddings: np.ndarray, train_embeddings: np.ndarray, k=0): """Find all matches for the test set in the training set at the sametime, using cupy. This is solely for optimisation purpose in order to get the code to run faster on machines with GPU. Args: query_embeddings (np.ndarray): Test set embeddings. train_embeddings (np.ndarray): Train set embeddings. k (int, optional): [description]. Defaults to 0. Raises: ValueError: The case where "k!=0" is not yet implemeted. Returns: [type]: [description] """ global use_cupy print("Using GPU to compute matches!") if k != 0: #best_matches = cp.argsort(-cp.dot(query_embeddings.T,train_embeddings)) #match_idx = best_matches[k] raise ValueError("The case where k is not 0 must be implemented.") else: match_idxs = [] query_chunk_size = 1024 train_chunk_size = 65536 * 2 for i in tqdm( range(0, math.ceil(len(query_embeddings) / query_chunk_size))): query_start = i * query_chunk_size query_end = query_start + query_chunk_size if query_end > len(query_embeddings): query_end = len(query_embeddings) cuda_query_embeddings = cp.asarray( query_embeddings[query_start:query_end]) matches = [] scores = [] best_match_idx_chunk_score = np.zeros((query_end - query_start, 1)) best_match_idx_chunk = np.zeros((query_end - query_start, 1), dtype=np.uint64) for j in range( 0, math.ceil(train_embeddings.shape[1] / train_chunk_size)): train_start = j * train_chunk_size train_end = train_start + train_chunk_size if train_end > train_embeddings.shape[1]: train_end = train_embeddings.shape[1] cuda_train_embeddings = cp.asarray( train_embeddings[:, train_start:train_end]) similarity = cp.dot(cuda_query_embeddings, cuda_train_embeddings) match_idx_chunk = cp.argmax(similarity, axis=1).get() similarity = similarity.get() match_idx_chunk_score = np.take_along_axis(similarity, np.expand_dims( match_idx_chunk, axis=1), axis=1) match_idx_chunk += train_start best_match_idx_chunk = np.where( match_idx_chunk_score > best_match_idx_chunk_score, np.expand_dims(match_idx_chunk, axis=1), best_match_idx_chunk).astype(np.uint64) best_match_idx_chunk_score = np.where( match_idx_chunk_score > best_match_idx_chunk_score, match_idx_chunk_score, best_match_idx_chunk_score) #if use_cupy: #match_idx_chunk=match_idx_chunk.get() matches.append(match_idx_chunk) match_idxs += best_match_idx_chunk.squeeze().tolist() return match_idxs
def transform(self, neigh_dist, neigh_ind, X=None, assume_sorted: bool = True, *args, **kwargs) -> (np.ndarray, np.ndarray): """ Transform distance between test and training data with Mutual Proximity. Parameters ---------- neigh_dist: np.ndarray, shape (n_query, n_neighbors) Distance matrix of test objects (rows) against their individual k nearest neighbors among the training data (columns). neigh_ind: np.ndarray, shape (n_query, n_neighbors) Neighbor indices corresponding to the values in neigh_dist X: ignored assume_sorted: bool, default = True Assume input matrices are sorted according to neigh_dist. If False, these are partitioned here. NOTE: The returned matrices are never sorted. Returns ------- hub_reduced_dist, neigh_ind Local scaling distances, and corresponding neighbor indices Notes ----- The returned distances are NOT sorted! If you use this class directly, you will need to sort the returned matrices according to hub_reduced_dist. Classes from :mod:`skhubness.neighbors` do this automatically. """ check_is_fitted(self, 'r_dist_train_') n_test, n_indexed = neigh_dist.shape if n_indexed == 1: warnings.warn( f'Cannot perform hubness reduction with a single neighbor per query. ' f'Skipping hubness reduction, and returning untransformed distances.' ) return neigh_dist, neigh_ind # increment to include the k-th element in slicing k = self.k + 1 # Find distances to the k-th neighbor (standard LS) or the k neighbors (NICDM) if assume_sorted: r_dist_test = neigh_dist[:, :k] else: kth = np.arange(self.k) mask = np.argpartition(neigh_dist, kth=kth)[:, :k] r_dist_test = np.take_along_axis(neigh_dist, mask, axis=1) # Calculate LS or NICDM hub_reduced_dist = np.empty_like(neigh_dist) # Optionally show progress of local scaling loop if self.verbose: range_n_test = tqdm(range(n_test), total=n_test, desc=f'LS {self.method}') else: range_n_test = range(n_test) # Perform standard local scaling... if self.method in ['ls', 'standard']: r_train = self.r_dist_train_[:, -1] r_test = r_dist_test[:, -1] for i in range_n_test: hub_reduced_dist[i, :] = \ 1. - np.exp(-1 * neigh_dist[i] ** 2 / (r_test[i] * r_train[neigh_ind[i]])) # ...or use non-iterative contextual dissimilarity measure elif self.method == 'nicdm': r_train = self.r_dist_train_.mean(axis=1) r_test = r_dist_test.mean(axis=1) for i in range_n_test: hub_reduced_dist[i, :] = neigh_dist[i] / np.sqrt( (r_test[i] * r_train[neigh_ind[i]])) else: raise ValueError( f"Internal: Invalid method {self.method}. Try 'ls' or 'nicdm'." ) # Return the hubness reduced distances # These must be sorted downstream return hub_reduced_dist, neigh_ind
def main(args): # ===================================== # Load config # ===================================== with open(join(args.output_dir, 'config.json')) as f: config = json.load(f) args.__dict__.update(config) # ===================================== # Dataset # ===================================== data_file = join(RAW_DATA_DIR, "ComputerVision", "dSprites", "dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz") # It is already in the range [0, 1] with np.load(data_file, encoding="latin1") as f: x_train = f['imgs'] # 3 shape * 6 scale * 40 rotation * 32 pos X * 32 pos Y y_train = f['latents_classes'] x_train = np.expand_dims(x_train.astype(np.float32), axis=-1) num_train = len(x_train) print("num_train: {}".format(num_train)) print("y_train[:10]: {}".format(y_train[:10])) # ===================================== # Instantiate model # ===================================== if args.enc_dec_model == "1Konny": encoder = Encoder_1Konny(args.z_dim, stochastic=True) decoder = Decoder_1Konny() disc_z = DiscriminatorZ_1Konny(num_outputs=2) else: raise ValueError("Do not support enc_dec_model='{}'!".format(args.enc_dec_model)) model = FactorVAE([64, 64, 1], args.z_dim, encoder=encoder, decoder=decoder, discriminator_z=disc_z, rec_x_mode=args.rec_x_mode, use_gp0_z_tc=True, gp0_z_tc_mode=args.gp0_z_tc_mode) loss_coeff_dict = { 'rec_x': args.rec_x_coeff, 'kld_loss': args.kld_loss_coeff, 'tc_loss': args.tc_loss_coeff, 'gp0_z_tc': args.gp0_z_tc_coeff, 'Dz_tc_loss_coeff': args.Dz_tc_loss_coeff, } model.build(loss_coeff_dict) SimpleParamPrinter.print_all_params_tf_slim() # ===================================== # Load model # ===================================== config_proto = tf.ConfigProto(allow_soft_placement=True) config_proto.gpu_options.allow_growth = True config_proto.gpu_options.per_process_gpu_memory_fraction = 0.9 sess = tf.Session(config=config_proto) model_dir = make_dir_if_not_exist(join(args.output_dir, "model_tf")) train_helper = SimpleTrainHelper(log_dir=None, save_dir=model_dir) # Load model train_helper.load(sess, load_step=args.load_step) # ===================================== # Experiments save_dir = make_dir_if_not_exist(join(args.save_dir, "{}_{}".format(args.enc_dec_model, args.run))) # ===================================== np.set_printoptions(threshold=np.nan, linewidth=1000, precision=5, suppress=True) num_bins = args.num_bins bin_limits = tuple([float(s) for s in args.bin_limits.split(";")]) data_proportion = args.data_proportion num_data = int(data_proportion * num_train) assert num_data == num_train, "For dSprites, you must use all data!" eps = 1e-8 # file f = open(join(save_dir, 'log[bins={},bin_limits={},data={}].txt'. format(num_bins, bin_limits, data_proportion)), mode='w') # print function print_ = functools.partial(print_both, file=f) print_("num_bins: {}".format(num_bins)) print_("bin_limits: {}".format(bin_limits)) print_("data_proportion: {}".format(data_proportion)) # Compute bins # ================================= # print_("") print_("bin_limits: {}".format(bin_limits)) assert len(bin_limits) == 2 and bin_limits[0] < bin_limits[1], "bin_limits={}".format(bin_limits) bins = np.linspace(bin_limits[0], bin_limits[1], num_bins + 1, endpoint=True) print_("bins: {}".format(bins)) assert len(bins) == num_bins + 1 bin_widths = [bins[b] - bins[b - 1] for b in range(1, len(bins))] print_("bin_widths: {}".format(bin_widths)) assert len(bin_widths) == num_bins, "len(bin_widths)={} while num_bins={}!".format(len(bin_widths), num_bins) assert np.all(np.greater(bin_widths, 0)), "bin_widths: {}".format(bin_widths) bin_centers = [(bins[b] + bins[b - 1]) * 0.5 for b in range(1, len(bins))] print_("bin_centers: {}".format(bin_centers)) assert len(bin_centers) == num_bins, "len(bin_centers)={} while num_bins={}!".format(len(bin_centers), num_bins) # ================================= # # Compute representations # ================================= # z_data_file = join(save_dir, "z_data[data={}].npz".format(data_proportion)) if not exists(z_data_file): all_z_mean = [] all_z_stddev = [] print("") print("Compute all_z_mean, all_z_stddev and all_attrs!") count = 0 for batch_ids in iterate_data(num_data, 10 * args.batch_size, shuffle=False): x = x_train[batch_ids] z_mean, z_stddev = sess.run( model.get_output(['z_mean', 'z_stddev']), feed_dict={model.is_train: False, model.x_ph: x}) all_z_mean.append(z_mean) all_z_stddev.append(z_stddev) count += len(batch_ids) print("\rProcessed {} samples!".format(count), end="") print() all_z_mean = np.concatenate(all_z_mean, axis=0) all_z_stddev = np.concatenate(all_z_stddev, axis=0) np.savez_compressed(z_data_file, all_z_mean=all_z_mean, all_z_stddev=all_z_stddev) else: print("{} exists. Load data from file!".format(z_data_file)) with np.load(z_data_file, "r") as f: all_z_mean = f['all_z_mean'] all_z_stddev = f['all_z_stddev'] # ================================= # print_("") all_Q_z_cond_x = [] for i in range(args.z_dim): print_("\nCompute all_Q_z{}_cond_x!".format(i)) all_Q_s_cond_x = [] for batch_ids in iterate_data(len(all_z_mean), 500, shuffle=False, include_remaining=True): # (batch_size, num_bins) q_s_cond_x = normal_density(np.expand_dims(bin_centers, axis=0), mean=np.expand_dims(all_z_mean[batch_ids, i], axis=-1), stddev=np.expand_dims(all_z_stddev[batch_ids, i], axis=-1)) # (batch_size, num_bins) max_q_s_cond_x = np.max(q_s_cond_x, axis=-1) # print("\nmax_q_s_cond_x: {}".format(np.sort(max_q_s_cond_x))) # (batch_size, num_bins) deter_s_cond_x = at_bin(all_z_mean[batch_ids, i], bins).astype(np.float32) # (batch_size, num_bins) Q_s_cond_x = q_s_cond_x * np.expand_dims(bin_widths, axis=0) Q_s_cond_x = Q_s_cond_x / np.maximum(np.sum(Q_s_cond_x, axis=1, keepdims=True), eps) # print("sort(sum(Q_s_cond_x)) (before): {}".format(np.sort(np.sum(Q_s_cond_x, axis=-1)))) Q_s_cond_x = np.where(np.expand_dims(np.less(max_q_s_cond_x, 1e-5), axis=-1), deter_s_cond_x, Q_s_cond_x) # print("sort(sum(Q_s_cond_x)) (after): {}".format(np.sort(np.sum(Q_s_cond_x, axis=-1)))) all_Q_s_cond_x.append(Q_s_cond_x) # (num_samples, num_bins) all_Q_s_cond_x = np.concatenate(all_Q_s_cond_x, axis=0) assert np.all(all_Q_s_cond_x >= 0), "'all_Q_s_cond_x' contains negative values. " \ "sorted_all_Q_s_cond_x[:30]:\n{}!".format(np.sort(all_Q_s_cond_x[:30], axis=None)) assert len(all_Q_s_cond_x) == num_train all_Q_z_cond_x.append(all_Q_s_cond_x) # (z_dim, num_samples, num_bins) all_Q_z_cond_x = np.asarray(all_Q_z_cond_x, dtype=np.float32) print_("all_Q_z_cond_x.shape: {}".format(all_Q_z_cond_x.shape)) print_("sum(all_Q_z_cond_x)[:, :10]:\n{}".format(np.sum(all_Q_z_cond_x, axis=-1)[:, :10])) # (z_dim, num_bins) Q_z = np.mean(all_Q_z_cond_x, axis=1) log_Q_z = np.log(np.clip(Q_z, eps, 1-eps)) print_("Q_z.shape: {}".format(Q_z.shape)) print_("sum(Q_z): {}".format(np.sum(Q_z, axis=-1))) # (z_dim, ) H_z = -np.sum(Q_z * log_Q_z, axis=-1) # Factors gt_factors = ['shape', 'scale', 'rotation', 'pos_x', 'pos_y'] gt_num_values = [3, 6, 40, 32, 32] MI_z_y = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32) H_z_y = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32) ids_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.int32) MI_z_y_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32) H_z_y_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32) H_y = [] RMIG = [] JEMMI = [] for k, (factor, num_values) in enumerate(zip(gt_factors, gt_num_values)): print_("\n#" + "=" * 50 + "#") print_("The {}-th gt factor '{}' has {} values!".format(k, factor, num_values)) print_("") # (num_samples, num_categories) # NOTE: We must use k+1 to account for the 'color' attribute, which is always white all_Q_yk_cond_x = one_hot(y_train[:, k+1], num_categories=num_values, dtype=np.float32) print_("all_Q_yk_cond_x.shape: {}".format(all_Q_yk_cond_x.shape)) # (num_categories) Q_yk = np.mean(all_Q_yk_cond_x, axis=0) log_Q_yk = np.log(np.clip(Q_yk, eps, 1-eps)) print_("Q_yk.shape: {}".format(Q_yk.shape)) H_yk = -np.sum(Q_yk * log_Q_yk) print_("H_yk: {}".format(H_yk)) H_y.append(H_yk) Q_z_yk = np.zeros([args.z_dim, num_bins, num_values], dtype=np.float32) # Compute I(zi, yk) for i in range(args.z_dim): print_("\n#" + "-" * 50 + "#") all_Q_zi_cond_x = all_Q_z_cond_x[i] assert len(all_Q_zi_cond_x) == len(all_Q_yk_cond_x) == num_train, \ "all_Q_zi_cond_x.shape: {}, all_Q_yk_cond_x.shape: {}".format( all_Q_zi_cond_x.shape, all_Q_yk_cond_x.shape) # (num_bins, num_categories) Q_zi_yk = np.matmul(np.transpose(all_Q_zi_cond_x, axes=[1, 0]), all_Q_yk_cond_x) Q_zi_yk = Q_zi_yk / num_train print_("np.sum(Q_zi_yk): {}".format(np.sum(Q_zi_yk))) Q_zi_yk = Q_zi_yk / np.maximum(np.sum(Q_zi_yk), eps) print_("np.sum(Q_zi_yk) (normalized): {}".format(np.sum(Q_zi_yk))) assert np.all(Q_zi_yk >= 0), "'Q_zi_yk' contains negative values. " \ "sorted_Q_zi_yk[:10]:\n{}!".format(np.sort(Q_zi_yk, axis=None)) # (num_bins, num_categories) log_Q_zi_yk = np.log(np.clip(Q_zi_yk, eps, 1 - eps)) print_("") print_("Q_zi (default): {}".format(Q_z[i])) print_("Q_zi (sum of Q_zi_yk over yk): {}".format(np.sum(Q_zi_yk, axis=-1))) print_("") print_("Q_yk (default): {}".format(Q_yk)) print_("Q_yk (sum of Q_zi_yk over zi): {}".format(np.sum(Q_zi_yk, axis=0))) MI_zi_yk = Q_zi_yk * (log_Q_zi_yk - np.expand_dims(log_Q_z[i], axis=-1) - np.expand_dims(log_Q_yk, axis=0)) MI_zi_yk = np.sum(MI_zi_yk) H_zi_yk = -np.sum(Q_zi_yk * log_Q_zi_yk) Q_z_yk[i] = Q_zi_yk MI_z_y[i, k] = MI_zi_yk H_z_y[i, k] = H_zi_yk print_("#" + "-" * 50 + "#") # Print statistics for all z print_("") print_("MI_z_yk:\n{}".format(MI_z_y[:, k])) print_("H_z_yk:\n{}".format(H_z_y[:, k])) print_("H_z:\n{}".format(H_z)) print_("H_yk:\n{}".format(H_yk)) # Compute RMIG and JEMMI ids_yk_sorted = np.argsort(MI_z_y[:, k], axis=0)[::-1] MI_z_yk_sorted = np.take_along_axis(MI_z_y[:, k], ids_yk_sorted, axis=0) H_z_yk_sorted = np.take_along_axis(H_z_y[:, k], ids_yk_sorted, axis=0) RMIG_yk = np.divide(MI_z_yk_sorted[0] - MI_z_yk_sorted[1], H_yk) JEMMI_yk = np.divide(H_z_yk_sorted[0] - MI_z_yk_sorted[0] + MI_z_yk_sorted[1], H_yk + np.log(num_bins)) ids_sorted[:, k] = ids_yk_sorted MI_z_y_sorted[:, k] = MI_z_yk_sorted H_z_y_sorted[:, k] = H_z_yk_sorted RMIG.append(RMIG_yk) JEMMI.append(JEMMI_yk) print_("") print_("ids_sorted: {}".format(ids_sorted)) print_("MI_z_yk_sorted: {}".format(MI_z_yk_sorted)) print_("RMIG_yk: {}".format(RMIG_yk)) print_("JEMMI_yk: {}".format(JEMMI_yk)) z_yk_prob_file = join(save_dir, "z_yk_prob_4_{}[bins={},bin_limits={},data={}].npz". format(factor, num_bins, bin_limits, data_proportion)) np.savez_compressed(z_yk_prob_file, Q_z_yk=Q_z_yk) print_("#" + "=" * 50 + "#") results = { "MI_z_y": MI_z_y, "H_z_y": H_z_y, "ids_sorted": ids_sorted, "MI_z_y_sorted": MI_z_y_sorted, "H_z_y_sorted": H_z_y_sorted, "H_z": H_z, "H_y": np.asarray(H_y, dtype=np.float32), "RMIG": np.asarray(RMIG, dtype=np.float32), "JEMMI": np.asarray(JEMMI, dtype=np.float32), } result_file = join(save_dir, "results[bins={},bin_limits={},data={}].npz". format(num_bins, bin_limits, data_proportion)) np.savez_compressed(result_file, **results) f.close()
def evaluate_similarity_metrics(separation: nn.Module, completion: nn.Module, triplet: nn.Module, device, dataset_path: str, scannet_path: str, shapenet_path: str) -> None: unique_scan_objects, unique_cad_objects = get_unique_samples(dataset_path) batch_size = 1 scan_dataset: Dataset = data.FileListDataset( scannet_path, unique_scan_objects, ".sdf", transformation=data.to_occupancy_grid) scan_dataloader = torch.utils.data.DataLoader(dataset=scan_dataset, shuffle=False, batch_size=batch_size) # Evaluate all unique scan embeddings embeddings: Dict[str, np.array] = {} for name, element in tqdm(scan_dataloader, total=len(scan_dataloader)): # Move data to GPU element = element.to(device) with torch.no_grad(): scan_foreground, _ = separation(torch.sigmoid(element)) scan_completed = completion(torch.sigmoid(scan_foreground)) scan_latent = triplet.embed(torch.sigmoid(scan_completed)).view(-1) embeddings[name[0]] = scan_latent.cpu().numpy().squeeze() # Evaluate all unique cad embeddings cad_dataset: Dataset = data.FileListDataset( shapenet_path, unique_cad_objects, "__0__.df", transformation=data.to_occupancy_grid) cad_dataloader = torch.utils.data.DataLoader(dataset=cad_dataset, shuffle=False, batch_size=batch_size) for name, element in tqdm(cad_dataloader, total=len(cad_dataloader)): # Move data to GPU element = element.to(device) with torch.no_grad(): cad_latent = triplet.embed(element).view(-1) embeddings[name[0]] = cad_latent.cpu().numpy().squeeze() # embedding_vectors = np.load("/mnt/raid/dahnert/joint_embedding_binary/embedding_vectors.npy") # embedding_names = json.load(open("/mnt/raid/dahnert/joint_embedding_binary/embedding_names.json")) # embeddings = dict(zip(embedding_names, embedding_vectors)) # Evaluate metrics with open(dataset_path) as f: samples = json.load(f).get("samples") retrieved_correct = 0 retrieved_total = 0 ranked_correct = 0 ranked_total = 0 selected_categories = [ "02747177", "02808440", "02818832", "02871439", "02933112", "03001627", "03211117", "03337140", "04256520", "04379243", "other" ] per_category_retrieved_correct = { category: 0 for category in selected_categories } per_category_retrieved_total = { category: 0 for category in selected_categories } per_category_ranked_correct = { category: 0 for category in selected_categories } per_category_ranked_total = { category: 0 for category in selected_categories } # Iterate over all annotations for sample in tqdm(samples, total=len(samples)): reference_name = sample["reference"]["name"].replace("/scan/", "") reference_embedding = embeddings[reference_name][np.newaxis, :] pool_names = np.asarray( [p["name"].replace("/cad/", "") for p in sample["pool"]]) pool_embeddings = [embeddings[p] for p in pool_names] pool_embeddings = np.asarray(pool_embeddings) # Compute distances in embedding space distances = scipy.spatial.distance.cdist(reference_embedding, pool_embeddings, metric="euclidean") sorted_indices = np.argsort(distances, axis=1) sorted_distances = np.take_along_axis(distances, sorted_indices, axis=1) sorted_distances = sorted_distances[0] predicted_ranking = np.take(pool_names, sorted_indices)[0].tolist() ground_truth_names = [ r["name"].replace("/cad/", "") for r in sample["ranked"] ] # retrieval accuracy sample_retrieved_correct = 1 if metrics.is_correctly_retrieved( predicted_ranking, ground_truth_names) else 0 retrieved_correct += sample_retrieved_correct retrieved_total += 1 # per-category retrieval accuracy reference_category = metrics.get_category_from_list( metrics.get_category(reference_name), selected_categories) per_category_retrieved_correct[ reference_category] += sample_retrieved_correct per_category_retrieved_total[reference_category] += 1 # ranking quality sample_ranked_correct = metrics.count_correctly_ranked_predictions( predicted_ranking, ground_truth_names) ranked_correct += sample_ranked_correct ranked_total += len(ground_truth_names) per_category_ranked_correct[ reference_category] += sample_ranked_correct per_category_ranked_total[reference_category] += len( ground_truth_names) print( f"correct: {retrieved_correct}, total: {retrieved_total}, accuracy: {retrieved_correct/retrieved_total}" ) for (category, correct), total in zip(per_category_retrieved_correct.items(), per_category_retrieved_total.values()): print( f"{category}: {correct:>5d}/{total:>5d} --> {correct/total:4.3f}" ) print( f"correct: {ranked_correct}, total: {ranked_total}, accuracy: {ranked_correct/ranked_total}" ) for (category, correct), total in zip(per_category_ranked_correct.items(), per_category_ranked_total.values()): print( f"{category}: {correct:>5d}/{total:>5d} --> {correct/total:4.3f}" ) return None
def process(policy, dataloader, top_k): mean_kacc = np.zeros(len(top_k)) n_samples_processed = 0 for batch in dataloader: if policy['type'] == 'gcnn': c, ei, ev, v, n_cs, n_vs, n_cands, cands, best_cands, cand_scores = batch pred_scores = policy['model']( (c, ei, ev, v, tf.reduce_sum( n_cs, keepdims=True), tf.reduce_sum(n_vs, keepdims=True)), tf.convert_to_tensor(False)) # filter candidate variables pred_scores = tf.expand_dims( tf.gather(tf.squeeze(pred_scores, 0), cands), 0) elif policy['type'] == 'ml-competitor': cand_feats, n_cands, best_cands, cand_scores = batch # move to numpy cand_feats = cand_feats.numpy() n_cands = n_cands.numpy() # feature normalization cand_feats = (cand_feats - policy['feat_shift']) / policy['feat_scale'] pred_scores = policy['model'].predict(cand_feats) # move back to TF pred_scores = tf.convert_to_tensor(pred_scores.reshape((1, -1)), dtype=tf.float32) # padding pred_scores = padding(pred_scores, n_cands) true_scores = padding(tf.reshape(cand_scores, (1, -1)), n_cands) true_bestscore = tf.reduce_max(true_scores, axis=-1, keepdims=True) assert all(true_bestscore.numpy() == np.take_along_axis( true_scores.numpy(), best_cands.numpy().reshape((-1, 1)), axis=1)) kacc = [] for k in top_k: pred_top_k = tf.nn.top_k(pred_scores, k=k)[1].numpy() pred_top_k_true_scores = np.take_along_axis(true_scores.numpy(), pred_top_k, axis=1) kacc.append( np.mean( np.any(pred_top_k_true_scores == true_bestscore.numpy(), axis=1))) kacc = np.asarray(kacc) batch_size = int(n_cands.shape[0]) mean_kacc += kacc * batch_size n_samples_processed += batch_size mean_kacc /= n_samples_processed return mean_kacc
def shuffle_along_axis( a, axis ): # Function courtesy of Divakar (https://stackoverflow.com/questions/5040797/shuffling-numpy-array-along-a-given-axis/5044364#5044364) idx = np.random.rand(*a.shape).argsort(axis=axis) return np.take_along_axis(a, idx, axis=axis)
def simplify_ad(self): if len(self.shape) == 0: # Workaround for scalar-like arrays other = self.reshape((1, )) other.simplify_ad() other = other.reshape(tuple()) self.coef, self.index = other.coef, other.index return bad_index = np.iinfo(self.index.dtype).max bad_pos = self.coef == 0 self.index[bad_pos] = bad_index ordering = self.index.argsort(axis=-1) self.coef = np.take_along_axis(self.coef, ordering, axis=-1) self.index = np.take_along_axis(self.index, ordering, axis=-1) cum_coef = np.full(self.shape, 0.) indices = np.full(self.shape, 0) size_ad = self.size_ad self.coef = np.moveaxis(self.coef, -1, 0) self.index = np.moveaxis(self.index, -1, 0) prev_index = np.copy(self.index[0]) for i in range(size_ad): # Note : self.index, self.coef change during iterations ind, co = self.index[i], self.coef[i] pos_new_index = np.logical_and(prev_index != ind, ind != bad_index) pos_old_index = np.logical_not(pos_new_index) prev_index[pos_new_index] = ind[pos_new_index] cum_coef[pos_new_index] = co[pos_new_index] cum_coef[pos_old_index] += co[pos_old_index] indices[pos_new_index] += 1 indices_exp = np.expand_dims(indices, axis=0) np.put_along_axis(self.index, indices_exp, prev_index, axis=0) np.put_along_axis(self.coef, indices_exp, cum_coef, axis=0) indices[self.index[0] == bad_index] = -1 indices_max = np.max(indices, axis=None) size_ad_new = indices_max + 1 self.coef = self.coef[:size_ad_new] self.index = self.index[:size_ad_new] if size_ad_new == 0: self.coef = np.moveaxis(self.coef, 0, -1) self.index = np.moveaxis(self.index, 0, -1) return coef_end = self.coef[np.maximum(indices_max, 0)] index_end = self.index[np.maximum(indices_max, 0)] coef_end[indices < indices_max] = 0. index_end[indices < indices_max] = -1 while np.min(indices, axis=None) < indices_max: indices = np.minimum(indices_max, 1 + indices) indices_exp = np.expand_dims(indices, axis=0) np.put_along_axis(self.coef, indices_exp, coef_end, axis=0) np.put_along_axis(self.index, indices_exp, index_end, axis=0) self.coef = np.moveaxis(self.coef, 0, -1) self.index = np.moveaxis(self.index, 0, -1) self.coef = self.coef.reshape(self.shape + (size_ad_new, )) self.index = self.index.reshape(self.shape + (size_ad_new, )) self.index[self.index == -1] = 0 # Corresponding coefficient is zero anyway.
def shuffle_along_axis(a, axis): idx = np.random.rand(*a.shape).argsort(axis=axis) return np.take_along_axis(a, idx, axis=axis)
def assign(dts: np.ndarray, gts: np.ndarray, cfg: DetectionCfg) -> np.ndarray: """Attempt assignment of each detection to a ground truth label. Args: dts: Detections of shape (N,). gts: Ground truth labels of shape (M,). cfg: Detection configuration. Returns: metrics: Matrix of true/false positive concatenated with true positive errors (N, K + S) where K is the number of true positive thresholds used for AP computation and S is the number of true positive errors. """ # Ensure the number of boxes considered per class is at most `MAX_NUM_BOXES`. if dts.shape[0] > MAX_NUM_BOXES: dts = dts[:MAX_NUM_BOXES] n_threshs = len(cfg.affinity_threshs) metrics = np.zeros((dts.shape[0], n_threshs + N_TP_ERRORS)) # Set the true positive metrics to np.nan since error is undefined on false positives. metrics[:, n_threshs : n_threshs + N_TP_ERRORS] = np.nan if gts.shape[0] == 0: return metrics affinity_matrix = compute_affinity_matrix(dts, gts, cfg.affinity_fn_type) # Get the GT label for each max-affinity GT label, detection pair. gt_matches = affinity_matrix.argmax(axis=1)[np.newaxis, :] # The affinity matrix is an N by M matrix of the detections and ground truth labels respectively. # We want to take the corresponding affinity for each of the initial assignments using `gt_matches`. # The following line grabs the max affinity for each detection to a ground truth label. affinities = np.take_along_axis(affinity_matrix.T, gt_matches, axis=0).squeeze(0) # Find the indices of the "first" detection assigned to each GT. unique_gt_matches, unique_dt_matches = np.unique(gt_matches, return_index=True) for i, thresh in enumerate(cfg.affinity_threshs): # `tp_mask` may need to be defined differently with other affinities. tp_mask = affinities[unique_dt_matches] > -thresh metrics[unique_dt_matches, i] = tp_mask # Only compute true positive error when `thresh` is equal to the tp threshold. is_tp_thresh = thresh == cfg.tp_thresh # Ensure that there are true positives of the respective class in the frame. has_true_positives = np.count_nonzero(tp_mask) > 0 if is_tp_thresh and has_true_positives: dt_tp_indices = unique_dt_matches[tp_mask] gt_tp_indices = unique_gt_matches[tp_mask] # Form DataFrame of shape (N, D) where D is the number of attributes in `ObjectLabelRecord`. dt_df = pd.DataFrame([dt.__dict__ for dt in dts[dt_tp_indices]]) gt_df = pd.DataFrame([gt.__dict__ for gt in gts[gt_tp_indices]]) trans_error = dist_fn(dt_df, gt_df, DistFnType.TRANSLATION) scale_error = dist_fn(dt_df, gt_df, DistFnType.SCALE) orient_error = dist_fn(dt_df, gt_df, DistFnType.ORIENTATION) metrics[dt_tp_indices, n_threshs : n_threshs + N_TP_ERRORS] = np.vstack( (trans_error, scale_error, orient_error) ).T return metrics
def v_iter_couple(setup, t, EV_tuple, ushift, nbatch=nbatch_def, verbose=False, force_f32=False): if verbose: start = default_timer() agrid = setup.agrid_c sgrid = setup.sgrid_c dtype = setup.dtype ls = setup.ls_levels nls = len(ls) # type conversion is here zf = setup.exogrid.all_t[t][:, 0] zm = setup.exogrid.all_t[t][:, 1] zftrend = setup.pars['f_wage_trend'][t] zmtrend = setup.pars['m_wage_trend'][t] psi = setup.exogrid.all_t[t][:, 2] beta = setup.pars['beta_t'][t] sigma = setup.pars['crra_power'] R = setup.pars['R_t'][t] nexo = setup.pars['nexo_t'][t] shp = (setup.na, nexo, setup.ntheta) wf = np.exp(zf + zftrend) wm = np.exp(zm + zmtrend) dtype_here = np.float32 if force_f32 else dtype if EV_tuple is None: EVr_by_l, EVc_by_l, EV_fem_by_l, EV_mal_by_l = np.zeros( ((4, ) + shp + (nls, )), dtype=dtype) else: EVr_by_l, EVc_by_l, EV_fem_by_l, EV_mal_by_l = EV_tuple # type conversion sgrid, sigma, beta = (dtype(x) for x in (sgrid, sigma, beta)) V_couple, c_opt, s_opt, x_opt = np.empty((4, ) + shp, dtype) i_opt, il_opt = np.empty(shp, np.int16), np.empty(shp, np.int16) V_all_l = np.empty(shp + (nls, ), dtype=dtype) theta_val = dtype(setup.thetagrid) # the original problem is max{umult*u(c) + beta*EV} # we need to rescale the problem to max{u(c) + beta*EV_resc} istart = 0 ifinish = nbatch if nbatch < nexo else nexo #Time husband contribute to build Q mt = 1.0 - setup.mlevel # this natually splits everything onto slices for ibatch in range(int(np.ceil(nexo / nbatch))): #money_i = money[:,istart:ifinish] assert ifinish > istart money_t = (R * agrid, wf[istart:ifinish], wm[istart:ifinish]) EV_t = (setup.vsgrid_c, EVr_by_l[:, istart:ifinish, :, :]) V_pure_i, c_opt_i, x_opt_i, s_opt_i, i_opt_i, il_opt_i, V_all_l_i = \ v_optimize_couple(money_t,sgrid,EV_t,setup.mgrid, setup.ucouple_precomputed_u,setup.ucouple_precomputed_x, ls,beta,ushift,dtype=dtype_here,mt=mt) V_ret_i = V_pure_i + psi[None, istart:ifinish, None] # if dtype_here != dtype type conversion happens here V_couple[:, istart: ifinish, :] = V_ret_i # this estimate of V can be improved c_opt[:, istart:ifinish, :] = c_opt_i s_opt[:, istart:ifinish, :] = s_opt_i i_opt[:, istart:ifinish, :] = i_opt_i x_opt[:, istart:ifinish, :] = x_opt_i il_opt[:, istart:ifinish, :] = il_opt_i V_all_l[:, istart: ifinish, :, :] = V_all_l_i # we need this for l choice so it is ok istart = ifinish ifinish = ifinish + nbatch if ifinish + nbatch < nexo else nexo if verbose: print('Batch {} done at {} sec'.format(ibatch, default_timer() - start)) assert np.all(c_opt > 0) psi_r = psi[None, :, None].astype(setup.dtype, copy=False) # finally obtain value functions of partners uf, um = setup.u_part(c_opt, x_opt, il_opt, theta_val[None, None, :], ushift, psi_r) uc = setup.u_couple(c_opt, x_opt, il_opt, theta_val[None, None, :], ushift, psi_r) EVf_all, EVm_all, EVc_all = (setup.vsgrid_c.apply_preserve_shape(x) for x in (EV_fem_by_l, EV_mal_by_l, EVc_by_l)) V_fem = uf + beta * np.take_along_axis( np.take_along_axis(EVf_all, i_opt[..., None], 0), il_opt[..., None], 3).squeeze(axis=3) V_mal = um + beta * np.take_along_axis( np.take_along_axis(EVm_all, i_opt[..., None], 0), il_opt[..., None], 3).squeeze(axis=3) V_all = uc + beta * np.take_along_axis( np.take_along_axis(EVc_all, i_opt[..., None], 0), il_opt[..., None], 3).squeeze(axis=3) #def r(x): return x.astype(dtype) def r(x): return x assert V_all.dtype == dtype assert V_fem.dtype == dtype assert V_mal.dtype == dtype assert c_opt.dtype == dtype assert x_opt.dtype == dtype assert s_opt.dtype == dtype try: assert np.allclose(V_all, V_couple, atol=1e-4, rtol=1e-3) except: #print('max difference in V is {}'.format(np.max(np.abs(V_all-V_couple)))) pass return r(V_all), r(V_fem), r(V_mal), r(c_opt), r(x_opt), r( s_opt), il_opt, r(V_all_l)
if current_frame_index < para.start_frame + para.window_length and not is_first_statuation: hsv[statistics_index] = frame_HSV v_count = counting(statistics_index, hsv, v_count) # 統計初始值方圖 statistics_index = statistics_index + 1 if current_frame_index == para.start_frame + para.window_length and not is_first_statuation: v_count = counting(statistics_index, hsv, v_count) is_first_statuation = True if current_frame_index > para.start_frame + para.window_length or is_first_statuation: different_frame = np.zeros((frame_x, frame_y), np.uint8) # 宣告 moving_obj_frame_temp = different_frame[:, :] # 宣告 moving_obj_bool_frame = np.take_along_axis(v_count, (frame_HSV[:, :, statistics_channel] * 255). reshape(frame_x, frame_y, 1).astype(int), axis=2) < para.probability_throuhold moving_obj_frame_temp[moving_obj_bool_frame.reshape(frame_x, frame_y)] = 255 different_frame[:, :] = moving_obj_frame_temp # 做AND # img = different_frame_binary and_frame = and_frame_func(previous_different_frame, different_frame) previous_different_frame = different_frame img = and_frame # cv2.imshow('and', img) # 侵蝕膨脹 img = cv2.erode(img, None, iterations=2) # 侵蝕膨脹去雜訊 img = cv2.dilate(img, None, iterations=2)
def kneighbors( self, X=None, n_candidates=None, return_distance=True ) -> Union[Tuple[np.array, np.array], np.array]: """ Retrieve k nearest neighbors. Parameters ---------- X: np.array or None, optional, default = None Query objects. If None, search among the indexed objects. n_candidates: int or None, optional, default = None Number of neighbors to retrieve. If None, use the value passed during construction. return_distance: bool, default = True If return_distance, will return distances and indices to neighbors. Else, only return the indices. """ check_is_fitted(self, 'index_') index = self.index_ if n_candidates is None: n_candidates = self.n_candidates n_candidates = check_n_candidates(n_candidates) # For compatibility reasons, as each sample is considered as its own # neighbor, one extra neighbor will be computed. if X is None: n_query = self.n_indexed_ X = np.array([index.get(i) for i in range(n_query)]) search_from_index = True else: X = check_array(X) n_query = X.shape[0] search_from_index = False dtype = X.dtype # If chosen metric is not among the natively supported ones, reorder the neighbors reorder = True if self.metric not in ('angular', 'cosine', 'jaccard') else False # If fewer candidates than required are found for a query, # we save index=-1 and distance=NaN neigh_ind = -np.ones((n_query, n_candidates), dtype=np.int32) if return_distance or reorder: neigh_dist = np.empty_like(neigh_ind, dtype=dtype) * np.nan metric = 'cosine' if self.metric == 'angular' else self.metric disable_tqdm = False if self.verbose else True if search_from_index: # search indexed against indexed for i in tqdm( range(n_query), desc='Querying', disable=disable_tqdm, ): # Find the approximate nearest neighbors. # Each of the true `n_candidates` nearest neighbors # has at least `recall` chance of being found. ind = index.search_from_index( i, n_candidates, self.recall, ) neigh_ind[i, :len(ind)] = ind if return_distance or reorder: X_neigh_denormalized = \ X[ind] * self.X_indexed_norm_[ind].reshape(len(ind), -1) neigh_dist[i, :len(ind)] = pairwise_distances( X[i:i + 1, :] * self.X_indexed_norm_[i], X_neigh_denormalized, metric=metric, ) else: # search new query against indexed for i, x in tqdm( enumerate(X), desc='Querying', disable=disable_tqdm, ): # Find the approximate nearest neighbors. # Each of the true `n_candidates` nearest neighbors # has at least `recall` chance of being found. ind = index.search( x.tolist(), n_candidates, self.recall, ) neigh_ind[i, :len(ind)] = ind if return_distance or reorder: X_neigh_denormalized =\ np.array([index.get(i) for i in ind]) * self.X_indexed_norm_[ind].reshape(len(ind), -1) neigh_dist[i, :len(ind)] = pairwise_distances( x.reshape(1, -1), X_neigh_denormalized, metric=metric, ) if reorder: sort = np.argsort(neigh_dist, axis=1) neigh_dist = np.take_along_axis(neigh_dist, sort, axis=1) neigh_ind = np.take_along_axis(neigh_ind, sort, axis=1) if return_distance: return neigh_dist, neigh_ind else: return neigh_ind
def __getitem__(self, index): depthmap_orig = load_depthmap(self.names[index], self.img_width, self.img_height, self.max_depth) pose_orig = self.joints_world[index] depthmap, pose, cropped_cfg = crop_from_xyz_pose(depthmap_orig, pose_orig, self.cfg, out_w=128, out_h=128, pad=20.0, max_depth=self.max_depth) #show_Data(depthmap, pose, cropped_cfg, self.max_depth) """ if self.training: com = center_of_mass(depthmap, cropped_cfg) inv_depthmap = -depthmap + self.max_depth aug_dms, pose = data_aug(inv_depthmap, pose, cropped_cfg, com) depthmap = -aug_dms + self.max_depth """ #show_Data(depthmap, pose, cropped_cfg, self.max_depth) xyzlocal_pose = xyz2xyz_local(pose, cropped_cfg) """ for i in range(3): if pose[:,i].min() < self.stored_min[i]: self.stored_min[i] = pose[:,i].min() print() print('MIN, MAX =', self.stored_min, self.stored_max) if pose[:,i].max() > self.stored_max[i]: self.stored_max[i] = pose[:,i].max() print() print('MIN, MAX =', self.stored_min, self.stored_max) """ offset = np.tile(self.offset[None,:], (xyzlocal_pose.shape[0], 1)) joints_world_normalized = xyzlocal_pose + offset #scale = np.tile(np.array([self.scale])[None,:], aug_poses.shape) joints_world_normalized = joints_world_normalized / self.scale if joints_world_normalized.min() < -1 or joints_world_normalized.max() > 1: print('trouble trouble trouble ', joints_world_normalized.min(), joints_world_normalized.max()) wrist = joints_world_normalized[0,:][None,:] joints_world_normalized = np.concatenate([wrist, joints_world_normalized[1:5,:], wrist, joints_world_normalized[5:9,:], wrist, joints_world_normalized[9:13,:], wrist, joints_world_normalized[13:17,:], wrist, joints_world_normalized[17:,:] ], axis=0) joints_world_normalized = joints_world_normalized.reshape(5, -1, 3) joints_world_normalized = joints_world_normalized.reshape(5, -1) if self.not_initialized: self.not_initialized = False """ points = depthmap2points(depthmap, self.fx, self.fy) points = points.reshape((-1, 3)) j = 0 fig = plt.figure(figsize=(10, 10)) ax = fig.gca(projection='3d') for i in range(points.shape[0]): xs, ys, zs = points[i,:] if zs != self.max_depth: if (j % 10) == 0: ax.scatter(xs, ys, zs, c='r', marker='o') j += 1 for i in range(self.joints_world[index].shape[0]): xs, ys, zs = self.joints_world[index][i,:] ax.scatter(xs, ys, zs, c='b', marker='o') #plt.savefig('fig_{}.png'.format(i), dpi=400, bbox_inches='tight') plt.show() print('hej') """ depthmap /= self.max_depth depthmap = 1 - depthmap if self.training: """ Add Gaussian Noise """ #depthmap += np.random.randn(*depthmap.shape)*0.022 #depthmap = np.clip(depthmap, 0., 1.) """ scramble """ for _ in range(2): depthmap_ = depthmap.reshape((-1,4)) scramble = np.argsort(1.5*np.random.randn(*depthmap_.shape) + np.arange(4)) depthmap = np.take_along_axis(depthmap_,scramble,1).reshape(*depthmap.shape) depthmap = depthmap.transpose() #depthmap = np.concatenate([depthmap, np.zeros((self.img_width-self.img_height, self.img_width), dtype=np.float32)], axis=0) #depthmap = np.array(Image.fromarray(depthmap).resize((100, 100))) #depthmap = imresize(depthmap, (100,100), interp='bilinear', mode='F') return np.float32(depthmap.reshape((1, *depthmap.shape))), np.float32(joints_world_normalized) #save_to_jpg('test%1.png', depthmap, format="PNG") """