def preprocess_data(self): train_data = read_data(self.train_path) test_data = read_data(self.test_path) len_train = len(train_data) len_test = len(test_data) train_data = np.asarray(train_data) test_data = np.asarray(test_data) #print(train_data.shape,test_data.shape) X_train,y_train = train_data[:,:-1],train_data[:,-1] X_test,y_test = test_data[:,:-1],test_data[:,-1] #print(X_train.shape,y_train.shape,X_test.shape,y_test.shape) X_all = np.append(X_train,X_test,axis=0) X_all_EPMNF = [] for row in X_all: line = [] for p in row: line = line + PMNF_exp(p) X_all_EPMNF.append(line) X_all_EPMNF = np.asarray(X_all_EPMNF) #print(X_all_EPMNF.shape) scaler = StandardScaler() scaler.fit(X_all_EPMNF) X_all_EPMNF = scaler.transform(X_all_EPMNF) X_train_EPMNF = X_all_EPMNF[:len_train,:] X_test_EPMNF = X_all_EPMNF[len_train:,:] print(X_train_EPMNF.shape,X_test_EPMNF.shape) return train_data,test_data,X_train_EPMNF,X_test_EPMNF,y_train,y_test
def preprocess_data(self): # read data data_train = np.asarray(read_data(self.train_path)) data_test = np.asarray(read_data(self.test_path)) #print(data_train.shape,data_test.shape) # 1. split trainset to X,y self.X_RF_train = data_train[:, :-1] self.y_RF_train = data_train[:, -1] # 2. split testset to X_RF,y_RF and X_Lasso,y_Lasso #test data split by number of process(128) split_at = data_test[:, -2].searchsorted([129]) test_data_split = np.split(data_test, split_at) #print(test_data_split) #test data split to X,y of Random Forest and Lasso self.X_RF_test = test_data_split[0][:, :-1] self.y_RF_test = test_data_split[0][:, -1] self.X_lasso_test = test_data_split[1][:, :-1] self.y_lasso_test = test_data_split[1][:, -1] print(self.X_RF_test.shape, self.y_RF_test.shape, self.X_lasso_test.shape, self.y_lasso_test.shape)
def read_raw_data(self): calendar = cm.read_data(os.path.join(cm.raw_data_path, 'calendar.csv')) sales_train = cm.read_data( os.path.join(cm.raw_data_path, 'sales_train_validation.csv')) sell_prices = cm.read_data( os.path.join(cm.raw_data_path, 'sell_prices.csv')) sample_submission = cm.read_data( os.path.join(cm.raw_data_path, 'sample_submission.csv')) return sample_submission, calendar, sales_train, sell_prices
def preprocess_data(self): # read data data_train = np.asarray(read_data(self.train_path)) data_test = np.asarray(read_data(self.test_path)) print(data_train.shape, data_test.shape) # sort and split train data group by number of process self.data_train_split = self.split_data(data_train, self.split_train_len) #print(self.data_train_split[0]) # sort and split test data group by number of process self.data_test_split = self.split_data(data_test, self.split_test_len)
def all_answers(question_id): answer_database = common.read_data('answer.csv') question_database = common.read_data('question.csv') answers = [] for data_line in answer_database: if str(question_id) in data_line[3]: answers.append(data_line) print(answers) for data_line in question_database: if str(question_id) in data_line[0]: question_line = data_line return render_template('all_answers.html', question_line=question_line, answers=answers)
def main(model_dir, output_dir, redshift_table, subvols, obs_dir): plt = common.load_matplotlib() fields = { 'galaxies': ('type', 'mstars_disk', 'mstars_bulge', 'rstar_disk', 'm_bh', 'matom_disk', 'mmol_disk', 'mgas_disk', 'matom_bulge', 'mmol_bulge', 'mgas_bulge', 'mvir_hosthalo') } hdf5_data = common.read_data(model_dir, redshift_table[0], fields, subvols) (mgas_relation, mgas_relation_cen, mgas_relation_sat, mh2_gals, mh1_gals, mgas_gals, mh2_relation, mh1_relation, mhr_relation, mhr_relation_cen, mhr_relation_sat, mgas_relation_ltg, mh2_relation_ltg, mh1_relation_ltg, mgas_relation_etg, mh2_relation_etg, mh1_relation_etg, mgas_ms_relation_ltg, mh2_ms_relation_ltg, mh1_ms_relation_ltg, mgas_ms_relation_etg, mh2_ms_relation_etg, mh1_ms_relation_etg, mh1_relation_satellites_halos) = prepare_data(hdf5_data) plot_cold_gas_fraction(plt, output_dir, obs_dir, mgas_relation, mgas_relation_cen, mgas_relation_sat) plot_HI_stacking(plt, output_dir, obs_dir, mh1_relation_satellites_halos) plot_molecular_gas_fraction( plt, output_dir, obs_dir, mgas_gals, mgas_relation, mh1_gals, mh1_relation, mh2_gals, mh2_relation, mgas_relation_ltg, mh2_relation_ltg, mh1_relation_ltg, mgas_relation_etg, mh2_relation_etg, mh1_relation_etg, mgas_ms_relation_ltg, mh2_ms_relation_ltg, mh1_ms_relation_ltg, mgas_ms_relation_etg, mh2_ms_relation_etg, mh1_ms_relation_etg) plot_h1h2_gas_fraction(plt, output_dir, mhr_relation, mhr_relation_cen, mhr_relation_sat)
def job(database, symbol): today = date.today() # today # Update to Today First print("Updating Data for Symbol %s" % symbol) download_symbol(stocks_collection, symbol) print("Analizing Data for Symbol %s" % symbol) stock = read_data(stocks_collection, symbol) strategy = minimum_month_strategy.Strategy() st = strategy.buy_at(stock, today) results = {} results["_date"] = today results.update(st) print(results) r = requests.post( "https://maker.ifttt.com/trigger/buyspy/with/key/lgZ2-PIbeA4ZzkBFem8M-u933GuzypBiSCim4JUesVH", data={ "value1": str(results["price_today"]), "value2": str(results["minimun_last_month"]), "value3": "Buy" if results["buy"] else "DO NOT BUY" }) print(r.content)
def main(modeldir, outdir, redshift_table, subvols, obsdir): plt = common.load_matplotlib() fields = { 'global': ('redshifts', 'm_hi', 'm_h2', 'mcold', 'mcold_metals', 'mhot_halo', 'mejected_halo', 'mstars', 'mstars_bursts_mergers', 'mstars_bursts_diskinstabilities', 'm_bh', 'sfr_quiescent', 'sfr_burst', 'm_dm', 'mcold_halo', 'number_major_mergers', 'number_minor_mergers', 'number_disk_instabilities', 'smbh_maximum') } # Read data from each subvolume at a time and add it up # rather than appending it all together for idx, subvol in enumerate(subvols): subvol_data = common.read_data(modeldir, redshift_table[0], fields, [subvol]) max_bhs_subvol = subvol_data[20].copy() if idx == 0: hdf5_data = subvol_data max_smbh = max_bhs_subvol else: max_smbh = np.maximum(max_smbh, max_bhs_subvol) for subvol_datum, hdf5_datum in zip(subvol_data[3:], hdf5_data[3:]): hdf5_datum += subvol_datum #select the most massive black hole from the last list item # Also make sure that the total volume takes into account the number of subvolumes read hdf5_data[1] = hdf5_data[1] * len(subvols) h0, redshifts = hdf5_data[0], hdf5_data[2] #for z, m in zip(redshifts, max_smbh): # print z,m/h0 (mstar_plot, mcold_plot, mhot_plot, meje_plot, mstar_dm_plot, mcold_dm_plot, mhot_dm_plot, meje_dm_plot, mbar_dm_plot, sfr, sfrd, sfrb, mstarden, mstarbden_mergers, mstarbden_diskins, sfre, sfreH2, mhrat, mHI_plot, mH2_plot, mH2den, mdustden, omegaHI, mdustden_mol, mcoldden, mhotden, mejeden, history_interactions, mDMden) = prepare_data(hdf5_data, redshifts) plot_mass_densities(plt, outdir, obsdir, h0, redshifts, mstar_plot, mcold_plot, mhot_plot, meje_plot, mstarden, mcoldden, mhotden, mejeden) plot_baryon_fractions(plt, outdir, redshifts, mstar_dm_plot, mcold_dm_plot, mhot_dm_plot, meje_dm_plot, mbar_dm_plot) plot_cosmic_sfr(plt, outdir, obsdir, redshifts, h0, sfr, sfrd, sfrb, history_interactions, mDMden) plot_stellar_mass_cosmic_density(plt, outdir, obsdir, redshifts, h0, mstarden, mstarbden_mergers, mstarbden_diskins) plot_sft_efficiency(plt, outdir, redshifts, sfre, sfreH2, mhrat) plot_mass_cosmic_density(plt, outdir, redshifts, mcold_plot, mHI_plot, mH2_plot) plot_omega_h2(plt, outdir, obsdir, redshifts, h0, mH2den) plot_cosmic_dust(plt, outdir, obsdir, redshifts, h0, mdustden, mdustden_mol) plot_omega_HI(plt, outdir, obsdir, redshifts, h0, omegaHI)
def main(model_dir, outdir, redshift_table, subvols, obsdir): # Loop over redshift and subvolumes plt = common.load_matplotlib() fields = {'galaxies': ('mstars_disk', 'mstars_bulge', 'mvir_hosthalo', 'mvir_subhalo', 'type', 'mean_stellar_age', 'sfr_disk', 'sfr_burst', 'id_galaxy')} z = (0, 0.25, 0.5, 1, 1.5, 2.0, 3.0, 4.0, 6.0) snapshots = redshift_table[z] # Create histogram for index, snapshot in enumerate(snapshots): hdf5_data = common.read_data(model_dir, snapshot, fields, subvols) #sfh, delta_t, LBT = common.read_sfh(model_dir, snapshot, sfh_fields, subvols) seds, ids, nbands = common.read_photometry_data(model_dir, snapshot, subvols) if(index == 0): CSED = np.zeros(shape = (len(z), 5, nbands)) prepare_data(hdf5_data, seds, ids, CSED, nbands, index) h0, volh = hdf5_data[0], hdf5_data[1] if(volh > 0.): CSED[index,:] = CSED[index,:] / volh * pow(h0,3.0) # Take logs plot_csed(plt, outdir, obsdir, h0, CSED, nbands)
def main(experiment_name, model_name, data_path, max_depth, max_bins, describe, log_as_mleap, log_as_onnx, spark_autolog): print("Options:") for k, v in locals().items(): print(f" {k}: {v}") client = mlflow.tracking.MlflowClient() if experiment_name: mlflow.set_experiment(experiment_name) if spark_autolog: SparkSession.builder.config("spark.jars.packages", "org.mlflow.mlflow-spark") mlflow.spark.autolog() data_path = data_path or common.default_data_path data = common.read_data(spark, data_path) if (describe): print("==== Data") data.describe().show() with mlflow.start_run() as run: print("MLflow:") print(" run_id:", run.info.run_id) print(" experiment_id:", run.info.experiment_id) print(" experiment_name:", client.get_experiment(run.info.experiment_id).name) mlflow.set_tag("version.mlflow", mlflow.__version__) mlflow.set_tag("version.spark", spark.version) mlflow.set_tag("version.pyspark", pyspark.__version__) mlflow.set_tag("version.os", platform.system() + " - " + platform.release()) model_name = None if model_name is None or model_name == "None" else model_name train(run.info.run_id, data, max_depth, max_bins, model_name, log_as_mleap, log_as_onnx, spark_autolog)
def do(impute_params): logging.info("In pipeline1.do") data_raw, data_sub = read_data(DATA_TRAIN, SAMPLE_SUBMISSION) dense_data = do_nmf(data_raw, impute_params) preds = regress(dense_data) np.savez_compressed("../results/imputed_preds.npz", preds) logging.info('return from pipeline1.do')
def main(modeldir, outdir, redshift_table, subvols): plt = common.load_matplotlib() fields = {'galaxies': ('type', 'vvir_hosthalo', 'cooling_rate')} hdf5_data = common.read_data(modeldir, redshift_table[0], fields, subvols, include_h0_volh=False) med_tvir = prepare_data(hdf5_data) plot_cooling_rate(plt, outdir, med_tvir)
def check_start(self, root_path, label_name, restart=False): if restart is False: try: start_i = int(common.read_data(self.index_file, 'r')) print('start_index: ' + str(start_i)) except Exception, e: print e start_i = 0
def main(modeldir, outdir, redshift_table, subvols, obsdir): z = [0, 0.5, 1.0, 2.0, 3.0, 4.0] snapshots = redshift_table[z] plt = common.load_matplotlib() mainseqsf = np.zeros(shape=(len(z), 3, len(xmf))) sigmamainseqsf = np.zeros(shape=(len(z), 7, len(xmf))) passive_fractions = np.zeros(shape=(len(z), 3, len(xmf2))) passive_fractions_cens_sats = np.zeros(shape=(len(z), 2, len(xmflr), len(xmf2))) hist_ssfr = np.zeros(shape=(len(z), len(ssfrbins))) fields = { 'galaxies': ('sfr_disk', 'sfr_burst', 'mstars_disk', 'mstars_bulge', 'rstar_disk', 'm_bh', 'matom_disk', 'mmol_disk', 'mgas_disk', 'matom_bulge', 'mmol_bulge', 'mgas_bulge', 'mgas_metals_disk', 'mgas_metals_bulge', 'mstars_metals_disk', 'mstars_metals_bulge', 'type', 'mvir_hosthalo', 'rstar_bulge') } for index, snapshot in enumerate(snapshots): hdf5_data = common.read_data(modeldir, snapshot, fields, subvols) (mass, slope, offset) = prepare_data(hdf5_data, index, z[index], mainseqsf, passive_fractions, hist_ssfr, sigmamainseqsf, passive_fractions_cens_sats) h0 = hdf5_data[0] if index == 0: (sfr_disk, sfr_burst, mdisk, mbulge) = hdf5_data[2:6] sfr_seq = np.zeros(shape=(2, len(mdisk))) ind = np.where((sfr_disk + sfr_burst > 0) & (mdisk + mbulge > 0)) sfr_seq[0, ind] = mass[ind] sfr_seq[1, ind] = np.log10( (sfr_disk[ind] + sfr_burst[ind]) / h0 / GyrToYr) slope_ms_z0 = slope offset_ms_z0 = offset #print 'scatter MS' #for m,a,b,c,d,e,f,g in zip(xmf[:], sigmamainseqsf[index,0,:], sigmamainseqsf[index,1,:], sigmamainseqsf[index,2,:], sigmamainseqsf[index,3,:], sigmamainseqsf[index,4,:], sigmamainseqsf[index,5,:], sigmamainseqsf[index,6,:]): # print m,a,b,c,d,e,f,g #print 'passive fractions centrals' #for m,a,b,c,d,e,f in zip(xmf2[:], passive_fractions_cens_sats[0,0,0,:], passive_fractions_cens_sats[0,0,1,:], passive_fractions_cens_sats[0,0,2,:], passive_fractions_cens_sats[0,0,3,:], passive_fractions_cens_sats[0,0,4,:], passive_fractions_cens_sats[0,0,5,:],): # print m,a,b,c,d,e,f #print 'passive fractions satellites' #for m,a,b,c,d,e,f in zip(xmf2[:], passive_fractions_cens_sats[0,1,0,:], passive_fractions_cens_sats[0,1,1,:], passive_fractions_cens_sats[0,1,2,:], passive_fractions_cens_sats[0,1,3,:], passive_fractions_cens_sats[0,1,4,:], passive_fractions_cens_sats[0,1,5,:],): # print m,a,b,c,d,e,f # This should be the same in all HDF5 files plot_sfr_mstars_z0(plt, outdir, obsdir, h0, sfr_seq, mainseqsf, sigmamainseqsf, slope_ms_z0, offset_ms_z0) plot_passive_fraction(plt, outdir, obsdir, passive_fractions, hist_ssfr, passive_fractions_cens_sats)
def all_answers(question_id): answer_database = read_data('answer.csv') question_database = read_data('question.csv') decoded_data_answer = time_decode(answer_database) decoded_data_question = time_decode(question_database) answers = [] for data_line in decoded_data_answer: if str(question_id) in data_line[3]: answers.append(data_line) for data_line in decoded_data_question: if str(question_id) in data_line[0]: question_line = data_line return render_template('all_answers.html', question_line=question_line, answers=answers)
def train_data(): train_ = read_data('./tcdata/hy_round2_train_20200225/') train_x = pd.DataFrame.from_dict(train_) train_y = train_x.pop('type') #测试得到结果需要id #训练模型不需要 #train_id = train_x.pop('id') #train_x = new_cols(train_x) return train_x, train_y
def read_data(self): if self.sample_size == 1: data_path = os.path.join(cm.cleaned_data_path, 'regression', 'regression.csv') else: data_path = os.path.join( cm.cleaned_data_path, 'regression', 'regression_sample_' + str(self.sample_size).replace('.', '') + '.csv') return cm.read_data(data_path)
def __get_data_to_predict(data_file, data_width, pca): data_read = read_data(filename=data_file) data_numpy = np.array(data_read["y"]) # make all the data the same size, clip the end of it. The end is not interesting anyway data_numpy = data_numpy[:data_width] data_pca = pca.transform(data_numpy.reshape(1, -1)) return data_pca.reshape((1, 1, -1))
def __init__(self, data, tr, maps=None, mask=None): self.data = co.read_data(data) self.tr = tr self.spin = int(self.data['tracers'][tr]['spin']) self.type = self.data['tracers'][tr]['type'] self._raw_maps = None self._maps = maps self._mask = mask self.f = self.compute_field()
def get_valid_and_invalid_files(root_dir="YOMIRAN", validate_hierarchy=True, validate_filename_format=True, validate_empty_file=True): """Return (valid_files, invalid_files) Validation criteria: - empty file - format of file name - identical files (same content) - folder hierarchy of file""" invalid_files = [] valid_files = [] data_of_all_files = [ ] # in format of [Data1, Data2...] where Data is a named tuple for root, dirs, files in os.walk(root_dir, topdown=True): for file in files: file_is_valid = True full_path = os.path.join(root, file) if validate_hierarchy and not __is_file_saved_in_correct_directory_hierarchy( file_path=full_path, root_dir=root_dir): invalid_files.append( InvalidFile(file_path=full_path, reason="Incorrect folder hierarchy")) file_is_valid = False # continue if validate_filename_format and not __is_filename_correct_format( filename=file): invalid_files.append( InvalidFile(file_path=full_path, reason="Incorrect file name")) file_is_valid = False # continue if validate_empty_file and __is_file_empty(filename=full_path): invalid_files.append( InvalidFile(file_path=full_path, reason="Empty file")) file_is_valid = False # continue if file_is_valid: valid_files.append(full_path) # for future check if we have identical files data = read_data(filename=full_path) data_numpy = np.vstack((np.array(data["x"]), np.array(data["y"]))) data_of_all_files.append(Data(file_path=full_path, data=data_numpy)) identical_files = __get_identical_files(data_of_all_files) invalid_files.extend(identical_files) # filtering the identical files from the valid files identical_files_paths = [ identical_file.file_path for identical_file in identical_files ] valid_files = list( filter(lambda filename: filename not in identical_files_paths, valid_files)) return valid_files, invalid_files
def run(self): cm.create_all_directories([os.path.join(cm.checkpoint_path, 'eda')]) self.df = cm.read_data(os.path.join(cm.cleaned_data_path, 'regression', 'features_extractor' +'_sample_rate_' + str(self.sample_size).replace('.', '') + '_version_' + str(self.version) +'.csv')) self.target_analysis(self.df) self.calculate_weights(self.df)
def sign_start(self, restart=False): times = 4 # 图片放大倍数 if restart is False: try: start_i = int(common.read_data(self.index_file, 'r')) print('start_index: ' + str(start_i)) except Exception, e: print e start_i = 0
def main(uri, data_path, num_records, log_mod, output_file_base, num_iters): records = read_data(data_path, num_records) headers = {'Content-Type': 'application/json'} durations = [] for iter in range(0, num_iters): num_records = len(records) print("Calls:") for j, r in enumerate(records): data = json.dumps(r) start = time.time() requests.post(uri, headers=headers, data=data) dur = time.time() - start if j % log_mod == 0: print(f" {j}/{num_records}: {round(dur,3)}") durations.append(dur) total = sum(durations) mean = statistics.mean(durations) stdev = statistics.stdev(durations) rsd = stdev / mean * 100 # relative stdev calls = num_iters * len(records) print("Results (seconds):") print(" mean: ", round(mean, 3)) print(" max: ", round(max(durations), 3)) print(" min: ", round(min(durations), 3)) print(" std: ", round(stdev, 3)) print(" rsd: ", round(rsd, 2)) print(" total: ", round(total, 3)) print(" calls: ", calls) print(" records: ", len(records)) print(" iterations:", num_iters) if output_file_base: now = time.time() ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now)) dct = { "timestamp": ts, "uri": uri, "mean": mean, "max": max(durations), "min": min(durations), "std": stdev, "rsd": rsd, "total": total, "calls": calls, "records": len(records), "iterations": num_iters } ts = time.strftime("%Y-%m-%d_%H%M%S", time.gmtime(now)) path = f"{output_file_base}_{ts}.csv" print("Output file:", path) with open(path, "w") as f: f.write(json.dumps(dct, indent=2) + "\n")
def __init__(self, root_dir, label_file, img_size, transforms=None, is_train=False): self.root_dir = root_dir records_txt = common.read_data(label_file, 'r') self.records = records_txt.split('\n') self.img_size = img_size self.is_train = is_train # imgs = os.listdir(root) # self.imgs = [os.path.join(root, img) for img in imgs] # self.label_path = label_path self.transforms = transforms
def main(modeldir, outdir, redshift_table, subvols, obsdir): plt = common.load_matplotlib() fields = { 'galaxies': ('mstars_disk', 'mstars_bulge', 'mstars_burst_mergers', 'mstars_burst_diskinstabilities', 'mstars_bulge_mergers_assembly', 'mstars_bulge_diskins_assembly', 'm_bh', 'rstar_disk', 'rstar_bulge', 'type', 'specific_angular_momentum_disk_star', 'specific_angular_momentum_bulge_star', 'specific_angular_momentum_disk_gas', 'specific_angular_momentum_bulge_gas', 'specific_angular_momentum_disk_gas_atom', 'specific_angular_momentum_disk_gas_mol', 'lambda_subhalo', 'mvir_subhalo', 'mgas_disk', 'mgas_bulge', 'matom_disk', 'mmol_disk', 'matom_bulge', 'mmol_bulge', 'bh_accretion_rate_hh', 'bh_accretion_rate_sb') } # Loop over redshift and subvolumes rcomb = np.zeros(shape=(len(zlist), 3, len(xmf))) disk_size = np.zeros(shape=(len(zlist), 3, len(xmf))) bulge_size = np.zeros(shape=(len(zlist), 3, len(xmf))) bulge_size_mergers = np.zeros(shape=(len(zlist), 3, len(xmf))) bulge_size_diskins = np.zeros(shape=(len(zlist), 3, len(xmf))) BH = np.zeros(shape=(len(zlist), 3, len(xmf))) disk_size_sat = np.zeros(shape=(len(zlist), 3, len(xmf))) disk_size_cen = np.zeros(shape=(len(zlist), 3, len(xmf))) BT_fractions = np.zeros(shape=(len(zlist), len(xmf))) BT_fractions_nodiskins = np.zeros(shape=(len(zlist), len(xmf))) BT_fractions_centrals = np.zeros(shape=(len(zlist), len(xmf))) BT_fractions_satellites = np.zeros(shape=(len(zlist), len(xmf))) disk_vel = np.zeros(shape=(len(zlist), 3, len(xmf))) bulge_vel = np.zeros(shape=(len(zlist), 3, len(xmf))) baryonic_TF = np.zeros(shape=(len(zlist), 3, len(xv))) for index, snapshot in enumerate(redshift_table[zlist]): hdf5_data = common.read_data(modeldir, snapshot, fields, subvols) prepare_data(hdf5_data, index, rcomb, disk_size, bulge_size, bulge_size_mergers, bulge_size_diskins, BH, disk_size_sat, disk_size_cen, BT_fractions, BT_fractions_nodiskins, bulge_vel, disk_vel, BT_fractions_centrals, BT_fractions_satellites, baryonic_TF) plot_sizes(plt, outdir, obsdir, disk_size_cen, disk_size_sat, bulge_size, bulge_size_mergers, bulge_size_diskins) plot_velocities(plt, outdir, disk_vel, bulge_vel, baryonic_TF) plot_sizes_combined(plt, outdir, rcomb) plot_bulge_BH(plt, outdir, obsdir, BH) plot_bt_fractions(plt, outdir, obsdir, BT_fractions, BT_fractions_nodiskins, BT_fractions_centrals, BT_fractions_satellites)
def main(model_dir, outdir, redshift_table, subvols, obsdir): # Loop over redshift and subvolumes plt = common.load_matplotlib() fields = { 'galaxies': ('mstars_disk', 'mstars_bulge', 'mvir_hosthalo', 'mvir_subhalo', 'type', 'mean_stellar_age', 'sfr_disk', 'sfr_burst', 'id_galaxy') } sfh_fields = { 'bulges_diskins': ('star_formation_rate_histories'), 'bulges_mergers': ('star_formation_rate_histories'), 'disks': ('star_formation_rate_histories') } z = (0, 2) #0.5, 1, 1.5, 2, 3) snapshots = redshift_table[z] # Create histogram for index, snapshot in enumerate(snapshots): hdf5_data = common.read_data(model_dir, snapshot, fields, subvols) sfh, delta_t, LBT = common.read_sfh(model_dir, snapshot, sfh_fields, subvols) seds, ids, nbands = common.read_photometry_data( model_dir, snapshot, subvols) (SEDs_dust, SEDs_nodust, total_sfh, sb_sfh, disk_sfh, gal_props) = prepare_data(hdf5_data, sfh, seds, ids, index, nbands) h0, volh = hdf5_data[0], hdf5_data[1] if (index == 0): SEDs_nodust_z0 = SEDs_nodust SEDs_dust_z0 = SEDs_dust total_sfh_z0 = total_sfh gal_props_z0 = gal_props LBT_z0 = LBT plot_individual_seds(plt, outdir, obsdir, h0, SEDs_dust_z0, SEDs_nodust, total_sfh_z0, gal_props_z0, LBT_z0) if (index == 1): SEDs_dust_z2 = SEDs_dust total_sfh_z2 = total_sfh disk_sfh_z2 = disk_sfh sb_sfh_z2 = sb_sfh gal_props_z2 = gal_props LBT_z2 = LBT plot_individual_seds_z2(plt, outdir, obsdir, h0, SEDs_dust_z2, total_sfh_z2, disk_sfh_z2, sb_sfh_z2, gal_props_z2, LBT_z2)
def do(params, gen_submission, blending_model, validate=False): logging.info("in pipeline2.do") if (not validate): data_raw, data_sub = common.read_data(DATA_TRAIN, SAMPLE_SUBMISSION) else: TRAIN = "../data/train.csv" VAL = "../data/val.csv" data_raw, data_sub = common.read_data(TRAIN, SAMPLE_SUBMISSION) data_val, _ = common.read_data(VAL, SAMPLE_SUBMISSION) preds_mat = np.load('../results/imputed_preds.npz', allow_pickle=True)['arr_0'] preds = pd.DataFrame(preds_mat).reset_index().melt('index') preds.rename(columns={ "index": "User", "variable": "Movie", "value": "Prediction" }, inplace=True) regressors_train = get_regressors(preds, data_raw) U_red, V_red = get_u_v(data_raw, params) user_clusters, item_clusters, data_raw = get_clusters( U_red, V_red, data_raw, params) user_df = user_factorization(data_raw, user_clusters, params) item_df = item_factorization(data_raw, item_clusters, user_df, params) merge2, data_raw = merge(data_raw, regressors_train, user_df, item_df) model = train(data_raw, blending_model) if validate: regressors_val = get_regressors(preds, data_val) rmse = validate_holdout(model, data_val, regressors_val, merge2) else: rmse = validate_full(model, data_raw) if gen_submission: regressors_test = get_regressors(preds, data_sub) generate_submission(model, data_sub, regressors_test, merge2) print("rmse: ", rmse) return rmse
def sign_start(self, restart=False): times = 2 cv2.namedWindow('sign_image') cv2.setMouseCallback('sign_image', self.mouse_click_events) # 鼠标事件绑定 if restart is False: try: start_i = int(common.read_data(self.index_file, 'r')) print('start_index: ' + str(start_i)) except Exception, e: print e start_i = 0
def new_answer(question_id): question_database = read_data('question.csv') for line in question_database: if str(question_id) in line[0]: question_line = line file_name = "answer.csv" button_name = "Post your answer" all_data = read_data(file_name) timestamp = int(time.time()) data_list = [] if request.method == "POST": data_list.append(str(generate_data_id(file_name))) data_list.append(str(timestamp)) data_list.append(' ') # view number data_list.append(question_id) data_list.append(request.form['message']) data_list.append(' ') # for picture all_data.append(data_list) new_data_to_write = write_data(file_name, all_data) return redirect(url_for('all_answers', question_id=question_id)) return render_template("add_answer.html", question_line=question_line)
def main(modeldir, outdir, redshift_table, subvols, obsdir): plt = common.load_matplotlib() fields = { 'galaxies': ('mstars_disk', 'mstars_bulge', 'mstars_burst_mergers', 'mstars_burst_diskinstabilities', 'mstars_bulge_mergers_assembly', 'mstars_bulge_diskins_assembly', 'm_bh', 'rstar_disk', 'rstar_bulge', 'type', 'specific_angular_momentum_disk_star', 'specific_angular_momentum_bulge_star', 'specific_angular_momentum_disk_gas', 'specific_angular_momentum_bulge_gas', 'specific_angular_momentum_disk_gas_atom', 'specific_angular_momentum_disk_gas_mol', 'lambda_subhalo', 'mvir_subhalo', 'mvir_hosthalo', 'matom_disk', 'mmol_disk', 'mgas_disk', 'matom_bulge', 'mmol_bulge', 'mgas_bulge', 'sfr_disk', 'sfr_burst', 'vvir_hosthalo', 'rgas_disk', 'rgas_bulge') } # Loop over redshift and subvolumes sam_vs_sam_halo_disk = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) sam_vs_sam_halo_gal = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) sam_vs_sam_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) sam_vs_sam_halo_bar = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) m_vs_m_halo_disk = np.zeros(shape=(len(zlist), 3, len(xmf), 2)) m_vs_m_halo_gal = np.zeros(shape=(len(zlist), 3, len(xmf), 2)) m_vs_m_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xmf), 2)) m_vs_m_halo_bar = np.zeros(shape=(len(zlist), 3, len(xmf), 2)) r_vs_r_halo_disk = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) r_vs_r_halo_gal = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) r_vs_r_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) r_vs_r_halo_bar = np.zeros(shape=(len(zlist), 3, len(xlf), 2)) for index, snapshot in enumerate(redshift_table[zlist]): hdf5_data = common.read_data(modeldir, snapshot, fields, subvols) (lh, lj, lm, bt, ms, ssfr) = prepare_data( hdf5_data, index, sam_vs_sam_halo_disk, sam_vs_sam_halo_gal, sam_vs_sam_halo_disk_gas, sam_vs_sam_halo_bar, m_vs_m_halo_disk, m_vs_m_halo_gal, m_vs_m_halo_disk_gas, m_vs_m_halo_bar, r_vs_r_halo_disk, r_vs_r_halo_gal, r_vs_r_halo_disk_gas, r_vs_r_halo_bar) plot_specific_am_ratio(plt, outdir, obsdir, sam_vs_sam_halo_disk, sam_vs_sam_halo_gal, sam_vs_sam_halo_disk_gas, sam_vs_sam_halo_bar, m_vs_m_halo_disk, m_vs_m_halo_gal, m_vs_m_halo_disk_gas, m_vs_m_halo_bar, r_vs_r_halo_disk, r_vs_r_halo_gal, r_vs_r_halo_disk_gas, r_vs_r_halo_bar)
def fit_model(data): x = np.linspace(0, len(get_x_values(data)), len(get_x_values(data))) y = get_y_values(data) # calculate polynomial z = np.polyfit(x, y, 4) f = np.poly1d(z) return f def predict(f, data): x_new = np.linspace(0, len(get_x_values(data)), len(get_x_values(data))) y_new = f(x_new) return y_new if __name__ == '__main__': data = read_data(sys.argv[1]) training_data = get_training_data(data) print("fitting to %d rows" % training_data.shape[0]) model_subset = fit_model(training_data) fitted_subset_y_values = predict(model_subset, data) model_all = fit_model(data) fitted_all_y_values = predict(model_all, data) plt.plot(range(len(get_x_values(data))), get_y_values(data), 'go', range(len(get_x_values(training_data))), get_y_values(training_data), 'ro', range(len(get_x_values(data))), fitted_all_y_values, 'b', range(len(get_x_values(data))), fitted_subset_y_values, 'pink') plt.show()