示例#1
0
    def preprocess_data(self):
        train_data = read_data(self.train_path)
        test_data = read_data(self.test_path)
        len_train = len(train_data)
        len_test = len(test_data)
        train_data = np.asarray(train_data)
        test_data = np.asarray(test_data)
        #print(train_data.shape,test_data.shape)


        X_train,y_train = train_data[:,:-1],train_data[:,-1]
        X_test,y_test = test_data[:,:-1],test_data[:,-1]
        #print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

        X_all = np.append(X_train,X_test,axis=0)
        X_all_EPMNF = []
        for row in X_all:
            line = []
            for p in row:
                line = line + PMNF_exp(p)
            X_all_EPMNF.append(line)
        X_all_EPMNF = np.asarray(X_all_EPMNF)
        #print(X_all_EPMNF.shape)
        
        scaler = StandardScaler()
        scaler.fit(X_all_EPMNF)
        X_all_EPMNF = scaler.transform(X_all_EPMNF)

        X_train_EPMNF = X_all_EPMNF[:len_train,:]
        X_test_EPMNF = X_all_EPMNF[len_train:,:]
        print(X_train_EPMNF.shape,X_test_EPMNF.shape)

        return train_data,test_data,X_train_EPMNF,X_test_EPMNF,y_train,y_test
示例#2
0
    def preprocess_data(self):

        # read data
        data_train = np.asarray(read_data(self.train_path))
        data_test = np.asarray(read_data(self.test_path))
        #print(data_train.shape,data_test.shape)

        # 1. split trainset to X,y
        self.X_RF_train = data_train[:, :-1]
        self.y_RF_train = data_train[:, -1]

        # 2. split testset to X_RF,y_RF and X_Lasso,y_Lasso
        #test data split by number of process(128)
        split_at = data_test[:, -2].searchsorted([129])
        test_data_split = np.split(data_test, split_at)
        #print(test_data_split)

        #test data split to X,y of Random Forest and Lasso
        self.X_RF_test = test_data_split[0][:, :-1]
        self.y_RF_test = test_data_split[0][:, -1]
        self.X_lasso_test = test_data_split[1][:, :-1]
        self.y_lasso_test = test_data_split[1][:, -1]

        print(self.X_RF_test.shape, self.y_RF_test.shape,
              self.X_lasso_test.shape, self.y_lasso_test.shape)
示例#3
0
 def read_raw_data(self):
     calendar = cm.read_data(os.path.join(cm.raw_data_path, 'calendar.csv'))
     sales_train = cm.read_data(
         os.path.join(cm.raw_data_path, 'sales_train_validation.csv'))
     sell_prices = cm.read_data(
         os.path.join(cm.raw_data_path, 'sell_prices.csv'))
     sample_submission = cm.read_data(
         os.path.join(cm.raw_data_path, 'sample_submission.csv'))
     return sample_submission, calendar, sales_train, sell_prices
示例#4
0
    def preprocess_data(self):
        # read data
        data_train = np.asarray(read_data(self.train_path))
        data_test = np.asarray(read_data(self.test_path))
        print(data_train.shape, data_test.shape)

        # sort and split train data group by number of process
        self.data_train_split = self.split_data(data_train,
                                                self.split_train_len)
        #print(self.data_train_split[0])

        # sort and split test data group by number of process
        self.data_test_split = self.split_data(data_test, self.split_test_len)
def all_answers(question_id):
    answer_database = common.read_data('answer.csv')
    question_database = common.read_data('question.csv')
    answers = []
    for data_line in answer_database:
        if str(question_id) in data_line[3]:
            answers.append(data_line)
    print(answers)
    for data_line in question_database:
        if str(question_id) in data_line[0]:
            question_line = data_line
    return render_template('all_answers.html',
                           question_line=question_line,
                           answers=answers)
示例#6
0
def main(model_dir, output_dir, redshift_table, subvols, obs_dir):

    plt = common.load_matplotlib()
    fields = {
        'galaxies':
        ('type', 'mstars_disk', 'mstars_bulge', 'rstar_disk', 'm_bh',
         'matom_disk', 'mmol_disk', 'mgas_disk', 'matom_bulge', 'mmol_bulge',
         'mgas_bulge', 'mvir_hosthalo')
    }
    hdf5_data = common.read_data(model_dir, redshift_table[0], fields, subvols)

    (mgas_relation, mgas_relation_cen, mgas_relation_sat, mh2_gals, mh1_gals,
     mgas_gals, mh2_relation, mh1_relation, mhr_relation, mhr_relation_cen,
     mhr_relation_sat, mgas_relation_ltg, mh2_relation_ltg, mh1_relation_ltg,
     mgas_relation_etg, mh2_relation_etg, mh1_relation_etg,
     mgas_ms_relation_ltg, mh2_ms_relation_ltg, mh1_ms_relation_ltg,
     mgas_ms_relation_etg, mh2_ms_relation_etg, mh1_ms_relation_etg,
     mh1_relation_satellites_halos) = prepare_data(hdf5_data)

    plot_cold_gas_fraction(plt, output_dir, obs_dir, mgas_relation,
                           mgas_relation_cen, mgas_relation_sat)
    plot_HI_stacking(plt, output_dir, obs_dir, mh1_relation_satellites_halos)

    plot_molecular_gas_fraction(
        plt, output_dir, obs_dir, mgas_gals, mgas_relation, mh1_gals,
        mh1_relation, mh2_gals, mh2_relation, mgas_relation_ltg,
        mh2_relation_ltg, mh1_relation_ltg, mgas_relation_etg,
        mh2_relation_etg, mh1_relation_etg, mgas_ms_relation_ltg,
        mh2_ms_relation_ltg, mh1_ms_relation_ltg, mgas_ms_relation_etg,
        mh2_ms_relation_etg, mh1_ms_relation_etg)

    plot_h1h2_gas_fraction(plt, output_dir, mhr_relation, mhr_relation_cen,
                           mhr_relation_sat)
示例#7
0
def job(database, symbol):
    today = date.today()  # today

    # Update to Today First
    print("Updating Data for Symbol %s" % symbol)
    download_symbol(stocks_collection, symbol)

    print("Analizing Data for Symbol %s" % symbol)
    stock = read_data(stocks_collection, symbol)

    strategy = minimum_month_strategy.Strategy()
    st = strategy.buy_at(stock, today)

    results = {}
    results["_date"] = today
    results.update(st)
    print(results)
    r = requests.post(
        "https://maker.ifttt.com/trigger/buyspy/with/key/lgZ2-PIbeA4ZzkBFem8M-u933GuzypBiSCim4JUesVH",
        data={
            "value1": str(results["price_today"]),
            "value2": str(results["minimun_last_month"]),
            "value3": "Buy" if results["buy"] else "DO NOT BUY"
        })
    print(r.content)
示例#8
0
def main(modeldir, outdir, redshift_table, subvols, obsdir):

    plt = common.load_matplotlib()
    fields = {
        'global':
        ('redshifts', 'm_hi', 'm_h2', 'mcold', 'mcold_metals', 'mhot_halo',
         'mejected_halo', 'mstars', 'mstars_bursts_mergers',
         'mstars_bursts_diskinstabilities', 'm_bh', 'sfr_quiescent',
         'sfr_burst', 'm_dm', 'mcold_halo', 'number_major_mergers',
         'number_minor_mergers', 'number_disk_instabilities', 'smbh_maximum')
    }

    # Read data from each subvolume at a time and add it up
    # rather than appending it all together
    for idx, subvol in enumerate(subvols):
        subvol_data = common.read_data(modeldir, redshift_table[0], fields,
                                       [subvol])
        max_bhs_subvol = subvol_data[20].copy()
        if idx == 0:
            hdf5_data = subvol_data
            max_smbh = max_bhs_subvol
        else:
            max_smbh = np.maximum(max_smbh, max_bhs_subvol)
            for subvol_datum, hdf5_datum in zip(subvol_data[3:],
                                                hdf5_data[3:]):
                hdf5_datum += subvol_datum
                #select the most massive black hole from the last list item

    # Also make sure that the total volume takes into account the number of subvolumes read
    hdf5_data[1] = hdf5_data[1] * len(subvols)

    h0, redshifts = hdf5_data[0], hdf5_data[2]

    #for z, m in zip(redshifts, max_smbh):
    #    print z,m/h0

    (mstar_plot, mcold_plot, mhot_plot, meje_plot, mstar_dm_plot,
     mcold_dm_plot, mhot_dm_plot, meje_dm_plot, mbar_dm_plot, sfr, sfrd, sfrb,
     mstarden, mstarbden_mergers, mstarbden_diskins, sfre, sfreH2, mhrat,
     mHI_plot, mH2_plot, mH2den, mdustden, omegaHI, mdustden_mol, mcoldden,
     mhotden, mejeden, history_interactions,
     mDMden) = prepare_data(hdf5_data, redshifts)

    plot_mass_densities(plt, outdir, obsdir, h0, redshifts, mstar_plot,
                        mcold_plot, mhot_plot, meje_plot, mstarden, mcoldden,
                        mhotden, mejeden)
    plot_baryon_fractions(plt, outdir, redshifts, mstar_dm_plot, mcold_dm_plot,
                          mhot_dm_plot, meje_dm_plot, mbar_dm_plot)
    plot_cosmic_sfr(plt, outdir, obsdir, redshifts, h0, sfr, sfrd, sfrb,
                    history_interactions, mDMden)
    plot_stellar_mass_cosmic_density(plt, outdir, obsdir, redshifts, h0,
                                     mstarden, mstarbden_mergers,
                                     mstarbden_diskins)
    plot_sft_efficiency(plt, outdir, redshifts, sfre, sfreH2, mhrat)
    plot_mass_cosmic_density(plt, outdir, redshifts, mcold_plot, mHI_plot,
                             mH2_plot)
    plot_omega_h2(plt, outdir, obsdir, redshifts, h0, mH2den)
    plot_cosmic_dust(plt, outdir, obsdir, redshifts, h0, mdustden,
                     mdustden_mol)
    plot_omega_HI(plt, outdir, obsdir, redshifts, h0, omegaHI)
示例#9
0
文件: csed.py 项目: viogp/shark
def main(model_dir, outdir, redshift_table, subvols, obsdir):

    # Loop over redshift and subvolumes
    plt = common.load_matplotlib()
    fields = {'galaxies': ('mstars_disk', 'mstars_bulge', 'mvir_hosthalo',
                           'mvir_subhalo', 'type', 'mean_stellar_age', 
                           'sfr_disk', 'sfr_burst', 'id_galaxy')}

    z = (0, 0.25, 0.5, 1, 1.5, 2.0, 3.0, 4.0, 6.0)
    snapshots = redshift_table[z]

    # Create histogram
    for index, snapshot in enumerate(snapshots):

        hdf5_data = common.read_data(model_dir, snapshot, fields, subvols)
        #sfh, delta_t, LBT = common.read_sfh(model_dir, snapshot, sfh_fields, subvols)
        seds, ids, nbands = common.read_photometry_data(model_dir, snapshot, subvols)
        
        if(index == 0):
            CSED = np.zeros(shape = (len(z), 5, nbands))

        prepare_data(hdf5_data, seds, ids, CSED, nbands, index)

        h0, volh = hdf5_data[0], hdf5_data[1]
        if(volh > 0.):
            CSED[index,:]   = CSED[index,:] / volh * pow(h0,3.0)

    # Take logs
    plot_csed(plt, outdir, obsdir, h0, CSED, nbands)
示例#10
0
def main(experiment_name, model_name, data_path, max_depth, max_bins, describe,
         log_as_mleap, log_as_onnx, spark_autolog):
    print("Options:")
    for k, v in locals().items():
        print(f"  {k}: {v}")

    client = mlflow.tracking.MlflowClient()
    if experiment_name:
        mlflow.set_experiment(experiment_name)
    if spark_autolog:
        SparkSession.builder.config("spark.jars.packages",
                                    "org.mlflow.mlflow-spark")
        mlflow.spark.autolog()
    data_path = data_path or common.default_data_path
    data = common.read_data(spark, data_path)
    if (describe):
        print("==== Data")
        data.describe().show()

    with mlflow.start_run() as run:
        print("MLflow:")
        print("  run_id:", run.info.run_id)
        print("  experiment_id:", run.info.experiment_id)
        print("  experiment_name:",
              client.get_experiment(run.info.experiment_id).name)
        mlflow.set_tag("version.mlflow", mlflow.__version__)
        mlflow.set_tag("version.spark", spark.version)
        mlflow.set_tag("version.pyspark", pyspark.__version__)
        mlflow.set_tag("version.os",
                       platform.system() + " - " + platform.release())
        model_name = None if model_name is None or model_name == "None" else model_name
        train(run.info.run_id, data, max_depth, max_bins, model_name,
              log_as_mleap, log_as_onnx, spark_autolog)
示例#11
0
def do(impute_params):
    logging.info("In pipeline1.do")
    data_raw, data_sub = read_data(DATA_TRAIN, SAMPLE_SUBMISSION)
    dense_data = do_nmf(data_raw, impute_params)
    preds = regress(dense_data)
    np.savez_compressed("../results/imputed_preds.npz", preds)
    logging.info('return from pipeline1.do')
示例#12
0
文件: hothalo.py 项目: viogp/shark
def main(modeldir, outdir, redshift_table, subvols):

    plt = common.load_matplotlib()
    fields = {'galaxies': ('type', 'vvir_hosthalo', 'cooling_rate')}
    hdf5_data = common.read_data(modeldir, redshift_table[0], fields, subvols, include_h0_volh=False)
    med_tvir = prepare_data(hdf5_data)

    plot_cooling_rate(plt, outdir, med_tvir)
示例#13
0
 def check_start(self, root_path, label_name, restart=False):
     if restart is False:
         try:
             start_i = int(common.read_data(self.index_file, 'r'))
             print('start_index: ' + str(start_i))
         except Exception, e:
             print e
             start_i = 0
示例#14
0
def main(modeldir, outdir, redshift_table, subvols, obsdir):

    z = [0, 0.5, 1.0, 2.0, 3.0, 4.0]
    snapshots = redshift_table[z]

    plt = common.load_matplotlib()

    mainseqsf = np.zeros(shape=(len(z), 3, len(xmf)))
    sigmamainseqsf = np.zeros(shape=(len(z), 7, len(xmf)))

    passive_fractions = np.zeros(shape=(len(z), 3, len(xmf2)))
    passive_fractions_cens_sats = np.zeros(shape=(len(z), 2, len(xmflr),
                                                  len(xmf2)))

    hist_ssfr = np.zeros(shape=(len(z), len(ssfrbins)))

    fields = {
        'galaxies':
        ('sfr_disk', 'sfr_burst', 'mstars_disk', 'mstars_bulge', 'rstar_disk',
         'm_bh', 'matom_disk', 'mmol_disk', 'mgas_disk', 'matom_bulge',
         'mmol_bulge', 'mgas_bulge', 'mgas_metals_disk', 'mgas_metals_bulge',
         'mstars_metals_disk', 'mstars_metals_bulge', 'type', 'mvir_hosthalo',
         'rstar_bulge')
    }

    for index, snapshot in enumerate(snapshots):
        hdf5_data = common.read_data(modeldir, snapshot, fields, subvols)
        (mass, slope, offset) = prepare_data(hdf5_data, index, z[index],
                                             mainseqsf, passive_fractions,
                                             hist_ssfr, sigmamainseqsf,
                                             passive_fractions_cens_sats)

        h0 = hdf5_data[0]
        if index == 0:
            (sfr_disk, sfr_burst, mdisk, mbulge) = hdf5_data[2:6]
            sfr_seq = np.zeros(shape=(2, len(mdisk)))
            ind = np.where((sfr_disk + sfr_burst > 0) & (mdisk + mbulge > 0))
            sfr_seq[0, ind] = mass[ind]
            sfr_seq[1, ind] = np.log10(
                (sfr_disk[ind] + sfr_burst[ind]) / h0 / GyrToYr)
            slope_ms_z0 = slope
            offset_ms_z0 = offset
            #print 'scatter MS'
            #for m,a,b,c,d,e,f,g in zip(xmf[:], sigmamainseqsf[index,0,:], sigmamainseqsf[index,1,:], sigmamainseqsf[index,2,:], sigmamainseqsf[index,3,:], sigmamainseqsf[index,4,:], sigmamainseqsf[index,5,:], sigmamainseqsf[index,6,:]):
            #    print m,a,b,c,d,e,f,g

            #print 'passive fractions centrals'
            #for m,a,b,c,d,e,f in zip(xmf2[:], passive_fractions_cens_sats[0,0,0,:], passive_fractions_cens_sats[0,0,1,:], passive_fractions_cens_sats[0,0,2,:], passive_fractions_cens_sats[0,0,3,:], passive_fractions_cens_sats[0,0,4,:], passive_fractions_cens_sats[0,0,5,:],):
            #    print m,a,b,c,d,e,f
            #print 'passive fractions satellites'
            #for m,a,b,c,d,e,f in zip(xmf2[:], passive_fractions_cens_sats[0,1,0,:], passive_fractions_cens_sats[0,1,1,:], passive_fractions_cens_sats[0,1,2,:], passive_fractions_cens_sats[0,1,3,:], passive_fractions_cens_sats[0,1,4,:], passive_fractions_cens_sats[0,1,5,:],):
            #    print m,a,b,c,d,e,f

    # This should be the same in all HDF5 files
    plot_sfr_mstars_z0(plt, outdir, obsdir, h0, sfr_seq, mainseqsf,
                       sigmamainseqsf, slope_ms_z0, offset_ms_z0)
    plot_passive_fraction(plt, outdir, obsdir, passive_fractions, hist_ssfr,
                          passive_fractions_cens_sats)
示例#15
0
def all_answers(question_id):
    answer_database = read_data('answer.csv')
    question_database = read_data('question.csv')

    decoded_data_answer = time_decode(answer_database)
    decoded_data_question = time_decode(question_database)

    answers = []
    for data_line in decoded_data_answer:
        if str(question_id) in data_line[3]:
            answers.append(data_line)

    for data_line in decoded_data_question:
        if str(question_id) in data_line[0]:
            question_line = data_line
    return render_template('all_answers.html',
                           question_line=question_line,
                           answers=answers)
示例#16
0
def train_data():
    train_ = read_data('./tcdata/hy_round2_train_20200225/')
    train_x = pd.DataFrame.from_dict(train_)
    train_y = train_x.pop('type')
    #测试得到结果需要id
    #训练模型不需要
    #train_id = train_x.pop('id')
    #train_x = new_cols(train_x)
    return train_x, train_y
示例#17
0
 def read_data(self):
     if self.sample_size == 1:
         data_path = os.path.join(cm.cleaned_data_path, 'regression',
                                  'regression.csv')
     else:
         data_path = os.path.join(
             cm.cleaned_data_path, 'regression', 'regression_sample_' +
             str(self.sample_size).replace('.', '') + '.csv')
     return cm.read_data(data_path)
示例#18
0
def __get_data_to_predict(data_file, data_width, pca):
    data_read = read_data(filename=data_file)
    data_numpy = np.array(data_read["y"])

    # make all the data the same size, clip the end of it. The end is not interesting anyway
    data_numpy = data_numpy[:data_width]

    data_pca = pca.transform(data_numpy.reshape(1, -1))
    return data_pca.reshape((1, 1, -1))
示例#19
0
文件: cl.py 项目: damonge/S8z
 def __init__(self, data, tr, maps=None, mask=None):
     self.data = co.read_data(data)
     self.tr = tr
     self.spin = int(self.data['tracers'][tr]['spin'])
     self.type = self.data['tracers'][tr]['type']
     self._raw_maps = None
     self._maps = maps
     self._mask = mask
     self.f = self.compute_field()
def get_valid_and_invalid_files(root_dir="YOMIRAN",
                                validate_hierarchy=True,
                                validate_filename_format=True,
                                validate_empty_file=True):
    """Return (valid_files, invalid_files)
    Validation criteria:
        - empty file
        - format of file name
        - identical files (same content)
        - folder hierarchy of file"""
    invalid_files = []
    valid_files = []
    data_of_all_files = [
    ]  # in format of [Data1, Data2...] where Data is a named tuple
    for root, dirs, files in os.walk(root_dir, topdown=True):
        for file in files:
            file_is_valid = True
            full_path = os.path.join(root, file)
            if validate_hierarchy and not __is_file_saved_in_correct_directory_hierarchy(
                    file_path=full_path, root_dir=root_dir):
                invalid_files.append(
                    InvalidFile(file_path=full_path,
                                reason="Incorrect folder hierarchy"))
                file_is_valid = False
            #     continue
            if validate_filename_format and not __is_filename_correct_format(
                    filename=file):
                invalid_files.append(
                    InvalidFile(file_path=full_path,
                                reason="Incorrect file name"))
                file_is_valid = False
                # continue
            if validate_empty_file and __is_file_empty(filename=full_path):
                invalid_files.append(
                    InvalidFile(file_path=full_path, reason="Empty file"))
                file_is_valid = False
                # continue
            if file_is_valid:
                valid_files.append(full_path)

            # for future check if we have identical files
            data = read_data(filename=full_path)
            data_numpy = np.vstack((np.array(data["x"]), np.array(data["y"])))
            data_of_all_files.append(Data(file_path=full_path,
                                          data=data_numpy))

    identical_files = __get_identical_files(data_of_all_files)
    invalid_files.extend(identical_files)

    # filtering the identical files from the valid files
    identical_files_paths = [
        identical_file.file_path for identical_file in identical_files
    ]
    valid_files = list(
        filter(lambda filename: filename not in identical_files_paths,
               valid_files))
    return valid_files, invalid_files
示例#21
0
    def run(self):
        cm.create_all_directories([os.path.join(cm.checkpoint_path, 'eda')])

        self.df = cm.read_data(os.path.join(cm.cleaned_data_path, 'regression', 'features_extractor'
                                    +'_sample_rate_' + str(self.sample_size).replace('.', '')
                                    + '_version_' + str(self.version)
                                    +'.csv'))

        self.target_analysis(self.df)
        self.calculate_weights(self.df)
示例#22
0
    def sign_start(self, restart=False):
        times = 4               # 图片放大倍数

        if restart is False:
            try:
                start_i = int(common.read_data(self.index_file, 'r'))
                print('start_index: ' + str(start_i))
            except Exception, e:
                print e
                start_i = 0
示例#23
0
def main(uri, data_path, num_records, log_mod, output_file_base, num_iters):
    records = read_data(data_path, num_records)
    headers = {'Content-Type': 'application/json'}

    durations = []
    for iter in range(0, num_iters):
        num_records = len(records)
        print("Calls:")
        for j, r in enumerate(records):
            data = json.dumps(r)
            start = time.time()
            requests.post(uri, headers=headers, data=data)
            dur = time.time() - start
            if j % log_mod == 0:
                print(f"  {j}/{num_records}: {round(dur,3)}")
            durations.append(dur)

    total = sum(durations)
    mean = statistics.mean(durations)
    stdev = statistics.stdev(durations)
    rsd = stdev / mean * 100  # relative stdev

    calls = num_iters * len(records)
    print("Results (seconds):")
    print("  mean:   ", round(mean, 3))
    print("  max:    ", round(max(durations), 3))
    print("  min:    ", round(min(durations), 3))
    print("  std:    ", round(stdev, 3))
    print("  rsd:    ", round(rsd, 2))
    print("  total:  ", round(total, 3))
    print("  calls:     ", calls)
    print("  records:   ", len(records))
    print("  iterations:", num_iters)

    if output_file_base:
        now = time.time()
        ts = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(now))
        dct = {
            "timestamp": ts,
            "uri": uri,
            "mean": mean,
            "max": max(durations),
            "min": min(durations),
            "std": stdev,
            "rsd": rsd,
            "total": total,
            "calls": calls,
            "records": len(records),
            "iterations": num_iters
        }
        ts = time.strftime("%Y-%m-%d_%H%M%S", time.gmtime(now))
        path = f"{output_file_base}_{ts}.csv"
        print("Output file:", path)
        with open(path, "w") as f:
            f.write(json.dumps(dct, indent=2) + "\n")
示例#24
0
    def __init__(self, root_dir, label_file, img_size, transforms=None, is_train=False):
        self.root_dir = root_dir
        records_txt = common.read_data(label_file, 'r')
        self.records = records_txt.split('\n')
        self.img_size = img_size
        self.is_train = is_train

        # imgs = os.listdir(root)
        # self.imgs = [os.path.join(root, img) for img in imgs]
        # self.label_path = label_path
        self.transforms = transforms
示例#25
0
def main(modeldir, outdir, redshift_table, subvols, obsdir):

    plt = common.load_matplotlib()
    fields = {
        'galaxies':
        ('mstars_disk', 'mstars_bulge', 'mstars_burst_mergers',
         'mstars_burst_diskinstabilities', 'mstars_bulge_mergers_assembly',
         'mstars_bulge_diskins_assembly', 'm_bh', 'rstar_disk', 'rstar_bulge',
         'type', 'specific_angular_momentum_disk_star',
         'specific_angular_momentum_bulge_star',
         'specific_angular_momentum_disk_gas',
         'specific_angular_momentum_bulge_gas',
         'specific_angular_momentum_disk_gas_atom',
         'specific_angular_momentum_disk_gas_mol', 'lambda_subhalo',
         'mvir_subhalo', 'mgas_disk', 'mgas_bulge', 'matom_disk', 'mmol_disk',
         'matom_bulge', 'mmol_bulge', 'bh_accretion_rate_hh',
         'bh_accretion_rate_sb')
    }

    # Loop over redshift and subvolumes
    rcomb = np.zeros(shape=(len(zlist), 3, len(xmf)))
    disk_size = np.zeros(shape=(len(zlist), 3, len(xmf)))
    bulge_size = np.zeros(shape=(len(zlist), 3, len(xmf)))
    bulge_size_mergers = np.zeros(shape=(len(zlist), 3, len(xmf)))
    bulge_size_diskins = np.zeros(shape=(len(zlist), 3, len(xmf)))

    BH = np.zeros(shape=(len(zlist), 3, len(xmf)))
    disk_size_sat = np.zeros(shape=(len(zlist), 3, len(xmf)))
    disk_size_cen = np.zeros(shape=(len(zlist), 3, len(xmf)))
    BT_fractions = np.zeros(shape=(len(zlist), len(xmf)))
    BT_fractions_nodiskins = np.zeros(shape=(len(zlist), len(xmf)))
    BT_fractions_centrals = np.zeros(shape=(len(zlist), len(xmf)))
    BT_fractions_satellites = np.zeros(shape=(len(zlist), len(xmf)))
    disk_vel = np.zeros(shape=(len(zlist), 3, len(xmf)))
    bulge_vel = np.zeros(shape=(len(zlist), 3, len(xmf)))
    baryonic_TF = np.zeros(shape=(len(zlist), 3, len(xv)))

    for index, snapshot in enumerate(redshift_table[zlist]):
        hdf5_data = common.read_data(modeldir, snapshot, fields, subvols)
        prepare_data(hdf5_data, index, rcomb, disk_size, bulge_size,
                     bulge_size_mergers, bulge_size_diskins, BH, disk_size_sat,
                     disk_size_cen, BT_fractions, BT_fractions_nodiskins,
                     bulge_vel, disk_vel, BT_fractions_centrals,
                     BT_fractions_satellites, baryonic_TF)

    plot_sizes(plt, outdir, obsdir, disk_size_cen, disk_size_sat, bulge_size,
               bulge_size_mergers, bulge_size_diskins)
    plot_velocities(plt, outdir, disk_vel, bulge_vel, baryonic_TF)
    plot_sizes_combined(plt, outdir, rcomb)
    plot_bulge_BH(plt, outdir, obsdir, BH)
    plot_bt_fractions(plt, outdir, obsdir, BT_fractions,
                      BT_fractions_nodiskins, BT_fractions_centrals,
                      BT_fractions_satellites)
示例#26
0
def main(model_dir, outdir, redshift_table, subvols, obsdir):

    # Loop over redshift and subvolumes
    plt = common.load_matplotlib()
    fields = {
        'galaxies':
        ('mstars_disk', 'mstars_bulge', 'mvir_hosthalo', 'mvir_subhalo',
         'type', 'mean_stellar_age', 'sfr_disk', 'sfr_burst', 'id_galaxy')
    }

    sfh_fields = {
        'bulges_diskins': ('star_formation_rate_histories'),
        'bulges_mergers': ('star_formation_rate_histories'),
        'disks': ('star_formation_rate_histories')
    }

    z = (0, 2)  #0.5, 1, 1.5, 2, 3)
    snapshots = redshift_table[z]

    # Create histogram
    for index, snapshot in enumerate(snapshots):

        hdf5_data = common.read_data(model_dir, snapshot, fields, subvols)
        sfh, delta_t, LBT = common.read_sfh(model_dir, snapshot, sfh_fields,
                                            subvols)
        seds, ids, nbands = common.read_photometry_data(
            model_dir, snapshot, subvols)

        (SEDs_dust, SEDs_nodust, total_sfh, sb_sfh, disk_sfh,
         gal_props) = prepare_data(hdf5_data, sfh, seds, ids, index, nbands)

        h0, volh = hdf5_data[0], hdf5_data[1]
        if (index == 0):
            SEDs_nodust_z0 = SEDs_nodust
            SEDs_dust_z0 = SEDs_dust
            total_sfh_z0 = total_sfh
            gal_props_z0 = gal_props
            LBT_z0 = LBT
            plot_individual_seds(plt, outdir, obsdir, h0, SEDs_dust_z0,
                                 SEDs_nodust, total_sfh_z0, gal_props_z0,
                                 LBT_z0)

        if (index == 1):
            SEDs_dust_z2 = SEDs_dust
            total_sfh_z2 = total_sfh
            disk_sfh_z2 = disk_sfh
            sb_sfh_z2 = sb_sfh
            gal_props_z2 = gal_props
            LBT_z2 = LBT
            plot_individual_seds_z2(plt, outdir, obsdir, h0, SEDs_dust_z2,
                                    total_sfh_z2, disk_sfh_z2, sb_sfh_z2,
                                    gal_props_z2, LBT_z2)
示例#27
0
def do(params, gen_submission, blending_model, validate=False):
    logging.info("in pipeline2.do")
    if (not validate):
        data_raw, data_sub = common.read_data(DATA_TRAIN, SAMPLE_SUBMISSION)
    else:
        TRAIN = "../data/train.csv"
        VAL = "../data/val.csv"
        data_raw, data_sub = common.read_data(TRAIN, SAMPLE_SUBMISSION)
        data_val, _ = common.read_data(VAL, SAMPLE_SUBMISSION)

    preds_mat = np.load('../results/imputed_preds.npz',
                        allow_pickle=True)['arr_0']
    preds = pd.DataFrame(preds_mat).reset_index().melt('index')
    preds.rename(columns={
        "index": "User",
        "variable": "Movie",
        "value": "Prediction"
    },
                 inplace=True)
    regressors_train = get_regressors(preds, data_raw)
    U_red, V_red = get_u_v(data_raw, params)
    user_clusters, item_clusters, data_raw = get_clusters(
        U_red, V_red, data_raw, params)
    user_df = user_factorization(data_raw, user_clusters, params)
    item_df = item_factorization(data_raw, item_clusters, user_df, params)
    merge2, data_raw = merge(data_raw, regressors_train, user_df, item_df)
    model = train(data_raw, blending_model)

    if validate:
        regressors_val = get_regressors(preds, data_val)
        rmse = validate_holdout(model, data_val, regressors_val, merge2)
    else:
        rmse = validate_full(model, data_raw)

    if gen_submission:
        regressors_test = get_regressors(preds, data_sub)
        generate_submission(model, data_sub, regressors_test, merge2)
    print("rmse: ", rmse)
    return rmse
示例#28
0
    def sign_start(self, restart=False):
        times = 2

        cv2.namedWindow('sign_image')
        cv2.setMouseCallback('sign_image', self.mouse_click_events)  # 鼠标事件绑定

        if restart is False:
            try:
                start_i = int(common.read_data(self.index_file, 'r'))
                print('start_index: ' + str(start_i))
            except Exception, e:
                print e
                start_i = 0
示例#29
0
def new_answer(question_id):
    question_database = read_data('question.csv')
    for line in question_database:
        if str(question_id) in line[0]:
            question_line = line

    file_name = "answer.csv"
    button_name = "Post your answer"
    all_data = read_data(file_name)
    timestamp = int(time.time())
    data_list = []
    if request.method == "POST":
        data_list.append(str(generate_data_id(file_name)))
        data_list.append(str(timestamp))
        data_list.append(' ')  # view number
        data_list.append(question_id)
        data_list.append(request.form['message'])
        data_list.append(' ')  # for picture
        all_data.append(data_list)
        new_data_to_write = write_data(file_name, all_data)
        return redirect(url_for('all_answers', question_id=question_id))
    return render_template("add_answer.html", question_line=question_line)
def main(modeldir, outdir, redshift_table, subvols, obsdir):

    plt = common.load_matplotlib()
    fields = {
        'galaxies':
        ('mstars_disk', 'mstars_bulge', 'mstars_burst_mergers',
         'mstars_burst_diskinstabilities', 'mstars_bulge_mergers_assembly',
         'mstars_bulge_diskins_assembly', 'm_bh', 'rstar_disk', 'rstar_bulge',
         'type', 'specific_angular_momentum_disk_star',
         'specific_angular_momentum_bulge_star',
         'specific_angular_momentum_disk_gas',
         'specific_angular_momentum_bulge_gas',
         'specific_angular_momentum_disk_gas_atom',
         'specific_angular_momentum_disk_gas_mol', 'lambda_subhalo',
         'mvir_subhalo', 'mvir_hosthalo', 'matom_disk', 'mmol_disk',
         'mgas_disk', 'matom_bulge', 'mmol_bulge', 'mgas_bulge', 'sfr_disk',
         'sfr_burst', 'vvir_hosthalo', 'rgas_disk', 'rgas_bulge')
    }

    # Loop over redshift and subvolumes

    sam_vs_sam_halo_disk = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    sam_vs_sam_halo_gal = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    sam_vs_sam_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    sam_vs_sam_halo_bar = np.zeros(shape=(len(zlist), 3, len(xlf), 2))

    m_vs_m_halo_disk = np.zeros(shape=(len(zlist), 3, len(xmf), 2))
    m_vs_m_halo_gal = np.zeros(shape=(len(zlist), 3, len(xmf), 2))
    m_vs_m_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xmf), 2))
    m_vs_m_halo_bar = np.zeros(shape=(len(zlist), 3, len(xmf), 2))

    r_vs_r_halo_disk = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    r_vs_r_halo_gal = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    r_vs_r_halo_disk_gas = np.zeros(shape=(len(zlist), 3, len(xlf), 2))
    r_vs_r_halo_bar = np.zeros(shape=(len(zlist), 3, len(xlf), 2))

    for index, snapshot in enumerate(redshift_table[zlist]):
        hdf5_data = common.read_data(modeldir, snapshot, fields, subvols)
        (lh, lj, lm, bt, ms, ssfr) = prepare_data(
            hdf5_data, index, sam_vs_sam_halo_disk, sam_vs_sam_halo_gal,
            sam_vs_sam_halo_disk_gas, sam_vs_sam_halo_bar, m_vs_m_halo_disk,
            m_vs_m_halo_gal, m_vs_m_halo_disk_gas, m_vs_m_halo_bar,
            r_vs_r_halo_disk, r_vs_r_halo_gal, r_vs_r_halo_disk_gas,
            r_vs_r_halo_bar)

    plot_specific_am_ratio(plt, outdir, obsdir, sam_vs_sam_halo_disk,
                           sam_vs_sam_halo_gal, sam_vs_sam_halo_disk_gas,
                           sam_vs_sam_halo_bar, m_vs_m_halo_disk,
                           m_vs_m_halo_gal, m_vs_m_halo_disk_gas,
                           m_vs_m_halo_bar, r_vs_r_halo_disk, r_vs_r_halo_gal,
                           r_vs_r_halo_disk_gas, r_vs_r_halo_bar)
def fit_model(data):
    x = np.linspace(0, len(get_x_values(data)), len(get_x_values(data)))
    y = get_y_values(data)
    # calculate polynomial
    z = np.polyfit(x, y, 4)
    f = np.poly1d(z)
    return f


def predict(f, data):
    x_new = np.linspace(0, len(get_x_values(data)), len(get_x_values(data)))
    y_new = f(x_new)
    return y_new


if __name__ == '__main__':
    data = read_data(sys.argv[1])
    training_data = get_training_data(data)
    print("fitting to %d rows" % training_data.shape[0])
    model_subset = fit_model(training_data)
    fitted_subset_y_values = predict(model_subset, data)
    model_all = fit_model(data)
    fitted_all_y_values = predict(model_all, data)
    plt.plot(range(len(get_x_values(data))), get_y_values(data), 'go',
             range(len(get_x_values(training_data))), get_y_values(training_data), 'ro',
             range(len(get_x_values(data))), fitted_all_y_values, 'b',
             range(len(get_x_values(data))), fitted_subset_y_values, 'pink')

    plt.show()