Python load_csv_data示例，utils.load_csv_data Python示例

示例#1

0

显示文件

文件： app.py 项目： danabig/bookingsyncapp

def file_upload():
    global cache
    booking_sync_api = cache.get(API_CACHE)
    rentals = cache.get(RENTALS_CACHE)
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit an empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            loaded_data = load_csv_data(rentals, file)
            for single_data in loaded_data:
                updateNightlyRates(booking_sync_api, single_data)
        return redirect(request.url)
        # filename = secure_filename(file.filename)
        # result_data['upload_file_path'] = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        # file.save(result_data['upload_file_path'])
        # return redirect(url_for('uploaded_file',
        #                         filename=filename))
    if request.method == 'GET':
        return render_template('file_upload.html')

示例#2

0

显示文件

    # Default values for argparse args.
    LANGUAGE_NAME = "Logical_index"
    MAX_EXPR_LEN = 5
    MAX_MODEL_SIZE = 8
    LANG_GEN_DATE = "2020-12-25"
    CSV_DATE = "2021-05-05"
    args = parse_args()

    # Set DataFrame print options.
    # pd.set_option("display.max_rows", None)
    pd.set_option("display.max_columns", None)
    # pd.set_option("display.width", None)
    # pd.set_option("display.max_colwidth", None)

    data = utils.load_csv_data(args.max_model_size, args.max_expr_len,
                               args.language_name, args.lang_gen_date,
                               args.csv_date)

    quan_props = ["monotonicity", "quantity", "conservativity"]

    # Show expressions of lenght 2 non-satisfying univ props.
    expressions_non_satisfying(2, data)

    # Make contigency tables and plots of percentage of quans with univ
    # prop, per expr length.
    line = "-" * 60
    print(f"\n{line}\nContingency tables\n{line}\n")
    plot_perc_with_prop_per_expr_len(quan_props, data, args.max_model_size,
                                     args.max_expr_len, args.language_name,
                                     args.lang_gen_date)

示例#3

0

显示文件

文件： recipes.py 项目： TenOs/gousto-backend-challenge

from fastapi import APIRouter

from models.database import RecipeDatabase
from models.recipe import Recipe, RecipeListResponse, RecipeUpdateRequest
from utils import load_csv_data

router = APIRouter()

_recipe_db = RecipeDatabase(load_csv_data('recipe-data.csv'))


@router.get('/', response_model=RecipeListResponse)
async def search_recipes(cuisine: str, offset: int = 0, nb: int = 10):
    """
    Searches recipes.

    Currently only the search by cuisine is supported.

    :param cuisine: cuisine type to filter
    :param offset: offset of results to fetch
    :param nb: number of results to return
    :return: list of recipes
    """
    if nb > 10 or nb < 0:
        nb = 10

    res, total = _recipe_db.search(cuisine=cuisine,
                                   nb_results=nb,
                                   offset=offset)
    nb_res = len(res)

示例#4

0

显示文件

文件： sample_data.py 项目： aibsen/pay_attention_to_the_flare

def create_interpolated_vectors(data_csv, metadata_csv,output_dir, length):
    #load data
    data, ids, metadata = load_csv_data(data_csv, metadata_csv)

    data_cp = data.copy()
    #get ids
    obj_ids = data.object_id.unique()
    #get targets and retag them (so the classes are numbered from 0 to 14)
    m =  metadata.loc[metadata["object_id"].isin(obj_ids)].drop_duplicates("object_id")
    targets = m["true_target"]
    mask = targets > 100 #class 99 mask
    targets.loc[mask] = 99
    classes_ = targets.drop_duplicates().values
    new_targets = retag(targets)

    #add a number from 0 to 5 at the end of the id so there is an id per passband
    obj_ids_p=np.concatenate([10*obj_ids + d for d in range(6)])
    data_cp['ob_p']=data.object_id*10+data.passband
    
    rem=set(obj_ids_p).difference(set(data_cp['ob_p'].values))

    if len(rem)>0:
        mmjd=data_cp.mjd.mean()
        data_rem=np.zeros((len(rem),7))
        rml=np.array(list(rem))
        data_rem[:,0] = (rml/10).astype('int')
        data_rem[:,1] = np.ones(len(rem))*mmjd 
        data_rem[:,2]= (rml-data_rem[:,0]*10).astype('int')
        data_rem[:,6]=rml
        df_rem=pd.DataFrame(data=data_rem, columns=['object_id','mjd','passband','flux','flux_err','detected','ob_p'])
        data_cp=pd.concat([data_cp,df_rem],ignore_index=True).sort_values(['object_id','mjd']).reset_index(drop=True)

    #catch above rem problem later

    #get dataframe with min and max mjd values per each object id
    group_by_mjd = data_cp.groupby(['object_id'])['mjd'].agg(['min', 'max']).rename(columns = lambda x : 'mjd_' + x).reset_index()
    #add this info to data
    merged = pd.merge(data_cp, group_by_mjd, how = 'left', on = 'object_id')
    #scale mjd according to max mjd, min mjd and the desired length of the light curve (128)
    merged['mm_scaled_mjd'] = (length - 1) * (merged['mjd'] - merged['mjd_min'])/(merged['mjd_max']-merged['mjd_min'])
    merged['count'] = 1
    merged['cc'] = merged.groupby(['ob_p'])['count'].cumcount()
    merged=merged.sort_values(['object_id','mjd'])
    #reshape df so that for each row there's one lightcurve (6 rows per obj) and each column is a point of it
    # there is two main columns also, for flux and for mjd
    unstack = merged[['ob_p', 'mm_scaled_mjd', 'flux', 'cc']].set_index(['ob_p', 'cc']).unstack()
    #transform above info into numpy arrays
    mjd_uns = unstack['mm_scaled_mjd'].values[..., np.newaxis]
    flux_uns = unstack['flux'].values[..., np.newaxis]
    mjd_flux = np.concatenate((mjd_uns, flux_uns), axis =2)
    #create a mask to get points that are valid (not nan)
    nan_masks = ~np.isnan(mjd_flux)[:, :, 0]
    x = np.arange(length)
    
    #here we'll store interpolated lcs
    X = np.zeros((mjd_flux.shape[0], x.shape[0]))
    t=range(mjd_flux.shape[0])
    #here we'll store the channels that tells us how far a point is from the nearest real point
    X_void = np.zeros((unstack.shape[0], x.shape[0]))
    
    #interpolation
    for i in t: 
        if nan_masks[i].any():
            X[i] = np.interp(x, mjd_flux[i][:, 0][nan_masks[i]], mjd_flux[i][:, 1][nan_masks[i]])
        else:
            X[i] = np.zeros_like(x)
    #get distance for each point to nearest real point
    t=range(length)
    for i in t:
        X_void[:, i] = np.abs((unstack["mm_scaled_mjd"] - i)).min(axis = 1).fillna(500)

    #reshape vectors so the ones belonging to the same object are grouped into 6 channels    
    n_objs = int(X.shape[0]/6)
    X_per_band = X.reshape((n_objs,6,length)).astype(np.float32)
    X_void_per_band = X_void.reshape((n_objs,6,length)).astype(np.float32)

    vectors = np.concatenate((X_per_band,X_void_per_band),axis=1)
    print(vectors.shape)
    print(obj_ids.shape)
    print(new_targets.values.shape)
    #save relevant info int hdf5 file
    dataset = {
        "X":vectors,
        "ids":obj_ids,
        "Y": new_targets.values
    }
    save_vectors(dataset, output_dir)

示例#5

0

显示文件

def bootstrap_regression(csv_date: str,
                         scores: list,
                         dep_vars: list,
                         regression_func,
                         repeat: int,
                         sample_size: int,
                         bootstrap_id: int,
                         max_model_size: int,
                         max_expr_len: int,
                         language_name: str,
                         lang_gen_date: str,
                         print_summary=False,
                         verbose=False):
    '''Run regression on data sample and repeat.

    For each score (ind var) in scores, run a regression for the 
    dep_vars, and store coefficient results in a dataframe (per score).
    Do regression for orignal score data, randomly shuffled score data,
    and compute the difference between those coefficients.

    Args: 
        csv_date: A string. The date on which the csv data was created or
            last altered. For loading csv file with language data which  
            includes column names as given in dep_vars.
        scores: A list of strings. The names of the complexity measures:
            the independent variables.
        dep_vars: A list of strings. The names of the quantifier props:
            the dependent variables.
        regression_func: A function. Choice of regression function. 
        repeat: An int. The number of samples taken, i.e. the number
            regressions.
        sample_size: An int. The size of the samples taken.
        bootstrap_id: An int. Used for storing csv data with logistic
            regression data. Identifies the bootstrap series for a given
            date. Multiple regression sessions were done on the same data 
            to check for convergence.
        max_model_size: An int. Should coincide with the value in 
            max_model_size column in loaded csv data. Used for loading 
            csv data and storing regression data.
        max_expr_len: An int. Should coincide with the max value of
            expr_length column in loaded csv data. Used for loading csv data 
            and storing regression data.
        language_name: A string. Should coincide with the value of the 
            lot column in loaded csv data. Used for loading csv data and
            storing regression data.
        lang_gen_date: A string. The date on which the data was generated. 
            Used for loading csv data and storing regression data.
        print_summary: True or False. Print the regression summary of
            each sample when True. Reports on convergence.
        verbose: True or False. Print the regression results.

    '''
    data = utils.load_csv_data(max_model_size, max_expr_len, language_name,
                               lang_gen_date, csv_date)
    results = {
        (score, dep_var): \
        pd.DataFrame() for score in scores for dep_var in dep_vars
    }
    # Take samples from original data set, do regression on
    # each sample and store parameter values of the
    # regression results.
    for lap in range(repeat):
        if lap in np.arange(0, repeat + 1, repeat / 10):
            print(lap)
        for score in scores:
            # Reshuffle complexity scores.
            data[f"{score}_shuff_zscore"] = \
                data[
                    f"{score}_shuff_zscore"
                ].sample(frac=1).reset_index(drop=True)
        # Take sample.
        df_sample = data.sample(n=sample_size, replace=True)
        for score in scores:
            ind_vars = [
                f"{score}_zscore",  # complexity (normalized)
                f"{score}_shuff_zscore"  # complexity random baseline
            ]
            # Do regression on sample.
            for dep_var in dep_vars:
                for ind_var in ind_vars:
                    model = regression_func(df_sample, dep_var, [ind_var],
                                            print_summary)
                    # Store the coef of ind_var.
                    results[(score, dep_var)].at[lap, f"coef_{ind_var}"] = \
                        model.params[ind_var]
    for score in scores:
        ind_vars = [
            f"{score}_zscore",  # complexity (normalized)
            f"{score}_shuff_zscore"  # complexity random baseline
        ]
        for dep_var in dep_vars:
            # Store difference scores of coefficients:
            # Original - Randomly shuffles.
            results[(score, dep_var)][f"{ind_vars[0]}-{ind_vars[1]}"] = \
                results[(score, dep_var)][f"coef_{ind_vars[0]}"] - \
                results[(score, dep_var)][f"coef_{ind_vars[1]}"]
            if verbose:
                print(f"results[({score}, {dep_var})]:")
                print(results[(score, dep_var)])
            # Store results.
            log_reg_date = datetime.datetime.now().strftime("%Y-%m-%d")

            csv_filename = utils.make_log_reg_csv_filename(
                ind_vars[0], dep_var, bootstrap_id, sample_size, repeat,
                log_reg_date, max_model_size, max_expr_len, language_name)
            fileloc = utils.make_log_reg_csv_path(max_model_size,
                                                  language_name, lang_gen_date,
                                                  log_reg_date)
            results[(score, dep_var)].to_csv(fileloc / Path(csv_filename),
                                             index=False)

示例#6

0

显示文件

文件： test_utils.py 项目： TenOs/gousto-backend-challenge

def test_load_csv_data():
    assert load_csv_data('tests/data/test.csv') == [{
        'Column1': 'value1',
        'Column2': 'value2'
    }]

示例#7

0

显示文件

from flask import Flask, request
from flask import render_template
from werkzeug.datastructures import ImmutableMultiDict

from utils import init_params, create_params, create_demo_data, load_csv_data, append_data, save_csv, exists_id, \
    search_youtube

app = Flask(__name__)
DF = load_csv_data()
print(DF.head())


def save_data(data):
    # dfを更新してcsvに保存する
    global DF
    data_list = parse_data(data)
    DF = append_data(DF, data_list)
    save_csv(DF)


def parse_data(data: ImmutableMultiDict):
    # formから来たデータをパースする
    return [
        data.get("vtuber")
        if data.get("isOtherVTuber") is None else data.get("OtherVTuber"),
        data.get("music")
        if data.get("isOtherMusic") is None else data.get("OtherMusic"),
        data.get("original")
        if data.get("isOtherOriginal") is None else data.get("OtherOriginal"),
        "True" if data.get("isCollab") == "yes" else "False",
        data.get("collabVTuber"),

示例#8

0

显示文件


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("-eval",
                        action="store_true",
                        help="Evaluate on gold data (in development stage)")
    parser.add_argument("-tune_sgd",
                        action="store_true",
                        help="Tune parameters for SGDClassifier")
    parser.add_argument("train_file", help="Path to training data")
    parser.add_argument("test_file", help="Path to test data")
    parser.add_argument("output_file", help="Path to output")
    args = parser.parse_args()

    train_samples, train_labels = load_csv_data(args.train_file,
                                                textcol="text_ws")
    test_df = pd.read_csv(args.test_file)
    test_indices = [str(text) for text in test_df["id"]]
    test_samples = [str(text) for text in test_df["text_ws"]]
    test_labels = [str(intent) for intent in test_df["label"]]

    text_clf = train(train_samples, train_labels, tune_sgd=args.tune_sgd)
    preds = predict(text_clf, test_samples)
    output = []

    for id, label in zip(test_indices, preds):
        output.append([id, label])
    df = pd.DataFrame(data=output, columns=["id", "label"])
    df.to_csv(args.output_file, index=False, quoting=csv.QUOTE_NONE)

    if args.eval:

示例#9

0

显示文件

def recipes() -> List[dict]:
    return load_csv_data('recipe-data.csv')