def file_upload(): global cache booking_sync_api = cache.get(API_CACHE) rentals = cache.get(RENTALS_CACHE) if request.method == 'POST': # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): loaded_data = load_csv_data(rentals, file) for single_data in loaded_data: updateNightlyRates(booking_sync_api, single_data) return redirect(request.url) # filename = secure_filename(file.filename) # result_data['upload_file_path'] = os.path.join(app.config['UPLOAD_FOLDER'], filename) # file.save(result_data['upload_file_path']) # return redirect(url_for('uploaded_file', # filename=filename)) if request.method == 'GET': return render_template('file_upload.html')
# Default values for argparse args. LANGUAGE_NAME = "Logical_index" MAX_EXPR_LEN = 5 MAX_MODEL_SIZE = 8 LANG_GEN_DATE = "2020-12-25" CSV_DATE = "2021-05-05" args = parse_args() # Set DataFrame print options. # pd.set_option("display.max_rows", None) pd.set_option("display.max_columns", None) # pd.set_option("display.width", None) # pd.set_option("display.max_colwidth", None) data = utils.load_csv_data(args.max_model_size, args.max_expr_len, args.language_name, args.lang_gen_date, args.csv_date) quan_props = ["monotonicity", "quantity", "conservativity"] # Show expressions of lenght 2 non-satisfying univ props. expressions_non_satisfying(2, data) # Make contigency tables and plots of percentage of quans with univ # prop, per expr length. line = "-" * 60 print(f"\n{line}\nContingency tables\n{line}\n") plot_perc_with_prop_per_expr_len(quan_props, data, args.max_model_size, args.max_expr_len, args.language_name, args.lang_gen_date)
from fastapi import APIRouter from models.database import RecipeDatabase from models.recipe import Recipe, RecipeListResponse, RecipeUpdateRequest from utils import load_csv_data router = APIRouter() _recipe_db = RecipeDatabase(load_csv_data('recipe-data.csv')) @router.get('/', response_model=RecipeListResponse) async def search_recipes(cuisine: str, offset: int = 0, nb: int = 10): """ Searches recipes. Currently only the search by cuisine is supported. :param cuisine: cuisine type to filter :param offset: offset of results to fetch :param nb: number of results to return :return: list of recipes """ if nb > 10 or nb < 0: nb = 10 res, total = _recipe_db.search(cuisine=cuisine, nb_results=nb, offset=offset) nb_res = len(res)
def create_interpolated_vectors(data_csv, metadata_csv,output_dir, length): #load data data, ids, metadata = load_csv_data(data_csv, metadata_csv) data_cp = data.copy() #get ids obj_ids = data.object_id.unique() #get targets and retag them (so the classes are numbered from 0 to 14) m = metadata.loc[metadata["object_id"].isin(obj_ids)].drop_duplicates("object_id") targets = m["true_target"] mask = targets > 100 #class 99 mask targets.loc[mask] = 99 classes_ = targets.drop_duplicates().values new_targets = retag(targets) #add a number from 0 to 5 at the end of the id so there is an id per passband obj_ids_p=np.concatenate([10*obj_ids + d for d in range(6)]) data_cp['ob_p']=data.object_id*10+data.passband rem=set(obj_ids_p).difference(set(data_cp['ob_p'].values)) if len(rem)>0: mmjd=data_cp.mjd.mean() data_rem=np.zeros((len(rem),7)) rml=np.array(list(rem)) data_rem[:,0] = (rml/10).astype('int') data_rem[:,1] = np.ones(len(rem))*mmjd data_rem[:,2]= (rml-data_rem[:,0]*10).astype('int') data_rem[:,6]=rml df_rem=pd.DataFrame(data=data_rem, columns=['object_id','mjd','passband','flux','flux_err','detected','ob_p']) data_cp=pd.concat([data_cp,df_rem],ignore_index=True).sort_values(['object_id','mjd']).reset_index(drop=True) #catch above rem problem later #get dataframe with min and max mjd values per each object id group_by_mjd = data_cp.groupby(['object_id'])['mjd'].agg(['min', 'max']).rename(columns = lambda x : 'mjd_' + x).reset_index() #add this info to data merged = pd.merge(data_cp, group_by_mjd, how = 'left', on = 'object_id') #scale mjd according to max mjd, min mjd and the desired length of the light curve (128) merged['mm_scaled_mjd'] = (length - 1) * (merged['mjd'] - merged['mjd_min'])/(merged['mjd_max']-merged['mjd_min']) merged['count'] = 1 merged['cc'] = merged.groupby(['ob_p'])['count'].cumcount() merged=merged.sort_values(['object_id','mjd']) #reshape df so that for each row there's one lightcurve (6 rows per obj) and each column is a point of it # there is two main columns also, for flux and for mjd unstack = merged[['ob_p', 'mm_scaled_mjd', 'flux', 'cc']].set_index(['ob_p', 'cc']).unstack() #transform above info into numpy arrays mjd_uns = unstack['mm_scaled_mjd'].values[..., np.newaxis] flux_uns = unstack['flux'].values[..., np.newaxis] mjd_flux = np.concatenate((mjd_uns, flux_uns), axis =2) #create a mask to get points that are valid (not nan) nan_masks = ~np.isnan(mjd_flux)[:, :, 0] x = np.arange(length) #here we'll store interpolated lcs X = np.zeros((mjd_flux.shape[0], x.shape[0])) t=range(mjd_flux.shape[0]) #here we'll store the channels that tells us how far a point is from the nearest real point X_void = np.zeros((unstack.shape[0], x.shape[0])) #interpolation for i in t: if nan_masks[i].any(): X[i] = np.interp(x, mjd_flux[i][:, 0][nan_masks[i]], mjd_flux[i][:, 1][nan_masks[i]]) else: X[i] = np.zeros_like(x) #get distance for each point to nearest real point t=range(length) for i in t: X_void[:, i] = np.abs((unstack["mm_scaled_mjd"] - i)).min(axis = 1).fillna(500) #reshape vectors so the ones belonging to the same object are grouped into 6 channels n_objs = int(X.shape[0]/6) X_per_band = X.reshape((n_objs,6,length)).astype(np.float32) X_void_per_band = X_void.reshape((n_objs,6,length)).astype(np.float32) vectors = np.concatenate((X_per_band,X_void_per_band),axis=1) print(vectors.shape) print(obj_ids.shape) print(new_targets.values.shape) #save relevant info int hdf5 file dataset = { "X":vectors, "ids":obj_ids, "Y": new_targets.values } save_vectors(dataset, output_dir)
def bootstrap_regression(csv_date: str, scores: list, dep_vars: list, regression_func, repeat: int, sample_size: int, bootstrap_id: int, max_model_size: int, max_expr_len: int, language_name: str, lang_gen_date: str, print_summary=False, verbose=False): '''Run regression on data sample and repeat. For each score (ind var) in scores, run a regression for the dep_vars, and store coefficient results in a dataframe (per score). Do regression for orignal score data, randomly shuffled score data, and compute the difference between those coefficients. Args: csv_date: A string. The date on which the csv data was created or last altered. For loading csv file with language data which includes column names as given in dep_vars. scores: A list of strings. The names of the complexity measures: the independent variables. dep_vars: A list of strings. The names of the quantifier props: the dependent variables. regression_func: A function. Choice of regression function. repeat: An int. The number of samples taken, i.e. the number regressions. sample_size: An int. The size of the samples taken. bootstrap_id: An int. Used for storing csv data with logistic regression data. Identifies the bootstrap series for a given date. Multiple regression sessions were done on the same data to check for convergence. max_model_size: An int. Should coincide with the value in max_model_size column in loaded csv data. Used for loading csv data and storing regression data. max_expr_len: An int. Should coincide with the max value of expr_length column in loaded csv data. Used for loading csv data and storing regression data. language_name: A string. Should coincide with the value of the lot column in loaded csv data. Used for loading csv data and storing regression data. lang_gen_date: A string. The date on which the data was generated. Used for loading csv data and storing regression data. print_summary: True or False. Print the regression summary of each sample when True. Reports on convergence. verbose: True or False. Print the regression results. ''' data = utils.load_csv_data(max_model_size, max_expr_len, language_name, lang_gen_date, csv_date) results = { (score, dep_var): \ pd.DataFrame() for score in scores for dep_var in dep_vars } # Take samples from original data set, do regression on # each sample and store parameter values of the # regression results. for lap in range(repeat): if lap in np.arange(0, repeat + 1, repeat / 10): print(lap) for score in scores: # Reshuffle complexity scores. data[f"{score}_shuff_zscore"] = \ data[ f"{score}_shuff_zscore" ].sample(frac=1).reset_index(drop=True) # Take sample. df_sample = data.sample(n=sample_size, replace=True) for score in scores: ind_vars = [ f"{score}_zscore", # complexity (normalized) f"{score}_shuff_zscore" # complexity random baseline ] # Do regression on sample. for dep_var in dep_vars: for ind_var in ind_vars: model = regression_func(df_sample, dep_var, [ind_var], print_summary) # Store the coef of ind_var. results[(score, dep_var)].at[lap, f"coef_{ind_var}"] = \ model.params[ind_var] for score in scores: ind_vars = [ f"{score}_zscore", # complexity (normalized) f"{score}_shuff_zscore" # complexity random baseline ] for dep_var in dep_vars: # Store difference scores of coefficients: # Original - Randomly shuffles. results[(score, dep_var)][f"{ind_vars[0]}-{ind_vars[1]}"] = \ results[(score, dep_var)][f"coef_{ind_vars[0]}"] - \ results[(score, dep_var)][f"coef_{ind_vars[1]}"] if verbose: print(f"results[({score}, {dep_var})]:") print(results[(score, dep_var)]) # Store results. log_reg_date = datetime.datetime.now().strftime("%Y-%m-%d") csv_filename = utils.make_log_reg_csv_filename( ind_vars[0], dep_var, bootstrap_id, sample_size, repeat, log_reg_date, max_model_size, max_expr_len, language_name) fileloc = utils.make_log_reg_csv_path(max_model_size, language_name, lang_gen_date, log_reg_date) results[(score, dep_var)].to_csv(fileloc / Path(csv_filename), index=False)
def test_load_csv_data(): assert load_csv_data('tests/data/test.csv') == [{ 'Column1': 'value1', 'Column2': 'value2' }]
from flask import Flask, request from flask import render_template from werkzeug.datastructures import ImmutableMultiDict from utils import init_params, create_params, create_demo_data, load_csv_data, append_data, save_csv, exists_id, \ search_youtube app = Flask(__name__) DF = load_csv_data() print(DF.head()) def save_data(data): # dfを更新してcsvに保存する global DF data_list = parse_data(data) DF = append_data(DF, data_list) save_csv(DF) def parse_data(data: ImmutableMultiDict): # formから来たデータをパースする return [ data.get("vtuber") if data.get("isOtherVTuber") is None else data.get("OtherVTuber"), data.get("music") if data.get("isOtherMusic") is None else data.get("OtherMusic"), data.get("original") if data.get("isOtherOriginal") is None else data.get("OtherOriginal"), "True" if data.get("isCollab") == "yes" else "False", data.get("collabVTuber"),
if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("-eval", action="store_true", help="Evaluate on gold data (in development stage)") parser.add_argument("-tune_sgd", action="store_true", help="Tune parameters for SGDClassifier") parser.add_argument("train_file", help="Path to training data") parser.add_argument("test_file", help="Path to test data") parser.add_argument("output_file", help="Path to output") args = parser.parse_args() train_samples, train_labels = load_csv_data(args.train_file, textcol="text_ws") test_df = pd.read_csv(args.test_file) test_indices = [str(text) for text in test_df["id"]] test_samples = [str(text) for text in test_df["text_ws"]] test_labels = [str(intent) for intent in test_df["label"]] text_clf = train(train_samples, train_labels, tune_sgd=args.tune_sgd) preds = predict(text_clf, test_samples) output = [] for id, label in zip(test_indices, preds): output.append([id, label]) df = pd.DataFrame(data=output, columns=["id", "label"]) df.to_csv(args.output_file, index=False, quoting=csv.QUOTE_NONE) if args.eval:
def recipes() -> List[dict]: return load_csv_data('recipe-data.csv')