def handle_tornado_upload_file(http_handler, tornado_http_files, upload_start_time): # 1. check and read param tornado_http_file = tornado_http_files.get("file")[0] if tornado_http_file is None: raise MissingParamException("file") file_name = tornado_http_file['filename'] file_body = tornado_http_file['body'] file_size = util.human_data_size(len(file_body)) file_suffix = util.get_file_suffix(file_name) assert file_suffix in [ '.csv', '.tsv' ], 'Please check is your file suffix in [.csv, .tsv], current is: %s' % file_suffix origin_file_name = util.make_dataset_name(util.cut_suffix( file_name)) + file_suffix # for it in url, disk path readable # 2. open temporary file and write to local file temporary_file_path = util.temporary_upload_file_path(origin_file_name) if not P.exists(P.dirname(temporary_file_path)): os.makedirs(P.dirname(temporary_file_path)) logger.info(f"Open path {temporary_file_path} to store upload file.") with open(temporary_file_path, 'wb') as f: f.write(file_body) logger.info( f"Uploaded file finished at {temporary_file_path}, file size {file_size} ." ) upload_took = util.time_diff(time.time(), upload_start_time) # 3. response # relative_path = temporary_file_path[len(consts.PATH_DATA_ROOT)+1:] # relative path not start with / response = \ { "path": util.relative_path(P.abspath(temporary_file_path)), "size": file_size, "took": upload_took } http_handler.response_json(response)
# logger.info(f"Encode label column {label_col} because type is {f.type}. ") # y = pd.Series(LabelEncoder().fit_transform(y), name=label_col) # 5. encode categorical features pearson_corr_dict = {} for f in dataset_stats.features: if f.type == FeatureType.Categorical: logger.info(f"Skip categorical feature {f.name} ") # lb = LabelEncoder() # encoded_series = pd.Series(lb.fit_transform(df[f.name]), name=f.name) # pearson_corr_dict[f.name] = y.corr(encoded_series, method='pearson') pearson_corr_dict[f.name] = None elif f.type in [FeatureType.Continuous, FeatureType.Datetime]: pearson_corr_dict[f.name] = y.corr(df[f.name], method='pearson') else: logger.info( f"Encode feature {f.name} type is {f.type}, skipped calc corr. ") pearson_corr_dict[f.name] = None # not support text feature extension = {"corr": pearson_corr_dict, "label_col": label_col} # 6. send back calc result client.analyze_callback(portal=server_portal, dataset_name=dataset_name, analyze_job_name=job_name, type=AnalyzeStep.Types.PatchCorrelation, status=JobStep.Status.Succeed, took=util.time_diff(time.time(), t), extension=extension)