def agg(file,groupby, applyname, func): """Operates on a groupby column in a csv file and applies a function Example Usage: ./csvcli.py cvsops --file ext/input.csv --groupby last_name --applyname count --func npmedian Processing csvfile: ext/input.csv and groupby name: last_name and applyname: count 2017-06-22 14:07:52,532 - nlib.utils - INFO - Loading appliable functions/plugins: npmedian 2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: npsum 2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: numpy 2017-06-22 14:07:52,533 - nlib.utils - INFO - Loading appliable functions/plugins: tanimoto last_name eagle 17.0 lee 3.0 smith 13.5 Name: count, dtype: float64 """ if not file and not groupby and not applyname and not func: click.echo("--file and --column and --applyname --func are required") sys.exit(1) click.echo("Processing csvfile: {file} and groupby name: {groupby} and applyname: {applyname}".\ format(file=file, groupby=groupby, applyname=applyname)) #Load Plugins and grab correct one plugins = utils.plugins_map() appliable_func = plugins[func] res = csvops.group_by_operations(data=file, groupby_column_name=groupby, apply_column_name=applyname, func=appliable_func) click.echo(res)
def csv_aggregate_columns(groupbyop): """Aggregate column in an uploaded csv --- consumes: application/json parameters: - in: path name: Appliable Function (i.e. npsum, npmedian) type: string required: true description: appliable function, which must be registered (check /api/funcs) - in: query name: column type: string description: The column to process in an aggregation required: True - in: query name: group_by type: string description: The column to group_by in an aggregation required: True - in: header name: Content-Type type: string description: Requires "Content-Type:application/json" to be set required: True - in: body name: payload type: string description: base64 encoded csv file required: True responses: 200: description: Returns an aggregated CSV. """ #TO DO?: Make this into a helper function #Return 415 if not valid content type content_type = request.headers.get('Content-Type') content_type_log_msg = "Content-Type is set to: {content_type}".\ format(content_type=content_type) log.info(content_type_log_msg) if not content_type == "application/json": wrong_method_log_msg =\ "Wrong Content-Type in request: {content_type} sent, but requires application/json".\ format(content_type=content_type) log.info(wrong_method_log_msg) return jsonify({ "content_type": content_type, "error_msg": wrong_method_log_msg }), status.HTTP_415_UNSUPPORTED_MEDIA_TYPE #Parse Query Parameters and Retrieve Values query_string = request.query_string query_string_msg = "Request Query String: {query_string}".format( query_string=query_string) log.info(query_string_msg) column = request.args.get("column") group_by = request.args.get("group_by") #Query Parameter logging and handling query_parameters_log_msg = "column: [{column}] and group_by: [{group_by}] Query Parameter values".\ format(column=column, group_by=group_by) log.info(query_parameters_log_msg) if not column or not group_by: error_msg = "Query Parameter column or group_by not set" log.info(error_msg) return jsonify({ "column": column, "group_by": group_by, "error_msg": error_msg }), status.HTTP_400_BAD_REQUEST #Load Plugins and grab correct one plugins = utils.plugins_map() appliable_func = plugins[groupbyop] #TO DO?: Add some additional error handling (invalid column name, etc) #Unpack data and operate on it data, _ = _b64decode_helper(request) #Returns Pandas Series res = csvops.group_by_operations(data, groupby_column_name=group_by, apply_column_name=column, func=appliable_func) log.info(res) return res.to_json(), status.HTTP_200_OK