def download_prediction(model_id): token = DataServer.extract_token(request) # check if user sent token if token is None: return jsonify({"detail": "Authentication credentials were not provided."}), 401 # send a request to the data server to authenticate the user via the token user = DataServer.auth_user(token) # none is returned if the token is invalid if user is None: return jsonify({"detail": "Invalid token."}), 401 # get data information from the data server user_model = DataServer.get_user_model(user_id=user['user_id'], model_id=model_id) if user_model is None: return jsonify({"detail": "The model specified was not found."}), 404 # check if the user owns the data to be uploaded if user_model['owner'] != user['user_id']: return jsonify({"detail": "You do not have permission to perform this action."}), 403 user_id = user['user_id'] model_name = user_model['name'] user_model_dir = os.path.join(MODEL_DIR, f"{user_id}") if not os.path.exists(user_model_dir): return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404 if not check_file(get_model_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404 if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'No predictions were preformed on this model before.'}), 404 model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id) model_info_file = open(model_info_dir, mode='r') model_info_text = model_info_file.read() model_info_file.close() model_info = json.loads(model_info_text) # check if the file for the prediction exists user_model_dir = os.path.join(PREDICT_DIR, f"{user_id}") if not os.path.exists(user_model_dir): return jsonify({'detail': 'No predictions were preformed on this model before'}), 404 if not check_file(get_predict_dir(user_id=user_id, model_id=model_id, file_type=model_info['dataset_info']['type'])): return jsonify({'detail': 'No predictions were preformed on this model before'}), 404 return Response( response=stream_with_context( read_file_chunks(get_predict_dir(user_id=user_id, model_id=model_id, file_type=model_info['dataset_info']['type']))), status=200, headers={ 'Content-Disposition': f'attachment; filename={model_name}.{model_info["dataset_info"]["type"]}' }, mimetype='application/octet-stream' )
def predict( user_id: int, model_id: int, to_drop: list, predict_data_dir: str, model_info: dict, ): model_dir = get_model_dir(user_id=user_id, model_id=model_id) print(model_dir) trained_model_file = open(model_dir, 'rb') model_data = pickle.load(trained_model_file, fix_imports=True) trained_model_file.close() file_type = model_info['dataset_info']['type'] if file_type in file_extensions['csv']: df = pd.read_csv(predict_data_dir) elif file_type in file_extensions['json']: df = pd.read_json(predict_data_dir, orient='split') elif file_type in file_extensions['excel']: excel = pd.ExcelFile(predict_data_dir) df = pd.read_excel(predict_data_dir, excel.sheet_names[0]) else: raise Exception('Unsupported file type') to_drop = [] if to_drop is None else to_drop df = df.drop(columns=[col for col in to_drop if col in df]) label_encodes = model_data['label_encoders'] for column in label_encodes: le = label_encodes[column] df[column] = le.fit_transform(df[column]) y_col = model_info['y_col'] if y_col in df: x = df.loc[:, df.columns != y_col] else: x = df normalize_scaler = model_data['normalize_scaler'] model = model_data['model'] if normalize_scaler is not None: y = DataFrame(data=model.predict(normalize_scaler.transform(x)), columns=[y_col]) else: y = DataFrame(data=model.predict(x), columns=[y_col]) predict_dir = get_predict_dir(user_id=user_id, model_id=model_id, file_type=file_type) if file_type in file_extensions['csv']: y.to_csv(predict_dir) elif file_type in file_extensions['json']: y.to_json(predict_dir, orient='split') elif file_type in file_extensions['excel']: y.to_excel(predict_dir, sheet_name='Sheet1')
def model_info(): # validate if the request is JSON or not if not request.is_json: return jsonify({'detail': 'Invalid format'}), 400 # check if the data server is the one requesting by checking the token if not DataServer.check_data_token(request): return jsonify({'detail': 'Unauthorized'}), 401 # validate main request arguments request_data = request.get_json() missing_args = [item[0] for item in GET_MODEL_INFO_ARGS if item[0] not in request_data] if len(missing_args) == 1 and missing_args[0] == 'model_id': pass elif len(missing_args) != 0: return jsonify({'detail': 'Missing arguments', 'missing_args': missing_args}), 400 invalid_args = \ [ { 'arg': GET_MODEL_INFO_ARGS[index][0], 'sent_type': type(request_data[GET_MODEL_INFO_ARGS[index][0]]).__name__, 'expected_type': GET_MODEL_INFO_ARGS[index][1].__name__ } for index in range(len(GET_MODEL_INFO_ARGS)) if type(request_data[GET_MODEL_INFO_ARGS[index][0]]) != GET_MODEL_INFO_ARGS[index][1] ] if len(invalid_args) != 0: return jsonify({'detail': 'Invalid types for arguments.', 'invalid_args': invalid_args}), 400 user_id = request_data['user_id'] model_id = request_data['model_id'] # get data information from the data server user_model = DataServer.get_user_model(user_id=user_id, model_id=model_id) if user_model is None: return jsonify({"detail": "The model specified was not found."}), 404 # check if the user owns the data to be uploaded if user_model['owner'] != user_id: return jsonify({"detail": "The model specified does not belong to the user."}), 403 user_model_dir = os.path.join(MODEL_DIR, f"{user_id}") if not os.path.exists(user_model_dir): return jsonify({'detail': 'The model has never been trained before.'}), 404 if not check_file(get_model_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'The model has never been trained before.'}), 404 if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'The model has never been trained before.'}), 404 model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id) model_info_file = open(model_info_dir, mode='r') model_info_text = model_info_file.read() model_info_file.close() return model_info_text, 200
def download_model(model_id): token = DataServer.extract_token(request) # check if user sent token if token is None: return jsonify({"detail": "Authentication credentials were not provided."}), 401 # send a request to the data server to authenticate the user via the token user = DataServer.auth_user(token) # none is returned if the token is invalid if user is None: return jsonify({"detail": "Invalid token."}), 401 # get data information from the data server user_model = DataServer.get_user_model(user_id=user['user_id'], model_id=model_id) if user_model is None: return jsonify({"detail": "The model specified was not found."}), 404 # check if the user owns the data to be uploaded if user_model['owner'] != user['user_id']: return jsonify({"detail": "You do not have permission to perform this action."}), 403 user_id = user['user_id'] model_name = user_model['name'] # check if the file for th model exists user_model_dir = os.path.join(MODEL_DIR, f"{user_id}") if not os.path.exists(user_model_dir): return jsonify({'detail': 'Model was never trained before.'}), 404 if not check_file(get_model_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'Model was never trained before.'}), 404 return Response( response=stream_with_context(read_file_chunks(get_model_dir(user_id=user_id, model_id=model_id))), status=200, headers={ 'Content-Disposition': f'attachment; filename={model_name}.pickle' }, mimetype='application/octet-stream' )
def predict(model_id): token = DataServer.extract_token(request) # check if user sent token if token is None: return jsonify({"detail": "Authentication credentials were not provided."}), 401 # send a request to the data server to authenticate the user via the token user = DataServer.auth_user(token) # none is returned if the token is invalid if user is None: return jsonify({"detail": "Invalid token."}), 401 user_id = user['user_id'] # get data information from the data server user_model = DataServer.get_user_model(user_id=user_id, model_id=model_id) if user_model is None: return jsonify({"detail": "The model specified was not found."}), 404 # check if the user owns the data to be uploaded if user_model['owner'] != user['user_id']: return jsonify({"detail": "You do not have permission to perform this action."}), 403 # check if the file for the model exists user_model_dir = os.path.join(MODEL_DIR, f"{user_id}") if not os.path.exists(user_model_dir): return jsonify({'detail': 'Model was never trained before.'}), 404 if not check_file(get_model_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'Model was never trained before.'}), 404 if not check_file(get_model_info_dir(user_id=user_id, model_id=model_id)): return jsonify({'detail': 'Model was never trained before.'}), 404 # create directories for the user if it doesn't exist user_prediction_dir = os.path.join(PREDICT_DIR, f"{user['user_id']}") if not os.path.exists(user_prediction_dir): os.makedirs(user_prediction_dir) model_info_dir = get_model_info_dir(user_id=user_id, model_id=model_id) model_info_file = open(model_info_dir, mode='r') model_info_text = model_info_file.read() model_info_file.close() model_info = json.loads(model_info_text) dataset_type = model_info['dataset_info']['type'] dataset_column_info = model_info['dataset_info']['columns'] # file is sent in request body, open a temp file and start writing to it temp_file_dir = os.path.join(PREDICT_DIR, f"{user['user_id']}", f'temp.{dataset_type}') with open(temp_file_dir, "wb") as file: file.write(request.data) # try to open the file and get some information about it before overwriting the existing file try: prediction_dataset_column_info = get_columns(temp_file_dir, dataset_type) except (UnicodeDecodeError, EmptyDataError): os.remove(temp_file_dir) return jsonify({'detail': f"Invalid {dataset_type} file."}), 415 # check columns of the prediction data compared to the training data invalid_columns = [] dataset_column_info_dict = {column['name']: column for column in dataset_column_info} for column in prediction_dataset_column_info: if column['name'] in dataset_column_info_dict: if column['type'] != dataset_column_info_dict[column['name']]['type']: invalid_columns.append({ 'detail': 'Unexpected type', 'name': column['name'], 'expected_type': dataset_column_info_dict[column['name']]['type'] }) else: if column['name'] not in model_info['to_drop'] and column['name'] != model_info['y_col']: invalid_columns.append({ 'detail': 'Missing column that was not dropped during training', 'name': column['name'], }) if len(invalid_columns) != 0: return jsonify({'detail': 'The columns of the prediction file do not match the columns used for training', 'invalid_columns': invalid_columns}), 400 to_drop = [col_name for col_name in model_info['to_drop'] if col_name in dataset_column_info_dict] from predict import predict as predict_data predict_data(user_id=user_id, model_id=model_id, to_drop=to_drop, predict_data_dir=temp_file_dir, model_info=model_info,) # return status code CREATED return jsonify({ 'detail': 'Prediction done. You can access the data from the given endpoint.', 'endpoint': f'/download/p/{model_id}' }), 201