def __authenticate(self, request: Request, username: str = None, password: str = None) -> User: """ ユーザー認証 Args: request (Request): リクエスト情報 username (str): ユーザー名 password (str): パスワード Returns: User: ユーザー情報 Raises: ApiException: ・入力されたメールアドレスでユーザーを取得できなかった場合 ・入力されたパスワードとユーザーのパスワードが一致しなかった場合 ・入力されたメールアドレスで取得したユーザーが有効でない場合 """ user = CRUDUser(request.state.db_session).get_query().filter_by( **{ 'username': username }).all() # ユーザーを取得できなかった場合はエラー if not user: raise ApiException(create_error(ErrorMessage.FAILURE_LOGIN)) # パスワードが一致しない もしくは ユーザーが有効でない場合はエラー if not check_password(password, user[0].password) or not user[0].is_active: raise ApiException(create_error(ErrorMessage.FAILURE_LOGIN)) return user[0]
async def authenticate(self, request: Request) -> None: """ 認証処理 Args: request (Request): リクエスト情報 """ authorization: str = request.headers.get('Authorization') scheme, access_token = get_authorization_scheme_param(authorization) # リクエストヘッダに認証情報が無い場合は「未承認ユーザー」を返す if not authorization or scheme.lower() != 'bearer': return authentication.AuthCredentials(['unauthenticated']), UnauthenticatedUser() # JWTをデコードしてクレームセットを取得 try: claims = jwt_decord_handler(access_token) # アクセストークン期限切れ except jwt.ExpiredSignatureError: raise ApiException(create_error(ErrorMessage.EXPIRED_TOKEN), status_code=status.HTTP_401_UNAUTHORIZED) # その他エラーの場合は「未承認ユーザー」を返す except Exception as e: print(e) return authentication.AuthCredentials(['unauthenticated']), UnauthenticatedUser() # クレームセットのユーザーIDでユーザーを取得 user = CRUDUser(request.state.db_session).get_by_id(claims['user_id']) # 下記いずれかの場合はエラー # ・ユーザーを取得できなかった場合 # ・ユーザーを取得できたが、非アクティブ if not user or not user.is_active: raise ApiException(create_error(ErrorMessage.INVALID_TOKEN)) return authentication.AuthCredentials(['authenticated']), AuthenticatedUser(user)
def json_to_pandas(json_data): """ convert row-wise json format [{"var1":val1, "var2":val2},{...}] to pandas dataframe. If it doesn't fit this format, just return the input, unchanged. """ ## convert to string if in bytes format if isinstance(json_data, bytes): try: json_data = json_data.decode("utf-8") except (UnicodeDecodeError): raise (ApiException("Data not unicode encoded")) ## convert to list if in str format if isinstance(json_data, str): try: json_data = json.loads(json_data) except (json.decoder.JSONDecodeError): raise (ApiException( "Unable to read as JSON: {}".format(json_data))) ## assume we now have a list of records try: frame_dict = {} for row in json_data: for k, v in row.items(): if not k in frame_dict.keys(): frame_dict[k] = [] frame_dict[k].append(v) df = pd.DataFrame(frame_dict) return df except: # could be TypeError, AttributeError, ... raise (ApiException("Unable to convert json to pandas dataframe"))
def handle_file(path, db): line_number = 1 accumulative_count = Counter() try: with open(path, 'r') as reader: line = reader.readline() while line != '': # The EOF char is an empty string line_word_count = count(line) accumulative_count += line_word_count # If we reached the buffer size, save to the DB and clear the counters if line_number == LINE_NUM_BUFFER: save_to_db(accumulative_count, db) accumulative_count = Counter() line_number = 0 line = reader.readline() line_number += 1 # Save the remaining lines to the DB save_to_db(accumulative_count, db) except FileNotFoundError: raise ApiException('file not found', 400) except OSError as e: logging.error('error in file handling', e) raise ApiException('error in file handling', 500)
def get_file_content(url): """ Given the URL of a file on the datastore, return the contents (likely some function definitions and/or import statements). """ try: r = requests.get(url) if r.status_code is not 200: raise ApiException("Could not retrieve dataframe", status_code=r.status_code) file_content = r.content.decode("utf-8") return file_content except (requests.exceptions.ConnectionError): try: ## Try falling back on the datastore environment variable cell_hash, file_name = url.split("/")[-2:] url = '{}/{}/{}'.format(DATASTORE_URI, cell_hash, file_name) r = requests.get(url) if r.status_code is not 200: raise ApiException("Could not retrieve dataframe", status_code=r.status_code) file_content = r.content.decode("utf-8") return file_content except: raise ApiException( "Unable to get file content from {}".format(url), status_code=500)
def retrieve_frames(input_frames): """ given a list of dictionaries {'name': x, 'url': y} retrieve the frames from data-store and keep in a dict {<name>:<content>} """ frame_dict = {} for frame in input_frames: try: r = requests.get(frame["url"]) if r.status_code != 200: raise ApiException("Problem retrieving dataframe %s" % frame["name"], status_code=r.status_code) frame_content = json.loads(r.content.decode("utf-8")) frame_dict[frame["name"]] = frame_content except (requests.exceptions.ConnectionError): ## try falling back on read_frame method (using env var DATASTORE_URI) try: frame_hash, frame_name = frame["url"].split("/")[-2:] frame_data = read_frame(frame_name, frame_hash) frame_dict[frame["name"]] = frame_data except (requests.exceptions.ConnectionError): raise ApiException("Unable to connect to {}".format( frame["url"])) return frame_dict
def get_data_from_url(url): try: response = requests.get(url, timeout=30) if response.status_code != 200: raise ApiException('url returned bad response', 400) else: return str(response.content) except requests.exceptions.RequestException as e: logging.error('error connecting to input url', e) raise ApiException('error while requesting url', 500)
def read_frame(frame_name, frame_hash): """ read a frame from the data store """ url = '{}/{}/{}'.format(DATASTORE_URI, frame_hash, frame_name) try: r = requests.get(url) if r.status_code is not 200: raise ApiException("Could not retrieve dataframe", status_code=r.status_code) data = json.loads(r.content.decode("utf-8")) return data except (requests.exceptions.ConnectionError): raise ApiException( "Unable to connect to datastore {}".format(DATASTORE_URI), status_code=500)
def login(cls, request: Request, schema: AuthRequestSchema) -> Dict[str, str]: """ ログインAPI Args: request (Request): リクエスト情報 schema (AuthRequestSchema): リクエストボディ Returns: Dict[str, str]: ユーザー認証結果 Raises: ApiException: メールアドレス または パスワードが未入力の場合 """ credentials = { 'username': schema.username, 'password': schema.password, } # メールアドレスとパスワードが入力されている場合、 # ユーザー認証を実施してアクセストークンとリフレッシュトークンを生成 if all(credentials.values()): user = cls().__authenticate(request, **credentials) # アクセストークンのクレームセット取得 access_token_claims = jwt_claims_handler( user, token_type=TYPE_ACCESS_TOKEN) # メールアドレス または パスワードが未入力の場合はエラー else: raise ApiException( create_error(ErrorMessage.INVALID_EMAIL_OR_PASSWORD)) # アクセストークンを返す return jwt_response_handler(jwt_encode_handler(access_token_claims))
def execute_file_content(file_content): """ Given the string file content of a file (likely containing function defns and import statements), call 'exec' on it """ try: exec(file_content) print("Executed file content!") except SyntaxError as e: raise ApiException("Error processing file: {}".format(e.msg))
def arrow_to_pandas(arrow_buffer): """ Convert from an Apache Arrow buffer into a pandas dataframe """ try: reader = pa.ipc.open_file(arrow_buffer) frame = reader.read_pandas() return frame except: raise (ApiException("Error converting arrow to pandas dataframe"))
def pandas_to_json(dataframe): """ converts pandas dataframe into wrattler format, i.e. list of rows. If input is not a pandas dataframe, try to convert it, and return None if we can't """ if not (isinstance(dataframe, pd.DataFrame)): try: dataframe = pd.DataFrame(dataframe) except: raise ApiException("Unable to convert to pandas dataframe") return dataframe.to_json(orient='records')
def find_assignments(code_string): """ returns a dict {"targets: [], "input_vals": []} """ output_dict = {"targets": [], "input_vals": []} try: node = ast.parse(code_string) except SyntaxError as e: raise ApiException("Syntax error in code string: {}".format(e.msg)) ## recursive function to navigate the tree and find assignment targets and input values def _find_elements(node, output_dict, parent=None, global_scope=True): if isinstance(node, ast.AST): if isinstance(node, ast.Assign): _find_elements(node.targets, output_dict, "targets", global_scope) _find_elements(node.value, output_dict, "input_vals", global_scope) elif isinstance(node, ast.Call): _find_elements(node.args, output_dict, "input_vals", global_scope) _find_elements(node.func, output_dict, "input_vals", global_scope) ## treat things like df[0] = x (i.e. ast.Subscript nodes) similarly to Call nodes ## - i.e. we will need 'df' to be an import in order to avoid 'not defined' error. elif isinstance(node, ast.Subscript): _find_elements(node.value, output_dict, "input_vals", global_scope) if parent and parent == "targets": _find_elements(node.value, output_dict, "targets", global_scope) elif isinstance(node, ast.Name) and parent: if global_scope or parent == "input_vals": ## only add this name if it isn't already in the list if not node.id in output_dict[parent]: output_dict[parent].append(node.id) elif isinstance( node, ast.FunctionDef): ## will no longer be in global scope for a, b in ast.iter_fields(node): _find_elements(b, output_dict, parent, False) else: for a, b in ast.iter_fields(node): _find_elements(b, output_dict, parent, global_scope) elif isinstance(node, list): for element in node: _find_elements(element, output_dict, parent, global_scope) return output_dict final_dict = _find_elements(node, output_dict) return final_dict
def convert_to_pandas(input_data): """ convert an unknown input type (either Apache Arrow or JSON) to a pandas dataframe. """ try: dataframe = arrow_to_pandas(input_data) return dataframe except (ApiException): try: dataframe = json_to_pandas(input_data) return dataframe except (ApiException): raise ApiException("Unknown data type - cannot convert to pandas")
async def login_required( request: Request, token: str = Depends(OAUTH2_SCHEMA)) -> None: """ ユーザがログインしているかどうか Args: request (Request): リクエスト情報 token (str): アクセストークン Raises: ApiException: ログインに失敗している場合 """ if not request.user.is_authenticated: raise ApiException((create_error(ErrorMessage.INVALID_TOKEN)), status_code=status.HTTP_401_UNAUTHORIZED)
def leave_meeting(user_uuid: str, meeting_uuid: str): validate_user_uuid(user_uuid) validate_meeting_uuid(meeting_uuid) meeting = find_meeting(meeting_uuid) user = find_user(user_uuid) if is_owner(user, meeting): raise ApiException('Meeting owner cannot leave meeting', 403) membership = find_membership(meeting_uuid, user_uuid) db.session.delete(membership) db.session.commit() return '', 204
def write_frame(data, frame_name, frame_hash): """ write a frame to the data store """ url = '{}/{}/{}'.format(DATASTORE_URI, frame_hash, frame_name) try: r = requests.put(url, data=data) tokenized_response = r.content.decode("utf-8").split() if 'StatusMessage:Created' in tokenized_response: return True return r.status_code == 200 except (requests.exceptions.ConnectionError): raise ApiException( "Unable to connect to datastore {}".format(DATASTORE_URI), status_code=500) return False
def join_meeting(meeting_uuid: str, user_uuid: str): validate_meeting_uuid(meeting_uuid) validate_user_uuid(user_uuid) check_json_data() nickname = get_nickname() meeting = find_meeting(meeting_uuid) user = find_user(user_uuid) membership = Membership(user=user, meeting=meeting, nickname=nickname) db.session.add(membership) try: db.session.commit() except (IntegrityError, FlushError): raise ApiException('Already a member', 400) return '', 204
async def __call__(self, request: Request) -> Optional[str]: """ 呼び出し可能インスタンス Args: request (Request): リクエスト情報 Returns: Optional[str]: JsonWebToken Raises: ApiException: ヘッダーに認証情報(Authorization)が含まれていない場合 """ authorization: str = request.headers.get('Authorization') scheme, param = security.utils.get_authorization_scheme_param(authorization) if not authorization or scheme.lower() != 'bearer': if self.auto_error: raise ApiException(create_error(ErrorMessage.INVALID_TOKEN), status_code=status.HTTP_401_UNAUTHORIZED) else: return None return param
def list_definitions(word=None): if not word: word = input("Enter an English word:\n") definitions = None try: definitions = wn.synsets(word) except LookupError as lerror: raise DictionaryException( "Wordnet resource not found. Please download the nltk data" ) from lerror except Exception as ex: raise ApiException( "An error ocurred while connecting with the API") from ex else: if not definitions: print(f"{word} is not an English word.") else: for i, definition in enumerate(definitions): print(f"{i + 1}. {definition.definition().capitalize()}.")
def write_image(frame_hash): """ See if there is an image on TMPDIR and send it to the datastore if so. Return True if an image is written to the datastore, False if there is nothing to write, and raise an ApiException if there is a problem writing it. """ file_path = os.path.join(TMPDIR, frame_hash) if not os.path.exists(file_path): return False url = '{}/{}/figures'.format(DATASTORE_URI, frame_hash) file_data = open(os.path.join(file_path, 'fig.png'), 'rb') try: img_b64 = base64.b64encode(file_data.read()) data = [{"IMAGE": img_b64.decode("utf-8")}] r = requests.put(url, json=data) return (r.status_code == 200) except (requests.exceptions.ConnectionError): raise ApiException( "Could not write image to datastore {}".format(DATASTORE_URI), status_code=500)
def translate(**kwargs): # Another way to do this is like: # source = 'en' # try: # source = kwargs['source'] # except KeyError: # pass source = "en" if "source" not in kwargs else kwargs["source"] dest = "es" if "dest" not in kwargs else kwargs["dest"] file = None if "file" not in kwargs else kwargs["file"] text = None if "text" not in kwargs else kwargs["text"] if not text and not file: source = input("Enter the source language: ") dest = input("Enter the destination language: ") text = input("Enter a text to translate: ") elif file: if isinstance(file, str) and not Path(file).is_file(): raise TranslateException("Please provide a valid path to a file") if isinstance(file, str) and Path(file).suffix != ".txt": raise TranslateException("Only .txt files are allowed") text = read_file(file) translated_text = "" try: translated_text = Translator().translate(text, dest=dest, source=source).text except ValueError as verror: raise TranslateException( "The source or destination language is invalid.") from verror except Exception as ex: raise ApiException( "An error ocurred while connecting with the API") from ex else: if file: output_file = ("data/output.txt" if "output" not in kwargs else kwargs["output"]) write_file(translated_text, output_file) return translated_text
def execute_code(code, input_val_dict, return_vars, output_hash, verbose=False): """ Call a function that constructs a string containing a function definition, then do exec(func_string), then define another string call_string that calls this function, and then finally do eval(call_string) """ func_string = construct_func_string(code, input_val_dict, return_vars, output_hash) if verbose: print(func_string) exec(func_string) return_dict = {"output": "", "results": []} try: with stdoutIO() as s: ### wrapping function wrattler_f should now be in the namespace func_output = eval('wrattler_f()') return_dict["output"] = s.getvalue().strip() if isinstance(func_output, collections.Iterable): results = [] for item in func_output: results.append(convert_from_pandas_df(item)) return_dict["results"] = results elif not func_output: return_dict["results"] = [] else: result = convert_from_pandas_df(func_output) return_dict["results"] = [result] except Exception as e: output = "{}: {}".format(type(e).__name__, e) raise ApiException(output, status_code=500) return return_dict
def not_found(error): error = ApiException('Endpoint not found', status_code=404) return error.to_json_response()
def evaluate_code(data): """ recieves data posted to eval endpoint, in format: { "code": <code_string>, "hash": <cell_hash>, "frames" [<frame_name>, ... ] } This function will analyze and execute code, including retrieving input frames, and will return output as a dict: { "output": <text_output_from_cell>, "frames" [ {"name": <frame_name>, "url": <frame_url>}, ... ] "figures": [ {"name": <fig_name>, "url": <fig_url>}, ... ] } """ code_string = data["code"] output_hash = data["hash"] assign_dict = find_assignments(code_string) input_frames = data["frames"] frame_dict = retrieve_frames(input_frames) ## execute the code, get back a dict {"output": <string_output>, "results":<list_of_vals>} results_dict = execute_code(code_string, frame_dict, assign_dict['targets'], output_hash) results = results_dict["results"] ## prepare a return dictionary return_dict = { "output": results_dict["output"], "frames": [], "figures": [] } frame_names = assign_dict['targets'] if len(results) != len(frame_names): raise ApiException( "Error: no. of output frames does not match no. results", status_code=500) wrote_ok = True for i, name in enumerate(frame_names): ## check here if the result is a JSON string - if not, skip it if not (isinstance(results[i], str) and (results[i][0] == '[' or results[i][0] == '{')): continue wrote_ok &= write_frame(results[i], name, output_hash) return_dict["frames"].append({"name": name,"url": "{}/{}/{}"\ .format(DATASTORE_URI, output_hash, name)}) ## see if there is an image in /tmp, and if so upload to datastore wrote_image = write_image(output_hash) ## if there was an image written, it should be stores as <hash>/figures if wrote_image: return_dict["figures"].append({ "name": "figures", "url": "{}/{}/figures".format(DATASTORE_URI, output_hash) }) if wrote_ok: return return_dict else: raise RuntimeError("Could not write result to datastore")
def validate_stop_name(stop_name: str) -> None: if len(stop_name) > STOP_NAME_MAX_LENGTH: raise ApiException('Stop name too long', 400)
def validate_nickname(nickname: str) -> None: if nickname is not None and len(nickname) > NICKNAME_MAX_LENGTH: raise ApiException('Nickname too long', 400)
def check_json_data() -> None: if request.json is None: raise ApiException('Missing JSON data', 400)
def get_owner_uuid() -> str: if 'owner_uuid' not in request.json: raise ApiException('Missing owner uuid', 400) owner_uuid = request.json['owner_uuid'] validate_user_uuid(owner_uuid) return owner_uuid
def handle_eval(data): """ recieves data posted to eval endpoint, in format: { "code": <code_string>, "hash": <cell_hash>, "frames": [<frame_name>, ... ], "files": [<file_url>, ...] } This function will analyze and execute code, including retrieving input frames, and will return output as a dict: { "output": <text_output_from_cell>, "frames": [ {"name": <frame_name>, "url": <frame_url>}, ... ] "figures": [ {"name": <fig_name>, "url": <fig_url>}, ... ] } """ code_string = data["code"] output_hash = data["hash"] assign_dict = find_assignments(code_string) files = data["files"] if "files" in data.keys() else [] file_content_dict = {} for file_url in files: filename = file_url.split("/")[-1] file_content = get_file_content(file_url) file_content_dict[filename] = file_content input_frames = data["frames"] frame_dict = retrieve_frames(input_frames) ## execute the code, get back a dict {"output": <string_output>, "results":<list_of_vals>} results_dict = execute_code(file_content_dict, code_string, frame_dict, assign_dict['targets'], output_hash, verbose=False) results = results_dict["results"] ## prepare a return dictionary return_dict = { "output": results_dict["output"], "frames": [], "figures": [] } wrote_ok = True for name, frame in results.items(): wrote_ok &= write_frame(frame, name, output_hash) return_dict["frames"].append({"name": name,"url": "{}/{}/{}"\ .format(DATASTORE_URI, output_hash, name)}) ## see if there is an image in /tmp, and if so upload to datastore wrote_image = write_image(output_hash) ## if there was an image written, it should be stores as <hash>/figures if wrote_image: return_dict["figures"].append({ "name": "figures", "url": "{}/{}/figures".format(DATASTORE_URI, output_hash) }) if wrote_ok: return return_dict else: raise ApiException("Could not write result to datastore")
def execute_code(file_content_dict, code, input_val_dict, return_vars, output_hash, verbose=False): """ Call a function that constructs a string containing a function definition, then do exec(func_string), which should mean that the function ('wratttler_f') is defined, and then finally we do eval('wrattler_f()) to execute the function. Takes arguments: file_content_dict: is a dict of {<filename>:<content>,...} for files (e.g. containing function definitions) on the datastore. code: is a string (the code in the cell) input_val_dict: dictionary {<variable_name>: <data_retrieved_from_datastore>, ...} return_vars: list of variable names found by find_assignments(code) output_hash: hash of the cell - will be used to create URL on datastore for outputs. verbose: if True will print out e.g. the function string. Returns a dictionary: { "output": <console output>, "results": {<frame_name>: <frame>, ... } } """ ## first deal with any files that could contain function def'ns and/or import statements file_contents = "" for v in file_content_dict.values(): file_contents += v file_contents += "\n" func_string = construct_func_string(file_contents, code, input_val_dict, return_vars, output_hash) if verbose: print(func_string) try: exec(func_string) except SyntaxError as e: ## there is a problem either with the code fragment or with the file_contents - ## see if we can narrow it down in order to provide a more helpful error msg for fn, fc in file_content_dict.items(): try: exec(fc) except SyntaxError as se: output = "SyntaxError when trying to execute imported file: {}".format( fn) raise ApiException(output, status_code=500) output = "SyntaxError when trying to execute code in cell: {}".format( e) raise ApiException(output, status_code=500) return_dict = {"output": "", "results": []} try: with stdoutIO() as s: ### wrapping function wrattler_f should now be in the namespace func_output = eval('wrattler_f()') return_dict["output"] = s.getvalue().strip() return_dict["results"] = {} for k, v in func_output.items(): result = convert_from_pandas(v) if result: return_dict["results"][k] = result except Exception as e: output = "{}: {}".format(type(e).__name__, e) raise ApiException(output, status_code=500) return return_dict