def get(self, id=None): if id is not None: file = File.get_file(id) return file, 200 else: try: current_user = flask_praetorian.current_user() files = File.get_all_files(current_user.id) ret = {"files": files} return ret, 200 except: return 500 ret = {"message": "Error"} return ret, 400
def post(self): try: data = request.get_json() file_id = data["fileId"] text = Text.get_text(file_id) if text: return text, 200 file_url = File.get_file(file_id)["url"] file_filename = secure_filename(f"{file_id}.pdf") file_path = f"/tmp/{file_filename}" file_content = requests.get(file_url).content with open(file_path, "wb") as f: f.write(file_content) text_id = str(uuid4) text_filename = secure_filename(f"{text_id}.json") text_path = f"/tmp/{text_filename}" all_text = "" with pdfplumber.open(file_path) as pdf: for pdf_page in pdf.pages: single_page_text = pdf_page.extract_text() all_text = all_text + "\n" + single_page_text summary_prompt = all_text[:2041] + "\ntl;dr:" response = openai.Completion.create( engine="ada", prompt=summary_prompt, temperature=0, max_tokens=120, top_p=1.0, frequency_penalty=0.8, presence_penalty=0.0, ) summary = response["choices"][0]["text"].replace("\n", " ") printable = set(string.printable) all_text = "".join(filter(lambda x: x in printable, all_text)) keywords_prompt = all_text[:2036] + "\n\nKeywords:" response = openai.Completion.create( engine="curie", prompt=keywords_prompt, temperature=0.0, max_tokens=60, top_p=1.0, frequency_penalty=0.8, presence_penalty=0.0, stop=["Keywords:"], ) keywords = response["choices"][0]["text"].replace("\n", " ") keywords = [word.strip() for word in keywords.split(",")] text_content = {"summary": summary, "keywords": keywords} with open(text_path, "w") as f: json.dump(text_content, f) storage_client.upload_file( Filename=text_path, Bucket=bucket_name, Key=text_filename, ) text = Text( file_id=file_id, text_url= f"https://ec500-news-analyzer.s3.us-east-2.amazonaws.com/{text_filename}", ) db.session.add(text) db.session.commit() ret = text_content return ret, 200 except: ret = {"message": "Server Error"} return ret, 500 ret = {"message": "Error"} return ret, 400