Exemplo n.º 1
0
 def process(self,accession_no,doc_id):
     logger = Utils.add_logger()
     try:
         path = CONFIG["Path"]["file_path"] + accession_no +".dissem"
         content = Utils.read_file(path,accession_no)
         content = content.replace(' ', '\xa0')
         soup = BeautifulSoup(content, "html.parser")
         contents = soup.text
         contents = contents.replace('SIGNATURES', '----------------------------------')
         contents = contents.replace('FUND:', '############### \n FUND:')
         fundlist = contents.split('###############')
     except Exception as error:
         logger.error("Exception in parsing through bs4 for accession_no -{} -{}".format(accession_no,error))
         fundlist = []
         return "unsuccessfull"
     i = 0
     final_df = pd.DataFrame()
     # print(fundlist[1])
     for list in fundlist:
         if i > 0:
             sr = list.split('ISSUER')
             fName = Format4Parser.fundName(sr[0])
             #print(fName)
             df = Format4Parser.companyfundDetails(list, fName)
             try:
                 final_df = final_df.append(df)
             except Exception as error:
                 logger.error("Exception in pipeline for df merging for accession_no -{} -{}".format(accession_no,error))
                 return "unsuccessfull"
         i = 1
     final_df = Format4Parser.concat_rowData(final_df)
     final_df = final_df.drop(columns=['ID'])
     final_df.rename(columns={"ISSUER":"CompanyName","MEETING DATE":"MeetingDate","Proposal No":"ProposalNumber",
                              "PROPOSAL":"Proposal","PROPOSED BY":"ProposedBy","VOTED?":"Voted",
                              "MGMT":"ForAgainstManagement","TICKER":"Ticker","VOTE CAST":"VoteCast"},inplace=True)
     final_df["MeetingDate"] = final_df["MeetingDate"].apply(Utils.parsing_date)
     final_df["AccesssionNumber"] = accession_no
     final_df["DocumentId"] = doc_id
     Utils.df_to_database(accession_no,doc_id,final_df)
     filename = os.path.join(CONFIG["Path"]["output_path_format4"], accession_no + ".xlsx")
     Utils.df_to_excel(accession_no,final_df,filename)
     return 1
Exemplo n.º 2
0
 def process(self, accession_no, doc_id):
     logger = Utils.add_logger()
     text_list, registrant_name = Parser.preprocess(accession_no)
     if len(text_list) > 0:
         funds_list = Parser.parse_file(accession_no, text_list, doc_id)
         if len(funds_list) > 0:
             header_df, table_df = Parser.post_process(
                 accession_no, funds_list)
             if header_df.empty:
                 logging.debug(
                     "header_df is empty for accession_no:{}".format(
                         accession_no))
             else:
                 final_df = Parser.process_df(accession_no, header_df,
                                              table_df)
                 if final_df.empty:
                     logging.debug(
                         "final_df is empty for accession_no:{}".format(
                             accession_no))
                 else:
                     logging.debug(
                         "final_df created successfully for accession_no:{}"
                         .format(accession_no))
                     print("final_df length:{}".format(len(final_df.index)))
                     final_df = final_df.drop(columns=["seq_id"])
                     Utils.df_to_database(accession_no, doc_id, final_df)
                     filename = os.path.join(
                         CONFIG["Path"]["output_path_format2"],
                         accession_no + ".xlsx")
                     Utils.df_to_excel(accession_no, final_df, filename)
                     return 1
     else:
         logger.debug(
             "No data found from the file for accssion_no:{}".format(
                 accession_no))
         return "Unsuccessfull"