Python Utils 예제들, utils.utility.Utils Python 예제들

예제 #1

0

파일 보기

 def headlerlistcreation(headeritem):
     logger = Utils.add_logger()
     colist = []
     headerlist = []
     headeritem = [
         x for x in headeritem if x not in ['Management', 'Opposition']
     ]
     # print("headeritem:{}".format(headeritem))
     length = len(headeritem)
     try:
         for i in range(0, length + 1):
             if (i % 2 == 0):
                 headerlist.append(headeritem[i])
             else:
                 if "Date" in headeritem[i] or "Agenda" in headeritem[i]:
                     headeritem.insert(i, " ")
                     colist.append("  ")
                     length += 1
                 else:
                     colist.append(headeritem[i])
     except:
         logger.debug("colList:{}".format(colist))
         #print("colList:{}".format(colist))
     #print("headeritem after loop:{}".format(headerlist))
     #print("collist:{}".format(colist))
     alist = []
     for i, j in zip(headerlist, colist):
         alist.append(i)
         alist.append(j)
     #print(alist)
     return alist

예제 #2

0

파일 보기

파일: format_2_parser.py 프로젝트: shalini9795/UnstructuredDocParser

    def preprocess(accession_no):
        logger = Utils.add_logger()
        try:
            path = os.path.join(CONFIG["Path"]["file_path"],
                                accession_no + ".dissem")
            file = open(path, 'r', encoding='utf-8')
            s = file.read()
            raw = bs(s, "lxml")
            val = raw.get_text()
            result = raw.find_all(text=re.compile("NAME OF REGISTRANT"))
            for val in result:
                v = val.split('\n')
            for i, str in enumerate(v):
                if "NAME OF REGISTRANT" in str:
                    temp = v[i].split(':')
            Registrant_Name = temp[1].strip()
            logging.debug("Registrant Name for file:{} is {}".format(
                accession_no, Registrant_Name))
            res = raw.find_all(text=re.compile(Registrant_Name))
            res = raw.getText()
            text = ""
            for value in res:
                text += value
            result_list = re.split(r"-{20,}", text)
        except Exception as error:
            logger.error("Error in preprocess for file-{}-{}".format(
                accession_no, error))
            return list(), ""

        return result_list, Registrant_Name

예제 #3

0

파일 보기

 def splitevenodd(headeritem, companyName):
     logger = Utils.add_logger()
     colist = []
     headerlist = []
     headeritem = [
         x for x in headeritem if x not in ['Management', 'Opposition']
     ]
     # print("headeritem:{}".format(headeritem))
     length = len(headeritem)
     try:
         for i in range(0, length + 1):
             # print(length)
             if (i % 2 == 0):
                 headerlist.append(headeritem[i])
             else:
                 if "Date" in headeritem[i] or "Agenda" in headeritem[i]:
                     headeritem.insert(i, " ")
                     colist.append("  ")
                     length += 1
                 else:
                     colist.append(headeritem[i])
     except:
         logger.debug("colList:{}".format(colist))
         #print("colList:{}".format(colist))
     dfs = pd.DataFrame(columns=headerlist)
     # print("headeritem after loop:{}".format(headerlist))
     # print("collist:{}".format(colist))
     dfs = dfs.append(pd.Series(colist, index=headerlist),
                      ignore_index=True)
     #print("CompanyName={}".format(companyName))
     if len(companyName) >= 1:
         dfs['CompanyName'] = companyName
     #print(dfs)
     return dfs

예제 #4

0

파일 보기

파일: format6Apipeline.py 프로젝트: shalini9795/UnstructuredDocParser

 def process(self, accession_no, doc_id):
     logger = Utils.add_logger()
     try:
         path = CONFIG["Path"]["file_path"] + accession_no + ".dissem"
         content = Utils.read_file(path, accession_no)
         content = Format6parser.segparsing(content)
         content = content.replace("</td>\n<tr>", "</td></tr><tr>")
         soup = BeautifulSoup(content, "html.parser")
         tables = soup.findAll('table')
         dffa = None
         prev_table = None
         finaldf = pd.DataFrame()
         for table in tables:
             try:
                 if Format6APipeline.check_table(table):
                     if prev_table is not None:
                         prev_table = prev_table + str(table.contents)
                         dffa = Format6parser.tabledetails(
                             BeautifulSoup(prev_table, "html.parser"))
                         prev_table = None
                     else:
                         dffa = Format6parser.tabledetails(table)
                 else:
                     prev_table = str(table.contents)
                 #print('######################')
                 if dffa is not None:
                     if finaldf.empty == True:
                         finaldf = dffa
                         dffa = None
                     else:
                         #print(dffa.head())
                         finaldf = finaldf.append(dffa, sort=True)
                         dffa = None
             except Exception as error:
                 logger.error(
                     "Exception in table parsing for format6 for accession_no -{} -{}"
                     .format(accession_no, error))
         filename = os.path.join(CONFIG['Path']['output_path_format6B'],
                                 accession_no + '.xlsx')
         Utils.df_to_excel(accession_no, finaldf, filename)
         return 1
     except Exception as error:
         logger.error(
             "Exception in format6 pipeline for accession_no -{}-{}".format(
                 accession_no, error))
         print("Exception in format6A pipeline-{}".format(error))
         return 0

예제 #5

0

파일 보기

파일: conjugation_form_dialog.py 프로젝트: Rekkursion/Spanishing_Qt

 def __update_enabilities(self):
     # set the enabilities of all setting-widgets except for the radio-buttons for irregularity
     Utils.set_enabled_recursively(self.__vbox_settings,
                                   self.__rdb_irregular.isChecked())
     self.__rdb_regular.setEnabled(True)
     self.__rdb_irregular.setEnabled(True)
     # if it's an irregular verb
     if self.__rdb_irregular.isChecked():
         self.__comb_stem_changing.setEnabled(
             self.__chk_stem_changing.isChecked())
         self.__le_special_past_particle.setEnabled(
             self.__chk_special_past_particle.isChecked())
         self.__le_special_yo_form.setEnabled(
             self.__chk_special_yo_form.isChecked())
         self.__le_special_preterite_stem.setEnabled(
             self.__chk_special_preterite_stem.isChecked())
         self.__le_special_future_stem.setEnabled(
             self.__chk_special_future_stem.isChecked())
         self.__le_special_present_subjunctive_stem.setEnabled(
             self.__chk_special_present_subjunctive_stem.isChecked())
         self.__le_special_tú_form_affirmative_imperative.setEnabled(
             self.__chk_special_tú_form_affirmative_imperative.isChecked())

예제 #6

0

파일 보기

 def process(self, accession_no, doc_id):
     logger = Utils.add_logger()
     text_list, registrant_name = Parser.preprocess(accession_no)
     if len(text_list) > 0:
         funds_list = Parser.parse_file(accession_no, text_list, doc_id)
         if len(funds_list) > 0:
             header_df, table_df = Parser.post_process(
                 accession_no, funds_list)
             if header_df.empty:
                 logging.debug(
                     "header_df is empty for accession_no:{}".format(
                         accession_no))
             else:
                 final_df = Parser.process_df(accession_no, header_df,
                                              table_df)
                 if final_df.empty:
                     logging.debug(
                         "final_df is empty for accession_no:{}".format(
                             accession_no))
                 else:
                     logging.debug(
                         "final_df created successfully for accession_no:{}"
                         .format(accession_no))
                     print("final_df length:{}".format(len(final_df.index)))
                     final_df = final_df.drop(columns=["seq_id"])
                     Utils.df_to_database(accession_no, doc_id, final_df)
                     filename = os.path.join(
                         CONFIG["Path"]["output_path_format2"],
                         accession_no + ".xlsx")
                     Utils.df_to_excel(accession_no, final_df, filename)
                     return 1
     else:
         logger.debug(
             "No data found from the file for accssion_no:{}".format(
                 accession_no))
         return "Unsuccessfull"

예제 #7

0

파일 보기

파일: format_2_parser.py 프로젝트: shalini9795/UnstructuredDocParser

    def process_df(accession_no, main_header_df, table_df):
        logger = Utils.add_logger()
        try:
            table_df = table_df.astype({"seq_id": int})
            main_header_df = main_header_df.reset_index(drop=True)
            main_header_df = main_header_df.astype({"seq_id": int})
            df = table_df.dropna(subset=['Proposal'])
            df["ProposedBy"] = df["ProposedBy"].apply(Parser.replace_empty)
            df["ProposalNumber"] = df["ProposalNumber"].apply(
                Parser.replace_empty)
            main_header_df["MeetingDate"] = main_header_df[
                "MeetingDate"].apply(Utils.parsing_date)
            df = df.reset_index(drop=True)
            r = len(df.index) - 1
            #print(r)
            while r >= 0:
                temp_index = list()
                while ((df.at[r, "Proposal"] != "DIRECTOR"
                        or df.at[r, "ProposalNumber"] == None)
                       and (df.at[r, "ProposedBy"] == None)):
                    temp_index.append(r)
                    r -= 1
                #print(temp_index)
                r -= 1
                if len(temp_index) >= 1:
                    c = temp_index[-1]
                    # print(c)
                    for m in reversed(temp_index):
                        df.at[c - 1, "Proposal"] += " " + df.at[m, "Proposal"]
                        df.at[m, "Proposal"] = None

            df = df.dropna(subset=["Proposal"])
            final_df = main_header_df.merge(df, on=["seq_id"], how="inner")
        except Exception as error:
            logger.error("Error in process_df for accession_no-{}-{}".format(
                accession_no, error))
            print("Error in df processing for accession_no-{}-{}".format(
                accession_no, error))
            return pd.DataFrame()

        return final_df

예제 #8

0

파일 보기

 def process(self,accession_no,doc_id):
     logger = Utils.add_logger()
     try:
         path = CONFIG["Path"]["file_path"] + accession_no +".dissem"
         content = Utils.read_file(path,accession_no)
         content = content.replace('&#160;', '\xa0')
         soup = BeautifulSoup(content, "html.parser")
         contents = soup.text
         contents = contents.replace('SIGNATURES', '----------------------------------')
         contents = contents.replace('FUND:', '############### \n FUND:')
         fundlist = contents.split('###############')
     except Exception as error:
         logger.error("Exception in parsing through bs4 for accession_no -{} -{}".format(accession_no,error))
         fundlist = []
         return "unsuccessfull"
     i = 0
     final_df = pd.DataFrame()
     # print(fundlist[1])
     for list in fundlist:
         if i > 0:
             sr = list.split('ISSUER')
             fName = Format4Parser.fundName(sr[0])
             #print(fName)
             df = Format4Parser.companyfundDetails(list, fName)
             try:
                 final_df = final_df.append(df)
             except Exception as error:
                 logger.error("Exception in pipeline for df merging for accession_no -{} -{}".format(accession_no,error))
                 return "unsuccessfull"
         i = 1
     final_df = Format4Parser.concat_rowData(final_df)
     final_df = final_df.drop(columns=['ID'])
     final_df.rename(columns={"ISSUER":"CompanyName","MEETING DATE":"MeetingDate","Proposal No":"ProposalNumber",
                              "PROPOSAL":"Proposal","PROPOSED BY":"ProposedBy","VOTED?":"Voted",
                              "MGMT":"ForAgainstManagement","TICKER":"Ticker","VOTE CAST":"VoteCast"},inplace=True)
     final_df["MeetingDate"] = final_df["MeetingDate"].apply(Utils.parsing_date)
     final_df["AccesssionNumber"] = accession_no
     final_df["DocumentId"] = doc_id
     Utils.df_to_database(accession_no,doc_id,final_df)
     filename = os.path.join(CONFIG["Path"]["output_path_format4"], accession_no + ".xlsx")
     Utils.df_to_excel(accession_no,final_df,filename)
     return 1

예제 #9

0

파일 보기

 def tabledetails(table, lstdf, accession_no):
     logger = Utils.add_logger()
     line = []
     temp = False
     length = 0
     head = 0
     tempf = 0
     fund = ''
     parseline = []
     checkdata = []
     table_rows = table.find_all('tr')
     for tr in table_rows:
         try:
             td = tr.find_all('td')
             for i in td:
                 line.append(i.text.replace('\n', '').replace('\xa0', ' '))
                 checkdata.append(i.text.strip().replace('\n', '').replace(
                     '\xa0', ' '))
             line = [(lambda x: x.strip())(l) for l in line if l != ' ']
             #print(line)
             # if "QSR" in line or  "FP" in line:
             #     print("Line=={}".format(line))
             if line == [] or len(line) == line.count(''):
                 line = []
                 continue
             if temp == False:
                 temp = Format8Parser.identify_header(line, accession_no)
                 head = head + 1
                 # print('head ' + str(head))
                 # print('temp ' + str(temp))
                 # print('temp ' + str(len(line)))
                 # print(len(lstdf))
                 if temp == False and head == 1 and len(lstdf) != 0:
                     temp = True
                     parseline = line
                     if len(lstdf) == len(line):
                         line = lstdf
                     else:
                         lstdf = list(lstdf)
                         abcd = set(lstdf)
                         if 'Fund Name' in abcd:
                             lstdf.remove('Fund Name')
                         if 'FundName' in abcd:
                             lstdf.remove('FundName')
                         line = lstdf
                 if temp == True:
                     line = [(lambda x: re.sub(' +', ' ', x))(l)
                             for l in line if l != ' ']
                     df = pd.DataFrame(columns=line)
                     col = line
                     length = len(line)
                     if parseline != [] and len(parseline) == len(line):
                         df = df.append(pd.Series(parseline, index=col),
                                        ignore_index=True)
                     #print("header")
                     #print(line)
                     #print(length)
                     ##print(df)
             elif length == len(line):
                 df = df.append(pd.Series(line, index=col),
                                ignore_index=True)
             elif length == len(checkdata):
                 df = df.append(pd.Series(checkdata, index=col),
                                ignore_index=True)
             line = []
             checkdata = []
         except Exception as error:
             logger.error(
                 "Exception in format8 parser for accession_no:{} -{}".
                 format(accession_no, error))
             print("Exception in format8 parser:{}".format(error))
     if temp == False:
         df = pd.DataFrame()
     else:
         for col in df.columns:
             if col.upper().find('FUND NAME') > -1:
                 tempf = 1
         if tempf != 1 and fund != '':
             df["Fund Name"] = fund
     return df

예제 #10

0

파일 보기

 def process(self, accession_no, doc_id):
     logger = Utils.add_logger()
     try:
         olderFundName = ''
         lstdf = []
         dfs_all = pd.DataFrame()
         path = CONFIG["Path"]["file_path"] + accession_no + ".dissem"
         content = Utils.read_file(path, accession_no)
         content = Format8Parser.segparsing(content)
         # print(content)
         content = re.sub("=+", "", content)
         content = content.replace("<TABLE",
                                   "@@@@@@@@@@@@@@@@@@@@@\n <TABLE")
         content = content.replace("<table",
                                   "@@@@@@@@@@@@@@@@@@@@@\n <table")
         content = content.replace("</TABLE>",
                                   "</TABLE>\n ######################")
         content = content.replace("</table>",
                                   "</table>\n ######################")
         content = content.replace("</td>\n<tr>", "</td></tr><tr>")
         content = content.replace(
             "The Fund did not vote any proxies during this reporting period",
             "The Fund did not vote any proxies during this reporting period\n ######################"
         )
         fundVoteData = content.split('######################')
         for alist in fundVoteData:
             try:
                 #print("---------------------------------------------------------------")
                 # print(alist,lstdf)
                 # print(alist)
                 df_all = Format8Parser.tableparsed(alist, olderFundName,
                                                    lstdf, accession_no)
                 # print(df_all)
                 if df_all is not None:
                     dfs_batch = pd.DataFrame()
                     # col_list = df_all.columns
                     # col_list = [(lambda x: re.sub(' +',' ',x))(l) for l in col_list]
                     # df_all.columns = col_list
                     dfs_batch = Format8Parser.formDataFrame(df_all)
                     if dfs_batch.empty != True:
                         if dfs_all.empty == True:
                             # print(dfs_batch)
                             dfs_all = dfs_batch
                         else:
                             #print(dfs_batch)
                             dfs_all = dfs_all.append(dfs_batch,
                                                      ignore_index=True,
                                                      sort=False)
                     if "FundName" in dfs_all.columns:
                         olderFundName = dfs_all["FundName"][0]
                         lstdf = df_all.columns
                     # print(dfs_batch)
                     # print(lstdf)
             except Exception as error:
                 logger.error(
                     "Exception in for loop of Format8pipeline.process for accessionnumber:{}-{}"
                     .format(accession_no, error))
                 print(
                     "Exception in for loop of Format8pipeline.process for accessionnumber:{}-{}"
                     .format(accession_no, error))
         #print('------------Final -----------------')
         #print(dfs_all)
         #print("Column names:{}".format(dfs_all.columns))
         if dfs_all is not None:
             dfs_all = Format8Parser.remove_spaces_from_df(dfs_all)
         filename = os.path.join(CONFIG['Path']['output_path_format8'],
                                 accession_no + '.xlsx')
         Utils.df_to_excel(accession_no, dfs_all, filename)
         return 1
     except Exception as error:
         logger.error(
             "Exception in Format8pipeline.process for accessionnumber:{}-{}"
             .format(accession_no, error))
         print(
             "Exception in Format8pipeline.process for accessionnumber:{}-{}"
             .format(accession_no, error))
         return 0

예제 #11

0

파일 보기

 def __init__(self):
     self.utils = Utils()

예제 #12

0

파일 보기

class NN:
    def __init__(self):
        self.utils = Utils()

    @staticmethod
    def initialize_parameters(layer_dims):
        """
        Arguments:
        layer_dims -- python array (list) containing the dimensions of each layer in our network

        Returns:
        parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                        Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                        bl -- bias vector of shape (layer_dims[l], 1)
        """
        np.random.seed(1)
        parameters = {}
        L = len(layer_dims)  # number of layers in the network

        for l in range(1, L):
            parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) / np.sqrt(
                layer_dims[l - 1])
            parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

        return parameters


        
    def linear_forward(self, A, W, b):
        """
        Implement the linear part of a layer's forward propagation.

        Arguments:
        A -- activations from previous layer (or input data): (size of previous layer, number of examples)
        W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
        b -- bias vector, numpy array of shape (size of the current layer, 1)

        Returns:
        Z -- the input of the activation function, also called pre-activation parameter 
        cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently
        """
        
        Z = W.dot(A) + b
        cache = (A, W, b)
        
        return Z, cache


        
    def linear_activation_forward(self, A_prev, W, b, activation):
        """
        Implement the forward propagation for the LINEAR->ACTIVATION layer

        Arguments:
        A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
        W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
        b -- bias vector, numpy array of shape (size of the current layer, 1)
        activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

        Returns:
        A -- the output of the activation function, also called the post-activation value 
        cache -- a python dictionary containing "linear_cache" and "activation_cache";
                stored for computing the backward pass efficiently
        """
        
        if activation == "sigmoid":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = self.utils.sigmoid(Z)

        
        elif activation == "relu":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = self.utils.relu(Z)
        
        cache = (linear_cache, activation_cache)

        return A, cache



    def L_model_forward(self, X, parameters):
        """
        Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
        
        Arguments:
        X -- data, numpy array of shape (input size, number of examples)
        parameters -- output of initialize_parameters_deep()
        
        Returns:
        AL -- last post-activation value
        caches -- list of caches containing:
                    every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
                    the cache of linear_sigmoid_forward() (there is one, indexed L-1)
        """

        caches = []
        A = X
        L = len(parameters) // 2                  # number of layers in the neural network
        
        # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
        for l in range(1, L):
            A_prev = A 
            A, cache = self.linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
            caches.append(cache)
        
        # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
        AL, cache = self.linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
        caches.append(cache)
                
        return AL, caches


    def compute_cost(self, AL, Y):
        """
        Implement the cost function defined by equation (7).

        Arguments:
        AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
        Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)

        Returns:
        cost -- cross-entropy cost
        """
        m = Y.shape[1]
        # Compute loss from aL and y.
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))        
        cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
        
        return cost


    def compute_cost_with_regularization(self, AL, Y, parameters, lambd):
        L = len(parameters) // 2 
        m = Y.shape[1]
        # Compute loss from aL and y.
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
        # Using L2 Regularization
        for l in range(1, L):
            L2_regularization_cost = (1. / m)*(lambd / 2) * (np.sum(np.square(parameters['W' + str(l)])))
            cost = cost + L2_regularization_cost

        
        cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
        
        return cost


    def linear_backward(self, dZ, cache):
        """
        Implement the linear portion of backward propagation for a single layer (layer l)

        Arguments:
        dZ -- Gradient of the cost with respect to the linear output (of current layer l)
        cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer

        Returns:
        dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        dW -- Gradient of the cost with respect to W (current layer l), same shape as W
        db -- Gradient of the cost with respect to b (current layer l), same shape as b
        """
       
        A_prev, W, b = cache
        m = A_prev.shape[1]

    
        dW = (1. / m) * np.dot(dZ, cache[0].T) 
        db = (1. / m) * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(cache[1].T, dZ)
        return dA_prev, dW, db

    def linear_backward_with_regularization(self, dZ, cache, parameters, lambd):
        A_prev, W, b = cache    
        m = A_prev.shape[1]
        L = len(parameters) // 2 
        
        # L2 Regularization
       
        #for l in reversed(range(L)):
        dW = (1./m) * (np.dot(dZ, cache[0].T) + lambd * W)
        db = (1./m) * np.sum(dZ, axis = 1, keepdims = True)
        dA_prev = np.dot(cache[1].T,dZ)
        return dA_prev, dW, db



    def linear_activation_backward(self, dA, cache, activation, lambd, parameters):
        """
        Implement the backward propagation for the LINEAR->ACTIVATION layer.
        
        Arguments:
        dA -- post-activation gradient for current layer l 
        cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
        activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
        
        Returns:
        dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        dW -- Gradient of the cost with respect to W (current layer l), same shape as W
        db -- Gradient of the cost with respect to b (current layer l), same shape as b
        """
        linear_cache, activation_cache = cache
        if activation == "relu":
            dZ = self.utils.relu_backward(dA, activation_cache)
            if lambd == 0:
                dA_prev, dW, db = self.linear_backward(dZ, linear_cache)
            elif lambd > 0:
                
                dA_prev, dW, db = self.linear_backward_with_regularization(dZ, linear_cache, parameters,  lambd)

                
            
        elif activation == "sigmoid":
            dZ = self.utils.sigmoid_backward(dA, activation_cache)
            if lambd == 0:
                dA_prev, dW, db = self.linear_backward(dZ, linear_cache)
            elif lambd > 0:
                dA_prev, dW, db = self.linear_backward_with_regularization(dZ, linear_cache, parameters, lambd)
        
        return dA_prev, dW, db


        
    def L_model_backward(self, AL, Y, caches, lambd, parameters):
        """
        Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
        
        Arguments:
        AL -- probability vector, output of the forward propagation (L_model_forward())
        Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
        caches -- list of caches containing:
                    every cache of linear_activation_forward() with "relu" (there are (L-1) or them, indexes from 0 to L-2)
                    the cache of linear_activation_forward() with "sigmoid" (there is one, index L-1)
        
        Returns:
        grads -- A dictionary with the gradients
                grads["dA" + str(l)] = ... 
                grads["dW" + str(l)] = ...
                grads["db" + str(l)] = ... 
        """
        grads = {}
        L = len(caches) # the number of layers
        m = AL.shape[1]
        Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
        
        # Initializing the backpropagation
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
        
        # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
        current_cache = caches[L-1]
        grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = self.linear_activation_backward(dAL, current_cache, activation = "sigmoid", lambd=lambd, parameters=parameters)
        
        for l in reversed(range(L-1)):
            # lth layer: (RELU -> LINEAR) gradients.
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(grads["dA" + str(l + 2)], current_cache, activation = "relu", lambd=lambd, parameters=parameters)
            grads["dA" + str(l + 1)] = dA_prev_temp
            grads["dW" + str(l + 1)] = dW_temp
            grads["db" + str(l + 1)] = db_temp

        return grads



    def update_parameters(self, parameters, grads, learning_rate):
        """
        Update parameters using gradient descent
        
        Arguments:
        parameters -- python dictionary containing your parameters 
        grads -- python dictionary containing your gradients, output of L_model_backward
        
        Returns:
        parameters -- python dictionary containing your updated parameters 
                    parameters["W" + str(l)] = ... 
                    parameters["b" + str(l)] = ...
        """
        
        L = len(parameters) // 2 # number of layers in the neural network

        # Update rule for each parameter. Use a for loop.
        for l in range(L):
            parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
            parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
            
        return parameters

예제 #13

0

파일 보기

파일: format_2_parser.py 프로젝트: shalini9795/UnstructuredDocParser

    def parse_file(accession_no, text_list, doc_id):
        logger = Utils.add_logger()
        try:
            funds_list = list()
            seq_id = 1  # sequencing
            prev_fundname = ""
            for index, temp in enumerate(text_list):
                split = re.split("\n", temp)
                for x, spli in enumerate(split):
                    num = x + 3
                    table_list = list()
                    if "Proposal Vote" in spli:
                        #print("Table Data")
                        row_list = list()
                        table_list.append(seq_id - 1)
                        while (num < (len(split) - 2)) and (
                                case3
                                not in split[num]) and (case4
                                                        not in split[num]):
                            row_text = split[num]
                            # print(row_text)
                            if len(row_text) > 0:
                                temp_text = ""
                                temp_text += row_text[:6].strip()
                                temp_text += "\t" + row_text[6:65].strip()
                                temp_text += "\t" + row_text[65:79].strip()
                                temp_text += "\t" + row_text[79:110].strip()
                                temp_text += "\t" + row_text[110:].strip()
                                table_list.append(temp_text)
                                #print(temp_text)
                            num += 1
                        funds_list.append(table_list)
                    sep = re.split("\s{3,}", spli)
                    # print(sep)
                    header_list = list()
                    for i, s in enumerate(sep):
                        # print(s)
                        if "Agenda Number:" in s:
                            fund = text_list[index - 1]
                            # print(fund)
                            fund_name = re.split(r'\s{2,}', fund)
                            fund_name = fund_name[-1]
                            fund_name = re.sub("\n", "", fund_name)
                            fund_name = fund_name.strip()
                            if len(fund_name) > 1:
                                prev_fundname = fund_name
                            else:
                                fund_name = prev_fundname
                            #print("fund_name:{}".format(fund_name))
                            t = s.split(':')
                            agenda_number = t[1].strip()
                            inc_name = sep[i - 1]
                            inc_name = inc_name.strip()
                            #print("Company Name:{} \nAgenda_number:{}".format(inc_name, agenda_number))
                        elif "Security:" in s:
                            t = s.split(':')
                            security = t[1].strip()
                            #print("Security:", security)
                        elif "Meeting Type:" in s:
                            t = s.split(':')
                            meeting_type = t[1].strip()
                            #print("Meeting_type:", meeting_type)
                        elif "Meeting Date:" in s:
                            t = s.split(':')
                            meeting_date = t[1].strip()
                            #print("Meeting_date:", meeting_date)
                        elif "Ticker:" in s:
                            t = s.split(':')
                            ticker = t[1].strip()
                            #print("Ticker:", ticker)
                        elif "ISIN:" in s:
                            t = s.split(':')
                            ISIN = t[1].strip()
                            #print("ISIN:", ISIN)
                            #print("")
                            header_list.append(seq_id)
                            header_list.append(doc_id)
                            header_list.append(accession_no)
                            header_list.append(fund_name)
                            header_list.append(inc_name)
                            header_list.append(security)
                            header_list.append(meeting_date)
                            header_list.append(meeting_type)
                            header_list.append(ISIN)
                            header_list.append(ticker)
                            header_list.append(agenda_number)
                            funds_list.append(header_list)
                            seq_id += 1
        except Exception as error:
            logger.error(
                "Error in parsing the file for accession_no-{}-{}".format(
                    accession_no, error))
            print("Error in parsing file for accession_no-{}-{}".format(
                accession_no, error))
            return list()

        return funds_list

예제 #14

0

파일 보기

파일: format_2_parser.py 프로젝트: shalini9795/UnstructuredDocParser

    def post_process(accession_no, funds_list):
        logger = Utils.add_logger()
        try:
            seq_id = 1
            main_header_df = pd.DataFrame(columns=[
                "seq_id", "DocumentId", "AccesssionNumber", "FundName",
                "CompanyName", "SecurityId", "MeetingDate", "MeetingType",
                "ISIN", "Ticker", "AgendaNumber"
            ])
            table_df = pd.DataFrame(columns=[
                "seq_id", "ProposalNumber", "Proposal", "ProposedBy",
                "ForAgainstManagement", "VoteCast"
            ])
            for i, data in enumerate(funds_list):
                if i % 2 == 0:
                    #print("<-----Header Data----->")
                    temp = np.asarray(data)
                    temp = temp.reshape(1, 11)
                    header_df = pd.DataFrame(temp,
                                             columns=[
                                                 "seq_id", "DocumentId",
                                                 "AccesssionNumber",
                                                 "FundName", "CompanyName",
                                                 "SecurityId", "MeetingDate",
                                                 "MeetingType", "ISIN",
                                                 "Ticker", "AgendaNumber"
                                             ])
                    main_header_df = main_header_df.append(header_df)
                    #print(data)
                    #print()
                elif i % 2 != 0:
                    #print("<------Table Data----->")
                    for val in data:
                        row_list = list()
                        row_list.append(seq_id)
                        if not isinstance(val, int):
                            # val = re.sub(r"")
                            split = re.split(r"\t", val)
                            for value in split:
                                row_list.append(value.lstrip())
                        else:
                            seq_id = val
                        tp = np.asarray(row_list)
                        if len(tp) < 6:
                            for x in range(len(tp), 6):
                                tp = np.append(tp, None)
                        # print(tp)
                        tp = tp.reshape(1, 6)
                        # print(tp)
                        temp_df = pd.DataFrame(tp,
                                               columns=[
                                                   "seq_id", "ProposalNumber",
                                                   "Proposal", "ProposedBy",
                                                   "ForAgainstManagement",
                                                   "VoteCast"
                                               ])
                        table_df = table_df.append(temp_df)
                    #print(data)
                    #print()
        except Exception as error:
            logger.error(
                "Error in post-processing for accession_no-{}-{}".format(
                    accession_no, error))
            print("Error in post-processing for accession_no-{}-{}".format(
                accession_no, error))
            return pd.DataFrame(), pd.DataFrame()

        return main_header_df, table_df