def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # this function get's hopped and called later # Attention: This is a special case which get's called like: # lines, lines, self.index_field, feats, len(lines), file_info,None file_info = prev_line dbname = file_info.dbname current_year = int(dbname) selected_start_index = None if current_year >= 1960: # get the first lines which can resemble the title for index, value in enumerate(line_index): if value is not False: break selected_line = line_text[index] selected_text = selected_line['text'].strip(",.; ") if selected_text != "": selected_start_index = index break else: # just take last line in early years selected_start_index = len(line_index) - 1 if selected_start_index is not None: placeholder_match, errors = regu.fuzzy_search(r"", "") self.do_match_work(True, placeholder_match, selected_start_index, 0) return True return False
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^Hauptsitz\s?:|^Sitz\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Verwaltung:?|Verw\.\s?):", line_text, err_number=0) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Aufsichtsrat|Kontrollstelle)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Vorstand|Verwaltungsrat|Verwaltungsbeirat)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(?:Fernschreiber|Telex)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^Geschäfts(inhaber|leitung)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^Aus.+konsolidiert.+Bilanzen\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_stop_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_stop, errors = regu.fuzzy_search(r"^Geschäftsjahr\s?:", line_text) if match_stop is not None: self.do_match_work(False, match_stop, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search(r"(^Niederlassungen\s?:)", line_text, err_number=1) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^Kommandite.+und.+Bank.+:", combined_texts) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # matches ss or ß (group is not capturing) match_start, errors = regu.fuzzy_search( r"^Aus (der|den) Gewinn- und Verlust- ", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Stimmrecht der Aktien|Stimmrecht d\.[.\s]*Aktien.+)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # matches ss or ß (group is not capturing) match_start, errors = regu.fuzzy_search( r"^Dividenden(?:.+)aktien\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"Aus.+konsolidiert.+(?:G|g)ewinn.+(?:V|v)erlustrechnungen", combined_texts) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"(^Zahlstellen|^Hinterlegungs\- u(nd|\.)\s?Zahlstellen|^Zahlstellen\sbzw.\sHinterlegungsstellen)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"(bung der Tochtergesellschaften|^Tochtergesellschaften\sund\ssonstige\sBeteiligungen|^Hauptlagerplatz)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Persönlich\shaftender\s)?(Gesellschafter|schafter)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Es werden erzeugt|Erzeugnisse|Gegenstand\sdes\sUnternehmens|Produktionsprogramm)\s?:", line_text, err_number=1) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # matches ss or ß (group is not capturing) match_start, errors = regu.fuzzy_search( r"^((Gro(?:ss|ß)aktionär(?:\s?|e\s?))|Aktionäre?)\s?:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Betriebsanlagen|Betriebsgesellschaften|Vertriebsgesellschaften|Besitzangaben)\s?:", line_text, err_number=1) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Werke in|Werke\s?:|Betriebsstätten\s?:|Eigenwerke\s?:|Zechen\s?:)", line_text, err_number=1) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # matches ss or ß (group is not capturing) #match_start, errors = regu.fuzzy_search(r"Aktienkurse\s?.*:", "Aktienkurse (Düsseldorf):") match_start, errors = regu.fuzzy_search(r"Aktienkurse\s?.*:", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): # reduced error number to prevent confusion with "Beteiligung:" match_bet, errors = regu.fuzzy_search( r"(((?:Namhafte|Wesentliche|Maßgebliche|Wichtigste|Sonstige|Direkte)\s?Beteiligung(en)?)|\s?Beteiligung(en)?)\s?:", line_text, err_number=0) if match_bet is not None: self.do_match_work(True, match_bet, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(?:Fernruf|Telefon)\s?:", line_text) # if "Kupferberg" in combined_texts: # print("asd") if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"(^Zweigniederlassungen und Büros\s?:|" r"^Zweigniederlassungen in\s?:?|" r"^Zweigniederlassungen\s?:|" r"^Vertreten\sin\s:|" r"^Hauptverwaltung\s?:)", line_text) if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search(r"^Grundkapital\s?:", line_text, err_number=0) if match_start is not None: if "Bezugsrechte:" in combined_texts: return False # this is a special case self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): #if "Bezugsrechtabschläge insgesamt" in line_text: # return False # false positive, abort # nd Berichtigungsaktien regex_string = r"(^Bezugsrechtabschläge insgesamt\s?:|^Umtauschrechte\s?:|^Berichtigunsaktien\s?:|^Bezugsrechte und Berichtigungsaktien\s?:|^Bezugsrechte\s?:)" match_start, errors = regu.fuzzy_search(regex_string, combined_texts) if match_start is not None: match_line, errors_2 = regu.fuzzy_search( regex_string, line_text) # if the current line contains match index current line if match_line: pass_index = line_index else: # if combination with previous text contains info match previous index pass_index = line_index - 1 self.do_match_work(True, match_start, pass_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"Rechte.+Vorzugs.*(?:a|A)ktien.*:", combined_texts, err_number=1) # mismatch: 'rechtslose Vorzugsaktien. Aktienkurse:' with e2 # match: 'Besondere Rechte der an der Börse Hamburg gehandelten Vorzugs-Aktien:' with e0 # match: 'Besondere Rechte der Vorzugsaktien:' with e0 if match_start is not None: self.do_match_work(True, match_start, line_index, errors) return True
def match_start_condition(self, line, line_text, line_index, features, num_lines, prev_line, combined_texts): match_start, errors = regu.fuzzy_search( r"^(Aktionärvertreter)\s?:", line_text) # this is a possible false positive for above regex #match_wrong, errors = regu.fuzzy_search(r"^Aktionären", line_text, err_number=1) if match_start is not None: match_text = match_start.group() if "Aktionären" in match_text: return self.do_match_work(True, match_start, line_index, errors) return True