Exemplo n.º 1
0
def get_dividend_paras(doc_text):
    start_trigger = "shall be entitled to receive dividends"
    start_trigger = preprocess_text(start_trigger, stem=False).split(" ")
    loc = find_loc(doc_text, [start_trigger], allow_contains=True)
    if loc is None:
        return "Failed to find dividend info text"
    loc = loc[0]
    numbers = get_nums_from_text(doc_text, min=0, max=1, decimal=True)
    start_found = False
    start = -1
    for i in range(len(numbers)):
        cur_num = numbers[i]
        if not start_found and cur_num[2] > loc:
            start = cur_num[2] - 10
            start_found = True
            continue
        if start_found:
            if (cur_num[2] - numbers[i - 1][2]) > 140: # value is too far aways
                end = numbers[i-2][2] + 20
                return " ".join(doc_text[start: end])

    if not start_found:
        return "Failed to find dividend info text"

    return " ".join(doc_text[start: numbers[-1][2] + 20])
Exemplo n.º 2
0
def get_IV_intro_text(filename,
                      beginning_intro_triggers_filename="Beginning_IV",
                      end_intro_triggers_filename="End_IV_intro"):
    beginning_intro_triggers = get_proccessed_triggers(
        beginning_intro_triggers_filename)
    end_intro_triggers = get_proccessed_triggers(end_intro_triggers_filename)
    text = read_contract(filename)
    text = text.split(" ")
    #     print("text", text)
    #     text = text.split(" ")
    #     text = remove_periods(text)

    if filename in Failures:
        return "Failure expected"
    try:
        beginning_of_IV_intro = find_loc(text,
                                         beginning_intro_triggers,
                                         allow_contains=False)
        end_of_IV_intro = find_loc(text, end_intro_triggers)[0]

    except TypeError:
        print("failed", filename)
        print(text)
        return None

    # In case triggers didn't work, search the entire document

    if beginning_of_IV_intro is None:
        print("Beginning of IV intro is None")
        beginning_of_IV_intro = 0
    if beginning_of_IV_intro[0] > end_of_IV_intro:
        print("End is less than beginning index of IV intro")
        beginning_of_IV_intro = [0]
        end_of_IV_intro = len(text)

    # assert (beginning_of_IV_intro is not None), "Invalid info file " + " ".join(text) + filename

    beginning_of_IV = get_closest_string(values=beginning_of_IV_intro,
                                         target=end_of_IV_intro,
                                         less=True)

    if beginning_of_IV == -1:
        print("no beginning found")
        beginning_of_IV = end_of_IV_intro - 300

    IV_intro_text = text[beginning_of_IV:end_of_IV_intro]
    return IV_intro_text
Exemplo n.º 3
0
def get_board_of_directors_paras(doc_text):
    # start_trigger = "election of directors"
    start_trigger = "director elect"
    start_trigger = preprocess_text(start_trigger, stem=False).split(" ")
    print(start_trigger)
    loc = find_loc(doc_text, [start_trigger], allow_contains=False)

    if loc is None:
        loc = find_loc(doc_text, [start_trigger], allow_contains=True)
        if loc is None:
            return "Failed to find directors info text"
    for i in loc:
        if i < 200:
            continue
        print(i)
        return " ".join(doc_text[i: i + 200])
        # TODO: review find_loc function given sample results
    return "Failed to find board of directors info text"
Exemplo n.º 4
0
def get_securities_info_paras(doc_text, beginning_intro_triggers_filename="Beginning_IV",
                              end_intro_triggers_filename="End_IV_intro"):
    beginning_intro_triggers = get_proccessed_triggers(beginning_intro_triggers_filename, preproccess=False)
    end_intro_triggers = get_proccessed_triggers(end_intro_triggers_filename, preproccess=False)

    try:
        beginning_of_IV_intro = find_loc(doc_text, beginning_intro_triggers, allow_contains=True)
        end_of_IV_intro = find_loc(doc_text, end_intro_triggers)

    except TypeError:
        return "Failed to find securities info text"

    # In case triggers didn't work, search the entire document
    diffs = sorted(product(beginning_of_IV_intro, end_of_IV_intro), key=lambda t: abs(t[0] - t[1]))
    _, names = get_names(doc_text, stem=False)
    names_used = get_names_from_text(doc_text, names)
    names_used = " ".join([x[0] + ", " for x in names_used])[:-2]
    for dif in diffs:
        if dif[0] < dif[1]:
            return "Types of shares found: " + names_used + "\n" + " ".join(doc_text[dif[0]:dif[1]])
    return "Failed to find securities info text"
Exemplo n.º 5
0
def get_liquidation_paras(doc_text):
    start_triggers = ["event of any liquidation", "upon any such liquidation"]
    start_triggers = [preprocess_text(start_trigger, stem=False).split(" ") for start_trigger in start_triggers]
    start_locs = find_loc(doc_text, start_triggers, allow_contains=True)
    print(start_locs)
    print(len(doc_text))
    if start_locs is None or start_locs == []:
        return "Failed to find liquidation info text"
    for i in start_locs:
        if i > 200:
            out = " ".join(doc_text[i: i + 200])
            return out
    return "Failed to find liquidation info text"
Exemplo n.º 6
0
def get_original_issue_price(text, names, buffer=30):
    loc = find_loc(text, ["riginal issue pric"], allow_contains=True)
    print("LOC", loc)
    text = text[loc:]
    used_names = get_names_from_text(text, names)
    numbers = get_nums_from_text(text, min=0, max=5, decimal=True)
    pairs = match_nums_with_targets(numbers, used_names)
    names_out = []
    nums_out = []
    if pairs is None:
        print("NA")
        return None, None
    for name, num in pairs:
        names_out.append(name)
        nums_out.append(num)
    print("nums_out", nums_out)
    print("names_out", names_out)
    return nums_out, names_out