Пример #1
0
def path_to_docid05(earmark_path):
    """
    Input: list of lists of extracted earmark info                                                        \
    Ouput: list of lists containing relevant document id                                                  \

    """
    bill_path = pt.BillPathUtils()
    report_path = pt.ReportPathUtils()
    congress = earmark_path[0]
    bill_report = earmark_path[1]
    chamber = earmark_path[2]
    number = earmark_path[3]

    if bill_report == 'bill':
        if chamber == 'senate':
            path = os.path.join(configuration.get_path_to_bills(),
                                str(congress), '/bills/s/', str(number))
        else:
            path = os.path.join(configuration.get_path_to_bills(),
                                str(congress), '/bills/hr/', str(number))

        all_versions = bill_path.get_all_versions(path)
        best_date = date(1900, 1, 1)
        for version in all_versions:
            npth = path + '/text-versions/' + version
            bill_date = pt.BillPathUtils(npth).bill_date()
            bill_date = datetime.strptime(bill_date, "%Y-%m-%d").date()
            if bill_date > best_date:
                best_date = bill_date
                best_version = version
        PATH_BILL = bill_path.get_bill_path(congress, number, best_version)
        doc_id = pt.BillPathUtils(PATH_BILL).get_db_document_id()
    else:
        if chamber == "senate":
            path = os.path.join(configuration.get_path_to_reports(),
                                str(congress), "/senate/", str(number))
        else:
            path = os.path.join(configuration.get_path_to_reports(),
                                str(congress), "/house/", str(number))

        all_versions = report_path.get_all_versions(path)
        rep_path = report_path.get_report_path(int(congress), chamber,
                                               int(number), all_versions[0])
        doc_id = pt.ReportPathUtils(rep_path).get_db_document_id()
    return doc_id
def path_to_docid05(earmark_path):
    """
    Input: list of lists of extracted earmark info                                                        \
    Ouput: list of lists containing relevant document id                                                  \

    """
    bill_path = pt.BillPathUtils()
    report_path = pt.ReportPathUtils()
    congress = earmark_path[0]
    bill_report = earmark_path[1]
    chamber = earmark_path[2]
    number = earmark_path[3]

    if bill_report == 'bill':
        if chamber == 'senate':
            path =  os.path.join(configuration.get_path_to_bills(), str(congress), '/bills/s/', str(number))
        else:
            path = os.path.join(configuration.get_path_to_bills(), str(congress), '/bills/hr/', str(number))

        all_versions = bill_path.get_all_versions(path)
        best_date = date(1900,1,1)
        for version in all_versions:
            npth =  path + '/text-versions/' + version
            bill_date = pt.BillPathUtils(npth).bill_date()
            bill_date = datetime.strptime(bill_date,"%Y-%m-%d").date()
            if bill_date > best_date:
                best_date = bill_date
                best_version = version
        PATH_BILL = bill_path.get_bill_path(congress,number,best_version)
        doc_id = pt.BillPathUtils(PATH_BILL).get_db_document_id()
    else:
        if chamber == "senate":
            path = os.path.join(configuration.get_path_to_reports(), str(congress) , "/senate/", str(number))
        else:
            path = os.path.join(configuration.get_path_to_reports(), str(congress), "/house/", str(number))

        all_versions = report_path.get_all_versions(path)
        rep_path = report_path.get_report_path(int(congress),chamber,int(number),all_versions[0])
        doc_id = pt.ReportPathUtils(rep_path).get_db_document_id()
    return doc_id
def get_bill_directory(bill_name, congress_number):
    if not bill_name:
        return ""

    bill_dir = ""
    root_bill_dir = configuration.get_path_to_bills()
    bill_dir = os.path.join(root_bill_dir, congress_number)
    bill_dir = os.path.join(bill_dir, "bills")
    bill_dir = os.path.join(bill_dir, get_bill_type_dir(bill_name))
    bill_dir = os.path.join(bill_dir, get_full_bill_dir(bill_name))
    bill_dir = os.path.join(bill_dir, "text-versions")
    bill_dir = os.path.join(bill_dir, get_latest_version(bill_dir))

    return bill_dir
Пример #4
0
def get_bill_directory(bill_name, congress_number):
    if not bill_name:
        return ""

    bill_dir = ""
    root_bill_dir = configuration.get_path_to_bills()
    bill_dir = os.path.join(root_bill_dir, congress_number)
    bill_dir = os.path.join(bill_dir, "bills")
    bill_dir = os.path.join(bill_dir, get_bill_type_dir(bill_name))
    bill_dir = os.path.join(bill_dir, get_full_bill_dir(bill_name))
    bill_dir = os.path.join(bill_dir, "text-versions")
    bill_dir = os.path.join(bill_dir, get_latest_version(bill_dir))

    return bill_dir
def path_to_docid08(earmarks):
    """
    Input: list of lists of extracted earmark info
    Ouput: list of lists containing relevant document id
    """
    bill_path = pt.BillPathUtils()
    report_path = pt.ReportPathUtils()
    database = []
    for earmark in earmarks:
        earmark_id  = earmark[0]
        page = earmark[2]
        excerpt = earmark[3]
        earmark_info = earmark[1]

        congress = int(earmark_info[0])
        bill = earmark_info[1]
        chamber = earmark_info[2]
        number = earmark_info[3]

        if bill == 'bill':
            if isinstance(number,tuple):
                doc_ref  = number[0]
                document_name = number[1]
                all_versions = bill_path.get_all_versions(os.path.join(configuration.get_path_to_bills(), '110/bills/hr/hr2764/'))
                if re.search('\Division\s\w',document_name):
                    doc_string = re.findall('\Division\s\w',document_name)[0].replace(" ","")
                    version_index = [div_type for div_type in all_versions if doc_string in i]
                    version = version_index[0]
                    pth = bill_path.get_bill_path(congress,doc_ref,version)
                    doc_id = pt.BillPathUtils(pth).get_db_document_id()
                database.append([earmark_id,22552,page,excerpt])
                database.append([earmark_id,22553,page,excerpt])
                database.append([earmark_id,74460,page,excerpt])
                database.append([earmark_id,74678,page,excerpt])

            else:
                if chamber == 'senate':
                    pth = os.path.join(configuration.get_path_to_bills(), str(congress), '/bills/s/', str(number))
                else:
                    pth = os.path.join(configuration.get_path_to_bills(), str(congress), '/bills/hr/', str(number))
                all_versions = bill_path.get_all_versions(pth)
                best_date = date(1900,1,1)
                for version in all_versions:
                    npth = pth + '/text-versions/' + version
                    bill_date = pt.BillPathUtils(npth).bill_date()
                    bill_date = datetime.strptime(bill_date,"%Y-%m-%d").date()
                    if bill_date > best_date:
                        best_date = bill_date
                        best_version = version
                PATH_BILL = bill_path.get_bill_path(congress,number,best_version)
                doc_id  = pt.BillPathUtils(PATH_BILL).get_db_document_id()
                if number == 'hr3222':
                    database.append([earmark_id,74360,page,excerpt])
        elif bill=='report':
            if chamber == "senate":
                pth = os.path.join(configuration.get_path_to_reports(), str(congress), "/senate/", str(number))
            else:
                pth = os.path.join(configuration.get_path_to_reports(), str(congress), "/house/", str(number))

            all_versions = report_path.get_all_versions(pth)
            rep_path = report_path.get_report_path(int(congress),chamber,int(number),all_versions[0])
            doc_id = pt.ReportPathUtils(rep_path).get_db_document_id()

        database.append([earmark_id,doc_id,page,excerpt])
    database_dict = {}
    for ids in database:
        key = (ids[0],ids[1])
        value = [ids[2],ids[3]]
        if not key in database_dict.keys():
            database_dict[key] = [value]
        else:
            database_dict[key].append(value)
    new_database = []
    for key in database_dict.keys():
        item = list(key) + database_dict[key][0]
        new_database.append(item)
    return new_database
Пример #6
0
def path_to_docid08(earmarks):
    """
    Input: list of lists of extracted earmark info
    Ouput: list of lists containing relevant document id
    """
    bill_path = pt.BillPathUtils()
    report_path = pt.ReportPathUtils()
    database = []
    for earmark in earmarks:
        earmark_id = earmark[0]
        page = earmark[2]
        excerpt = earmark[3]
        earmark_info = earmark[1]

        congress = int(earmark_info[0])
        bill = earmark_info[1]
        chamber = earmark_info[2]
        number = earmark_info[3]

        if bill == 'bill':
            if isinstance(number, tuple):
                doc_ref = number[0]
                document_name = number[1]
                all_versions = bill_path.get_all_versions(
                    os.path.join(configuration.get_path_to_bills(),
                                 '110/bills/hr/hr2764/'))
                if re.search('\Division\s\w', document_name):
                    doc_string = re.findall('\Division\s\w',
                                            document_name)[0].replace(" ", "")
                    version_index = [
                        div_type for div_type in all_versions
                        if doc_string in i
                    ]
                    version = version_index[0]
                    pth = bill_path.get_bill_path(congress, doc_ref, version)
                    doc_id = pt.BillPathUtils(pth).get_db_document_id()
                database.append([earmark_id, 22552, page, excerpt])
                database.append([earmark_id, 22553, page, excerpt])
                database.append([earmark_id, 74460, page, excerpt])
                database.append([earmark_id, 74678, page, excerpt])

            else:
                if chamber == 'senate':
                    pth = os.path.join(configuration.get_path_to_bills(),
                                       str(congress), '/bills/s/', str(number))
                else:
                    pth = os.path.join(configuration.get_path_to_bills(),
                                       str(congress), '/bills/hr/',
                                       str(number))
                all_versions = bill_path.get_all_versions(pth)
                best_date = date(1900, 1, 1)
                for version in all_versions:
                    npth = pth + '/text-versions/' + version
                    bill_date = pt.BillPathUtils(npth).bill_date()
                    bill_date = datetime.strptime(bill_date, "%Y-%m-%d").date()
                    if bill_date > best_date:
                        best_date = bill_date
                        best_version = version
                PATH_BILL = bill_path.get_bill_path(congress, number,
                                                    best_version)
                doc_id = pt.BillPathUtils(PATH_BILL).get_db_document_id()
                if number == 'hr3222':
                    database.append([earmark_id, 74360, page, excerpt])
        elif bill == 'report':
            if chamber == "senate":
                pth = os.path.join(configuration.get_path_to_reports(),
                                   str(congress), "/senate/", str(number))
            else:
                pth = os.path.join(configuration.get_path_to_reports(),
                                   str(congress), "/house/", str(number))

            all_versions = report_path.get_all_versions(pth)
            rep_path = report_path.get_report_path(int(congress), chamber,
                                                   int(number),
                                                   all_versions[0])
            doc_id = pt.ReportPathUtils(rep_path).get_db_document_id()

        database.append([earmark_id, doc_id, page, excerpt])
    database_dict = {}
    for ids in database:
        key = (ids[0], ids[1])
        value = [ids[2], ids[3]]
        if not key in database_dict.keys():
            database_dict[key] = [value]
        else:
            database_dict[key].append(value)
    new_database = []
    for key in database_dict.keys():
        item = list(key) + database_dict[key][0]
        new_database.append(item)
    return new_database