示例#1
0
def mirror_old_style_records_etl():
    """Ensure the old-style records exist for all present new-style ones."""
    start_time = datetime.datetime.now()
    LOG.info("Start: Mirror record documents in old-style naming.")
    conn = credential.UNCPathCredential(path.RLID_DATA_SHARE,
                                        **credential.RLID_DATA_SHARE)
    count = Counter()
    with conn:
        for doc_path in rlid_record_paths():
            doc_name = os.path.basename(doc_path)
            doc_id, ext = os.path.splitext(doc_name)
            old_style_path = rlid_record_path_old(doc_id, ext)
            if not old_style_path:
                count["not in database"] += 1
            elif os.path.exists(old_style_path):
                count["already mirrored"] += 1
            elif place_record_old(doc_path):
                count["mirrored"] += 1
            else:
                count["failed to mirror"] += 1
                LOG.warning("%r failed to mirror to %r.", doc_name,
                            old_style_path)
    document.log_state_counts(count, documents_type="records")
    LOG.info("End: Mirror.")
    elapsed(start_time, LOG)
示例#2
0
def deeds_records_update():
    """Run update for deeds & records documents RLID repository."""
    start_time = datetime.datetime.now()
    PATH["logfile"] = os.path.join(
        PATH["staging"], "Deeds_Records_Update_{}.log".format(start_time.year))
    conn = credential.UNCPathCredential(PATH["staging"],
                                        **credential.RLID_DATA_SHARE)
    with conn:
        # Attach logfile handler for staging logfile.
        logfile = logging.FileHandler(PATH["logfile"])
        logfile.setLevel(logging.INFO)
        logfile.setFormatter(LOGFILE_FORMATTER)
        LOG.addHandler(logfile)
        LOG.info("START SCRIPT: Update RLID deeds & records repository.")
        LOG.info(
            "Start: Move deeds & records drop-files to staging directory.")
        drop_extensions = [".exe", ".pdf", ".zip"
                           ] + document.IMAGE_FILE_EXTENSIONS
        for file_name in os.listdir(PATH["drop"]):
            file_path = os.path.join(PATH["drop"], file_name)
            file_extension = os.path.splitext(file_name)[-1].lower()
            if all(
                [os.path.isfile(file_path), file_extension
                 in drop_extensions]):
                move_path = os.path.join(PATH["staging"], file_name)
                shutil.move(file_path, move_path)
                LOG.info("Moved %r to %r.", file_path, move_path)
        LOG.info("End: Move.")
        LOG.info("Start: Extract record archives.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if os.path.splitext(file_path)[-1].lower() in [".exe", ".zip"]:
                count[extract_records(file_path, archive_original=True)] += 1
        document.log_state_counts(count, documents_type="archives")
        # D&R archives include a few log & reference files; delete if present.
        for file_path in path.folder_file_paths(PATH["staging"]):
            for pattern in ["_logfile", "_xreffile"]:
                if pattern.lower() in file_path.lower():
                    os.remove(file_path)
        LOG.info("Start: Replace record images with PDFs.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if (os.path.splitext(file_path)[-1].lower()
                    in document.IMAGE_FILE_EXTENSIONS):
                count[convert_image(file_path, delete_original=True)] += 1
        document.log_state_counts(count, documents_type="images")
        LOG.info("Start: Place record PDFs in RLID repository.")
        count = Counter()
        for file_path in path.folder_file_paths(PATH["staging"]):
            if os.path.splitext(file_path)[-1].lower() == ".pdf":
                old_state = place_record_old(file_path)
                new_state = place_record(
                    file_path, delete_original=(old_state == "placed"))
                count.update([old_state, new_state])
        document.log_state_counts(count, documents_type="records")
    elapsed(start_time, LOG)
    LOG.info("END SCRIPT")
示例#3
0
def property_cards_staging_update():
    """Run update for RLID assessor property card staging repository."""
    LOG.info("Start: Update assessor property card staging repository.")
    start_time = datetime.datetime.now()
    source_paths = document.repository_file_paths(path.LANE_PROPERTY_CARDS)
    conn = credential.UNCPathCredential(
        path.RLID_DATA_STAGING_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for source_path in source_paths:
            staging_path = os.path.join(
                REPO_PATH["property-card-staging"], os.path.basename(source_path)
            )
            if document.changed(staging_path, source_path):
                result_key = document.update_document(source_path, staging_path)
                count[result_key] += 1
    LOG.info("End: Update.")
    document.log_state_counts(count, documents_type="property cards (staging)")
    elapsed(start_time, LOG)
示例#4
0
def tax_maps_staging_update():
    """Run update for RLID tax map staging repository."""
    LOG.info("Start: Update tax map staging repository.")
    start_time = datetime.datetime.now()
    conn = credential.UNCPathCredential(
        path.RLID_DATA_STAGING_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for source_path in document.repository_file_paths(path.LANE_TAX_MAP_IMAGES):
            staging_path = os.path.join(
                REPO_PATH["tax-map-staging"],
                # Tax maps have a one-deep bin.
                os.path.split(os.path.dirname(source_path))[-1],
                os.path.basename(source_path),
            )
            if document.changed(staging_path, source_path):
                result_key = document.update_document(source_path, staging_path)
                count[result_key] += 1
    document.log_state_counts(count, documents_type="tax maps (staging)")
    elapsed(start_time, LOG)
    LOG.info("End: Update.")
示例#5
0
def property_cards_update():
    """Run update for assessor property card RLID production repository."""
    LOG.info("Start: Update RLID assessor property card repository.")
    start_time = datetime.datetime.now()
    staging_paths = document.repository_file_paths(
        REPO_PATH["property-card-staging"], file_extensions=[".pdf"]
    )
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        count = Counter()
        for staging_path in staging_paths:
            rlid_path = rlid_document_path(
                os.path.basename(staging_path), document_type="property-card"
            )
            if document.changed(rlid_path, staging_path):
                result_key = document.update_document(staging_path, rlid_path)
                count[result_key] += 1
    LOG.info("End: Update.")
    document.log_state_counts(count, documents_type="property cards")
    elapsed(start_time, LOG)
示例#6
0
def tax_maps_update():
    """Run update for RLID tax map repository."""
    start_time = datetime.datetime.now()
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        # Attach logfile handler for repository update logfile.
        logfile = logging.FileHandler(
            os.path.join(
                REPO_PATH["tax-map"], "Tax_Map_Update_{}.log".format(start_time.year)
            )
        )
        logfile.setLevel(logging.INFO)
        logfile.setFormatter(LOGFILE_FORMATTER)
        LOG.addHandler(logfile)
        LOG.info("START SCRIPT: Update RLID tax map repository from staging.")
        file_name_release_date = tax_map_file_name_release_map(
            start_datetime=rlid_data_currency("Tax Maps")
        )
        count = Counter()
        # Iterate through path/date map, adding, archiving & updating.
        for file_name, release_date in file_name_release_date.items():
            rlid_path = rlid_document_path(file_name, document_type="tax-map")
            staging_path = rlid_document_path(
                file_name, document_type="tax-map-staging"
            )
            result_key = update_tax_map(
                staging_path, rlid_path, release_date, archive_previous=True
            )
            count[result_key] += 1
    document.log_state_counts(count, documents_type="tax maps")
    # Finally, update tax map repository currency date (if we placed any).
    if count["updated"]:
        rlid_data_currency_setter("Tax Maps", max(file_name_release_date.values()))
    elapsed(start_time, LOG)
    LOG.info("END SCRIPT: Update")
示例#7
0
def missing_in_rlid_etl():
    """Run ETL for log of deeds & records documents missing in RLID."""
    start_time = datetime.datetime.now()
    LOG.info(
        "Start: Compile table of deeds & records listed in Lane County records system,"
        + " but not present in RLID repository.")
    conn = credential.UNCPathCredential(PATH["staging"],
                                        **credential.RLID_DATA_SHARE)
    csv_path = os.path.join(PATH["staging"], "Missing_in_RLID.csv")
    check_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
    missing_count = 0
    with conn:
        csvfile = open(csv_path, "wb")
        with csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["document_id", "document_path", "check_time"])
            for doc_path in rlid_record_paths():
                if not os.path.exists(doc_path):
                    doc_id = os.path.splitext(os.path.basename(doc_path))[0]
                    csvwriter.writerow((doc_id, doc_path, check_time))
                    missing_count += 1
    LOG.info("Found %s missing documents.", missing_count)
    LOG.info("End: Compile.")
    elapsed(start_time, LOG)
示例#8
0
def tax_maps_not_in_source_etl():
    """Run ETL for log of tax map documents in RLID but not source repository.

    We used to have an automatic check & retire for RLID tax maps that were no longer in
    the source repository. This pretty much retired the entire taxmap repository the
    night of 2015-05-07. This was because there appear to be times when the source
    repository is not reachable, and/or reports nothing in the source. For now, we will
    just log potential orphans.

    If you do need to "retire" a tax map no longer in use:
    1. Make an archive copy of the document with this function call:
        ```
        archive_tax_map(
            tax_map_path, archive_date=datetime.datetime.now(), is_replaced=False
        )
        ```
    2. Move the document file to the `RetiredNoReplacement` subfolder.
    3. Execute the following SQL statement:
        ```
        if exists (
            select 1 from RLID.dbo.Taxmap_Retired where image_filename = {file-name}
        ) begin;
            update RLID.dbo.Taxmap_Retired
            set date_retired = {same-date-as-archive-above}
            where image_filename = {file-name};
        end;
        else begin;
            insert into RLID.dbo.Taxmap_Retired(image_filename, date_retired)
            values ({file-name}, {same-date-as-archive-above});
        end;
        delete from RLID.dbo.Taxmap_Image where image_filename = {file-name};`
    """
    start_time = datetime.datetime.now()
    LOG.info(
        "Start: Compile table of tax maps not mirrored between the Lane County & RLID"
        " repositories.\nAny tax maps in RLID not mirrored in the county repositoryare"
        " likely tax maps that no longer exist, and should be researched (and perhaps"
        " retired)."
    )
    conn = credential.UNCPathCredential(
        path.RLID_DATA_SHARE, **credential.RLID_DATA_SHARE
    )
    with conn:
        check_time = start_time.strftime("%Y-%m-%d %H:%M")
        file_names = {
            "County": {
                fixed_file_name(name)
                for _, _, filenames in os.walk(REPO_PATH["tax-map-staging"])
                for name in filenames
                if name.lower().endswith(".pdf")
            },
            "RLID": {
                fixed_file_name(name)
                for name in os.listdir(REPO_PATH["tax-map"])
                if name.lower().endswith(".pdf")
            },
        }
        for repo, other in permutations(["County", "RLID"]):
            LOG.info("Checking %s repository for tax maps not mirrored.", repo)
            unmirrored_file_names = sorted(file_names[repo] - file_names[other])
            csv_path = os.path.join(
                REPO_PATH["tax-map"], "In_{}_Not_{}.csv".format(repo, other)
            )
            csv_file = open(csv_path, "wb")
            with csv_file:
                csv_ = csv.writer(csv_file)
                csv_.writerow(("file_name", "check_time"))
                for file_name in unmirrored_file_names:
                    csv_.writerow((file_name, check_time))
            LOG.info(
                "Found %s tax maps in %s repository not mirrored in %s.",
                len(unmirrored_file_names),
                repo,
                other,
            )
    LOG.info("End: Compile.")
    elapsed(start_time, LOG)