Exemplo n.º 1
0
def create_marc_file(system, dst_fh, no_holdings_msg=None):
    with session_scope() as db_session:
        recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True)
        for r in recs:
            marc = r.wchits.prepped_marc
            if marc:
                # delete unsupported headings
                # remove_unsupported_subject_headings(marc)
                # remove_unwanted_tags(marc)
                # remove_ebook_isbns(marc)
                # add barcode if added by user
                if r.barcode is not None:
                    for field in marc.get_fields("949"):
                        if field.indicators == [" ", "1"]:
                            field.add_subfield("i", r.barcode)
                if no_holdings_msg:
                    msg = "OCLC holdings not updated"
                    if system == "NYPL":
                        field = marc["901"]
                    elif system == "BPL":
                        field = marc["947"]
                    if "h" not in field:
                        field.add_subfield("h", msg)
                    else:
                        field["h"] = msg
                try:
                    write_marc21(dst_fh, marc)
                except TypeError:
                    module_logger.error(
                        "Unable to create marc file for record: "
                        "wchid: {}, oclcNo: {}".format(
                            r.wchits.wchid, r.wchits.match_oclcNo
                        )
                    )
                    raise

    if ".mrc" in dst_fh:
        dst_fh = dst_fh.replace(".mrc", ".csv")
    else:
        dst_fh = "{}.csv".format(dst_fh)
    header = ["position", "result", "title", "ISBN"]
    save2csv(dst_fh, header)

    with session_scope() as db_session:
        recs = retrieve_related(db_session, WCSourceMeta, "wchits")
        for r in recs:
            if r.selected and r.wchits.prepped_marc:
                result = "pass"
            else:
                result = "reject"
            try:
                row = [r.wchits.wchid, result, r.meta.title, r.meta.t020[0]]
            except IndexError:
                row = [r.wchits.wchid, result, r.meta.title, None]
            save2csv(dst_fh, row)
Exemplo n.º 2
0
def save_stats():
    module_logger.debug('Saving batch stats.')
    batch = shelve.open(BATCH_META)
    timestamp = batch['timestamp']
    system = batch['system']
    library = batch['library']
    agent = batch['agent']
    file_qty = len(batch['file_names'])
    batch.close()

    try:
        df = reports.shelf2dataframe(BATCH_STATS, system)
    except ValueError:
        df = None

    if df is not None:
        stats = reports.create_stats(system, df)

        with session_scope() as session:
            # find out if timestamp already added
            # if not add records
            # add batch record
            record = insert_or_ignore(session,
                                      PVR_Batch,
                                      timestamp=timestamp,
                                      system=system,
                                      library=library,
                                      agent=agent,
                                      file_qty=file_qty)
            session.flush()
            bid = record.bid
            for row in stats.iterrows():
                name = row[1]['vendor']
                record = insert_or_ignore(session, Vendor, name=name)
                session.flush()
                vid = record.vid

                if system == 'nypl':
                    record = insert_or_ignore(session,
                                              PVR_File,
                                              bid=bid,
                                              vid=vid,
                                              new=row[1]['insert'],
                                              dups=row[1]['attach'],
                                              updated=row[1]['update'],
                                              mixed=row[1]['mixed'],
                                              other=row[1]['other'])
                else:
                    record = insert_or_ignore(session,
                                              PVR_File,
                                              bid=bid,
                                              vid=vid,
                                              new=row[1]['insert'],
                                              dups=row[1]['attach'],
                                              updated=row[1]['update'])
    else:
        module_logger.warning(
            'Unable to created dataframe from the BATCH_STATS.')
        raise OverloadError(
            'Encountered problems while trying to save statistics.')
Exemplo n.º 3
0
def cumulative_bpl_stats(start_date, end_date):
    """
    Produces dataframe with cumulative statistics of
    processed BPL records
    """

    stmn = text(
        """
        SELECT pvr_batch.system, sum(pvr_file.new), sum(pvr_file.dups), sum(pvr_file.updated), vendor.name 
        FROM pvr_file 
        JOIN pvr_batch ON pvr_file.bid = pvr_batch.bid 
        JOIN vendor ON pvr_file.vid = vendor.vid 
        WHERE pvr_batch.system = "bpl" AND pvr_batch.timestamp>=:start_date AND pvr_batch.timestamp<:end_date 
        GROUP BY vendor.name
        """
    )

    stmn = stmn.bindparams(start_date=start_date, end_date=end_date)

    with session_scope() as session:
        results = session.execute(stmn)
        labels = ["system", "insert", "attach", "overlay", "vendor"]
        df = pd.DataFrame.from_records(results, columns=labels)
        df["total loaded"] = df["insert"] + df["attach"] + df["overlay"]
        df = df[["vendor", "insert", "attach", "overlay", "total loaded"]]
        return df
Exemplo n.º 4
0
def store_connection(name, host, folder, user, password, system):
    if name == '':
        name = None
    if host == '':
        host = None
    if folder == '':
        folder = None
    if system == '':
        system = None
    if user == '':
        user = None
    else:
        user = base64.b64encode(user)
    if password == '':
        password = None
    else:
        password = base64.b64encode(password)

    try:
        with session_scope() as db_session:
            insert_or_ignore(db_session,
                             FTPs,
                             name=name,
                             host=host,
                             folder=folder,
                             user=user,
                             password=password,
                             system=system)
    except IntegrityError as e:
        module_logger.error('Unable to store FTP details. Error: {}'.format(e))
        raise OverloadError('Error. The name of the new connection is\n.'
                            'already used or some of the required elements\n'
                            'are missing')
Exemplo n.º 5
0
def create_report(query_date=None):
    if query_date is None:
        query_date = date.today().strftime('%Y-%m-%d')

    stmn = """SELECT timestamp, bid, bibs.title, bibs.b_call, copies.oid, copies.copies,
        code, description
        FROM tickets
        JOIN bibs ON tickets.bid = bibs.id
        JOIN copies ON tickets.id = copies.tid
        JOIN tick_conf_joiner ON tick_conf_joiner.tid = tickets.id
        JOIN conflicts ON tick_conf_joiner.cid = conflicts.id
        WHERE timestamp LIKE "{}%" """.format(query_date)

    msg = []
    msg.append('BPL QCbot report for day {}:'.format(query_date))
    msg.append('{}\t{}\t{}\t{}\t{}\t{}\t{}'.format(
        'bib id' + ' ' * 4, 'order id' + ' ' * 2, 'copies', 'title' + ' ' * 20,
        'call #' + ' ' * 19, 'error code', 'error description'))

    with session_scope() as session:
        results = run_query(session, stmn)
        for record in results:
            # shorten title if needed
            try:
                title = record.title[:25]
            except IndexError:
                title = record.title
            except TypeError:
                title = ''

            # shorten call number if needed
            try:
                callNo = record.b_call[:25]
            except IndexError:
                callNo = record.b_call
            except TypeError:
                callNo = ''

            # copies
            clen = 6 - len('{}'.format(record.copies))
            copies = '{}{}'.format(record.copies, ' ' * clen)

            # title
            tlen = 25 - len(title)
            title = '{}{}'.format(title, ' ' * tlen)

            # call number
            cnlen = 25 - len(callNo)
            callNo = '{}{}'.format(callNo, ' ' * cnlen)

            # error code
            code = '{}{}'.format(record.code, ' ' * 3)

            # create a new line with data
            msg.append('b{}a\to{}a\t{}\t{}\t{}\t{}\t{}'.format(
                record.bid, record.oid, copies, title, callNo, code,
                record.description))

    return '\n'.join(msg)
Exemplo n.º 6
0
def update_template(otid, record):
    try:
        with session_scope() as session:
            update_nypl_template(session, otid, **record)
    except IntegrityError as e:
        module_logger.error('IntegrityError on template update: {}'.format(e))
        raise OverloadError('Duplicate/missing template name\n'
                            'or missing primary matchpoint')
Exemplo n.º 7
0
def save_template(record):
    try:
        with session_scope() as session:
            insert_or_ignore(session, NYPLOrderTemplate, **record)
    except IntegrityError as e:
        module_logger.error('IntegrityError on template save: {}'.format(e))
        raise OverloadError('Duplicate/missing template name\n'
                            'or missing primary matchpoint')
Exemplo n.º 8
0
def save_cart_info(sheet_id, system_id):
    with session_scope() as session:
        rec = insert(session,
                     Cart,
                     system_id=system_id,
                     shopping_cart_id=sheet_id)
        session.flush()
        return (rec.rid)
Exemplo n.º 9
0
def count_total():
    meta_ids = []
    with session_scope() as db_session:
        recs = retrieve_records(db_session, WCSourceMeta)
        total = 0
        for rec in recs:
            total += 1
            meta_ids.append(rec.wcsmid)
    return total, meta_ids
Exemplo n.º 10
0
def populate_branch_tab(creds, system_id, sheet_id):
    data = []
    with session_scope() as session:
        branch_records = retrieve_records_ordered_by_code(session,
                                                          Branch,
                                                          system_id=system_id)
        for record in branch_records:
            if record.code:
                data.append([record.code])
    append2sheet(creds, sheet_id, 'branch codes', data)
Exemplo n.º 11
0
def delete_connection(name, system):
    with session_scope() as db_session:
        try:
            delete_record(db_session, FTPs, name=name, system=system)
        except Exception as exc:
            _, _, exc_traceback = sys.exc_info()
            tb = format_traceback(exc, exc_traceback)
            module_logger.error(
                'Unhandled error of deletion of FTP details. {}'.format(tb))
            raise OverloadError(exc)
Exemplo n.º 12
0
def remove_previous_process_data():
    module_logger.debug("Deleting previous process data.")
    with session_scope() as db_session:
        # deletes WCSourceBatch data and all related tables
        delete_all_table_data(db_session, WCSourceBatch)
        module_logger.debug("Data from previous run has been deleted.")

    try:
        os.remove(W2S_MULTI_ORD)
        os.remove(W2S_SKIPPED_ORD)
    except WindowsError:
        pass
Exemplo n.º 13
0
def cumulative_nypl_stats(start_date, end_date):
    stmn = text(
        """
        SELECT pvr_batch.system, sum(pvr_file.new), sum(pvr_file.dups), sum(pvr_file.updated), vendor.name 
        FROM pvr_file 
        JOIN pvr_batch ON pvr_file.bid = pvr_batch.bid 
        JOIN vendor ON pvr_file.vid = vendor.vid 
        WHERE pvr_batch.system = "nypl" AND pvr_batch.timestamp>=:start_date AND pvr_batch.timestamp<:end_date 
        GROUP BY vendor.name
        """
    )

    stmn = stmn.bindparams(start_date=start_date, end_date=end_date)

    with session_scope() as session:
        results = session.execute(stmn)

        nypl_labels = [
            "system",
            "library",
            "insert",
            "attach",
            "overlay",
            "mixed",
            "other",
            "vendor",
        ]
        df = pd.DataFrame.from_records(results, columns=nypl_labels)
        bdf = df[df["library"] == "branches"]
        bdf = bdf[["vendor", "insert", "attach", "overlay", "mixed", "other"]]
        bdf["total loaded"] = bdf["insert"] + bdf["attach"] + bdf["overlay"]
        bdf.columns = [
            "vendor",
            "insert",
            "attach",
            "overlay",
            "mixed dups",
            "research dups",
            "total loaded",
        ]
        rdf = df[df["library"] == "research"]
        rdf = rdf[["vendor", "insert", "attach", "overlay", "mixed", "other"]]
        rdf["total loaded"] = rdf["insert"] + rdf["attach"] + rdf["overlay"]
        rdf.columns = [
            "vendor",
            "insert",
            "attach",
            "overlay",
            "mixed dups",
            "branches dups",
            "total loaded",
        ]
        return (bdf, rdf)
Exemplo n.º 14
0
def get_connection_details(name, system):
    with session_scope() as db_session:
        record = retrieve_record(db_session, FTPs, name=name, system=system)
        if record.user:
            user = base64.b64decode(record.user)
        else:
            user = ''
        if record.password:
            password = base64.b64decode(record.password)
        else:
            password = ''

        return (record.host, user, password, record.folder)
Exemplo n.º 15
0
def set_oclc_holdings(dst_fh):
    oclc_numbers = []
    hold_not_set = []
    with session_scope() as db_session:
        recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True)
        for r in recs:
            if r.wchits.match_oclcNo:
                oclc_numbers.append(str(r.wchits.match_oclcNo))

        # update holdings
        batch_rec = retrieve_record(db_session, WCSourceBatch)
        creds = get_credentials(batch_rec.api)
        token = get_token(creds)

        with MetadataSession(credentials=token) as session:
            responses = session.holdings_set_batch(oclc_numbers)
            holdings = holdings_responses(responses)
            if holdings:
                for oclcNo, holding in holdings.items():
                    recs = retrieve_records(db_session, WCHit, match_oclcNo=oclcNo)

                    for rec in recs:
                        if holding[0] in ("set", "exists"):
                            holding_set = True
                        else:
                            holding_set = False
                        update_hit_record(
                            db_session,
                            WCHit,
                            rec.wchid,
                            holding_set=holding_set,
                            holding_status=holding[0],
                            holding_response=holding[1],
                        )

        db_session.commit()

        # verify all selected had holdings set
        recs = retrieve_related(db_session, WCSourceMeta, "wchits", selected=True)

        for r in recs:
            if not r.wchits.holding_set:
                hold_not_set.append(r.wchits.match_oclcNo)

    fh_csv = os.path.join(os.path.split(dst_fh)[0], "holdings-issues.csv")
    if hold_not_set:
        for oclcNo in hold_not_set:
            save2csv(fh_csv, [oclcNo])
        return False
    else:
        return True
Exemplo n.º 16
0
def persist_choice(meta_ids, selected, barcode_var=None):
    with session_scope() as db_session:
        for mid in meta_ids:
            if barcode_var:
                if barcode_var.get():
                    barcode = barcode_var.get()
                else:
                    barcode = None
            else:
                barcode = None

            update_meta_record(
                db_session, WCSourceMeta, mid, selected=selected, barcode=barcode
            )
Exemplo n.º 17
0
def set_new_branch(tabs, sheet_id):
    """
    Parses shopping cart with provided google sheet id and
    updates dst_branch_id column in hold table of the datastore based on staff
    selection
    args:
        tabs: list of str, list of names of google sheets in the spreadsheet
        sheet_id: str, google sheet id
    """
    creds = get_access_token()
    service = discovery.build('sheets', 'v4', credentials=creds)
    sheet = service.spreadsheets()
    for tab in tabs:
        results = sheet.values().get(
            spreadsheetId=sheet_id,
            range=tab).execute()

        values = results.get('values', [])
        for row in values[1:]:
            iid = None
            try:
                iid = int(row[6])

                # find row with actual data
                if iid:
                    try:
                        loc_code = row[7].strip()
                        dst_branch_id = BRANCH_CODES[loc_code][0]
                    except IndexError:
                        dst_branch_id = BRANCH_CODES[None][0]
                    # print(f'dst_id:{dst_branch_id}')
            except IndexError:
                # row with no data (example a section heading row)
                pass
            except ValueError:
                # log as error or warning
                pass

            if iid is not None:
                with session_scope() as session:
                    hold_rec = retrieve_record(
                        session, Hold, item_id=iid,
                        outstanding=False, issued=False)
                    if hold_rec:
                        update_record(
                            session, Hold, hold_rec.sid,
                            dst_branch_id=dst_branch_id,
                            issued=True)
Exemplo n.º 18
0
def save2store(fh, system_id):
    data = sierra_export_reader(fh)
    with session_scope() as session:
        branch_idx = create_code_idx(session, Branch, system_id=system_id)
        mat_cat_idx = create_code_idx(session, MatCat, system_id=system_id)
        audn_idx = create_code_idx(session, Audience)
        lang_idx = create_code_idx(session, Language)
        itemtype_idx = create_code_idx(session, ItemType, system_id=system_id)

        for element in data:
            # parse source shelf code and store it
            shelfcode_id = get_shelfcode_id(
                session, element.location, system_id)

            overflow_item = dict(
                system_id=system_id,
                bib_id=prep_ids(element.bib_id),
                title=prep_title(element.title),
                author=prep_author(element.author),
                call_no=element.call_no.strip(),
                item_id=prep_ids(element.item_id),
                src_branch_id=determine_branch_id(
                    element.location, branch_idx),
                src_branch_shelf_id=shelfcode_id,
                pub_date=parse_pub_date(element.pub_info),
                bib_created_date=string2date(element.bib_created_date),
                item_created_date=string2date(element.item_created_date),
                mat_cat_id=get_mat_cat_id(
                    element.call_no, element.location, element.opac_msg,
                    system_id, mat_cat_idx),
                audn_id=get_audience_id(element.location, audn_idx),
                lang_id=get_language_id(element.call_no, lang_idx),
                item_type_id=get_itemtype_id(
                    element.item_type, itemtype_idx),
                last_out_date=string2date(element.last_out_date),
                total_checkouts=string2int(element.total_checkouts),
                total_renewals=string2int(element.total_renewals))

            insert(session, OverflowItem, **overflow_item)
Exemplo n.º 19
0
def issue_holds(api_url, sierra_key, sierra_secret, account_id, cart_id=None):
    if cart_id is None:
        cart_id = get_latest_cart_record().sid
    with SierraSession(api_url, sierra_key, sierra_secret) as ils_session:
        with session_scope() as db_session:
            recs = retrieve_records(db_session,
                                    Hold,
                                    cart_id=cart_id,
                                    outstanding=False,
                                    issued=True)

            for rec in recs:
                if rec.dst_branch_id != 1:
                    dst_branch = retrieve_record(db_session,
                                                 Branch,
                                                 sid=rec.dst_branch_id)
                    response = ils_session.hold_place_on_item(
                        account_id, rec.item_id, dst_branch.code)
                    print(
                        f'i{rec.item_id}a,{rec.dst_branch_id},{dst_branch.code},{response.status_code},{response.text}'
                    )
                else:
                    print(f'i{rec.item_id}a,{rec.dst_branch_id},None,,')
Exemplo n.º 20
0
def get_bib(meta_id):
    data = []
    with session_scope() as db_session:
        r = retrieve_one_related(db_session, WCSourceMeta, "wchits", wcsmid=meta_id)
        sierra_data = dict(
            title=r.meta.title,
            sierraId=r.meta.sierraId,
            oid=r.meta.oid,
            locs=r.meta.locs,
            venNote=r.meta.venNote,
            note=r.meta.note,
            intNote=r.meta.intNote,
            choice=r.selected,
            barcode=r.barcode,
        )
        if r.wchits.prepped_marc:
            worldcat_data = str(r.wchits.prepped_marc).splitlines()
        else:
            worldcat_data = None
        data.append((r.wchits.wchid, sierra_data, worldcat_data))
    db_session.expunge_all()

    return data
Exemplo n.º 21
0
def launch_process(
    source_fh,
    data_source,
    system,
    library,
    progbar1,
    progbar2,
    process_label,
    hits,
    nohits,
    skipped,
    meet_crit_counter,
    fail_user_crit_counter,
    fail_glob_crit_counter,
    action,
    encode_level,
    mat_type,
    cat_rules,
    cat_source,
    recap_range,
    id_type="ISBN",
    api=None,
):
    """
    work notes:
    1. iterate through the source files and extract bib/order metadata
    2. temporarily persist this data in local datastore
    3. iterate over the batch and find best hit for each
    4. persist in local store matched record as a pymarc object
    5. display results (with all data needed for Sierra import) to user
    5. allow user to decide what to write to final file

    args:
        source_fh: str, file path
        data_source: str, 'Sierra export' or 'IDs list'
        system: str, 'NYPL' or 'BPL'
        library: str, 'research' or 'branches'
        progbar1: tkinter widget, overall progressbar
        progbar2: tkinter widget, task progressbar
        process_label: tkinter StrinVar, current task label
        hits: tkinter IntVar, hits counter
        nohits: tkinter IntVar, failed search counter
        meet_crit_counter: tkinter IntVar, success match & eval counter
        fail_user_crit_counter: tkinter IntVar, failed user criteria counter
        fail_glob_crit_counter: tkinter IntVar, failed global criteria counter
        action: str, 'catalog' or 'upgrade'
        encode_level: str, 'any', ...
        mat_type: str, 'any', print', 'large print', 'dvd', 'bluray'
        cat_rules: str,  'any', 'RDA-only'
        cat_source: str, 'any', 'DLC'
        recap_range: list, uppper and lower limits of Recap numbers
        id_type: str, 'ISBN', 'UPC', 'ISSN', 'LCCN', 'OCLC #'
        api: str, name of api to be used for queries
    """

    if mat_type == "":
        mat_type = None
    if cat_source == "":
        cat_source = None

    module_logger.debug(
        "Launching W2S process. "
        "Params: source_fh:{}, data_source:{}, system:{}, "
        "library:{}, action:{}, encode_level:{}, mat_type:{}, "
        "cat_rules:{}, cat_source:{}, recap_range:{}, id_type:{}, "
        "api:{}".format(
            source_fh,
            data_source,
            system,
            library,
            action,
            encode_level,
            mat_type,
            cat_rules,
            cat_source,
            recap_range,
            id_type,
            api,
        )
    )

    processed_counter = 0
    found_counter = 0
    not_found_counter = 0
    skipped_counter = 0

    remove_previous_process_data()

    # validate correctness of sierra export
    process_label.set("reading:")

    if data_source == "Sierra export":
        sierra_export_reader(source_fh, system, progbar1, progbar2)
    elif data_source == "IDs list":
        id_list_reader(source_fh, progbar1, progbar2)

    # keep track of recap call numbers
    if recap_range:
        recap_no = recap_range[0]
    else:
        recap_no = None

    with session_scope() as db_session:
        # create batch record
        batch_rec = insert_or_ignore(
            db_session,
            WCSourceBatch,
            file=source_fh,
            system=system,
            library=library,
            action=action,
            api=api,
            data_source=data_source,
            encode_level=encode_level,
            mat_type=mat_type,
            cat_rules=cat_rules,
            cat_source=cat_source,
            id_type=id_type,
        )
        db_session.flush()
        batch_id = batch_rec.wcsbid

        # parse depending on the data source
        if data_source == "IDs list":
            with open(source_fh, "r") as file:
                reader = csv.reader(file)
                # skip header
                reader.next()
                if id_type == "ISBN":
                    for row in reader:
                        meta = BibOrderMeta(
                            system=system, dstLibrary=library, t020=[parse_isbn(row[0])]
                        )
                        insert_or_ignore(
                            db_session, WCSourceMeta, wcsbid=batch_id, meta=meta
                        )
                        update_progbar(progbar1)
                        update_progbar(progbar2)
                elif id_type == "UPC":
                    raise OverloadError("Not implemented.")
                    pass

                    # will be implemented later
                    # for row in reader:
                    #     meta = BibOrderMeta(
                    #         system=system,
                    #         dstLibrary=library,
                    #         t024=[parse_upc(row[0])])

                elif id_type == "OCLC #":
                    for row in reader:
                        meta = BibOrderMeta(
                            system=system, dstLibrary=library, t001=row[0]
                        )
                        insert_or_ignore(
                            db_session, WCSourceMeta, wcsbid=batch_id, meta=meta
                        )
                        update_progbar(progbar1)
                        update_progbar(progbar2)
                else:
                    raise OverloadError("Not implemented.")

        elif data_source == "Sierra export":
            data = sierra_export_data(source_fh, system, library)
            for meta, single_order in data:
                if single_order is None:
                    row = ["b{}a".format(meta.sierraId), meta.title]
                    skipped_counter += 1
                    skipped.set(skipped_counter)
                    save2csv(W2S_SKIPPED_ORD, row)
                    progbar1["maximum"] = progbar1["maximum"] - 3
                elif single_order is False:
                    row = ["b{}a".format(meta.sierraId), meta.title]
                    skipped_counter += 1
                    skipped.set(skipped_counter)
                    save2csv(W2S_MULTI_ORD, row)
                    progbar1["maximum"] = progbar1["maximum"] - 3
                else:
                    insert_or_ignore(
                        db_session, WCSourceMeta, wcsbid=batch_id, meta=meta
                    )
                    update_progbar(progbar1)
                update_progbar(progbar2)

        creds = get_credentials(api)
        wskey = creds["key"]
        db_session.commit()

        # query Worldcat
        process_label.set("querying:")
        # reset progbar2
        progbar2["value"] = 0
        metas = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id)
        with SearchSession(credentials=wskey) as session:
            for m in metas:
                module_logger.debug(m.meta)
                hit = False
                if m.meta.t001:
                    query = construct_sru_query(
                        m.meta.t001,
                        keyword_type="OCLC #",
                        mat_type=mat_type,
                        cat_source=cat_source,
                    )
                    res = session.sru_query(query=query)
                    module_logger.debug("OCLC# request: {}".format(res.url))

                    hit = interpret_search_response(res, db_session, m.wcsmid)

                    if hit:
                        found_counter += 1

                if m.meta.t010 and not hit:
                    query = construct_sru_query(
                        m.meta.t010,
                        keyword_type="LCCN",
                        mat_type=mat_type,
                        cat_source=cat_source,
                    )
                    res = session.sru_query(query=query)
                    module_logger.debug("LCCN request: {}".format(res.url))

                    hit = interpret_search_response(res, db_session, m.wcsmid)

                    if hit:
                        found_counter += 1

                if m.meta.t020 and not hit:
                    # will iterate over all ISBNs if no hits
                    for isbn in m.meta.t020:
                        query = construct_sru_query(
                            isbn,
                            keyword_type="ISBN",
                            mat_type=mat_type,
                            cat_source=cat_source,
                        )
                        res = session.sru_query(query=query)
                        module_logger.debug("ISBN request: {}".format(res.url))

                        hit = interpret_search_response(res, db_session, m.wcsmid)

                        if hit:
                            found_counter += 1
                            break  # stop searching

                if m.meta.t024 and not hit:
                    for upc in m.meta.t024:
                        query = construct_sru_query(
                            upc,
                            keyword_type="UPC",
                            mat_type=mat_type,
                            cat_source=cat_source,
                        )
                        res = session.sru_query(query=query)
                        module_logger.debug("UPC request: {}".format(res.url))

                        hit = interpret_search_response(res, db_session, m.wcsmid)

                        if hit:
                            found_counter += 1
                            break  # stop searching

                if not hit:
                    not_found_counter += 1
                    module_logger.debug(
                        "Unable to find any matches in Worldcat for {}.".format(m.meta)
                    )
                    interpret_search_response(None, db_session, m.wcsmid)

                hits.set(found_counter)
                nohits.set(not_found_counter)

                update_progbar(progbar1)
                update_progbar(progbar2)
                processed_counter += 1

        db_session.commit()

        # check if meet criteria
        process_label.set("analyzing:")
        progbar2["value"] = 0
        rows = retrieve_records(db_session, WCHit, hit=True)
        for row in rows:
            results = row.query_results
            recs = results2record_list(results)
            for xml_record in recs:
                fulfills = False
                fail_types = []
                if meets_upgrade_criteria(xml_record):
                    if meets_user_criteria(
                        xml_record, encode_level, mat_type, cat_rules, cat_source
                    ):
                        fulfills = True
                        if action == "upgrade":
                            meet_crit_counter.set(meet_crit_counter.get() + 1)

                            oclcNo = get_oclcNo(xml_record)
                            update_hit_record(
                                db_session, WCHit, row.wchid, match_oclcNo=oclcNo
                            )

                            update_progbar(progbar1)
                            update_progbar(progbar2)
                            break

                        elif action == "catalog":
                            if meets_catalog_criteria(xml_record, library):
                                fulfills = True
                                meet_crit_counter.set(meet_crit_counter.get() + 1)
                                oclcNo = get_oclcNo(xml_record)
                                update_hit_record(
                                    db_session, WCHit, row.wchid, match_oclcNo=oclcNo
                                )

                                update_progbar(progbar1)
                                update_progbar(progbar2)
                                break
                            else:
                                fail_types.append("global")
                    else:
                        fail_types.append("user")
                else:
                    fail_types.append("global")

            if not fulfills:
                if "user" in fail_types:
                    fail_user_crit_counter.set(fail_user_crit_counter.get() + 1)
                else:
                    fail_glob_crit_counter.set(fail_glob_crit_counter.get() + 1)

        db_session.commit()

        # download and prep
        process_label.set("downloading:")
        # reset progbar2
        progbar2["value"] = 0

        # obtain access token
        token = get_token(creds)
        if token.token_str is None:
            module_logger.error(
                "Worldcat token not obtained. Error: {}.".format(token.server_response)
            )
        else:
            module_logger.debug("Worldcat token obtained.")

        # open Metadata API session
        with MetadataSession(credentials=token) as session:
            metas = retrieve_related(
                db_session, WCSourceMeta, "wchits", wcsbid=batch_id
            )
            for m in metas:
                if m.wchits.match_oclcNo:
                    xml_record = request_record(session, m.wchits.match_oclcNo)
                    if xml_record is not None:
                        update_hit_record(
                            db_session, WCHit, m.wchits.wchid, match_marcxml=xml_record
                        )
                update_progbar(progbar1)
                update_progbar(progbar2)

        db_session.commit()

        # prepare MARC files
        process_label.set("prepping:")
        progbar2["value"] = 0

        # check if Sierra bib # provided and use
        # for overlay command line
        rows = retrieve_records(db_session, WCSourceMeta, wcsbid=batch_id)

        for row in rows:
            # initial workflow shared by updgrade fuctionality
            xml_record = row.wchits.match_marcxml
            if xml_record is not None:
                marc_record = marcxml2array(xml_record)[0]
                remove_unsupported_subject_headings(system, marc_record)
                remove_unwanted_tags(marc_record)
                remove_ebook_isbns(marc_record)
                marc_record.remove_fields("901", "907", "945", "949", "947")
                initials = create_initials_field(system, library, "W2Sbot")
                marc_record.add_ordered_field(initials)

                if data_source == "Sierra export":
                    order_data = row.meta
                    if order_data.sierraId:
                        overlay_tag = create_target_id_field(
                            system, order_data.sierraId
                        )
                        marc_record.add_ordered_field(overlay_tag)

                if system == "NYPL":
                    marc_record.remove_fields("001", "910")
                    tag_001 = nypl_oclcNo_field(xml_record)
                    marc_record.add_ordered_field(tag_001)

                    # add Sierra bib code 3 and default location
                    if library == "branches":
                        defloc = NBIB_DEFAULT_LOCATIONS["branches"]
                    elif library == "research":
                        defloc = NBIB_DEFAULT_LOCATIONS["research"]

                    tag_949 = create_command_line_field("*b3=h;bn={};".format(defloc))
                    marc_record.add_ordered_field(tag_949)

                if action == "catalog":
                    # add call number & persist
                    if data_source == "Sierra export":
                        order_data = row.meta

                        local_fields = create_local_fields(
                            xml_record,
                            system,
                            library,
                            order_data=order_data,
                            recap_no=recap_no,
                        )

                    else:
                        # data source a list of IDs
                        local_fields = create_local_fields(
                            xml_record, system, library, recap_no=recap_no
                        )

                    if local_fields:
                        for field in local_fields:
                            if field is not None:
                                marc_record.add_ordered_field(field)
                        if system == "NYPL" and library == "research":
                            recap_no += 1

                update_hit_record(
                    db_session, WCHit, row.wchits.wchid, prepped_marc=marc_record
                )

            update_progbar(progbar1)
            update_progbar(progbar2)

            # make sure W2S stays within assigned Recap range
            if system == "NYPL" and library == "research":
                if action == "catalog":
                    if recap_no > recap_range[1]:
                        raise OverloadError(
                            "Used all available ReCAP call numbers " "assigned for W2S."
                        )

    # show completed
    progbar1["value"] = progbar1["maximum"]
    progbar2["value"] = progbar2["maximum"]
Exemplo n.º 22
0
def get_ftp_connections(system):
    with session_scope() as db_session:
        names = retrieve_records(db_session, FTPs, system=system)
        return [x.name for x in names]
Exemplo n.º 23
0
def audience_label_idx():
    with session_scope() as session:
        audn_recs = retrieve_records(session, Audience)
        audn_idx = {a.label: a.rid for a in audn_recs}

        return audn_idx
Exemplo n.º 24
0
def cumulative_vendor_stats(start_date, end_date):
    """
    Produces dataframe of vendor statistics during span of time
    """

    with session_scope() as session:
        query = session.query(
            PVR_Batch.system,
            PVR_Batch.library,
            func.sum(PVR_File.new),
            func.sum(PVR_File.dups),
            func.sum(PVR_File.updated),
            func.sum(PVR_File.mixed),
            func.sum(PVR_File.other),
            Vendor.name,
        )
        query = query.join(PVR_File, PVR_File.vid == Vendor.vid)

        nypl_br_results = (
            query.filter(
                PVR_Batch.timestamp >= start_date,
                PVR_Batch.timestamp < end_date,
                PVR_Batch.system == "nypl",
                PVR_Batch.library == "branches",
            )
            .group_by(Vendor.name)
            .all()
        )
        nypl_rl_results = (
            query.filter(
                PVR_Batch.timestamp >= start_date,
                PVR_Batch.timestamp < end_date,
                PVR_Batch.system == "nypl",
                PVR_Batch.library == "research",
            )
            .group_by(Vendor.name)
            .all()
        )
        bpl_results = (
            query.filter(
                PVR_Batch.timestamp >= start_date,
                PVR_Batch.timestamp < end_date,
                PVR_Batch.system == "bpl",
            )
            .group_by(Vendor.name)
            .all()
        )
    labels = [
        "system",
        "library",
        "insert",
        "attach",
        "overlay",
        "mixed",
        "other",
        "vendor",
    ]
    nbdf = pd.DataFrame.from_records(nypl_br_results, columns=labels)
    nrdf = pd.DataFrame.from_records(nypl_rl_results, columns=labels)
    bdf = pd.DataFrame.from_records(bpl_results, columns=labels)
    nbdf["total loaded"] = nbdf["insert"] + nbdf["attach"] + nbdf["overlay"]
    nbdf = nbdf[
        ["vendor", "insert", "attach", "overlay", "total loaded", "mixed", "other"]
    ]
    nrdf["total loaded"] = nrdf["insert"] + nrdf["attach"] + nrdf["overlay"]
    nrdf = nrdf[
        ["vendor", "insert", "attach", "overlay", "total loaded", "mixed", "other"]
    ]
    bdf["total loaded"] = bdf["insert"] + bdf["attach"] + bdf["overlay"]
    bdf = bdf[["vendor", "insert", "attach", "overlay", "total loaded"]]

    return nbdf, nrdf, bdf
Exemplo n.º 25
0
def get_latest_cart_record():
    # retrieve last entered cart
    with session_scope() as session:
        rec = retrieve_last_record(session, Cart)
        session.expunge(rec)
        return rec
Exemplo n.º 26
0
def analize(report_fh=None):

    # # clean-up Bibs and Orders tables to prep
    # datastore for new set
    with session_scope() as session:
        delete_table_data(session, Orders)
        delete_table_data(session, Bibs)

    fetched = False
    if report_fh is None:
        s = shelve.open(SETTINGS, flag='r')
        host = s['ftp_host']
        user = s['ftp_user']
        passw = s['ftp_pass']
        ret = s['orders_retention']
        # fetch the latests Sierra report
        fetched = ftp_download(host, user, passw, 'bpl')
        s.close()
        if fetched:
            data_generator = report_data(DATA, ret)
        else:
            main_logger.warning('No new sierra report - skippig analysis')

        # perform ftp maintenance
        ftp_maintenance(host, user, passw, 'bpl')

    else:
        s = shelve.open(settings, flag='r')
        ret = s['orders_retention']
        data_generator = report_data(report_fh, ret)
        fetched = True

    if fetched:
        # since bibs and orders are somewhat vetted by the sierra_parser
        # it's OK to add them in bulk to datastore
        # if any exception encountered the whole batch will be rolled back!
        try:
            with session_scope() as session:
                for record in data_generator:
                    bib = record[0]
                    order = record[1]
                    insert_or_ignore(session, Bibs, **bib)
                    insert_or_ignore(session, Orders, **order)
        except Exception as e:
            main_logger.critical('Unable to add data {} to datastore. '
                                 'Error: {}'.format(file, e))

        # update conflicts table and prepare queries
        # this time enter each conflict in it's own session
        # so well formed queries can be used
        queries = dict()
        conflicts = conflict2dict()

        # update Conflict table in datastore
        for conflict in conflicts:
            queries[conflict['id']] = conflict['query']
            conflict.pop('query', None)
            try:
                with session_scope() as session:
                    insert_or_update(session, Conflicts, **conflict)
            except Exception as e:
                main_logger.critical(
                    'unable to add data to datastore: {}, error: {}'.format(
                        conflict, e))

        # run conflict queries and save errors in the datastore
        for cid, query in queries.iteritems():
            try:
                with session_scope() as session:
                    results = run_query(session, query)
                    for row in results:
                        tic = dict(bid=row.bid, title=row.title)
                        ticket = insert_or_ignore(session, Tickets, **tic)
                        # flush session so ticket obj gets id needed for joiner
                        session.flush()
                        joiner = dict(tid=ticket.id, cid=cid)
                        insert_or_ignore(session, TickConfJoiner, **joiner)
                        cop = dict(tid=ticket.id,
                                   oid=row.oid,
                                   copies=row.copies)
                        insert_or_ignore(session, Copies, **cop)
            except Exception as e:
                # think about better logging here
                main_logger.critical('Unable to add data to datastore, error: '
                                     '{}, {}: {}'.format(e, row, cid))

    # email error report

    service = create_gmail_service()

    report = create_report()
    addresses = get_addresses()
    to = ','.join(addresses['to'])
    sender = addresses['from']
    subject = 'BPL QC Report for {}'.format(date.today().strftime('%Y-%m-%d'))
    msg = create_message(sender, to, subject, report)

    # send message
    send_message(service, 'me', msg)
Exemplo n.º 27
0
def get_template_names(agent):
    # agent arg must be 3 letter code
    with session_scope() as session:
        values = retrieve_records(session, NYPLOrderTemplate, agent=agent)
        return [x.tName for x in values]
Exemplo n.º 28
0
def delete_template(otid):
    with session_scope() as session:
        delete_record(session, NYPLOrderTemplate, otid=otid)
Exemplo n.º 29
0
def get_batch_criteria_record():
    with session_scope() as db_session:
        rec = retrieve_record(db_session, WCSourceBatch)
        db_session.expunge_all()
        return rec
Exemplo n.º 30
0
def get_total_number_of_branches(system_id):
    with session_scope() as session:
        branch_count = count_records(session, Branch, system_id=system_id)
        return branch_count