Exemplo n.º 1
0
def create_title_bool_and(record: Dict[str, str]) -> str:

    if 'Subtitle' in record.keys() and record["Subtitle"] not in ["N/A", ""]:
        query_str = "(" + ")+AND+(".join([
            normalize(str(record['Main Title'])),
            normalize(str(record['Subtitle']))
        ]) + ")"
    else:
        query_str = normalize(record['Main Title'])

    # logger.debug('Title Boolean phrase or string: ' + query_str)
    return query_str
Exemplo n.º 2
0
def look_up_book_in_worldcat(book_dict: Dict[str, str]) -> pd.DataFrame:
    # Generate query string
    full_title = create_full_title(book_dict)
    logger.info(f'Looking for "{full_title}" in WorldCat...')

    # Data currently has one author last name; otherwise I'd do what's commented below or process one-to-many relationship
    # query_author = normalize(f"{book_dict['Author_First']} {book_dict['Author_Last']})
    # Replacing apostrophe because they are breaking query strings when they occur
    query_author = book_dict['Author_Last'].replace("'", " ")
    query_title = normalize(full_title)
    query_str = f'srw.ti all "{query_title}" and srw.au all "{query_author}"'
    logger.debug(query_str)
    params = {
        'wskey': WC_API_KEY,
        "query": query_str,
        "maximumRecords": 100,
        'frbrGrouping': 'off'
    }
    result = make_request_using_cache(WC_BIB_BASE_URL, params)

    if not result:
        return pd.DataFrame({})

    records = parse_marcxml(result)
    records_df = pd.DataFrame(records)
    logger.info(f'Number of WorldCat records found: {len(records_df)}')
    logger.debug(records_df.head(10))
    return records_df
def main():
    if len(sys.argv) != 3:
        print("Usage:\npython extract_pov.py [SOURCE FILE/FOLDER] [SAVE_DIR]")
        exit(1)
    else:
        source = sys.argv[1]
        save_dir = sys.argv[2]

        if not os.path.exists(save_dir):
            print("Creating directory %s." % save_dir)
            os.makedirs(save_dir)

        if os.path.isdir(source):
            print("Reading from directory %s." % source)
            # Walk through the directory
            for root, _, files in os.walk(source):
                for f in files:
                    name, ext = os.path.splitext(f)
                    if ext == '.pcd':
                        print("Reading from file %s." % f)
                        pts = load_point_cloud(os.path.join(root, f))
                        x, y, z = normalize(pts)

                        if not os.path.exists(os.path.join(save_dir, name)):
                            os.makedirs(os.path.join(save_dir, name))

                        save_point_cloud(
                            pts,
                            os.path.join(save_dir, name,
                                         name + '_normalized.pcd'))

                        # Create folder in save_dir
                        if not os.path.exists(os.path.join(save_dir, name)):
                            os.makedirs(os.path.join(save_dir, name))

                        # Generate VTK file for reconstructed surface
                        render3D(os.path.join(save_dir, name,
                                              name + '_normalized.pcd'),
                                 show=False)

                        subprocess.check_call([
                            './extract_pov',
                            os.path.join(save_dir, name,
                                         name + '_normalized_output.vtk'),
                            os.path.join(save_dir, name) + '/',
                            str(x[0]),
                            str(x[1]),
                            str(x[2]),
                            str(y[0]),
                            str(y[1]),
                            str(y[2]),
                            str(z[0]),
                            str(z[1]),
                            str(z[2])
                        ])
                        print('Extracted PoV images to %s' %
                              os.path.join(save_dir, name))
        else:
            root, file = os.path.split(source)
            name, ext = os.path.splitext(file)
            if ext != '.pcd':
                print('Invalid file.')
                exit(1)
            print("Reading from file %s." % source)
            pts = load_point_cloud(source)
            x, y, z = normalize(pts)

            if not os.path.exists(os.path.join(save_dir, name)):
                os.makedirs(os.path.join(save_dir, name))

            save_point_cloud(
                pts, os.path.join(save_dir, name, name + '_normalized.pcd'))

            # Create folder in save_dir
            if not os.path.exists(os.path.join(save_dir, name)):
                os.makedirs(os.path.join(save_dir, name))

            # Generate VTK file for reconstructed surface
            render3D(os.path.join(save_dir, name, name + '_normalized.pcd'),
                     show=False)

            subprocess.check_call([
                './extract_pov',
                os.path.join(save_dir, name, name + '_normalized_output.vtk'),
                os.path.join(save_dir, name) + '/',
                str(x[0]),
                str(x[1]),
                str(x[2]),
                str(y[0]),
                str(y[1]),
                str(y[2]),
                str(z[0]),
                str(z[1]),
                str(z[2])
            ])
            print('Extracted PoV images to %s' % os.path.join(save_dir, name))
Exemplo n.º 4
0
def look_up_book_in_resource(book_dict: Dict[str, str]) -> pd.DataFrame:
    # Generate query string
    # logger.info(f'Looking for {book_dict["Main Title"]} in Harvard LibraryCloud...')

    query_author = normalize(f"{book_dict['Author 1 Given']} {book_dict['Author 1 Initial']} {book_dict['Author 1 Family']}")
    # query_author = book_dict['authorLast']
    query_author.replace("'", " ")

    title_bool_and = create_title_bool_and(book_dict)
    params = {
        'title' : title_bool_and,
        'name' : query_author,
        'limit': 10,
        'publisher' : book_dict['Publisher']
    }


    query_str = f'&'.join([k+'='+str(params[k]) for k in list(params.keys())])
    # logger.debug(query_str)
    records = {}

    result = make_request_using_cache(BIB_BASE_URL, params)
    if result:
        records.update(parse_modsxml(result,book_dict))
        if len(list(records.keys())) == 0:
            params.pop('publisher')
            result = make_request_using_cache(BIB_BASE_URL, params)
            if result:
                records.update(parse_modsxml(result,book_dict))

    if book_dict['Publisher'] != book_dict['Copyright Holder']:
        params['publisher'] = book_dict['Copyright Holder']
        second_result = make_request_using_cache(BIB_BASE_URL, params)
        if second_result:
            second_records = parse_modsxml(second_result,book_dict)
            if len(list(second_records.keys())) > 0:
                records.update(second_records)

    # print(records)
    # records.update(use_isbnlib({book_dict['ID']:book_dict}))

    categorized = False
    for k in list(records.keys()):
        r = records[k]
        for col in ['ebook ISBN', 'paper ISBN','hardcover ISBN']:
            if col in r:
                if len(r[col]) > 0:
                    categorized = True
    for k in list(records.keys()):
        r = records[k]
        if len(str(r['Uncategorized ISBN'])) > 40:
            categorized = False
    # if categorized == False:
    #     records.update(use_isbnlib(records))

    if records == {}:
        return pd.DataFrame({})
    else:

        # records.update(use_isbnlib(records))

        records_df = pd.DataFrame.from_dict(records,orient='index')
    # logger.info(f'Number of records found: {len(records_df)}')
    # logger.debug(records_df.head(10))
        return records_df