Exemplo n.º 1
0
    format='%(asctime)s - %(name)s - %(levelname)s:  %(message)s',
    level=logging.INFO
)
logger = logging.getLogger(__name__)

LIMIT = 500
DELAY = 1.0
RETRIES = 3
PROVIDER = prov.NYPL_DEFAULT_PROVIDER
BASE_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/search"
METADATA_ENDPOINT = "http://api.repo.nypl.org/api/v1/items/item_details/"
NYPL_API = os.getenv("NYPL_API_KEY")
TOKEN = f"Token token={NYPL_API}"

delay_request = DelayedRequester(delay=DELAY)
image_store = ImageStore(provider=PROVIDER)

DEFAULT_QUERY_PARAM = {
    "q": "CC_0",
    "field": "use_rtxt_s",
    "page": 1,
    "per_page": LIMIT
}

HEADERS = {
    "Authorization": TOKEN
}

IMAGE_URL_DIMENSIONS = [
    "g", "v", "q", "w", "r"
]
Exemplo n.º 2
0
import time
import logging
import re
import json
from urllib.parse import urlparse, parse_qs

DELAY = 1.0  #time delay (in seconds)
PROVIDER = 'rawpixel'
FILE = 'rawpixel_{}.tsv'.format(int(time.time()))

logging.basicConfig(
    format='%(asctime)s: [%(levelname)s - RawPixel API] =======> %(message)s',
    level=logging.INFO)

delayed_requester = DelayedRequester(DELAY)
image_store = ImageStore(provider=PROVIDER, output_file=FILE)


def sanitizeString(_data):
    if _data is None:
        return ''
    else:
        _data = str(_data)

    _data = _data.strip()
    _data = _data.replace('"', "'")
    _data = re.sub(r'\n|\r', ' ', _data)
    #_data      = re.escape(_data)

    backspaces = re.compile('\b+')
    _data = backspaces.sub('', _data)