Пример #1
0
def get_goodreads_details(url):
    """Given a goodreads URL, get title, creator, etc.
    """
    
    # Look for something like http://www.goodreads.com/book/show/40694.Ray_Bradbury_s_Fahrenheit_451

    details = {}
    
    #TODO: we should do some validation/error handling here
    url = url + '.xml'
    url_params = {'key' : GOODREADS['KEY']}
    data = urllib.urlencode(url_params)
    url = url + '?' + data
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    raw_response = response.read()

    root = objectify.fromstring(raw_response)
    
    details['format'] = 'book'
    
    if hasattr(root.book, 'title'):
        details['title'] = root.book.title
    if hasattr(root.book.authors.author, 'name'):
        details['creator'] = root.book.authors.author.name
    if hasattr(root.book.work, 'original_publication_year'):
        details['pub_date'] = get_year_from_raw_date(root.book.work.original_publication_year)
    if hasattr(root.book, 'isbn'):
        details['isbn'] = root.book.isbn
    if hasattr(root.book, 'num_pages') and len(root.book.num_pages.text) > 0:
        details['measurement_page_numeric'] = root.book.num_pages

    return details
Пример #2
0
def get_goodreads_details(url):
    """Given a goodreads URL, get title, creator, etc.
    """

    # Look for something like http://www.goodreads.com/book/show/40694.Ray_Bradbury_s_Fahrenheit_451

    details = {}

    #TODO: we should do some validation/error handling here
    url = url + '.xml'
    url_params = {'key': GOODREADS['KEY']}
    data = urllib.urlencode(url_params)
    url = url + '?' + data
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    raw_response = response.read()

    root = objectify.fromstring(raw_response)

    details['format'] = 'book'

    if hasattr(root.book, 'title'):
        details['title'] = root.book.title
    if hasattr(root.book.authors.author, 'name'):
        details['creator'] = root.book.authors.author.name
    if hasattr(root.book.work, 'original_publication_year'):
        details['pub_date'] = get_year_from_raw_date(
            root.book.work.original_publication_year)
    if hasattr(root.book, 'isbn'):
        details['isbn'] = root.book.isbn
    if hasattr(root.book, 'num_pages') and len(root.book.num_pages.text) > 0:
        details['measurement_page_numeric'] = root.book.num_pages

    return details
Пример #3
0
def get_amazon_details(url):
    """Given an Amazon URL, get title, creator, etc. from imdapi.com
    """
    matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url)
    asin = matches.group(1)

    aws_key = AMZ['KEY']
    aws_secret_key = AMZ['SECRET_KEY']
    aws_associate_tag = AMZ['ASSOCIATE_TAG']

    details = {}

    amazon = bottlenose.Amazon(aws_key, aws_secret_key, aws_associate_tag)
    response = amazon.ItemLookup(ItemId=asin,
                                 ResponseGroup="Large",
                                 IdType="ASIN")

    root = objectify.fromstring(response)

    if hasattr(root.Items.Item.ItemAttributes, 'Author'):
        details['creator'] = root.Items.Item.ItemAttributes.Author
    elif hasattr(root.Items.Item.ItemAttributes, 'Artist'):
        details['creator'] = root.Items.Item.ItemAttributes.Artist
    elif hasattr(root.Items.Item.ItemAttributes, 'Director'):
        details['creator'] = root.Items.Item.ItemAttributes.Director

    if hasattr(root.Items.Item.ItemAttributes, 'Title'):
        details['title'] = root.Items.Item.ItemAttributes.Title
    if hasattr(root.Items.Item.ItemAttributes, 'ISBN'):
        details['isbn'] = root.Items.Item.ItemAttributes.ISBN.text
    if hasattr(root.Items.Item.ItemAttributes, 'NumberOfPages'):
        details[
            'measurement_page_numeric'] = root.Items.Item.ItemAttributes.NumberOfPages
    if hasattr(root.Items.Item.ItemAttributes,
               'PackageDimensions') and hasattr(
                   root.Items.Item.ItemAttributes.PackageDimensions, 'Length'):
        amz_length = int(
            root.Items.Item.ItemAttributes.PackageDimensions['Length'])
        height_in_inches = '{0:.2g}'.format((amz_length / 100.0) * 2.54)
        details['measurement_height_numeric'] = height_in_inches
    if hasattr(root.Items.Item.ItemAttributes, 'ProductGroup'):
        if root.Items.Item.ItemAttributes.ProductGroup.text == 'Music':
            details['format'] = 'Sound Recording'
        elif root.Items.Item.ItemAttributes.ProductGroup.text == 'DVD':
            details['format'] = 'Video/Film'
        else:
            details['format'] = 'book'
    if hasattr(root.Items.Item.ItemAttributes, 'PublicationDate'):
        details['pub_date'] = get_year_from_raw_date(
            root.Items.Item.ItemAttributes.PublicationDate.text)

    return details
Пример #4
0
def get_amazon_details(url):
    """Given an Amazon URL, get title, creator, etc. from imdapi.com
    """
    matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url)
    asin = matches.group(1)

    aws_key = AMZ['KEY']
    aws_secret_key = AMZ['SECRET_KEY']
    aws_associate_tag = AMZ['ASSOCIATE_TAG']
    
    details = {}
    
    amazon = bottlenose.Amazon(aws_key, aws_secret_key, aws_associate_tag)
    response = amazon.ItemLookup(ItemId=asin, ResponseGroup="Large", IdType="ASIN")
        
    root = objectify.fromstring(response)
       
    if hasattr(root.Items.Item.ItemAttributes, 'Author'):
        details['creator'] = root.Items.Item.ItemAttributes.Author
    elif hasattr(root.Items.Item.ItemAttributes, 'Artist'):
        details['creator'] = root.Items.Item.ItemAttributes.Artist
    elif hasattr(root.Items.Item.ItemAttributes, 'Director'):
        details['creator'] = root.Items.Item.ItemAttributes.Director

    if hasattr(root.Items.Item.ItemAttributes, 'Title'):
        details['title'] = root.Items.Item.ItemAttributes.Title
    if hasattr(root.Items.Item.ItemAttributes, 'ISBN'):
        details['isbn'] = root.Items.Item.ItemAttributes.ISBN.text
    if hasattr(root.Items.Item.ItemAttributes, 'NumberOfPages'):
        details['measurement_page_numeric'] = root.Items.Item.ItemAttributes.NumberOfPages
    if hasattr(root.Items.Item.ItemAttributes, 'PackageDimensions') and hasattr(root.Items.Item.ItemAttributes.PackageDimensions, 'Length'):
        amz_length = int(root.Items.Item.ItemAttributes.PackageDimensions['Length'])
        height_in_inches = '{0:.2g}'.format((amz_length / 100.0) * 2.54)
        details['measurement_height_numeric'] = height_in_inches
    if hasattr(root.Items.Item.ItemAttributes, 'ProductGroup'):
        if root.Items.Item.ItemAttributes.ProductGroup.text == 'Music':
            details['format'] = 'Sound Recording'
        elif root.Items.Item.ItemAttributes.ProductGroup.text == 'DVD':
            details['format'] = 'Video/Film'
        else:
            details['format'] = 'book'
    if hasattr(root.Items.Item.ItemAttributes, 'PublicationDate'):
        details['pub_date'] = get_year_from_raw_date(root.Items.Item.ItemAttributes.PublicationDate.text)

        
    return details
Пример #5
0
def get_musicbrainz_details(url):
    """Given an musicbrainz URL, get title, creator, etc.
    """

    # Look for something like http://musicbrainz.org/release-group/20e845b1-c6b5-44f7-ab7e-cbc0e33767b5
    # or http://musicbrainz.org/release/b21c3cb7-a82d-4d9d-aef7-56569ca734be
    # See http://musicbrainz.org/doc/XML_Web_Service/Version_2#Lookups

    matches = re.search(r'musicbrainz\.org/([a-zA-Z\-]+)/([0\w\-]+)', url)

    mb_resource = matches.group(1)
    mb_id = matches.group(2)

    details = {}

    url = 'http://musicbrainz.org/ws/2/' + mb_resource + '/' + mb_id
    url_params = {'inc': 'artists'}
    data = urllib.urlencode(url_params)
    url = url + '?' + data
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    raw_response = response.read()

    tree = ElementTree(fromstring(raw_response))
    namespace = "{http://musicbrainz.org/ns/mmd-2.0#}"
    title_element = tree.find('*/{0}title'.format(namespace))
    name_element = tree.find(
        '*/{0}artist-credit/{0}name-credit/{0}artist/{0}name'.format(
            namespace))
    date_element = tree.find('*/{0}date'.format(namespace))

    # If we don't find the date (date element only occurs in the release type), try
    # the first-release-date.
    # TODO: There's probably a clever xpath way to do this
    if date_element is None:
        date_element = tree.find('*/{0}first-release-date'.format(namespace))

    details['title'] = title_element.text
    details['format'] = 'Sound Recording'
    details['creator'] = name_element.text
    details['pub_date'] = get_year_from_raw_date(date_element.text)

    return details
Пример #6
0
def get_musicbrainz_details(url):
    """Given an musicbrainz URL, get title, creator, etc.
    """
    
    # Look for something like http://musicbrainz.org/release-group/20e845b1-c6b5-44f7-ab7e-cbc0e33767b5
    # or http://musicbrainz.org/release/b21c3cb7-a82d-4d9d-aef7-56569ca734be
    # See http://musicbrainz.org/doc/XML_Web_Service/Version_2#Lookups

    matches = re.search(r'musicbrainz\.org/([a-zA-Z\-]+)/([0\w\-]+)', url)

    mb_resource = matches.group(1)
    mb_id = matches.group(2)
    
    details = {}

    url = 'http://musicbrainz.org/ws/2/' + mb_resource + '/' + mb_id
    url_params = {'inc' : 'artists'}
    data = urllib.urlencode(url_params)
    url = url + '?' + data
    req = urllib2.Request(url)
    response = urllib2.urlopen(req)
    raw_response = response.read()
    
    tree = ElementTree(fromstring(raw_response))
    namespace = "{http://musicbrainz.org/ns/mmd-2.0#}"
    title_element = tree.find('*/{0}title'.format(namespace))
    name_element = tree.find('*/{0}artist-credit/{0}name-credit/{0}artist/{0}name'.format(namespace))
    date_element = tree.find('*/{0}date'.format(namespace))
    
    # If we don't find the date (date element only occurs in the release type), try
    # the first-release-date.
    # TODO: There's probably a clever xpath way to do this 
    if date_element is None:
        date_element = tree.find('*/{0}first-release-date'.format(namespace))

    details['title'] = title_element.text
    details['format'] = 'Sound Recording'
    details['creator'] = name_element.text
    details['pub_date'] = get_year_from_raw_date(date_element.text)
    
    return details