Exemplo n.º 1
0
 def setUpClass(cls):
     config = {}
     try:
         config['access_key'] = os.environ['AWS_ACCESS_KEY_ID']
         config['secret_key'] = os.environ['AWS_SECRET_ACCESS_KEY']
         config['associate_tag'] = os.environ['AWS_ASSOCIATE_TAG']
     except:
         raise AssertionError('''
             The following environment variables must be set:
                     "AWS_ACCESS_KEY_ID"
                     "AWS_SECRET_ACCESS_KEY"
                     "AWS_ASSOCIATE_TAG"
         ''')
     cls.amzn = AmazonScraper(MaxQPS=0.5, **config)
Exemplo n.º 2
0
def initialize(prodId):

    amzn = AmazonScraper(acess_key, secret_key, customer_tag, Region='IN')
    p = amzn.lookup(ItemId=prodId)
    rs = amzn.reviews(ItemId=prodId)
    reviews, reviews_title = [], []
    i = 1
    for r in rs:
        fr = r.full_review()
        print_review(fr.title, fr.text, i)
        reviews.append(fr.text)
        reviews_title.append(fr.title)
        i += 1
    prodName = p.title
    for x in range(len(prodName)):
        string = list(prodName)
        if string[x] == '.' or string[x] == '/': string[x] = '-'
        prodName = ''.join(string)
    return reviews, reviews_title, prodName
Exemplo n.º 3
0
        return True


auth_args = [AMZ_ACCESS_KEY, AMZ_SECRET_KEY, AMZ_ASSOC_TAG]
auth_kwargs = {
    'Region': 'CN',
    'MaxQPS': 0.9,
    'Timeout': 5.0,
    'ErrorHandler': error_handler}


# region_options = bottlenose.api.SERVICE_DOMAINS.keys()

amz_product = AmazonAPI(*auth_args, **auth_kwargs)

amz_scraper = AmazonScraper(*auth_args, **auth_kwargs)

amz_nose = bottlenose.Amazon(
    Parser=lambda text: BeautifulSoup(text, 'xml'),
    *auth_args,
    **auth_kwargs)


def print_products(products):
    # product.featrues: List: 商品详情

    with open('result.txt', 'w') as f:
        for i, product in enumerate(products):
            line = "{0}. '{1}'".format(i, product.title.encode('utf8'))
            print(line)
            f.write(line + '\n')
Exemplo n.º 4
0
def main(num_items, heading_level, args):
    """Main routine"""

    # Retrieve the contents of the API key file
    apikey = get_api_config('.amznrc')

    # Create AmazonScraper object using API key
    amznscpr = AmazonScraper(*apikey)

    # Check keyword list entered on the command line
    if len(args) < 1:
        print('Missing search terms. For usage help: python amznsrch.py -h')
        sys.exit(1)

    # Loop through quoted lists of search terms from command line arguments
    for arg in args:

        # Print search terms as a markdown heading
        srch_terms = str(arg)
        if heading_level > 0 and heading_level < 7:
            print '\n' + '#' * heading_level + ' ' + srch_terms + '\n'

        # Fetch and return results
        for item in itertools.islice(
                amznscpr.search(Keywords=srch_terms, SearchIndex='Books'),
                num_items):

            # Skip if no title, else encode, remove parenthetical text, & quote
            if not item.title:
                continue
            else:
                bktitle = item.title.encode('utf8')
                bktitle = re.sub('\s*[(\[].*[)\]]', '', bktitle)
                bktitlesrch = urllib.quote_plus('"' + bktitle + '"')

            # Encode author, if present, and format for printing
            if not item.author:
                bkauthor = ''
            else:
                bkauthor = 'by ' + item.author.encode('utf8')

            # Add associate tag to item URL
            bkurl = str(item.url) + '/?tag=' + apikey[2]

            # Construct links as desired
            amzn = '[AMZN](' + bkurl + ')'
            goog = ('[GOOG]' + '(https://www.google.com/' +
                    'search?tbo=p&tbm=bks&q=intitle:' + bktitlesrch +
                    '&num=10&gws_rd=ssl)')
            spl = ('[SPL](https://seattle.bibliocommons.com/search?' +
                   't=title&search_category=title&q=' + bktitlesrch +
                   '&commit=Search)')
            uwl = ('[UW](http://alliance-primo.hosted.exlibrisgroup.com/' +
                   'primo_library/libweb/action/search.do?fn=search&' +
                   'ct=search&vid=UW&vl%28753972432UI0%29=title&' +
                   'vl%281UIStartWith0%29=starts+with&vl%28freeText0%29=' +
                   bktitlesrch + '&Submit=Search)')
            # Searching UW Libraries through WorldCat to be deprecated 2015-09
            #uwl = ('[UW](http://uwashington.worldcat.org' +
            #       '/search?q=ti%3A' + bktitlesrch + '&qt=advanced)')

            # Print markdown for title, author, and links as bulleted list item
            print('- _' + bktitle + '_ ' + bkauthor + ' ( ' + goog + ' | ' +
                  amzn + ' | ' + spl + ' | ' + uwl + ' )')
Exemplo n.º 5
0
        time.sleep(random.expovariate(0.1))
        return True


# Amazon api 验证资料。
AUTH_ARGS = [AMZ_ACCESS_KEY, AMZ_SECRET_KEY, AMZ_ASSOC_TAG]
# Amazon api 请求设置。
AUTH_KWARGS = {
    'Region': 'CN',
    'MaxQPS': 0.9,
    'Timeout': 5.0,
    'ErrorHandler': error_handler
}

amz_product = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS)
amz_scraper = AmazonScraper(*AUTH_ARGS, **AUTH_KWARGS)
amz_nose = bottlenose.Amazon(Parser=lambda text: BeautifulSoup(text, 'xml'),
                             *AUTH_ARGS,
                             **AUTH_KWARGS)


class AmazonLookupItem(object):
    # Wrap all the useful api from AmazonAPI and add some new.
    def __init__(self, asin):
        amz = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS)
        print('\n>>> Parsing item %s from api...' % asin)
        self.item_api = amz.lookup(ItemId=asin)
        print('Done.\n')

    @property
    def is_prime(self):
Exemplo n.º 6
0
def main():
    validate_args()
    scrapper = AmazonScraper('products.yml')
    print('===============================INICIO===============================')
    process_search(scrapper)
    print('===============================FIN===============================')
Exemplo n.º 7
0
import requests, time, bottlenose, math, urllib, csv
from bs4 import BeautifulSoup
from amazon_scraper import AmazonScraper  # https://github.com/adamlwgriffiths/amazon_scraper
from decimal import Decimal
from app.categories import *
from app_config import *

# Amazon scraper + Amazon API wrapper
amazon = AmazonScraper(app.config['AMZ_API_KEY'], app.config['AMZ_API_SECRET'],
                       app.config['AMZ_ASSOCIATE'])

# Access Raw Amazon XML Response
amazon_raw = bottlenose.Amazon(app.config['AMZ_API_KEY'],
                               app.config['AMZ_API_SECRET'],
                               app.config['AMZ_ASSOCIATE'])


def upc_to_asin(upc):
    time.sleep(1)
    p = amazon.lookup(ItemId=upc, IdType='UPC', SearchIndex='All')
    if type(p) != list:
        asin = [p.asin]
    else:
        asin = []
        count = 0
        while count <= len(p) - 1:
            asin.append(p[count].asin)
            count += 1
    return (asin)

Exemplo n.º 8
0
import requests
import re
import json
from textblob import TextBlob
import itertools

import pickle

# Disable request waring
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

from amazon_scraper import AmazonScraper

amzn = AmazonScraper("AKIAJ5G4TDSHO2D54APQ", "DkMW4edxLB91MGcnDhChkciqj2XumqlySi9yOhT6", "beproject0d-20", Region='IN',
                     MaxQPS=0.9, Timeout=5.0)

app = Flask(__name__)
mysql = MySQL()
app.config['MYSQL_DATABASE_USER'] = '******'
app.config['MYSQL_DATABASE_PASSWORD'] = '******'
app.config['MYSQL_DATABASE_DB'] = 'review_data'
app.config['MYSQL_DATABASE_HOST'] = 'localhost'
mysql.init_app(app)
conn = mysql.connect()
cursor = conn.cursor()
asin_regex = r'/([A-Z0-9]{10})'
isbn_regex = r'/([0-9]{10})'


def get_amazon_item_id(url):
Exemplo n.º 9
0
    def setUp(self):

        args = {'asin': 'a'}

        self.amazon_scraper = AmazonScraper(**args)
Exemplo n.º 10
0
resp = requests.get('http://icanhazip.com')
print "My current IP address:", resp.content.strip()

AUTH = requests.auth.HTTPProxyAuth('manutd0707', 'manutd0707')
PROXIES = {'http': 'http://us-dc.proxymesh.com:31280'}
resp = requests.get('http://icanhazip.com',
                    proxies=PROXIES,
                    auth=AUTH,
                    verify=False)
print "My new IP address via ProxyMesh:", resp.content.strip()

AMAZON_ACCESS_KEY = "AMAZON_ACCESS_KEY"
AMAZON_SECRET_KEY = "AMAZON_SECRET_KEY"
AMAZON_ASSOCIATE_TAG = "AMAZON_ASSOCIATE_TAG"

amzn = AmazonScraper(AMAZON_ACCESS_KEY, AMAZON_SECRET_KEY,
                     AMAZON_ASSOCIATE_TAG)
# You need 3 things for the above keys: AWS account (first two codes above),
# Amazon Associates account (final code), and then you need to sign up to use
# the Product Advertising API within the Associates account

filename = "reviews_allinfo.csv"
filename2 = "reviews_notext.csv"

save_path = 'c:/output/'

with open('product_ids.csv', 'rb') as f:
    csv_f = csv.reader(f)
    items = [row[0].strip() for row in csv_f]

for number in items: