def home(request): data = { 'url': '' } if request.method == 'POST': data['url'] = request.POST.get('url', 'http://www.toysrus.com/product/index.jsp?productId=24447876') print data['url'] json_result = diffbot.product(data['url'], token=settings.DIFFBOT_TOKEN) print json_result data['product'] = json_result['objects'][0] product_id_bits = data['url'].split('productId=') product_id_bits = product_id_bits[1].split('&') data['product_id'] = product_id_bits[0] return render(request, 'home.html', data)
def handle(self, *args, **options): # make sure file option is present if options['pid'] == None : raise CommandError("Option `--pid=...` must be specified.") if options['format'] == None : raise CommandError("Option `--format=...` must be specified (json or xml).") pids = options['pid'] for pid in pids: #diffbot url = 'http://www.toysrus.com/product/index.jsp?productId=%s' % pid json_result = diffbot.product(url, token=settings.DIFFBOT_TOKEN) data_product = json_result['objects'][0] sku = data_product['specs']['sku'] title = data_product['title'] #start retrieving reviews #pid encoding str_pid = str(pid) s = 0 i = 0 for char in pid: r = ord(char) r = r * (255 - r) s = s + r s = s % 1023 s = str(s) n = 4 fromParts = [c for c in s] i = 0 while i < n - len(s): fromParts.insert(0, '0') i = i + 1 s = ''.join(fromParts) s = s[:(n/2)] + "/" + s[(n/2):n] # end pid encoding page = 1 # build the url (http://www.toysrus.com/pwr/content/10/07/24447876-en_US-12-reviews.js) base_url = 'http://www.toysrus.com/pwr/content/%s/%s-en_US-%s-reviews.js' done = False decoded_data = [] exportable_data = [] it = 0 while not done: review_page_url = base_url % (s, pid, page) try: review_data_page = urllib2.urlopen(review_page_url) review_data_lines = [] review_data = '' for line in review_data_page.readlines(): review_data_lines.append(line) review_data_raw = ''.join(review_data_lines) # split at the first '=' char review_data_bits = review_data_raw.split(' = ', 1) review_data = review_data_bits[1] review_data = review_data.replace(';', '') decoded_data = demjson.decode(review_data) for reviews in decoded_data: review = reviews['r'] it = it + 1 exportable_data.append( { 'sku': sku, 'title': review['h'], 'rating': review['r'], 'text': review['p'], 'submissionTime': review['db'], 'displayName': review['n'], 'externalId': review['id'], 'emailAddress': it, } ) except: done = True page = page + 1 export_to_file(options['format'], exportable_data, pid)