示例#1
0
 def parse_element(script):
     content = None
     content_length = 0
     uns = unhexlify(script)
     mark = uns[0]
     if 0x00 <= mark and mark <= 0x4B:
         content_length = mark
         content = script[2:2 + content_length * 2]
         script = script[2 + content_length * 2:]
     elif 0x4C <= mark and mark <= 0x4E:
         if 0x4C == mark:
             content_length = int(script[2:4], 16)
             content = script[4:4 + content_length * 2]
             script = script[4 + content_length * 2:]
         if 0x4D == mark:
             content_length = int(CT.big_or_little(script[2:6]), 16)
             content = script[6:6 + content_length * 2]
             script = script[6 + content_length * 2:]
         if 0x4E == mark:
             content_length = int(CT.big_or_little(script[2:10]), 16)
             content = script[10:10 + content_length * 2]
             script = script[10 + content_length * 2:]
     elif 0x4F == mark:
         content = -1
         script = script[2:]
     elif 0x51 <= mark and mark <= 0x60:
         content = mark - 0x50
         script = script[2:]
     else:
         pass
     return content, script
示例#2
0
 def solveReviewerBack(self):
     #         newAsinList = []
     with open("../data/" + self.fetchDate.isoformat() + "/reviewer.txt",
               "w") as fout:
         ct = CommonTool(fout)
         ct.writeln(Reviewer.Reviewer.tableHead)
         for i, reviewerID in enumerate(self.reviewerList):
             if reviewerID == '':
                 continue
             print 'solve reviewer {0} of {1}: {2}'.format(
                 i, len(self.reviewerList), reviewerID)
             reviewer = Reviewer.loadReviewer(reviewerID)
             # newAsinList.extend(reviewer.getReviewedBookList())
             reviewer.saveProfileUp(ct)
示例#3
0
 def printReviews(self, ct=None):
     fout = ''
     flag = False
     if ct is None:
         flag = 1
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/"):
             os.makedirs(
                 "../data/" + self.fetchDate.isoformat() + "/review/")
         ct = CommonTool()
         if not os.path.exists("../data/" + self.fetchDate.isoformat() +
                               "/review/review.txt"):
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "w")
             print "writing reviews in new file"
             ct.setFout(fout)
             ct.writeln(Review.Review.tableHead)
         else:
             fout = open(
                 "../data/" + self.fetchDate.isoformat() +
                 "/review/review.txt", "a")
             ct.setFout(fout)
     for reviewID in self.reviewList:
         print "printing reviews"
         review = Review.loadReview(reviewID, self.fetchDate)
         review.printData(ct)
         review.insertReviewDataIntoTable()
     if flag:
         fout.close()
示例#4
0
 def solveRightCol(self):
     try:
         divRightCol = self.html.xpath(".//div[@id='rightCol']")[0]
         divBuyBoxInner = divRightCol.xpath(".//div[@id='buyBoxInner']")[0]
         priceSpan = divBuyBoxInner.xpath("./div/div[2]/ul/li/span")
         if len(priceSpan) == 0:
             self.listPrice = 0
         else:
             priceType = priceSpan[0].xpath("./span")[0].text.strip()
             priceValue = priceSpan[0].xpath("./span")[1].text.strip()
             if priceType == 'List Price:':
                 self.listPrice = CommonTool.strToFloat(priceValue)
             else:
                 self.listPrice = 0
             del priceType
             del priceValue
         del priceSpan
         del divBuyBoxInner
         del divRightCol
     except Exception, e:
         print 'RightCol error: %s' % str(e)
         self.listPrice = 0
示例#5
0
    def getProductLinksUsingThreads(self,productList):
        print "getting products using thread method"
        for link in productList:
            
            linkPage = MyHtml.getHtml(
                link, self.rID + "_AllProductLinks_" + str(self.page))
            trObjs = linkPage.xpath(
                ".//body/table[2]/tr[1]/td[2]/table[2]/tr[@valign='top']")
            if link is self.firstLink:
                try:
                    firstTimeObj = trObjs[-1]
                except LookupError:
                    print link
                    exit(0)
                fTime = firstTimeObj.xpath("./following-sibling::*")
                if len(fTime) == 0:
                    self.fRevTime = 'N/A'
                else:
                    # print etree.tostring(fTime[0])
                    fTime = fTime[0].xpath(".//nobr")
                    if len(fTime) == 0:
                        self.fRevTime = 'N/A'
                    else:
                        self.fRevTime = fTime[0].text.strip()
                        self.fRevTime = CommonTool.strToDate(self.fRevTime)
                del fTime

            if link is self.lastLink:
                lastTimeObj = trObjs[0]
                lTime = lastTimeObj.xpath("./following-sibling::*")
                if len(lTime) == 0:
                    self.lRevTime = 'N/A'
                else:
                    # print etree.tostring(lTime[0])
                    lTime = lTime[0].xpath(".//nobr")
                    if len(lTime) == 0:
                        self.lRevTime = 'N/A'
                    else:
                        self.lRevTime = lTime[0].text.strip()
                        self.lRevTime = CommonTool.strToDate(self.lRevTime)
                del lTime

            for trObj in trObjs:
                tableObj = trObj.xpath(
                    "./td[@class='small'][3]/table[@class='small']")
                # aLink
                if len(tableObj) != 0:
                    aLink = tableObj[0].xpath(".//a")
                    if len(aLink) == 0:
                        aLink = ''
                    else:
                        aLink = aLink[0].attrib['href']

                # rate
                rateObj = trObj.xpath("./following-sibling::*")
                rate = 'N/A'
                if len(rateObj) != 0:
                    try:
                        rateObj1 = rateObj[0].xpath(".//img")
                        title = rateObj1[0].attrib['title']
                        rate = title.split("out")[0].strip()
                        self.sum = self.sum + float(rate)
                        self.counter = self.counter + 1
                    except Exception, e:
                        sys.stderr.write(str(e) + ' rate Exception\n')

                # reviewID
                reviewID = ''
                rIDObj = rateObj[0].xpath(".//a")
                if len(rIDObj) != 0:
                    reviewID = rIDObj[0].attrib['name']

                # label The review is from
                label = ''
                labelObj = rateObj[0].xpath(".//div[@class='tiny']")
                if len(labelObj) != 0:
                    # verified purchase + the review is from
                    aObj = labelObj[-1].xpath(".//a")
                    if len(aObj) != 0:
                        label = filtTag.filter_tags(
                            etree.tostring(aObj[0]).strip())

                # 1---book 0---product
                parLeft = label.find('(')
                parRight = label.find(')')

                if parLeft == -1 and parRight == -1:
                    label = '0'
                elif label[-1] == ')':
                    label = label.split('(')
                    label = label[-1][:-1]
                    if (label.find('Paperback') != -1) or (label.find('Hardcover') != -1):
                        label = '1'
                        bookID = aLink.replace('/ref=cm_cr-mr-title', '')
                        bookID = bookID[-10:]
                        self.reviewedBookList.append(bookID)

                productList = []
                productList.append(aLink)
                productList.append(rate)
                productList.append(reviewID)
                productList.append(label)
                self.allProductLinks.append(productList)
                del productList

            self.page = self.page + 1
示例#6
0
    async def crawl(self):
        self.start = await self.get_state()
        self.start += 1

        while True:
            current_height = await self.get_block_count()
            time_a = CT.now()
            if self.start < current_height:
                stop = self.start + self.max_tasks
                if stop >= current_height:
                    stop = current_height
                self.processing.extend([i for i in range(self.start, stop)])
                max_height = max(self.processing)
                min_height = self.processing[0]
                await asyncio.wait(
                    [self.cache_block(h) for h in self.processing])
                if self.processing != sorted(self.cache.keys()):
                    msg = 'cache != processing'
                    logger.error(msg)
                    raise Exception(msg)
                    sys.exit(1)
                await self.update_sys_fee(min_height)
                vins = []
                vouts = []
                claims = []
                for block in self.cache.values():
                    for tx in block['tx']:
                        txid = tx['txid']
                        height = block['index']
                        for vin in tx['vin']:
                            vins.append([vin, txid, height])
                        for vout in tx['vout']:
                            vouts.append([vout, txid, height])
                        if 'claims' in tx.keys():
                            for claim in tx['claims']:
                                claims.append([claim, txid, height])
                if vins:
                    await asyncio.wait(
                        [self.update_a_vin(*vin) for vin in vins])
                if vouts:
                    await asyncio.wait(
                        [self.update_a_vout(*vout) for vout in vouts])
                if claims:
                    await asyncio.wait(
                        [self.update_a_claim(*claim) for claim in claims])

                #cache update addresses
                if stop == current_height and 1 == len(self.processing):
                    uas = []
                    vinas = await asyncio.gather(
                        *[self.get_address_from_vin(vin[0]) for vin in vins])
                    voutas = [vout[0]['address'] for vout in vouts]
                    uas = list(set(vinas + voutas))
                    await self.update_addresses(max_height, uas)

                time_b = CT.now()
                logger.info(
                    'reached %s ,cost %.6fs to sync %s blocks ,total cost: %.6fs'
                    % (max_height, time_b - time_a, stop - self.start,
                       time_b - START_TIME))
                await asyncio.wait([
                    self.update_block(block) for block in self.cache.values()
                ])
                await self.update_state(max_height)
                self.start = max_height + 1
                del self.processing
                del self.cache
                self.processing = []
                self.cache = {}
            else:
                await asyncio.sleep(0.5)
示例#7
0
                    % (max_height, time_b - time_a, stop - self.start,
                       time_b - START_TIME))
                await asyncio.wait([
                    self.update_block(block) for block in self.cache.values()
                ])
                await self.update_state(max_height)
                self.start = max_height + 1
                del self.processing
                del self.cache
                self.processing = []
                self.cache = {}
            else:
                await asyncio.sleep(0.5)


if __name__ == "__main__":
    START_TIME = CT.now()
    logger.info('STARTING...')
    mongo_uri = C.get_mongo_uri()
    neo_uri = C.get_neo_uri()
    mongo_db = C.get_mongo_db()
    tasks = C.get_tasks()
    loop = asyncio.get_event_loop()
    crawler = Crawler(mongo_uri, mongo_db, neo_uri, loop, tasks)
    try:
        loop.run_until_complete(crawler.crawl())
    except Exception as e:
        logger.error('LOOP EXCEPTION: %s' % e)
    finally:
        loop.close()
示例#8
0
    async def crawl(self):
        self.start = await self.get_history_state()
        self.start += 1

        while True:
            current_height = await self.get_block_count()
            time_a = CT.now()
            if self.start < current_height:
                stop = self.start + self.max_tasks
                if stop >= current_height:
                    stop = current_height
                self.processing.extend([i for i in range(self.start, stop)])
                max_height = max(self.processing)
                min_height = self.processing[0]
                await asyncio.wait(
                    [self.cache_block(h) for h in self.processing])
                if self.processing != sorted(self.cache.keys()):
                    msg = 'cache != processing'
                    logger.error(msg)
                    sys.exit(1)
                txids = []
                for block in self.cache.values():
                    for tx in block['tx']:
                        for vin in tx['vin']:
                            txids.append(vin['txid'])
                txids = list(set(txids))
                if txids:
                    await asyncio.wait(
                        [self.cache_utxo_vouts(txid) for txid in txids])
                if sorted(txids) != sorted(self.cache_utxo.keys()):
                    msg = 'cache utxo error'
                    logger.error(msg)
                    sys.exit(1)
                vins = []
                vouts = []
                for block in self.cache.values():
                    block_time = block['time']
                    for tx in block['tx']:

                        utxo_dict = {}
                        for vin in tx['vin']:
                            utxo = self.cache_utxo[vin['txid']][vin['vout']]
                            key = utxo['asset'] + '_' + utxo['address']
                            if key in utxo_dict.keys():
                                utxo_dict[key]['value'] = CT.sci_to_str(
                                    str(
                                        D(utxo_dict[key]['value']) +
                                        D(utxo['value'])))
                            else:
                                utxo_dict[key] = utxo

                        vout_dict = {}
                        for vout in tx['vout']:
                            key = vout['asset'] + '_' + vout['address']
                            if key in vout_dict.keys():
                                vout_dict[key]['value'] = CT.sci_to_str(
                                    str(
                                        D(vout_dict[key]['value']) +
                                        D(vout['value'])))
                            else:
                                vout_dict[key] = vout

                        if 1 == len(utxo_dict) == len(
                                vout_dict) and utxo_dict.keys(
                                ) == vout_dict.keys():
                            key = list(utxo_dict.keys())[0]
                            if utxo_dict[key]['value'] == vout_dict[key][
                                    'value']:
                                continue

                        utxos = list(utxo_dict.values())
                        for i in range(len(utxos)):
                            utxo = utxos[i]
                            key = utxo['asset'] + '_' + utxo['address']
                            if key in vout_dict.keys():
                                if D(utxo['value']) > D(
                                        vout_dict[key]['value']):
                                    utxo['value'] = CT.sci_to_str(
                                        str(
                                            D(utxo['value']) -
                                            D(vout_dict[key]['value'])))
                                    del vout_dict[key]
                            vins.append([utxo, tx['txid'], i, block_time])

                        voutx = list(vout_dict.values())
                        for k in range(len(voutx)):
                            vout = voutx[k]
                            vouts.append([vout, tx['txid'], k, block_time])

                if vins:
                    await asyncio.wait(
                        [self.update_a_vin(*vin) for vin in vins])
                if vouts:
                    await asyncio.wait(
                        [self.update_a_vout(*vout) for vout in vouts])

                time_b = CT.now()
                logger.info(
                    'reached %s ,cost %.6fs to sync %s blocks ,total cost: %.6fs'
                    % (max_height, time_b - time_a, stop - self.start,
                       time_b - START_TIME))
                await self.update_history_state(max_height)
                self.start = max_height + 1
                del self.processing
                del self.cache
                del self.cache_utxo
                self.processing = []
                self.cache = {}
                self.cache_utxo = {}
            else:
                await asyncio.sleep(0.5)
示例#9
0
def solveReviewPage(asin, rank, url, fetchDate, bookPublishDate):
    hlre = re.compile(
        r'^(\d+) of (\d+) people found the following review helpful')
    html = MyHtml.getHtml(url)
    print "solving Review Page"
    countOfReviews=int(html.xpath('.//div[@id="cm_cr-product_info"]/div/div[1]/div[2]/span')[0].text.strip())
    if countOfReviews>0:
        
        divWholeReviewList = html.xpath('.//div[@id="cm_cr-review_list"]')[0]
        divReviewList = divWholeReviewList.xpath('./div[@id]')     
        print divReviewList
        for divReview in divReviewList:
            
            aReview = Review()
            rank += 1
            aReview.helpfulRank = rank
            aReview.asin = asin
            aReview.reviewID = divReview.attrib['id']

            # helpful line
            parentNode=divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]')
            print "helpful Match",parentNode
            helpfulMatch=None
            if parentNode  is not None: 
                match1 = divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]')
                if match1 is not None and len(match1)>0:
                    if match1[0].text is not None:
                        helpfulMatch=hlre.match(match1[0].text.strip())
                else:
                    
                    match2=divReview.xpath('.//span[@class="a-size-base cr-vote"]/span[1]/span[1]/span[1]')
                    if match2 is not None and len(match2)>0:
                        if match2[0].text is not None:
                            helpfulMatch=hlre.match(match2[0].text.strip())
                        
                print helpfulMatch
                if helpfulMatch:
                    aReview.helpful = int(helpfulMatch.group(1))
                    aReview.total = int(helpfulMatch.group(2))
                    aReview.helpfulness = aReview.helpful * \
                                100 / aReview.total / 100.0
                else:
                    aReview.helpful=0
                    aReview.total=0
                    aReview.helpfulness=0
                del helpfulMatch

                print "getting rate data"
                
                rateData = divReview.xpath('.//span[@class="a-icon-alt"]')[0].text.strip()
                aReview.rate=rateData.split(' ')[0].strip()
                aReview.title = divReview.xpath('.//a[@class="a-size-base a-link-normal review-title a-color-base a-text-bold"]')[0].text.strip()
            
                print "getting reviewerId"
               
                reviewer = divReview.xpath('.//span[@class="a-size-base a-color-secondary review-byline"]/a')
                if reviewer is not None and len(reviewer)>0:
                    aReview.reviewerID=reviewer[0].attrib['href'].split('/')[4].split('?')[0]
                print "reviewerId",aReview.reviewerID
                
            aReview.date = CommonTool.strToDate(divReview.xpath('./div[@class="a-row"]/span[4]')[0].text.strip())
            aReview.elapsedDate = (fetchDate - aReview.date).days
            print bookPublishDate
            if bookPublishDate=='N/A':
                aReview.reviewBookDate='N/A'
            else:
                aReview.reviewBookDate = (aReview.date - (bookPublishDate)).days
                
    
            # format line
            try:
                strFormat = divReview[3].xpath('./a[1]')[0].text.strip()
                aReview.fromFormat = strFormat.split(' ')[1]
            except IndexError:
                aReview.fromFormat = ''
            spanVerifiedPurchase = divReview.xpath('.//span[@class="a-size-mini a-color-state a-text-bold"]')
            if spanVerifiedPurchase:
                spanVerifiedPurchase=spanVerifiedPurchase[0].text.strip()
                if spanVerifiedPurchase=="Verified Purchase":
                    aReview.verified = 1
                else:
                    aReview.verified = 0
    
                # review text line
            divReviewText = divReview.xpath('.//div[@class="a-row review-data"]/span')[0]
            aReview.description = filtTag.filter_tags(
                etree.tostring(divReviewText).strip()).strip()
            aReview.description = aReview.description.replace('\n', '<br />')
            del divReviewText
   
   
                # review comments line
            aReview.numOfComments = CommonTool.strToInt(
                divReview.xpath('.//div[@class="a-row a-spacing-top-small review-comments"]/div/a/span/span[1]')[0].text.strip())
            aReview.getComments()
            saveReview(review=aReview)
    #             try:
    #                 reviewer = Reviewer.loadReviewer(aReview.reviewerID)
    #                 aReview.lastReviewRank = reviewer.getPreBookReviewRanking(
    #                     aReview.reviewID)
    #             except Exception, e:
    #                 sys.stderr.write(str(e) + '\n')
    #                 sys.stderr.write('lastReviewRank not found! url: {0} id: {1} \
    #                 reviewerID: {2}\n'.format(
    #                     url, aReview.reviewID, aReview.reviewerID))
    #                 import traceback
    #                 traceback.print_exc()
            
        # end of for
    # end of else
    return rank
示例#10
0
    async def crawl(self):
        self.start = await self.get_asset_state()
        self.start += 1

        while True:
            current_height = await self.get_block_count()
            time_a = CT.now()
            if self.start < current_height:
                stop = self.start + self.max_tasks
                if stop >= current_height:
                    stop = current_height
                self.processing.extend([i for i in range(self.start, stop)])
                max_height = max(self.processing)
                min_height = self.processing[0]
                await asyncio.wait(
                    [self.cache_block(h) for h in self.processing])
                if self.processing != sorted(self.cache.keys()):
                    msg = 'cache != processing'
                    logger.error(msg)
                    sys.exit(1)

                global_assets = {}
                nep5_assets = {}
                for block in self.cache.values():
                    block_time = block['time']
                    for tx in block['tx']:
                        if 'RegisterTransaction' == tx['type']:
                            global_assets[tx['txid']] = tx['asset']
                            global_assets[tx['txid']]['time'] = block_time
                        if 'InvocationTransaction' == tx[
                                'type'] and 490 <= int(float(tx['sys_fee'])):
                            if tx['script'].endswith(
                                    '68134e656f2e436f6e74726163742e437265617465'
                            ):
                                try:
                                    asset = self.parse_script(tx['script'])
                                except Exception as e:
                                    print('parse error:', e)
                                    continue
                                asset['time'] = block_time
                                nep5_assets[asset['contract']] = asset
                if global_assets:
                    await asyncio.wait([
                        self.update_a_global_asset(*i)
                        for i in global_assets.items()
                    ])
                if nep5_assets:
                    await asyncio.wait([
                        self.update_a_nep5_asset(*i)
                        for i in nep5_assets.items()
                    ])

                time_b = CT.now()
                logger.info(
                    'reached %s ,cost %.6fs to sync %s blocks ,total cost: %.6fs'
                    % (max_height, time_b - time_a, stop - self.start,
                       time_b - START_TIME))
                await self.update_asset_state(max_height)
                self.start = max_height + 1
                del self.processing
                del self.cache
                self.processing = []
                self.cache = {}
            else:
                await asyncio.sleep(0.5)
示例#11
0
 self.paperbackPrice = 0
 priceList = divCenterCol.find(".//div[@id='tmmSwatches']/ul")
 if priceList is None:
     priceList = divCenterCol.xpath(
         ".//div[@id='twister']/div/span[@class='a-declarative']/table/tr\
         ")
     for price in priceList:
         try:
             priceType = price.xpath(
                 './td[@class="dp-title-col"]/*[@class="title-text"]/span\
                 ')[0].text.strip()
             priceValue = price.xpath(
                 "./td[@class='a-text-right dp-price-col']//span")[0]\
                 .text.strip()
             if cmp(priceType, 'Kindle') == 0:
                 self.kindlePrice = CommonTool.strToFloat(priceValue)
             elif cmp(priceType, 'Hardcover') == 0:
                 self.hardcoverPrice = CommonTool.strToFloat(priceValue)
             elif cmp(priceType, 'Paperback') == 0:
                 self.paperbackPrice = CommonTool.strToFloat(priceValue)
         except Exception, e:
             pass
 else:
     priceList = priceList.xpath(".//li")
     for priceLi in priceList:
         spans = priceLi.xpath("./span/span/span/a/span")
         priceType = spans[0].text.strip()
         priceValue = spans[1]
         if priceValue.find("./span") is not None:
             priceValue = priceValue.find("./span").text.strip()
         else:
示例#12
0
 def printData(self):
     with open("../data/" + self.fetchDate.isoformat() + "/dataAll.txt",
               "w") as fout:
         ct = CommonTool(fout)
         fout.write("\t".join(self.tableHeadList))
         fout.write('\n')
         fout.flush()
         for i, reviewID in enumerate(self.reviewList):
             if reviewID == '':
                 continue
             print 'solve review {0} of {1}: {2}'.format(
                 i, len(self.reviewList), reviewID)
             review = Review.loadReview(reviewID)
             book = Book.loadBookByAsin(review.asin)
             reviewer = Reviewer.loadReviewer(review.reviewerID)
             ct.write(reviewID)
             ct.write(review.asin)
             ct.write(review.reviewerID)
             ct.write(reviewer.rName)
             ct.write(reviewer.tRev1)
             ct.write(reviewer.tRev10)
             ct.write(reviewer.tRev50)
             ct.write(reviewer.tRev100)
             ct.write(reviewer.tRev500)
             ct.write(reviewer.tRev1000)
             ct.write(reviewer.tRevHall)
             ct.write(reviewer.vVoice)
             ct.write(review.verified)
             ct.write(review.rate)
             ct.write(review.title)
             ct.write(review.date)
             ct.write(review.fetchDate)
             ct.write(review.reviewBookDate)
             ct.write(review.elapsedDate)
             ct.write(review.helpful)
             ct.write(review.total)
             ct.write(review.helpfulness)
             ct.write(review.helpfulRank)
             ct.write(review.timeRank)
             #ct.write(review.top1Percent)
             #ct.write(review.top5Percent)
             #ct.write(review.top10Percent)
             ct.write(review.description)
             ct.write(review.numOfComments)
             ct.write(review.comment)
             #ct.write(review.isQuoteTable)
             ct.write(review.lastReviewRank)
             ct.write(book.url)
             ct.write(book.tag)
             ct.write(book.allowPreview)
             ct.write(book.binding)
             ct.write(book.publishDate)
             ct.write(book.author)
             ct.write(book.authorInfo)
             ct.write(book.rate)
             ct.write(book.numOfReviews)
             ct.write(book.kindlePrice)
             ct.write(book.hardcoverPrice)
             ct.write(book.paperbackPrice)
             ct.write(book.bookDsc)
             ct.write(book.listPrice)
             ct.write(book.pages)
             ct.write(book.isbn10)
             ct.write(book.isbn13)
             ct.write(book.subrank)
             ct.write(book.hasEditorialReview)
             ct.write(book.editorialReview)
             #ct.write(book.hasQuoteTable)
             ct.write(reviewer.email)
             ct.write(reviewer.webPage)
             ct.write(reviewer.hasPhoto)
             ct.write(reviewer.rNum)
             ct.write(reviewer.helpRate)
             ct.write(reviewer.hVote)
             ct.write(reviewer.tVote)
             ct.write(reviewer.avgRate)
             ct.write(reviewer.fRevTime)
             ct.write(reviewer.lRevTime)
             ct.write(reviewer.duration)
             if reviewer.rReal == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.location == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.aboutMe == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             if reviewer.interest == "N/A":
                 ct.write(0)
             else:
                 ct.write(1)
             ct.write(review.fromFormat)
             # if review.fromFormat == "Hardcover":
             #     ct.write(0)
             # elif review.fromFormat == "Paperback":
             #     ct.write(1)
             # else:
             #     ct.write(2)
             if reviewer.rRank == "N/A":
                 ct.write(0)
             else:
                 ct.write(reviewer.rRank)
             ct.writeln(book.rank)
示例#13
0
 def script_to_hash(unhex):
     intermed = hashlib.sha256(unhex).digest()
     return CT.big_or_little(
         hexlify(hashlib.new('ripemd160',
                             intermed).digest()).decode('ascii'))
示例#14
0
 def hex_to_num_str(cls, hs):
     bs = unhexlify(hs)
     return CT.sci_to_str(str(D(cls.bytes_to_num(bs)) / 100000000))
示例#15
0
 def getPreviousReviewedBook(self,reviewerId):
     initUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
     self.allRevLink=initUrl
     html=MyHtml.getHtml(initUrl)
     ftable=html.xpath('.//body/table[2]')[0]
     pages=ftable.xpath('./tr/td[2]/table[1]/tr[1]/td[2]/b/a[last()]')
     if pages is not None and len(pages)>0:
         totalPages=pages[0].text.strip()
     else:
         totalPages=1
     strPages=str(totalPages)
     print strPages
     if "-" in strPages:
         totalPages=totalPages.split('-')
         print "totalPages",totalPages
         totalPages=totalPages[1]
     else:
         totalPages=totalPages
     print totalPages,"totalPages"
     sortBy='MostRecentReview'
     j=1
     self.counter=0
     flag=0
     for j in range(1,(int(totalPages)+1)):
         baseUrl="http://www.amazon.com/gp/cdp/member-reviews/"+reviewerId
         baseUrl=baseUrl+ \
         '?pageNumber={}&sortBy={}'.format(str(j), sortBy)
         print baseUrl,j,totalPages
         html=MyHtml.getHtml(baseUrl)
         ftable=html.xpath('.//body/table[2]')[0]
         mainTable=ftable.xpath('./tr/td[2]/table[2]/tr[@valign="top"]')
         for row in mainTable:
             if row is not None:
                 isBook=row.xpath('./td[5]/table/tr[2]/td/b')
                 if isBook is not None and len(isBook)>0:
                     if isBook[0].text is not None and flag==0:
                         edition=isBook[0].text.strip()
                         if "Edition" in edition:
                             print "got the previous book"
                             flag=1
                             reviewdate=row.xpath('./following-sibling::*')
                             reviewdate=reviewdate[0].xpath('.//nobr')
                             if reviewdate:
                                 reviewdate=reviewdate[0].text.strip()
                                 print "got the reviewDate",reviewdate
                                 self.previousBookReviewDate=CommonTool.strToDate(reviewdate)
                                 
                             #to get link of the previous reviewedbook 
                             url=row.xpath('./td[5]/table/tr[1]/td/b/a')[0].attrib['href']
                             asin=Book.getAsinFromUrl(url)
                             previousBook=Book.loadBookByAsin(asin)
                             print "asinofPrevious",asin
                             print "previousBook",previousBook
                             self.previousBookPublishDate=previousBook.publishDate
                 
                             
                 if j==1 and self.lRevTime=='':
                     reviewdate=mainTable[0].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"lRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.lRevTime=CommonTool.strToDate(reviewdate)
                 print "value of j",j
                 if j==int(totalPages) and self.fRevTime=='':
                     print "inside frevtime loop"
                     reviewdate=mainTable[-1].xpath('./following-sibling::*')
                     reviewdate=reviewdate[0].xpath('.//nobr')
                     print reviewdate,"fRevtime"
                     if reviewdate:
                         reviewdate=reviewdate[0].text.strip()
                         self.fRevTime=CommonTool.strToDate(reviewdate)
                     
                 # rate
                 rateObj = row.xpath("./following-sibling::*")
                 rate = 'N/A'
                 if len(rateObj) != 0:
                     rateObj1 = rateObj[0].xpath(".//img")
                     title = rateObj1[0].attrib['title']
                     rate = title.split("out")[0].strip()
                     self.sum = self.sum + float(rate)        
     j=j+1
         #end of inner for loop
     print "sum",self.sum
     if self.rNum is not 0:
         self.avgRate=self.sum/self.rNum 
         self.avgRate=round(self.avgRate,2)
     if self.lRevTime=='' or self.fRevTime=='':
         duration=0
     else :  
         duration = (self.lRevTime-self.fRevTime).days
     self.duration=int(duration)
示例#16
0
    def solveProfileUp(self):
        profile = self.html.xpath("//div[@class='a-row profile-details']")
        if len(profile) == 0:
            return
        else:
            profile = profile[0]

        # Reviewer Name
        self.rName = self.html.xpath(
            ".//div[@class='a-section']/h1")[0].text.strip()
        if len(self.rName) == 0:
            self.rName = "N/A"
        

        # Reviewer Ranking
        rRank = profile.xpath(".//div[@class='profile-info']")
        for rank in rRank:
            if len(rRank) != 0:
                rRank = rank.xpath(".//div[@class='a-row']/span[@class='a-size-large a-text-bold']")
                if len(rRank) != 0:
                    rRank = rRank[0].text.strip()
                    if len(rRank) != 0:
                        #rRank=rRank.split(':')[1]
                        self.rRank = rRank
                        self.rRank = self.rRank.replace('#', '')
                        self.rRank = CommonTool.strToInt(self.rRank)
                    else:
                        self.rRank = 'N/A'
                else:
                    print "entered else in ranking"
                    rRank=profile.xpath(".//span[@class='a-size-small a-color-secondary']")
                    print rRank
                    for rank in rRank:
                        if "ranking" in rank.text:
                            rRank=rank
                            
                    print "aranking ",rRank
                    if len(rRank)> 0 and rRank[0].text is not None:
                        rRank=rRank[0].text.strip()
                        if "#" in rRank:
                            rRank=rRank.split('#')
                            print "rank",rRank
                            self.rRank = rRank[1]
                            self.rRank = CommonTool.strToInt(self.rRank)
                    else:
                        self.rRank = 'N/A'
            else:
                self.rRank = 'N/A'

        # Top Reviewer && Vine Voice
        self.tRev1 = 0
        self.tRev10 = 0
        self.tRev50 = 0
        self.tRev100 = 0
        self.tRev500 = 0
        self.tRev1000 = 0
        self.tRevHall = 0
        self.rReal=0
        #self.vVoice = '0'

        tRev = profile.xpath(
            ".//span[@class='a-color-link pr-c7y-badge a-text-bold']")
        if len(tRev) != 0:
            
            temp = tRev[0].text.strip()
            if temp.find('#1 REVIEWER') != -1:
                self.tRev1 = 1
            elif temp.find('TOP 10 REVIEWER') != -1:
                self.tRev10 = 1
            elif temp.find('TOP 50 REVIEWER') != -1:
                self.tRev50 = 1
            elif temp.find('TOP 100 REVIEWER') != -1:
                self.tRev100 = 1
            elif temp.find('TOP 500 REVIEWER') != -1:
                self.tRev500 = 1
            elif temp.find('TOP 1000 REVIEWER') != -1:
                self.tRev1000 = 1
            elif temp.find('HALL OF FAME') != -1:
                self.tRevHall = 1
                #elif temp.find('VINE VOICE') != -1:
                    #self.vVoice = 1

        # INTEREST
        self.interest = profile.xpath(
            ".//div[@class='a-row a-spacing-medium profile-interests']")
        if len(self.interest) == 0:
            self.interest = 'N/A'
            # print '(interest) No a-row a-spacing-medium profile-interests is
            # found!'
        else:
            self.interest = self.interest[0].xpath(
                "./div/span[@class='a-size-small']")[0].text.strip()
            self.interest = self.interest.replace('\r', '')
            self.interest = self.interest.replace('\n', '<br>')

        # ABOUT ME
        abtMe = profile.xpath(
            ".//span/text()[normalize-space(.)='About']/parent::*/\
            following-sibling::div/div[1]/span/p")
        print abtMe,"abtme"
        if abtMe is not None and len(abtMe)>0:
            for p in abtMe:
                about =p.text.strip()\
                    .replace('\r', '').replace('\n', '<br />')
                self.aboutMe=''
                self.aboutMe=self.aboutMe+about
                    
        #except LookupError:
        #self.aboutMe = 'N/A'
        
        # Email && webpage
        link1 = profile.xpath(".//div[@class='a-row break-word pr-link']/a")
        if len(link1) == 0:
            # print '(email) No a-size-small found!'
            self.email = 0
        else:
            # self.email = link1[0].text.strip()
            self.email = 1

        link2 = profile.xpath(".//div[@class='a-row customer-website pr-link']/a/span")
        if len(link2) == 0:
            self.webPage = 0
        else:
            self.webPage = 1
        # rNum
        rNum = profile.xpath(".//div[@class='a-column a-span7 pr-link']/a/span")
        if len(rNum) == 0:
            self.rNum = 0
        else:
            rNum = rNum[0].text.strip()
            if "Reviews" in rNum:
                print rNum
                rNum = rNum.split('(')
                rNum=rNum[1]
                rNum=rNum.split(')')
                rNum=rNum[0]
            else:
                rNum='0'
           
            self.rNum = CommonTool.strToInt(rNum)



        # helpRate
        helpful = profile.xpath(".//div[@class='a-row customer-helpfulness']")
        if len(helpful) == 0:
            # print '(helpful) No a-size-large a-text-bold found!'
            self.helpRate = 0.0
        else:
            self.helpRate = helpful[0].xpath(
                ".//span[@class='a-size-large a-text-bold']")
            if len(self.helpRate) == 0:
                # print '(helpRate) No a-size-large a-text-bold found!'
                self.helpRate = 0.0
            else:
                self.helpRate = self.helpRate[0].text.strip()
                self.helpRate = int(self.helpRate[:-1]) / 100.0

        # hVote && tVote
        votes = profile.xpath(
            "./span/div/div/div/span[@class='a-size-small a-color-secondary']")
        if len(votes) == 0:
            # print '(votes) No a-size-small a-color-secondary found!'
            self.hVote = 0
            self.tVote = 0
        else:
            votes = votes[0].text.strip()
            votesList = votes.split(' of ')
            self.hVote = votesList[0][1:]
            self.tVote = votesList[1][:-1]
            self.hVote = self.hVote.strip()
            self.tVote = self.tVote.strip()
            self.hVote = CommonTool.strToInt(self.hVote)
            self.tVote = CommonTool.strToInt(self.tVote)

            del votesList
示例#17
0
 def parse_return_type(cls, mark):
     if isinstance(mark, str): mark = int(CT.big_or_little(mark), 16)
     if isinstance(mark, int): return cls.get_arg_name(mark)
     raise ValueError('wrong type for return {}'.format(mark))