Пример #1
0
def doubanBookXML():
    doubanXML_URL = "http://api.douban.com/book/subject/isbn/"
    xmlFilePath = "/Users/wangjz/Desktop/down/books_xml/"
    usFilePath = "/Users/wangjz/Desktop/down/usFile.txt"
    apikey = "?apikey=0ff0dcf23a3d64d02fd296d6aeb82e70"
    books = Book.objects.all()

    for abook in books:
        if len(abook.ISBN) > 9:
            print("bookISBN: " + abook.ISBN)
            success, rawdata = utility.downloadUrl(
                doubanXML_URL + abook.ISBN + apikey, 'api.douban.com')

            if not success:
                #Append the unsuccess url to a unsuccesfulFile
                unsuccessFile = open(usFilePath, "w+")
                unsuccessFile.write(abook.ISBN)
                unsuccessFile.close()
            else:
                data = utility.zh2unicode(rawdata).encode('utf-8')
                #Use regular expression to find the isbn in the page.
                fileName = xmlFilePath + "BOOKINFO_ISBN_" + abook.ISBN + ".XML"
                bookxml = open(fileName, "w")
                bookxml.write(data)
                bookxml.close()

            time.sleep(2)
Пример #2
0
def doubanBookComments():
    comentsUrlStart = "http://api.douban.com/book/subject/isbn/"
    comentsUrlEnd = "/reviews?start-index=1&max-results=20"
    commentsFilePath = "/Users/wangjz/Desktop/down/comments/"
    usCommentsFile = "/Users/wangjz/Desktop/down/usCommentsFile.txt"
    apikey = "&apikey=0ff0dcf23a3d64d02fd296d6aeb82e70"
    books = Book.objects.all()

    for abook in books:
        if len(abook.ISBN) > 9:
            print("bookISBN: " + abook.ISBN)
            success, rawdata = utility.downloadUrl(
                comentsUrlStart + abook.ISBN + comentsUrlEnd + apikey,
                'api.douban.com')

            if not success:
                #Append the unsuccess url to a unsuccesfulFile
                ucf = open(usCommentsFile, "w+")
                ucf.write(abook.ISBN)
                ucf.close()
            else:
                data = utility.zh2unicode(rawdata).encode('utf-8')
                #Use regular expression to find the isbn in the page.
                fileName = commentsFilePath + "COMMENTS_ISBN_" + abook.ISBN + ".XML"
                commentxml = open(fileName, "w")
                commentxml.write(data)
                commentxml.close()

            time.sleep(2)
Пример #3
0
def doubanBookComments():
    comentsUrlStart = "http://api.douban.com/book/subject/isbn/"
    comentsUrlEnd = "/reviews?start-index=1&max-results=20"
    commentsFilePath = "/Users/wangjz/Desktop/down/comments/"
    usCommentsFile ="/Users/wangjz/Desktop/down/usCommentsFile.txt"
    apikey="&apikey=0ff0dcf23a3d64d02fd296d6aeb82e70"
    books= Book.objects.all()
    
    for abook in books:
	if len(abook.ISBN) > 9:
	    print("bookISBN: "+ abook.ISBN)
	    success,rawdata=utility.downloadUrl(comentsUrlStart+abook.ISBN+comentsUrlEnd+apikey,'api.douban.com')
	    
	    if not success:
		#Append the unsuccess url to a unsuccesfulFile
		ucf = open(usCommentsFile,"w+")
		ucf.write(abook.ISBN)
		ucf.close()  
	    else:
		data=utility.zh2unicode(rawdata).encode('utf-8')
		#Use regular expression to find the isbn in the page.
		fileName = commentsFilePath+"COMMENTS_ISBN_"+abook.ISBN+".XML"
		commentxml = open(fileName,"w")
		commentxml.write(data)
		commentxml.close()
		
	    time.sleep(2)
Пример #4
0
def doubanBookXML():
    doubanXML_URL = "http://api.douban.com/book/subject/isbn/"
    xmlFilePath = "/Users/wangjz/Desktop/down/books_xml/"
    usFilePath ="/Users/wangjz/Desktop/down/usFile.txt"
    apikey="?apikey=0ff0dcf23a3d64d02fd296d6aeb82e70"
    books= Book.objects.all()
    
    for abook in books:
	if len(abook.ISBN) > 9:
	    print("bookISBN: "+ abook.ISBN)
	    success,rawdata=utility.downloadUrl(doubanXML_URL+abook.ISBN+apikey,'api.douban.com')
	    
	    if not success:
		#Append the unsuccess url to a unsuccesfulFile
		unsuccessFile = open(usFilePath,"w+")
		unsuccessFile.write(abook.ISBN)
		unsuccessFile.close()  
	    else:
		data=utility.zh2unicode(rawdata).encode('utf-8')
		#Use regular expression to find the isbn in the page.
		fileName = xmlFilePath+"BOOKINFO_ISBN_"+abook.ISBN+".XML"
		bookxml = open(fileName,"w")
		bookxml.write(data)
		bookxml.close()
		
	    time.sleep(2)
Пример #5
0
def getImageURL():
    patternStarlevel=re.compile(ur"""__bigpic_pub"><img src="(.+?)" alt="" id="largePic".+?I S B N\xef\xbc\x9a(.+?)</s""",re.DOTALL)
    books= Book.objects.all()
    for abook in books:
	success,rawdata=utility.downloadUrl(abook.ddbookurl,'api.douban.com')
	if not success:
	    print('unsuccess with url'+abook.ddbookurl)
	else:
	    data=utility.zh2unicode(rawdata).encode('utf-8')
	    #Use regular expression to find the isbn in the page.
	    matches=patternStarlevel.findall(data)
	    imageUrl = matches[0][0]
	    print(imageUrl)
	    abook.imageurl = imageUrl
	    try:
		    abook.save()
	    except Exception,e:
		    print 'savlindbdis body error---',e
	time.sleep(2)