def parse(self, response): hxs = HtmlXPathSelector(response) videoNameVec = hxs.select('//div/div/div/div/h1/a/text()').extract() videoViewNumVec = hxs.select('//ul[@class = "info"]/li[3]/text()').extract() videoPubTimeVec = hxs.select('//ul[@class = "info"]/li[2]/text()').extract() videoLengthVec = hxs.select('//ul[@class = "info"]/li[1]/text()').extract() # check for errors numVN = len(videoNameVec) numVNV = len(videoViewNumVec) numPTV = len(videoViewNumVec) numLV = len(videoLengthVec) if (numVN != numVNV) or (numVN != numPTV) or (numVN != numLV): raise NameError( 'fetch failed for tudou' ) # if pass the test, continue to assign to structure itemList = [] for iVideo in range(numVN): item = videoItem() item['name'] = videoNameVec[iVideo] item['viewNum'] = videoViewNumVec[iVideo] item['pubTime'] = videoPubTimeVec[iVideo] item['length'] = videoLengthVec[iVideo] itemList.append(item) return itemList
def parse(self, response): hxs = HtmlXPathSelector(response) videoNameVec = hxs.select('//div/div/div/div/h1/a/text()').extract() videoViewNumVec = hxs.select( '//ul[@class = "info"]/li[3]/text()').extract() videoPubTimeVec = hxs.select( '//ul[@class = "info"]/li[2]/text()').extract() videoLengthVec = hxs.select( '//ul[@class = "info"]/li[1]/text()').extract() # check for errors numVN = len(videoNameVec) numVNV = len(videoViewNumVec) numPTV = len(videoViewNumVec) numLV = len(videoLengthVec) if (numVN != numVNV) or (numVN != numPTV) or (numVN != numLV): raise NameError('fetch failed for tudou') # if pass the test, continue to assign to structure itemList = [] for iVideo in range(numVN): item = videoItem() item['name'] = videoNameVec[iVideo] item['viewNum'] = videoViewNumVec[iVideo] item['pubTime'] = videoPubTimeVec[iVideo] item['length'] = videoLengthVec[iVideo] itemList.append(item) return itemList
def parse(self, response): #inspect_response(response) hxs = HtmlXPathSelector(response) videoNameVec = hxs.select('//div[2]/div/div[2]/div/ul/li/a/text()').extract() videoViewNumVec = hxs.select('//ul/li/span[2]/text()').extract() videoPubTimeVec = hxs.select('//div[2]/div/ul/li[@class = "v_pub"]/span/text()').extract() # The format is 20941 characters from the CJK Unified Ideographs block. # Check if the length is the same numVN = len(videoNameVec) numVNV = len(videoViewNumVec) numPTV = len(videoViewNumVec) if (numVN != numVNV) or (numVN != numPTV): raise NameError( 'fetch failed for youku' ) # if pass the test, continue to assign to structure itemList = [] for iVideo in range(numVN): item = videoItem() item['name'] = videoNameVec[iVideo] item['viewNum'] = videoViewNumVec[iVideo] item['pubTime'] = videoPubTimeVec[iVideo] itemList.append(item) return itemList
def parse(self, response): #inspect_response(response) hxs = HtmlXPathSelector(response) videoNameVec = hxs.select( '//div[2]/div/div[2]/div/ul/li/a/text()').extract() videoViewNumVec = hxs.select('//ul/li/span[2]/text()').extract() videoPubTimeVec = hxs.select( '//div[2]/div/ul/li[@class = "v_pub"]/span/text()').extract() # The format is 20941 characters from the CJK Unified Ideographs block. # Check if the length is the same numVN = len(videoNameVec) numVNV = len(videoViewNumVec) numPTV = len(videoViewNumVec) if (numVN != numVNV) or (numVN != numPTV): raise NameError('fetch failed for youku') # if pass the test, continue to assign to structure itemList = [] for iVideo in range(numVN): item = videoItem() item['name'] = videoNameVec[iVideo] item['viewNum'] = videoViewNumVec[iVideo] item['pubTime'] = videoPubTimeVec[iVideo] itemList.append(item) return itemList