Exemplo n.º 1
0
    def readDetailHost(self, response):
        meta = response.meta
        str = meta["item"]
        item = Host()
        item.parse(str)

        print "===https://vinalo.com/%s-%s" % (item.alias,item.crawler)

        yield self.checkNextPage(meta)

        if item.checkExisted():
            # print "=========== existed host ================"
            # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler)
            return

        yield {'image_urls':[item.image_profile]}
        image_guid = hashlib.sha1(item.image_profile).hexdigest()
        item.image_profile = '%s.jpg' % (image_guid)

        item.parseContent(response)
        id = item.insertDB()
        if id > 0:
            cityName = meta["cityName"]
            self.state[cityName] = self.state.get(cityName, 0) + 1
            # print "=========== new host ================"
            # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler)

            #store tag
            objectTag = ObjectTag()
            for tagId in item.listTagId:
                if tagId > 0:
                    # print id, tagId
                    objectTag.insertNewObjectTag(id, tagId)
Exemplo n.º 2
0
    def readDetailHost(self, response):
        str = response.meta["item"]
        item = Host()
        item.parse(str)

        if item.checkExisted():
            # print "=========== existed host ================"
            return

        item.parseContent(response)
        id = item.insertDB()
        if id > 0:
            cityName = response.meta["cityName"]
            self.state[cityName] = self.state.get(cityName, 0) + 1