def _process_item(self, item, spider):
     cols,vals,key = self.item_key(item, spider)
     print cols
     print vals
     print key
     mutations = [Mutation(column=col, value=val) for col,val in zip(cols,vals)]
     self.client.mutateRow(self.tableName,confUtil.getMd5(key),mutations,None)
     return item
    def hbase_tables(self):

        tables = self.client.getTableNames()
        print tables

        cols =['detail:publish_time', 'detail:site_source', 'detail:site_type', 'detail:site_url', 'detail:task_id',
               'detail:author',
               'detail:catch_date'
        ]
        vals = ['2015-03-10 02:39', 'news.sina.com.cn', 'news', 'http://news.sina.com.cn/c/2015-03-10/023931587440.shtml', '-1',
                u'\u4eac\u534e\u65f6\u62a5'.encode("utf-8"),
                '2015-03-27'
        ]
        key = "http://news.sina.com.cn/c/2015-03-10/023931587440.shtml"
        print confUtil.getMd5(key)


        mutations = [Mutation(column=col, value=val) for col,val in zip(cols,vals)]
        self.client.mutateRow(self.tableName,confUtil.getMd5(key),mutations,None)
 def _process_item(self, item, spider):
     cols, vals, key = self.item_key(item, spider)
     print cols
     print vals
     print key
     mutations = [
         Mutation(column=col, value=val) for col, val in zip(cols, vals)
     ]
     self.client.mutateRow(self.tableName, confUtil.getMd5(key), mutations,
                           None)
     return item
    def hbase_tables(self):

        tables = self.client.getTableNames()
        print tables

        cols = [
            'detail:publish_time', 'detail:site_source', 'detail:site_type',
            'detail:site_url', 'detail:task_id', 'detail:author',
            'detail:catch_date'
        ]
        vals = [
            '2015-03-10 02:39', 'news.sina.com.cn', 'news',
            'http://news.sina.com.cn/c/2015-03-10/023931587440.shtml', '-1',
            u'\u4eac\u534e\u65f6\u62a5'.encode("utf-8"), '2015-03-27'
        ]
        key = "http://news.sina.com.cn/c/2015-03-10/023931587440.shtml"
        print confUtil.getMd5(key)

        mutations = [
            Mutation(column=col, value=val) for col, val in zip(cols, vals)
        ]
        self.client.mutateRow(self.tableName, confUtil.getMd5(key), mutations,
                              None)