def _process_item(self, item, spider): cols,vals,key = self.item_key(item, spider) print cols print vals print key mutations = [Mutation(column=col, value=val) for col,val in zip(cols,vals)] self.client.mutateRow(self.tableName,confUtil.getMd5(key),mutations,None) return item
def hbase_tables(self): tables = self.client.getTableNames() print tables cols =['detail:publish_time', 'detail:site_source', 'detail:site_type', 'detail:site_url', 'detail:task_id', 'detail:author', 'detail:catch_date' ] vals = ['2015-03-10 02:39', 'news.sina.com.cn', 'news', 'http://news.sina.com.cn/c/2015-03-10/023931587440.shtml', '-1', u'\u4eac\u534e\u65f6\u62a5'.encode("utf-8"), '2015-03-27' ] key = "http://news.sina.com.cn/c/2015-03-10/023931587440.shtml" print confUtil.getMd5(key) mutations = [Mutation(column=col, value=val) for col,val in zip(cols,vals)] self.client.mutateRow(self.tableName,confUtil.getMd5(key),mutations,None)
def _process_item(self, item, spider): cols, vals, key = self.item_key(item, spider) print cols print vals print key mutations = [ Mutation(column=col, value=val) for col, val in zip(cols, vals) ] self.client.mutateRow(self.tableName, confUtil.getMd5(key), mutations, None) return item
def hbase_tables(self): tables = self.client.getTableNames() print tables cols = [ 'detail:publish_time', 'detail:site_source', 'detail:site_type', 'detail:site_url', 'detail:task_id', 'detail:author', 'detail:catch_date' ] vals = [ '2015-03-10 02:39', 'news.sina.com.cn', 'news', 'http://news.sina.com.cn/c/2015-03-10/023931587440.shtml', '-1', u'\u4eac\u534e\u65f6\u62a5'.encode("utf-8"), '2015-03-27' ] key = "http://news.sina.com.cn/c/2015-03-10/023931587440.shtml" print confUtil.getMd5(key) mutations = [ Mutation(column=col, value=val) for col, val in zip(cols, vals) ] self.client.mutateRow(self.tableName, confUtil.getMd5(key), mutations, None)