def process_comments(self, dbpost_ids, kind='question'): # coments associated to a post (question or answer) that question if kind == 'question': base_url = self.url + '/2.2/questions/' if kind == 'answer': base_url = self.url + '/2.2/answers/' base_url += str(dbpost_ids) + '/comments?' base_url += 'order=desc&sort=creation&site=stackoverflow&key=' + self.api_key base_url += '&' + 'pagesize=' + str(self.pagesize) logging.debug("Getting comments for " + str(dbpost_ids)) has_more = True page = 1 while has_more: url = base_url + "&page=" + str(page) if not self.debug: data = self._get_api_data(url) else: data = StackSampleData.comments has_more = False parser = JSONParser(unicode(data)) parser.parse() # [u'has_more', u'items', u'quota_max', u'quota_remaining'] if 'has_more' not in parser.data: logging.error("No has_more in JSON response. Exiting.") print parser.data raise has_more = parser.data['has_more'] page += 1 if 'items' in parser.data: data = parser.data['items'] else: logging.error("No items in comments") logging.error(parser.data) return for comment in data: dbcomment = Comments() # question or answer identifier if kind == "question": dbcomment.question_identifier = comment['post_id'] if kind == "answer": dbcomment.answer_identifier = comment['post_id'] if 'body' in comment.keys(): dbcomment.body = comment['body'] if 'user_id' in comment['owner']: dbcomment.user_identifier = comment['owner']['user_id'] if dbcomment.user_identifier not in self.user_ids_comments: self.user_ids_comments.append( dbcomment.user_identifier) cdate = datetime.datetime.fromtimestamp( int(comment['creation_date'])) dbcomment.submitted_on = cdate.strftime('%Y-%m-%d %H:%M:%S') self.session.add(dbcomment) self.total_comments += 1 self.session.commit()