def process_users(self, users_ids): if users_ids is None: return for user_id in users_ids: if user_id in self.users_blacklist: continue user = self.session.query(People).filter(People.username == user_id).first() if user is not None: continue url = self.url + "/users/" + user_id + ".json" logging.info("Getting user " + user_id) logging.info(url) stream = requests.get(url, verify=False) try: parser = JSONParser(unicode(stream.text)) parser.parse() except: logging.error("Can't get " + user_id + " data") self.users_blacklist.append(user_id) # print unicode(stream.text) continue user = parser.data['user'] dbuser = People() dbuser.username = user['username'] dbuser.reputation = user['trust_level'] dbuser.avatar = user['uploaded_avatar_id'] dbuser.last_seen_at = user['last_posted_at'] dbuser.joined_at = user['created_at'] dbuser.identifier = user['id'] self.session.add(dbuser) self.total_users += 1 self.session.commit() return
def process_users(self, users_ids): if users_ids is None: return if len(users_ids.split(";")) > self.pagesize: logging.error("Max ids overcome in process_users " + users_ids) raise Exception base_url = self.url + '/2.2/users/' + str(users_ids) + '?' base_url += 'order=desc&sort=reputation&site=stackoverflow&key=' + self.api_key base_url += '&' + 'pagesize=' + str(self.pagesize) has_more = True page = 1 while has_more: url = base_url + "&page=" + str(page) if not self.debug: data = self._get_api_data(url) else: data = StackSampleData.users has_more = False parser = JSONParser(unicode(data)) parser.parse() # [u'has_more', u'items', u'quota_max', u'quota_remaining'] if 'has_more' not in parser.data: logging.error("No has_more in JSON response") print parser.data raise has_more = parser.data['has_more'] data = parser.data['items'] for user in data: dbuser = People() dbuser.username = user['display_name'] dbuser.reputation = user['reputation'] if 'profile_image' in user: dbuser.avatar = user['profile_image'] dbuser.last_seen_at = datetime.datetime.fromtimestamp( int(user['last_access_date'])).strftime( '%Y-%m-%d %H:%M:%S') dbuser.joined_at = datetime.datetime.fromtimestamp( int(user['creation_date'])).strftime('%Y-%m-%d %H:%M:%S') dbuser.identifier = user['user_id'] self.session.add(dbuser) self.session.commit() return
def process_users(self, users_ids): if users_ids is None: return for user_id in users_ids: if user_id in self.users_blacklist: continue user = self.session.query(People).filter( People.username == user_id).first() if user is not None: continue url = self.url + "/users/" + user_id + ".json" logging.info("Getting user " + user_id) logging.info(url) stream = requests.get(url, verify=False) try: parser = JSONParser(unicode(stream.text)) parser.parse() except: logging.error("Can't get " + user_id + " data") self.users_blacklist.append(user_id) # print unicode(stream.text) continue user = parser.data['user'] dbuser = People() dbuser.username = user['username'] dbuser.reputation = user['trust_level'] dbuser.avatar = user['uploaded_avatar_id'] dbuser.last_seen_at = user['last_posted_at'] dbuser.joined_at = user['created_at'] dbuser.identifier = user['id'] self.session.add(dbuser) self.total_users += 1 self.session.commit() return
def process_users(self, users_ids): if users_ids is None: return if len(users_ids.split(";"))>self.pagesize: logging.error("Max ids overcome in process_users " + users_ids) raise Exception base_url = self.url + '/2.2/users/'+str(users_ids)+'?' base_url += 'order=desc&sort=reputation&site=stackoverflow&key='+self.api_key base_url += '&' + 'pagesize='+str(self.pagesize) has_more = True page = 1 while has_more: url = base_url + "&page="+str(page) if not self.debug: data = self._get_api_data(url) else: data = StackSampleData.users has_more = False parser = JSONParser(unicode(data)) parser.parse() # [u'has_more', u'items', u'quota_max', u'quota_remaining'] if 'has_more' not in parser.data: logging.error("No has_more in JSON response") print parser.data raise has_more = parser.data['has_more'] data = parser.data['items'] for user in data: dbuser = People() dbuser.username = user['display_name'] dbuser.reputation = user['reputation'] if 'profile_image' in user: dbuser.avatar = user['profile_image'] dbuser.last_seen_at = datetime.datetime.fromtimestamp(int(user['last_access_date'])).strftime('%Y-%m-%d %H:%M:%S') dbuser.joined_at = datetime.datetime.fromtimestamp(int(user['creation_date'])).strftime('%Y-%m-%d %H:%M:%S') dbuser.identifier = user['user_id'] self.session.add(dbuser) self.session.commit() return