def from_json(cls, js): tj = cls() tj.url = js['url'] tj.type = js['type'] tj.fail_cnt = js['fail_cnt'] tj.last_fail = js['last_fail'] tj.priority = utils.defvalkey(js, 'priority', 0) tj.time_added = utils.defvalkey(js, 'time_added', 0) tj.meta = utils.defvalkey(js, 'meta', None) if 'user_id' in js: user_url = js['user_url'] if 'user_url' in js else None # tj.user = GitHubUser(user_id=js['user_id'], user_name=js['user_name'], user_type=js['user_type'], user_url=user_url) return tj
def process_record(self, idx, record): """ Current record analysis :param idx: :param record: :return: """ record['id'] = self.ctr self.last_record_seen = record raw = utils.defvalkey(record, 'raw') parsed = utils.defvalkey(record, 'parsed') # Process server cert if parsed is None: self.not_parsed += 1 # TODO: parse raw cert if needed return try: ret = collections.OrderedDict() if 'rsa_public_key' not in parsed['subject_key_info']: self.not_rsa += 1 return mod16 = base64.b16encode( base64.b64decode( parsed['subject_key_info']['rsa_public_key']['modulus'])) if not self.fmagic.test16(mod16): return self.num_found += 1 ret['id'] = self.ctr ret['fprint256'] = utils.defvalkey(parsed, 'fingerprint_sha256') self.fill_cn_src(ret, parsed) self.fill_rsa_ne(ret, parsed) self.fill_cert_info(ret, parsed, record) if raw is not None: rawb = base64.b64decode(raw) ret['fprint'] = hashlib.sha1(rawb).hexdigest() ret['raw'] = raw self.last_record_flushed = record if not self.is_dry(): self.file_leafs_fh.write(json.dumps(ret) + '\n') except Exception as e: logger.warning('Certificate processing error %s : %s' % (self.ctr, e)) self.trace_logger.log(e) self.not_cert_ok += 1
def process_user(self, job, js, headers, raw_response): """ Process user detail data :param job: :param js: :param headers: :param raw_response: :return: """ if 'id' not in js: logger.error('Field ID not found in user') return s = self.session() try: user_id = int(js['id']) dbu = s.query(GitHubUserDetails).filter( GitHubUserDetails.id == user_id).one_or_none() is_new = False if dbu is None: is_new = True dbu = GitHubUserDetails() dbu.id = user_id dbu.date_last_check = salch.func.now() dbu.username = js['login'] dbu.name = utils.utf8ize(utils.defvalkey(js, 'name')) dbu.company = utils.utf8ize(utils.defvalkey(js, 'company')) dbu.blog = utils.defvalkey(js, 'blog') dbu.email = utils.defvalkey(js, 'email') dbu.bio = utils.utf8ize(utils.defvalkey(js, 'bio')) dbu.usr_type = utils.defvalkey(js, 'type') dbu.public_repos = js['public_repos'] dbu.public_gists = js['public_gists'] dbu.followers = js['followers'] dbu.following = js['following'] dbu.created_at = utils.dt_norm( utils.try_parse_timestamp(utils.defvalkey(js, 'created_at'))) dbu.updated_at = utils.dt_norm( utils.try_parse_timestamp(utils.defvalkey(js, 'updated_at'))) if is_new: s.add(dbu) else: s.merge(dbu) s.commit() s.flush() s.expunge_all() except Exception as e: logger.error('Exception storing user details: %s: %s' % (js['id'], e)) logger.debug(traceback.format_exc()) finally: utils.silent_close(s)
def process_roots(self, idx, record, server_cert): """ Process root certificates :param idx: :param record: :param server_cert: :return: """ chains_ctr = [] try: if 'chain' not in server_cert: return chains_ctr for cert in server_cert['chain']: self.chain_ctr += 1 if 'parsed' not in cert: continue parsed = cert['parsed'] fprint = parsed['fingerprint_sha256'] if fprint in self.chain_cert_db: chains_ctr.append(self.chain_cert_db[fprint]) continue ret = collections.OrderedDict() is_rsa = parsed['subject_key_info']['key_algorithm'][ 'name'].lower() == 'rsa' if not is_rsa: self.not_rsa += 1 ret['id'] = self.chain_ctr ret['count'] = 1 ret['chain'] = 1 ret['valid'] = utils.defvalkeys(parsed, ['signature', 'valid']) ret['ssign'] = utils.defvalkeys(parsed, ['signature', 'self_signed']) ret['fprint'] = fprint ret['fprint1'] = utils.defvalkey(parsed, 'fingerprint_sha1') self.fill_cn_src(ret, parsed) if is_rsa: self.fill_rsa_ne(ret, parsed) ret['raw'] = cert['raw'] if not self.is_dry(): self.file_roots_fh.write(json.dumps(ret) + '\n') self.chain_cert_db[fprint] = self.chain_ctr chains_ctr.append(self.chain_ctr) except Exception as e: logger.warning('Chain processing error %s : %s' % (self.chain_ctr, e)) logger.debug(traceback.format_exc()) self.not_chain_ok += 1 return chains_ctr
def process_colab(self, job, js, headers, raw_response): """ Process colaborators for org owned repos :param job: :param js: :param headers: :param raw_response: :return: """ for colab in js: if 'id' not in colab: logger.error('Field ID not found in colab') continue s = self.session() try: # delete first - avoid excs s.query(GitHubRepoColab)\ .filter(GitHubRepoColab.user_name == colab['login'])\ .filter(GitHubRepoColab.repo_name == job.meta['repo'])\ .delete() dbu = GitHubRepoColab() dbu.repo_name = job.meta['repo'] dbu.user_name = colab['login'] dbu.can_pull = colab['permissions']['pull'] dbu.can_push = colab['permissions']['push'] dbu.can_admin = colab['permissions']['admin'] s.add(dbu) s.commit() s.flush() s.expunge_all() except Exception as e: logger.error('Exception storing colab details: %s:%s: %s' % (colab['login'], job.meta['repo'], e)) logger.debug(traceback.format_exc()) finally: utils.silent_close(s) if len(js) == 0: return # Load next page cur_page = utils.defvalkey(job.meta, 'page', 1) new_url = (self.ORG_REPO_COLAB_URL % (job.meta['repo'])) + ('?page=%s' % (cur_page + 1)) new_meta = dict(job.meta) new_meta['page'] = cur_page + 1 job = DownloadJob(url=new_url, jtype=DownloadJob.TYPE_REPO_COLAB, meta=new_meta) self.link_queue.put(job)
def store_users_list(self, users): """ Stores all user in the list :param users :return: """ # Handling gaps in the user space ID. With user-only optimization it causes # overlaps. reduced_by = 0 with self.processed_user_set_lock: ids = [user.user_id for user in users] ids_ok = [] for id in ids: if id in self.processed_user_set: reduced_by += 1 continue self.processed_user_set.add(id) ids_ok.append(id) users = [user for user in users if user.user_id in ids_ok] # Bulk user load s = self.session() id_list = sorted([user.user_id for user in users]) db_users = s.query(GitHubUserDb).filter( GitHubUserDb.id.in_(id_list)).all() db_user_map = {user.id: user for user in db_users} for user in users: self.new_users_events.insert() # Store user to the DB try: db_user = utils.defvalkey(db_user_map, key=user.user_id) self.store_user(user, s, db_user=db_user, db_user_loaded=True) except Exception as e: logger.warning('[%02d] Exception in storing user %s' % (self.local_data.idx, e)) self.trace_logger.log(e) logger.info('[%02d] idlist: %s' % (self.local_data.idx, id_list)) self.trigger_quit() break try: s.commit() # logger.info('[%02d] Commited, reduced by: %s' % (self.local_data.idx, reduced_by)) except Exception as e: logger.warning('[%02d] Exception in storing bulk users' % self.local_data.idx) logger.warning(traceback.format_exc()) logger.info('[%02d] idlist: %s' % (self.local_data.idx, id_list)) self.trigger_quit() finally: utils.silent_close(s)
def process_assignee(self, job, js, headers, raw_response): """ Process assignees for org owned repos :param job: :param js: :param headers: :param raw_response: :return: """ for assignee in js: if 'id' not in assignee: logger.error('Field ID not found in assignees') continue s = self.session() try: # delete first - avoid excs s.query(GitHubRepoAssignee)\ .filter(GitHubRepoAssignee.user_name == assignee['login'])\ .filter(GitHubRepoAssignee.repo_name == job.meta['repo'])\ .delete() dbu = GitHubRepoAssignee() dbu.repo_name = job.meta['repo'] dbu.user_name = assignee['login'] s.add(dbu) s.commit() s.flush() s.expunge_all() except Exception as e: logger.error('Exception storing cassignee details: %s:%s: %s' % (assignee['login'], job.meta['repo'], e)) logger.debug(traceback.format_exc()) finally: utils.silent_close(s) if len(js) == 0: return # Load next page cur_page = utils.defvalkey(job.meta, 'page', 1) new_url = (self.ORG_REPO_ASSIGNEES_URL % (job.meta['repo'])) + ('?page=%s' % (cur_page + 1)) new_meta = dict(job.meta) new_meta['page'] = cur_page + 1 job = DownloadJob(url=new_url, jtype=DownloadJob.TYPE_REPO_ASSIGNEE, meta=new_meta) self.link_queue.put(job)
def fill_cert_info(self, ret, parsed, rec): """ isCA and other. :param ret: :param parsed: :param rec: :return: """ ret['ca'] = utils.defvalkeys( parsed, ['extensions', 'basic_constraints', 'is_ca']) issuer = utils.defvalkey(parsed, 'issuer') subject = utils.defvalkey(parsed, 'subject') ret['ss'] = issuer == subject ret['subject_dn'] = utils.defvalkey(parsed, 'subject_dn') ret['issuer_dn'] = utils.defvalkey(parsed, 'issuer_dn') ret['parents'] = utils.defvalkey(rec, 'parents') ret['crt_src'] = utils.defvalkey(rec, 'source') ret['seen_in_scan'] = utils.defvalkey(rec, 'seen_in_scan') ret['valid_nss'] = utils.defvalkey(rec, 'valid_nss') ret['was_valid_nss'] = utils.defvalkey(rec, 'was_valid_nss') ret['current_valid_nss'] = utils.defvalkey(rec, 'current_valid_nss')
def process_record(self, idx, line): """ Processes one record from PGP dump :param idx: :param line: :return: """ rec = json.loads(line) master_key_id = int(utils.defvalkey(rec, 'key_id', '0'), 16) master_fingerprint = utils.defvalkey(rec, 'fingerprint') flat_keys = [rec] user_names = [] # Phase 1 - info extraction if 'packets' in rec: for packet in rec['packets']: if packet['tag_name'] == 'User ID': utils.append_not_none(user_names, utils.defvalkey(packet, 'user_id')) elif packet['tag_name'] == 'Public-Subkey': flat_keys.append(packet) # Test all keys self.test_flat_keys(flat_keys, user_names, master_key_id, master_fingerprint, rec) if time.time() - self.last_report > self.report_time: per_second = (idx - self.last_report_idx) / float(self.report_time) logger.debug( ' .. report idx: %s, per second: %2.2f, found: %s, ' 'num_master: %s, num_sub: %s, ratio: %s, cur key: %016X ' % (idx, per_second, self.found, self.num_master_keys, self.num_sub_keys, float(self.num_sub_keys) / self.num_master_keys, master_key_id)) self.last_report = time.time() self.last_report_idx = idx
def key_exp(self, rec=None): """ Returns exponent from the record :param rec: :return: """ if rec is None: return False n = utils.defvalkey(rec, 'e') if n is None: return False n = n.strip() n = utils.strip_hex_prefix(n) return int(n, 16)
def test_key(self, rec=None): """ Fingerprint test :param rec: :return: """ if rec is None: return False n = utils.defvalkey(rec, 'n') if n is None: return False n = n.strip() n = utils.strip_hex_prefix(n) x = self.fmagic.magic16([n]) if len(x) > 0: return True return False
def test_flat_keys(self, flat_keys, user_names, master_key_id, master_fingerprint, rec): """ Tests all keys in the array :param flat_keys: :return: """ if flat_keys is None or len(flat_keys) == 0: return self.num_master_keys += 1 self.num_sub_keys += len(flat_keys) - 1 rsa_keys = ['n' in x and len(x['n']) > 0 for x in flat_keys] self.num_master_keys_rsa += rsa_keys[0] self.num_sub_keys_rsa += sum(rsa_keys[1:]) self.key_counts[len(flat_keys)] += 1 key_sizes = [self.key_size(x) for x in flat_keys] for x in key_sizes: self.key_sizes[x] += 1 # benchmarking if self.args.bench: for rec in flat_keys: n = self.key_mod(rec) if n is None or n == 0: continue self.bench_mods.append('%x' % n) # 1.11.2015 a 19.4.2017 bnd_a = datetime.datetime(year=2015, month=11, day=1) bnd_b = datetime.datetime(year=2017, month=4, day=19, hour=23, minute=59, second=59) in_time = [ 'creation_time' in rec and utils.time_between( datetime.datetime.utcfromtimestamp(rec['creation_time']), bnd_a, bnd_b) for rec in flat_keys ] rsa_in_time = [ 'n' in rec and len(rec['n']) > 0 and in_time[idx] for idx, rec in enumerate(flat_keys) ] self.num_total_keys_date += sum(in_time) self.num_total_master_keys_date += in_time[0] self.num_rsa_keys_date += sum(rsa_in_time) self.num_rsa_master_keys_date += rsa_in_time[0] # key testing tested = [self.test_key(x) for x in flat_keys] # classification if self.classif_file is not None: for idx, rec in enumerate(flat_keys): if 'n' not in rec: continue js = OrderedDict() ctime = datetime.datetime.utcfromtimestamp(rec['creation_time']).strftime('%Y-%m-%d') \ if 'creation_time' in rec else '' cname = user_names[0].encode('utf8').replace( ';', '_') if len(user_names) > 0 else '' js['source'] = [cname, ctime] js['size'] = self.key_size(rec) js['msb'] = '0x%x' % self.key_msb(rec) js['sub'] = int(idx != 0) js['master_id'] = utils.format_pgp_key(master_key_id) js['sec'] = int(tested[idx]) js['tot'] = len(flat_keys) js['e'] = '0x%x' % self.key_exp(rec) js['n'] = '0x%x' % self.key_mod(rec) self.classif_file.write('%s\n' % json.dumps(js)) # Key detection and store if any(tested): flat_key_ids = [ int(utils.defvalkey(x, 'key_id', '0'), 16) for x in flat_keys ] keys_hex = [utils.format_pgp_key(x) for x in flat_key_ids] det_key_ids = [ x for _idx, x in enumerate(flat_key_ids) if tested[_idx] ] logger.info('------- interesting map: %s for key ids %s' % (tested, keys_hex)) js = OrderedDict() js['detection'] = tested js['key_ids'] = keys_hex js['names'] = user_names js['master_key_id'] = utils.format_pgp_key(master_key_id) js['master_key_fprint'] = master_fingerprint # js['pgp'] = rec self.dump_file.write(json.dumps(js) + '\n') self.dump_file.flush() self.found_no_master_key += not tested[0] self.found_master_key += tested[0] self.found_sub_key += sum(tested[1:]) self.found += sum(tested) self.found_entities += 1 self.found_entities_keynum += len(tested) self.found_master_not_rsa += not rsa_keys[0] self.found_key_counts[len(flat_keys)] += 1 for x in det_key_ids: self.flat_key_ids.add(x) for idx, x in enumerate(key_sizes): if tested[idx]: self.found_key_sizes[x] += 1 for idx, x in enumerate(tested): if not tested[idx]: continue # 2012-04-30; rsa_bit_length; subkey_yes_no; email; MSB(modulus); modulus; rec = flat_keys[idx] res = [] res.append( datetime.datetime.utcfromtimestamp(rec['creation_time']). strftime('%Y-%m-%d') if 'creation_time' in rec else '') res.append(self.key_size(rec)) res.append(int(idx == 0)) res.append(user_names[0].encode('utf8'). replace(';', '_') if len(user_names) > 0 else '') res.append('%x' % self.key_msb(rec)) res.append('%x' % self.key_mod(rec)) self.found_info.append(res)
def continue_leafs(self, name): """ Continues processing of the leafs. Finds the last record - returns this also. Truncates the rest of the file. :param name: :return: last record loaded """ fsize = os.path.getsize(name) pos = 0 # If file is too big try to skip 10 MB before end if fsize > 1024 * 1024 * 1024 * 2: pos = fsize - 1024 * 1024 * 1024 * 1.5 logger.info('Leafs file too big: %s, skipping to %s' % (fsize, pos)) self.file_leafs_fh.seek(pos) x = self.file_leafs_fh.next() # skip unfinished record pos += len(x) record_from_state_found = False terminate_with_record = False last_record = None last_id_seen = None for line in self.file_leafs_fh: ln = len(line) try: last_record = json.loads(line) last_id_seen = last_record['id'] self.state_loaded_ips.add(last_record['ip']) self.ctr = max(self.ctr, last_record['id']) pos += ln if self.last_record_flushed is not None and self.last_record_flushed[ 'ip'] == last_record['ip']: logger.info( 'Found last record flushed in data file, ip: %s' % last_record['ip']) record_from_state_found = True break except Exception as e: terminate_with_record = True break logger.info('Operation resumed at leaf ctr: %s, last ip: %s' % (self.ctr, utils.defvalkey(last_record, 'ip'))) if self.last_record_flushed is not None and not record_from_state_found: logger.warning( 'Could not find the record from the state in the data file. Some data may be missing.' ) logger.info( 'Last record from state id: %s, last record data file id: %s' % (self.last_record_resumed['id'], last_id_seen)) raise ValueError('Incomplete data file') if terminate_with_record: logger.info('Leaf: Invalid record detected, position: %s' % pos) if not self.is_dry(): self.file_leafs_fh.seek(pos) self.file_leafs_fh.truncate() self.file_leafs_fh.flush() return last_record
def process_record(self, idx, record): """ Current record {"e":"0x10001","count":1,"source":["COMMON_NAME","NOT_BEFORE_2010-11-19"], "id":32000000,"cn":"COMMON_NAME","n":"0x...","timestamp":1475342704760} :param idx: :param record: :return: """ record['id'] = self.ctr ip = utils.defvalkey(record, 'ip') domain = utils.defvalkey(record, 'domain') timestamp_fmt = utils.defvalkey(record, 'timestamp') self.last_record_seen = record if not self.is_record_tls(record): self.not_tls += 1 return server_cert = record['data']['tls']['server_certificates'] if 'validation' not in server_cert or 'certificate' not in server_cert: self.not_cert_ok += 1 return # Process chains anyway as we may be interested in them even though the server is not RSA chains_roots = self.process_roots(idx, record, server_cert) # Process server cert trusted = utils.defvalkey(server_cert['validation'], 'browser_trusted') matches = utils.defvalkey(server_cert['validation'], 'matches_domain') cert_obj = server_cert['certificate'] if 'parsed' not in cert_obj: self.not_parsed += 1 return parsed = cert_obj['parsed'] try: ret = collections.OrderedDict() if parsed['subject_key_info']['key_algorithm']['name'].lower( ) != 'rsa': self.not_rsa += 1 return ret['id'] = self.ctr ret['ip'] = ip ret['count'] = 1 ret['fprint'] = utils.defvalkey(parsed, 'fingerprint_sha256') ret['fprint1'] = utils.defvalkey(parsed, 'fingerprint_sha1') utils.set_nonempty(ret, 'dom', domain) tstamp = utils.try_parse_timestamp(timestamp_fmt) ret['timestamp'] = utils.unix_time(tstamp) utils.set_nonempty(ret, 'trust', trusted) utils.set_nonempty(ret, 'match', matches) utils.set_nonempty( ret, 'valid', utils.defvalkeys(parsed, ['signature', 'valid'])) utils.set_nonempty( ret, 'ssign', utils.defvalkeys(parsed, ['signature', 'self_signed'])) self.fill_cn_src(ret, parsed) self.fill_rsa_ne(ret, parsed) ret['chains'] = chains_roots self.last_record_flushed = record if not self.is_dry(): self.file_leafs_fh.write(json.dumps(ret) + '\n') except Exception as e: logger.warning('Certificate processing error %s : %s' % (self.ctr, e)) logger.debug(traceback.format_exc()) self.not_cert_ok += 1
def process_repo(self, job, js, headers, raw_response, from_user): """ Process repo list page :param job: :param js: :param headers: :param raw_response: :param from_user: :return: """ for repo in js: if 'id' not in repo: logger.error('Field ID not found in repos') continue s = self.session() try: repo_id = int(repo['id']) dbe = s.query(GitHubRepo).filter( GitHubRepo.id == repo_id).one_or_none() dbu = GitHubRepo() dbu.id = repo_id dbu.user_repo = from_user if from_user: dbu.username = job.meta['user'] else: dbu.org_name = job.meta['org'] if 'owner' in repo: dbu.owner_id = repo['owner']['id'] dbu.owner_login = repo['owner']['login'] dbu.repo_name = repo['full_name'] dbu.repo_stars = repo['stargazers_count'] dbu.repo_forks = repo['forks'] dbu.repo_watchers = repo['watchers'] dbu.repo_is_fork = repo['fork'] dbu.repo_size = repo['size'] dbu.repo_homepage = utils.defvalkey(repo, 'homepage') dbu.repo_language = utils.defvalkey(repo, 'language') dbu.created_at = utils.dt_norm( utils.try_parse_timestamp( utils.defvalkey(repo, 'created_at'))) dbu.updated_at = utils.dt_norm( utils.try_parse_timestamp( utils.defvalkey(repo, 'updated_at'))) dbu.pushed_at = utils.dt_norm( utils.try_parse_timestamp( utils.defvalkey(repo, 'pushed_at'))) dbu.repo_description = utils.utf8ize(repo['description']) dbu.repo_stargazers_url = repo['stargazers_url'] dbu.repo_forks_url = repo['forks_url'] if not from_user and repo['stargazers_count'] > 100: new_meta = dict(job.meta) new_meta['page'] = 1 new_meta['repo'] = repo['full_name'] new_meta['owner'] = repo['owner']['login'] # Colab fetch - skip, no auth job = DownloadJob(url=self.ORG_REPO_COLAB_URL % (repo['full_name']), jtype=DownloadJob.TYPE_REPO_COLAB, meta=new_meta) # Asignee fetch job = DownloadJob(url=self.ORG_REPO_ASSIGNEES_URL % (repo['full_name']), jtype=DownloadJob.TYPE_REPO_ASSIGNEE, meta=dict(new_meta)) self.link_queue.put(job) # DB save if dbe is None: s.add(dbu) else: if dbe.username != dbu.username: logger.warning('Username does not match for %s %s %s' % (repo_id, dbe.username, dbu.username)) if dbe.org_name != dbu.org_name: logger.warning('org_name does not match for %s %s %s' % (repo_id, dbe.org_name, dbu.org_name)) if dbe.owner_login != dbu.owner_login: logger.warning( 'owner_login does not match for %s %s %s' % (repo_id, dbe.owner_login, dbu.owner_login)) s.commit() s.flush() s.expunge_all() except Exception as e: logger.error( 'Exception storing repo details: %s:%s meta: %s, url: %s, exc: %s' % (repo['id'], repo['full_name'], json.dumps( job.meta), job.url, e)) logger.debug(traceback.format_exc()) finally: utils.silent_close(s) if len(js) == 0: return # Load next page cur_page = utils.defvalkey(job.meta, 'page', 1) new_meta = dict(job.meta) new_meta['page'] = cur_page + 1 if from_user: new_url = (self.USER_REPOS_URL % job.meta['user']) + ('?page=%s' % (cur_page + 1)) job = DownloadJob(url=new_url, jtype=DownloadJob.TYPE_REPOS_USER, meta=new_meta) else: new_url = (self.ORG_REPOS_URL % job.meta['org']) + ('?page=%s' % (cur_page + 1)) job = DownloadJob(url=new_url, jtype=DownloadJob.TYPE_REPOS_ORG, meta=new_meta) self.link_queue.put(job)
def process_org(self, job, js, headers, raw_response): """ Process user -> orgs data :param job: :param js: :param headers: :param raw_response: :return: """ new_orgs = [] for org in js: if 'id' not in org: logger.error('Field ID not found in orgs') continue s = self.session() try: org_id = int(org['id']) # delete first - avoid excs s.query(GitHubUserOrgs)\ .filter(GitHubUserOrgs.org_id == org_id)\ .filter(GitHubUserOrgs.username == job.meta['user'])\ .delete() dbu = GitHubUserOrgs() dbu.username = job.meta['user'] dbu.org_id = org['id'] dbu.org_name = org['login'] dbu.org_desc = utils.utf8ize(org['description']) new_orgs.append(org['login']) s.add(dbu) s.commit() s.flush() s.expunge_all() except Exception as e: logger.error('Exception storing user->org details: %s: %s' % (org['id'], e)) logger.debug(traceback.format_exc()) finally: utils.silent_close(s) if len(js) == 0: return # Load next page cur_page = utils.defvalkey(job.meta, 'page', 1) new_url = (self.USER_ORGS_URL % job.meta['user']) + ('?page=%s' % (cur_page + 1)) new_meta = dict(job.meta) new_meta['page'] = cur_page + 1 job = DownloadJob(url=new_url, jtype=DownloadJob.TYPE_ORG, meta=new_meta) self.link_queue.put(job) # Load repositories for new organisations not_loaded_orgs = None with self.orgs_loaded_lock: new_orgs_set = set(new_orgs) not_loaded_orgs = new_orgs_set - self.orgs_loaded_set for x in new_orgs: self.orgs_loaded_set.add(x) for x in not_loaded_orgs: new_meta = dict(job.meta) new_meta['page'] = 1 new_meta['org'] = x job = DownloadJob(url=self.ORG_REPOS_URL % x, jtype=DownloadJob.TYPE_REPOS_ORG, meta=new_meta) self.link_queue.put(job)