def filter_queue(qname, fn): bakname = qname + '.bak' redis_shell.rename(qname, bakname) p = redis_shell.pipeline() for entry in redis_shell.lrange(bakname, 0, -1): p.lpush(qname, fn(entry)) p.execute()
def retire_lcs(name, ip, # It's safe to cache this because a proxy will take at least 24h # since the time it's recycled (and thus new server IDs can be # entered for it) and the time it's destroyed. To be more # precise, 24h must elapse since the time it's been _split_. For # this to work, it's crucial to remove the # /home/lantern/server_split flag file whenever we recycle # proxies. byip=util.Cache(timeout=60*60, update_fn=srv_cfg_by_ip)): cm = cm_by_name(name) region = region_by_name(name) srvs = byip.get().get(ip, (None, []))[1] txn = redis_shell.pipeline() if srvs: scores = [redis_shell.zscore(region + ':slices', srv) for srv in srvs] pairs = {"<empty:%s>" % int(score): score for score in scores if score} if pairs: txn.zadd(region + ":slices", **pairs) txn.zrem(region + ":slices", *srvs) txn.hdel('srv->cfg', *srvs) txn.incr('srvcount') else: print "No configs left to delete for %s." % name # Check whether this server is in the queue (because of recycling). for cfg in redis_shell.lrange(region + ':srvq', 0, -1): if cfg.split('|')[0] == ip: txn.lrem(region + ':srvq', cfg) txn.lrem(cm + ':vpss', name) txn.incr(cm + ':vpss:version') txn.execute()
def actually_retire_proxy(name, ip, srv=None, pipeline=None): """ While retire_proxy just enqueues the proxy for retirement, this actually updates the redis tables. """ name, ip, srv = nameipsrv(name=name, ip=ip, srv=srv) cm = cm_by_name(name) region = region_by_name(name) txn = pipeline or redis_shell.pipeline() if srv: actually_close_proxy(name, ip, srv, txn) txn.hdel('srv->cfg', srv) txn.hdel('server->config', name) txn.hdel('srv->name', srv) txn.hdel('srv->srvip', srv) txn.hdel('name->srv', name) txn.hdel('srvip->srv', ip) # For debugging purposes; we can delete these anytime if they're a # space problem. txn.hset('history:srv->name', srv, name) txn.hset('history:name->srv', name, srv) txn.hset('history:srv->srvip', srv, ip) # An IP may be used by multiple servers through history. txn.rpush('history:srvip->srv:%s' % ip, srv) txn.incr('srvcount') else: print "No configs left to delete for %s." % name # Check whether this server is in the queue (because of recycling). for cfg in redis_shell.lrange(region + ':srvq', 0, -1): if cfg.split('|')[0] == ip: txn.lrem(region + ':srvq', cfg) txn.lrem(cm + ':vpss', name) txn.incr(cm + ':vpss:version') if txn is not pipeline: txn.execute()
def run(): cm = vps_util.my_cm() region = vps_util.my_region() print "Starting retire server at cloudmaster %s, region %s." % (cm, region) qname = cm + ":retireq" destroy_qname = cm + ":destroyq" q = redisq.Queue(qname, redis_shell, TIMEOUT) while True: task, remover = q.next_job() if task: name, ip = task.split('|') is_baked_in = redis_shell.sismember(region + ":bakedin-names", name) txn = redis_shell.pipeline() if is_baked_in: print "Not retiring baked-in server %s (%s)" % (name, ip) else: print "Retiring", name, ip vps_util.actually_retire_proxy(name, ip, txn) remover(txn) if not is_baked_in: # Introduce the job with the timestamp already filled in, so it will # only be pulled when it 'expires'. This effectively adds a delay to # give clients some time to move over to their new server before we # actually destroy the old one. txn.lpush(destroy_qname, "%s*%s" % (name, int(time.time()))) txn.execute() else: time.sleep(10)
def retire_proxy(name=None, ip=None, srv=None, reason='failed checkfallbacks', pipeline=None, offload=False): name, ip, srv = nameipsrv(name, ip, srv) region = region_by_name(name) if redis_shell.sismember(region + ':fallbacks', srv): print >> sys.stderr, "I'm *not retiring* %s (%s) because it is a fallback server for region '%s'." % ( name, ip, region) print >> sys.stderr, "Please remove it as a fallback first." return if redis_shell.sismember(region + ':honeypots', srv): print >> sys.stderr, "I'm *not retiring* %s (%s) because it is a honeypot server for region '%s'." % ( name, ip, region) print >> sys.stderr, "Please remove it as a honeypot first." return p = pipeline or redis_shell.pipeline() if offload: qname = '%s:offloadq' % region_by_name(name) else: qname = '%s:retireq' % cm_by_name(name) p.rpush(qname, '%s|%s' % (name, ip)) log2redis( { 'op': 'retire', 'name': name, 'ip': ip, 'srv': srv, 'reason': reason }, pipeline=p) if not pipeline: p.execute()
def retire_lcs(name, ip, byip=util.Cache(timeout=60*60, update_fn=srv_cfg_by_ip)): cm = cm_by_name(name) region = region_by_name(name) srvs = byip.get().get(ip, (None, []))[1] txn = redis_shell.pipeline() if srvs: scores = [redis_shell.zscore(region + ':slices', srv) for srv in srvs] pairs = {"<empty:%s>" % int(score): score for score in scores if score} if pairs: txn.zadd(region + ":slices", **pairs) txn.zrem(region + ":slices", *srvs) txn.hdel('srv->cfg', *srvs) txn.incr('srvcount') else: print "No configs left to delete for %s." % name # Check whether this server is in the queue (because of recycling). for cfg in redis_shell.lrange(region + ':srvq', 0, -1): if cfg.split('|')[0] == ip: txn.lrem(region + ':srvq', cfg) txn.lrem(cm + ':vpss', name) txn.incr(cm + ':vpss:version') txn.execute()
def actually_retire_proxy(name, ip, pipeline=None): """ While retire_proxy just enqueues the proxy for retirement, this actually updates the redis tables. """ name, ip, srv = nameipsrv(name=name, ip=ip) cm = cm_by_name(name) region = region_by_name(name) txn = pipeline or redis_shell.pipeline() if srv: actually_close_proxy(name, ip, srv, txn) txn.hdel('srv->cfg', srv) txn.hdel('server->config', name) txn.hdel('srv->name', srv) txn.hdel('srv->srvip', srv) txn.hdel('name->srv', name) txn.hdel('srvip->srv', ip) # For debugging purposes; we can delete these anytime if they're a # space problem. txn.hset('history:srv->name', srv, name) txn.hset('history:name->srv', name, srv) txn.hset('history:srv->srvip', srv, ip) # An IP may be used by multiple servers through history. txn.rpush('history:srvip->srv:%s' % ip, srv) txn.incr('srvcount') else: print "No configs left to delete for %s." % name # Check whether this server is in the queue (because of recycling). for cfg in redis_shell.lrange(region + ':srvq', 0, -1): if cfg.split('|')[0] == ip: txn.lrem(region + ':srvq', cfg) txn.lrem(cm + ':vpss', name) txn.incr(cm + ':vpss:version') if txn is not pipeline: txn.execute()
def retire_proxy(name=None, ip=None, srv=None, reason='failed checkfallbacks', pipeline=None, offload=False): name, ip, srv = nameipsrv(name, ip, srv) region = region_by_name(name) if redis_shell.sismember(region + ':fallbacks', srv): print >> sys.stderr, "I'm *not retiring* %s (%s) because it is a fallback server for region '%s'." % (name, ip, region) print >> sys.stderr, "Please remove it as a fallback first." return if redis_shell.sismember(region + ':honeypots', srv): print >> sys.stderr, "I'm *not retiring* %s (%s) because it is a honeypot server for region '%s'." % (name, ip, region) print >> sys.stderr, "Please remove it as a honeypot first." return p = pipeline or redis_shell.pipeline() if offload: qname = '%s:offloadq' % region_by_name(name) else: qname = '%s:retireq' % cm_by_name(name) p.rpush(qname, '%s|%s' % (name, ip)) log2redis({'op': 'retire', 'name': name, 'ip': ip, 'srv': srv, 'reason': reason}, pipeline=p) if not pipeline: p.execute()
def enqueue_cfg(name, access_data, srvq): "Upload a config to a server queue." ip = access_data['addr'].split(':')[0] cfg = serialize_access_data(access_data, name) txn = redis_shell.pipeline() txn.hset('server->config', name, cfg) txn.lpush(srvq, "%s|%s|%s" % (ip, name, cfg)) txn.execute()
def destroy_vps(name): vps_shell(dc_by_name(name)).destroy_vps(name) srv = redis_shell.hget('name->srv', name) if srv: txn = redis_shell.pipeline() txn.hdel('name->srv', name) txn.hdel('srv->name', srv) txn.execute()
def feed(src): p = redis_shell.pipeline(transaction=True) cfg = yaml.load(file(src)) cfg['client']['frontedservers'] = [] cfg['client']['chainedservers'] = "<SERVER CONFIG HERE>" globalcfg = yaml.dump(cfg) p.set("globalcfg", globalcfg) p.set("globalcfgsha", hashlib.sha1(globalcfg).hexdigest()) p.execute()
def new_vps_serial(prefix, cm=None, datestr=None): if cm is None: cm = my_cm() if datestr is None: datestr = todaystr() key = 'serial:%s:%s:%s' % (cm, prefix, datestr) p = redis_shell.pipeline() p.incr(key) p.expire(key, 25 * 60 * 60) return p.execute()[0]
def get_lcs_name(): date = vps_util.todaystr() if redis_shell.get(CM + ':lcsserial:date') == date: serial = redis_shell.incr(CM + ':lcsserial') else: pipe = redis_shell.pipeline() pipe.set(CM + ':lcsserial:date', date) pipe.set(CM + ':lcsserial', 1) pipe.execute() serial = 1 return 'fp-%s-%s-%03d' % (CM, date, serial)
def upload_cfg(name, access_data): ip = access_data['addr'].split(':')[0] # DRY: flashlight/genconfig/cloud.yaml.tmpl access_data.update(pipeline=True, trusted=True, qos=10, weight=1000000) cfg = "\n " + yaml.dump({'fallback-' + ip: access_data}) txn = redis_shell.pipeline() txn.hset('server->config', name, cfg) txn.lpush(REGION + ":srvq", "%s|%s|%s" % (ip, name, cfg)) txn.execute()
def get_lcs_name(req): date = vps_util.todaystr() if redis_shell.get(CM + ':lcsserial:date') == date: serial = redis_shell.incr(CM + ':lcsserial') else: pipe = redis_shell.pipeline() pipe.set(CM + ':lcsserial:date', date) pipe.set(CM + ':lcsserial', 1) pipe.execute() serial = 1 type_prefix = 'obfs4' if 'obfs4_port' in req else 'https' return 'fp-%s-%s-%s-%03d' % (type_prefix, CM, date, serial)
def close_server(msg): if os.path.exists(close_flag_filename): print "Not closing myself again." return txn = redis_shell.pipeline() vps_util.actually_close_proxy(name=instance_id, ip=ip, pipeline=txn) alert(type='proxy-closed', details={'reason': msg}, text="*Closed* because I " + msg, color='good', pipeline=txn) txn.execute() flag_as_done(close_flag_filename)
def pull_from_srvq(prefix, refill=True): x = redis_shell.rpop(prefix + ':srvq') if x is None: raise RuntimeError("No servers to pull from the %s queue" % prefix) ip, name, cfg = x.split('|') srv = redis_shell.incr('srvcount') p = redis_shell.pipeline() if refill: p.lpush(prefix + ':srvreqq', srv) p.hset('server->config', name, cfg) p.hset('srv->cfg', srv, cfg) p.hset('srv->name', srv, name) p.hset('name->srv', name, srv) p.hset('srvip->srv', ip, srv) p.hset('srv->srvip', srv, ip) p.execute() return redis_util.nis(name, ip, srv)
def run(): region = vps_util.my_region() print "Starting offload server for region %s." % region qname = region + ":offloadq" q = redisq.Queue(qname, redis_shell, TIMEOUT) while True: task, remover = q.next_job() if task: name, ip = task.split('|') print "Offloading users from %s (%s)" % (name, ip) txn = redis_shell.pipeline() vps_util.actually_offload_proxy(name, ip, pipeline=txn) remover(txn) cm = vps_util.cm_by_name(name) txn.lpush(cm + ':retireq', task) txn.execute() else: time.sleep(10)
def srvs_in_cfgbysrv(region, cfgbysrv): key = region + ":slices" issues = [ (srv, score) for srv, score in redis_shell.zrangebyscore(key, "-inf", "+inf", withscores=True) if not srv.startswith("<empty") and srv not in cfgbysrv ] for srv, score in issues[:]: # Double-check to avoid race conditions. if redis_shell.hexists("srv->cfg", srv): issues.remove((srv, score)) else: # Might as well fix it while we're at it! txn = redis_shell.pipeline() txn.zrem(key, srv) txn.zadd(key, "<empty:%s>" % score, score) txn.execute() return ["Server %s in %s's slice table but no config for it." % (srv, region) for srv, _ in issues]
def fix_queue(qname, fix_fn): # The backup is left around just in case you bork something. # Delete it manually or by calling `delete_q_backups` after # making sure everything went well. print "handling %s..." % qname bakname = qname + '.bak' redis_shell.rename(qname, bakname) p = redis_shell.pipeline() for entry in redis_shell.lrange(bakname, 0, -1): ip, name, cfg = entry.split('|') needs_fixing, good_cfg = fix_fn(cfg) if needs_fixing: print "enqueued proxy %s needs fixing" % name entry = '|'.join([ip, name, good_cfg]) p.rpush(qname, entry) print "fixing enqueued proxies..." p.execute() print "Done with %s" % qname
def actually_close_proxy(name=None, ip=None, srv=None, pipeline=None): name, ip, srv = nameipsrv(name, ip, srv) region = region_by_name(name) slices_key = region + ':slices' def remove_if_there(k): score = redis_shell.zscore(slices_key, k) if score is None: return False else: txn.zrem(slices_key, k) txn.zadd(slices_key, "<empty:%s>" % int(score), score) return True txn = pipeline or redis_shell.pipeline() remove_if_there(srv) c = it.count() while remove_if_there('%s|%s' % (srv, c.next())): pass if txn is not pipeline: txn.execute()
def srvs_in_cfgbysrv(region, cfgbysrv): key = region + ':slices' issues = [(srv, score) for srv, score in redis_shell.zrangebyscore( key, '-inf', '+inf', withscores=True) if not srv.startswith('<empty') and srv not in cfgbysrv] for srv, score in issues[:]: # Double-check to avoid race conditions. if redis_shell.hexists('srv->cfg', srv): issues.remove((srv, score)) else: # Might as well fix it while we're at it! txn = redis_shell.pipeline() txn.zrem(key, srv) txn.zadd(key, '<empty:%s>' % score, score) txn.execute() return [ "Server %s in %s's slice table but no config for it." % (srv, region) for srv, _ in issues ]
def run(): dc = os.getenv("DC") print "Using datacenter", dc qname = dc + ":retireq" destroy_qname = dc + ":destroyq" q = redisq.Queue(qname, redis_shell, TIMEOUT) while True: task, remover = q.next_job() if task: name, ip = task.split('|') print "Retiring", name, ip vps_util.retire_lcs(name, ip) txn = redis_shell.pipeline() remover(txn) # Introduce the job with the timestamp already filled in, so it will # only be pulled when it 'expires'. This effectively adds a delay to # give clients some time to move over to their new server before we # actually destroy the old one. txn.lpush(destroy_qname, "%s*%s" % (name, int(time.time()))) txn.execute() time.sleep(10)
def get_lcs_name(dc, redis_shell): if dc.startswith('vltok'): country = 'jp' elif dc.startswith('doams'): country = 'nl' else: assert False now = datetime.utcnow() date = "%d%02d%02d" % (now.year, now.month, now.day) if redis_shell.get(dc + ':lcsserial:date') == date: serial = redis_shell.incr(dc + ':lcsserial') else: pipe = redis_shell.pipeline() pipe.set(dc + ':lcsserial:date', date) pipe.set(dc + ':lcsserial', 1) pipe.execute() serial = 1 return 'fp-%s-%s-%03d' % (country, date, serial)
def slice_srvs_in_srv2cfg(region, srv2cfg): key = region + ':slices' issues = [(k, score) for k, score in redis_shell.zrangebyscore(key, '-inf', '+inf', withscores=True) if not k.startswith('<empty') and not k.startswith('<locked') and k.split('|')[0] not in srv2cfg] for k, score in issues[:]: # Double-check to avoid race conditions. if redis_shell.hexists('srv->cfg', k.split('|')[0]): issues.remove((k, score)) else: # Might as well fix it while we're at it! txn = redis_shell.pipeline() txn.zrem(key, k) txn.zadd(key, '<empty:%s>' % int(score), score) txn.execute() return ["Key %s in %s's slice table but no config for it." % (k, region) for k, _ in issues]
def srvs_in_cfgbysrv(region, cfgbysrv): key = region + ':slices' issues = [(k, score) for k, score in redis_shell.zrangebyscore(key, '-inf', '+inf', withscores=True) if not k.startswith('<empty') and not k.startswith('<locked') and k.split('|')[0] not in cfgbysrv] for k, score in issues[:]: # Double-check to avoid race conditions. if redis_shell.hexists('srv->cfg', k.split('|')[0]): issues.remove((k, score)) else: # Might as well fix it while we're at it! txn = redis_shell.pipeline() txn.zrem(key, k) txn.zadd(key, '<empty:%s>' % int(score), score) txn.execute() return ["Key %s in %s's slice table but no config for it." % (k, region) for k, _ in issues]
def run(): region = vps_util.my_region() print "Starting offload server for region %s." % region qname = region + ":offloadq" q = redisq.Queue(qname, redis_shell, TIMEOUT) while True: task, remover = q.next_job() if task: name, ip = task.split('|') print "Offloading users from %s (%s)" % (name, ip) txn = redis_shell.pipeline() try: vps_util.actually_offload_proxy(name, ip, pipeline=txn) except vps_util.ProxyGone: print >> sys.stderr, "Tried to offload no longer existing proxy %s (%s)" % ( name, ip) remover(redis_shell) continue remover(txn) cm = vps_util.cm_by_name(name) txn.lpush(cm + ':retireq', task) txn.execute() else: time.sleep(10)
def retire_lcs(name, ip, byip=util.Cache(timeout=60 * 60, update_fn=srv_cfg_by_ip)): cm = cm_by_name(name) region = region_by_name(name) srvs = byip.get().get(ip, (None, []))[1] txn = redis_shell.pipeline() if srvs: scores = [redis_shell.zscore(region + ':slices', srv) for srv in srvs] pairs = {"<empty:%s>" % int(score): score for score in scores if score} if pairs: txn.zadd(region + ":slices", **pairs) txn.zrem(region + ":slices", *srvs) txn.hdel('srv->cfg', *srvs) txn.incr('srvcount') else: print "No configs left to delete for %s." % name # Check whether this server is in the queue (because of recycling). for cfg in redis_shell.lrange(region + ':srvq', 0, -1): if cfg.split('|')[0] == ip: txn.lrem(region + ':srvq', cfg) txn.lrem(cm + ':vpss', name) txn.incr(cm + ':vpss:version') txn.execute()
def run(): qname = QPREFIX + ":srvreqq" print "Serving queue", qname, ", MAXPROCS:", repr(MAXPROCS) quarantine = CM + ":quarantined_vpss" reqq = redisq.Queue(qname, redis_shell, LAUNCH_TIMEOUT) procq = multiprocessing.Queue() pending = {} def kill_task(reqid): print "Killing timed out process and vps..." task = pending.pop(reqid) task['proc'].terminate() proc = multiprocessing.Process(target=vps_shell.destroy_vps, args=(task['name'],)) proc.daemon = True proc.start() while True: # If the request queue is totally empty (no tasks enqueued or even in # progress), flush the quarantine queue into the destroy queue. if redis_shell.llen(qname) == 1: # 1 for the redisq sentinel entry names = redis_shell.smembers(quarantine) if names: print "Flushing %s VPSs from quarantine." % len(names) p = redis_shell.pipeline() p.srem(quarantine, *names) p.lpush(CM + ":destroyq", *names) p.execute() while not procq.empty(): try: result = procq.get(False) print "Got result:", result task = pending.get(result['reqid']) if task and task['name'] == result['name']: p = redis_shell.pipeline() if result['blocked']: print "Quarantining %(name)s (%(ip)s)." % result p.sadd(quarantine, result['name']) p.incr(CM + ":blocked_vps_count") # stats # We'll remove the original request anyway because we # don't want it to stay around until timeout. Insert a # new one to replace it instead. reqid = redis_shell.incr('srvcount') p.lpush(qname, reqid) else: p.incr(CM + ":unblocked_vps_count") # stats del pending[result['reqid']] vps_util.enqueue_cfg(result['name'], result['access_data'], result['srvq']) register_vps(task['name']) task['remove_req'](p) p.execute() except Empty: print "Wat?" break if len(pending) < MAXPROCS: req_string, remover = reqq.next_job() if req_string: print "Got request", req_string req = json.loads(req_string) if isinstance(req, int): # Transition: support the old format while we are updating # the config server etc. req = {'id': req, 'srvq': QPREFIX + ':srvq'} req_string = json.dumps(req) reqid = req['id'] if reqid in pending: print "Killing task %s because of queue timeout" % reqid kill_task(reqid) name = new_proxy_name(req) proc = multiprocessing.Process(target=launch_one_server, args=(procq, reqid, name, req_string)) proc.daemon = True pending[reqid] = { 'name': name, 'proc': proc, 'starttime': time.time(), 'remove_req': remover} print "Starting process to launch", name proc.start() else: # Since we're not checking the queue when we've maxed out our # processes, we need to manually check for expired tasks. for reqid, d in pending.items(): if time.time() - d['starttime'] > LAUNCH_TIMEOUT: print "Killing task %s because of local timeout" % reqid kill_task(reqid) time.sleep(10)