Exemplo n.º 1
0
class TweetWorker(object):

    def __init__(self, n, callback):
        self.processing_users = []
        self.lock = RWLock()
        self.jobs = ThreadQueue.Queue()
        self.results = ThreadQueue.Queue()
        self.processes = []
        for _ in range(0, n):
            s = Thread(target=worker, args=(job_queue, self.results))
            self.processes.append(s)
            s.daemon = True
            s.start()
        print('Started {} worker processes'.format(len(self.processes)))

        self.consumer = Thread(target=consumer_process, args=(self.results, callback))
        self.consumer.daemon = True
        self.consumer.start()
        print('Started consumer process')

    def get(self, username):
        if username is None:
            return

        self.lock.acquire_read()
        if username in self.processing_users:
            self.lock.release()
            return
        self.lock.release()

        self.lock.acquire_write()
        self.processing_users.append(username)
        self.lock.release()
        job_queue.put(username)
        return
Exemplo n.º 2
0
class TweetWorker(object):
    def __init__(self, n, callback):
        self.processing_users = []
        self.lock = RWLock()
        self.jobs = ThreadQueue.Queue()
        self.results = ThreadQueue.Queue()
        self.processes = []
        for _ in range(0, n):
            s = Thread(target=worker, args=(job_queue, self.results))
            self.processes.append(s)
            s.daemon = True
            s.start()
        print('Started {} worker processes'.format(len(self.processes)))

        self.consumer = Thread(target=consumer_process,
                               args=(self.results, callback))
        self.consumer.daemon = True
        self.consumer.start()
        print('Started consumer process')

    def get(self, username):
        if username is None:
            return

        self.lock.acquire_read()
        if username in self.processing_users:
            self.lock.release()
            return
        self.lock.release()

        self.lock.acquire_write()
        self.processing_users.append(username)
        self.lock.release()
        job_queue.put(username)
        return
Exemplo n.º 3
0
class ChatMessageStore:
    def __init__(self):
        self.__id = 1
        self.__storage = []
        self.__rwlock = RWLock()

    def add_message(self, message):
        self.__rwlock.acquire_write()
        self.__storage.append((self.__id, message))
        self.__id += 1
        self.__rwlock.release()

    def get_messages(self, start_id=0):
        self.__rwlock.acquire_read()
        result = ([message for id_, message in self.__storage if id_ >= start_id], self.__id)
        self.__rwlock.release()

        return result
Exemplo n.º 4
0
class DeepDist:
    def __init__(self, model, master='127.0.0.1:5000', min_updates=0, max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model  = model
        self.lock   = RWLock()
        self.descent  = lambda model, gradient: model
        self.master   = master
        self.state    = 'serving'
        self.served   = 0
        self.received = 0
        #self.server   = None
        self.pmodel   = None
        self.min_updates = min_updates
        self.max_updates = max_updates

    def __enter__(self):
        Thread(target=self.start).start()
        # self.server = Process(target=self.start)
        # self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        # self.server.terminate()
        pass # need to shut down server here

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving' or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    print(self.model)
                    self.pmodel = pickle.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            return pmodel


        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = 'receiving'
            self.received += 1

            self.descent(self.model, gradient)

            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served   = 0
                self.state    = 'serving'
                self.pmodel = None

            self.lock.release()
            return 'OK'

        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master   # will be pickled
        if master == None:
            master = rdd.ctx._conf.get('spark.master')
        if master.startswith('local['):
            master = 'localhost:5000'
        else:
            if master.startswith('spark://'):
                master = '%s:5000' % urlparse.urlparse(master).netloc.split(':')[0]
            else:
                master = '%s:5000' % master.split(':')[0]
        print '\n*** Master: %s\n' % master

        self.descent = descent

        def mapPartitions(data):
            #a = fetch_model(master=master)
            #print(a)
            aa = gradient(fetch_model(master=master), data)
            bb = pickle.dumps(aa)
            #print aa
            return [send_gradient(gradient(fetch_model(master=master), data), master=master)]
        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 5
0
class DeepDist:
    def __init__(self,
                 model,
                 master='127.0.0.1:5000',
                 min_updates=0,
                 max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model = model
        self.lock = RWLock()
        self.descent = lambda model, gradient: model
        self.master = master
        self.state = 'serving'
        self.served = 0
        self.received = 0
        #self.server   = None
        self.pmodel = None
        self.min_updates = min_updates
        self.max_updates = max_updates
        print("THIS IS THE MASTER")
        print(self.master)
        print("\n")
        print("THIS IS THE MODEL 1")
        print(self.model)
        print("\n")

    def __enter__(self):
        Thread(target=self.start).start()
        # self.server = Process(target=self.start)
        # self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        # self.server.terminate()
        pass  # need to shut down server here

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving'
                   or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    self.pmodel = pickleDumper.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            return pmodel

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = 'receiving'
            self.received += 1

            self.descent(self.model, gradient)

            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served = 0
                self.state = 'serving'
                self.pmodel = None

            self.lock.release()
            return 'OK'

        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master  # will be pickled
        print("MASTER ROUND 2")
        print(master)
        if master == None:
            master = rdd.ctx._conf.get('spark.master')
        if master.startswith('local['):
            master = 'localhost:5000'
        else:
            if master.startswith('spark://'):
                master = '%s:5000' % urlparse.urlparse(master).netloc.split(
                    ':')[0]
            else:
                master = '%s:5000' % master.split(':')[0]
        print '\n*** Master: %s\n' % master

        self.descent = descent

        def mapPartitions(data):
            return [
                send_gradient(gradient(fetch_model(master=master), data),
                              master=master)
            ]

        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 6
0
class DeepDist:
    def __init__(self,
                 model,
                 master='127.0.0.1:5000',
                 min_updates=0,
                 max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model = model
        self.lock = RWLock()
        self.descent = lambda model, gradient: model
        self.master = master
        self.state = 'serving'
        self.served = 0
        self.received = 0
        self.server = '0.0.0.0'
        self.pmodel = None
        self.min_updates = min_updates
        self.max_updates = max_updates

    def __enter__(self):
        Thread(target=self.start).start()
        # self.server = Process(target=self.start)
        # self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        url = "http://%s/shutdown" % self.master
        response = urllib2.urlopen(url, '{}').read()
        print("Exit requested...")

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving'
                   or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    self.pmodel = pickleDumper.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            return pmodel

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = 'receiving'
            self.received += 1

            self.descent(self.model, gradient)

            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served = 0
                self.state = 'serving'
                self.pmodel = None

            self.lock.release()
            return 'OK'

        @app.route('/shutdown', methods=['POST'])
        def shutdown():
            func = request.environ.get('werkzeug.server.shutdown')
            if func is None:
                raise RuntimeError('Not running with the Werkzeug Server')
            func()
            return 'Server shutting down...'

        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master  # will be pickled
        if master == None:
            master = rdd.ctx._conf.get('spark.master')
        if master.startswith('local['):
            master = 'localhost:5000'
        else:
            if master.startswith('spark://'):
                master = '%s:5000' % urlparse.urlparse(master).netloc.split(
                    ':')[0]
            else:
                master = '%s:5000' % master.split(':')[0]
        print '\n*** Master: %s\n' % master

        self.descent = descent

        def mapPartitions(data):
            return [
                send_gradient(gradient(fetch_model(master=master), data),
                              master=master)
            ]

        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 7
0
class DeepDist:
    def __init__(self,
                 model,
                 master='127.0.0.1:5000',
                 min_updates=0,
                 max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model = model
        self.lock = RWLock()
        self.descent = lambda model, gradient: model
        self.master = master
        self.state = 'serving'
        self.served = 0
        self.received = 0
        #self.server   = None
        self.pmodel = None
        self.min_updates = min_updates
        self.max_updates = max_updates

    def start_server(self):
        Thread(target=self.start).start()

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving'
                   or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    self.pmodel = pickle.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
                print "model replica weights were updated via /model"
            return pmodel

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = 'receiving'
            self.received += 1

            old_syn0, old_syn1 = self.model.syn0.copy(), self.model.syn1.copy()
            print "received gradient: " + str(gradient)

            self.descent(self.model, gradient)

            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served = 0
                self.state = 'serving'
                self.pmodel = None

            self.lock.release()
            print "server weights were updated by model replica"
            print "old weights: "
            print old_syn0[0:3, 0:3], old_syn1[
                0:3, 0:3]  #printing just the first few weights
            print "new weights: "
            print self.model.syn0[0:3, 0:3], self.model.syn1[0:3, 0:3]
            return 'OK'

        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master
        print '\n*** Master: %s\n' % master

        self.descent = descent

        def mapPartitions(data):
            return [
                send_gradient(gradient(fetch_model(master=master), data),
                              master=master)
            ]

        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 8
0
class Volumn(object):

    _rpc_methods = [
        'assign_volumn', 'store', 'replica', 'download', 'status', 'balance',
        'migrate_volumn_to', 'migrate_volumn_from', 'delete_file',
        'delete_volumn'
    ]

    def __init__(self, logger, host, port):
        self.logger = logger
        self.host = host
        self.port = port

        self.lock = RWLock()

        self.vdb = dict()
        self.fdb = dict()
        if os.path.isfile('vdb'):
            self.vdb = pickle.load(open('vdb', 'rb'))
        if os.path.isfile('fdb'):
            self.fdb = pickle.load(open('fdb', 'rb'))

        self.act_mst_serv = list()

        self.serv = ThreadXMLRPCServer((self.host, self.port),
                                       logRequests=True)

        for name in self._rpc_methods:
            self.serv.register_function(getattr(self, name))

    def _update_vdb(self):
        pickle.dump(self.vdb, open('vdb', 'wb'))

    def _update_fdb(self):
        pickle.dump(self.fdb, open('fdb', 'wb'))

    def update_master(self, masters):
        self.act_mst_serv = masters

    def get_master(self):
        return ServerProxy(random.choice(self.act_mst_serv))

    def assign_volumn(self, vid, size):
        path = 'data/%s' % vid

        if not os.path.isdir('data'):
            os.mkdir('data')

        try:
            self.lock.acquire_read()

            with open(path, 'wb') as f:
                f.seek(size - 1)
                f.write(b'\0')

            vdoc = dict()
            vdoc['vid'] = vid
            vdoc['path'] = path
            vdoc['size'] = size
            vdoc['counter'] = 0

            self.vdb[vid] = vdoc
            self._update_vdb()

            return True
        except:
            return False
        finally:
            self.lock.release()

    def migrate_volumn_to(self, vid, to_addr):
        try:
            vdoc = self.vdb[vid]
            path = vdoc['path']

            s = ServerProxy(to_addr)

            with open(path, 'rb') as f:
                while True:
                    data = f.read(64 * 1024 * 1024)
                    if data:
                        s.migrate_volumn_from(vid, data, vdoc)
                        self.logger.info('Send data...')
                    else:
                        fdocs = {
                            k: v
                            for k, v in self.fdb.items()
                            if k.startswith('%d,' % vid)
                        }
                        self.logger.info('Send metadata')
                        s.migrate_volumn_from(vid, data, vdoc, fdocs, True)
                        break

            return True
        except:
            return False

    def migrate_volumn_from(self, vid, data, vdoc, fdocs=None, done=False):
        path = vdoc['path']

        if not os.path.isdir('data'):
            os.mkdir('data')

        if done:
            self.vdb[vid] = vdoc
            self._update_vdb()
            self.fdb = {**self.fdb, **fdocs}
            self._update_fdb()
            self.logger.info('Build replica %d success' % vid)
        else:
            with open(path, 'ab') as f:
                f.write(data.data)

        return True

    def store(self, fid, data):
        vid, _ = fid.split(',')
        vid = int(vid)

        try:
            self.replica(fid, data)
            master = self.get_master()
            volumns = master.find_writable_volumn(vid)

            if not volumns:
                return False

            for volumn in volumns:
                if volumn != 'http://%s:%d' % (self.host, self.port):
                    s = ServerProxy(volumn)
                    s.replica(fid, data)

            return True
        except Exception as e:
            self.logger.exception('Got an exception')
            return False

    def replica(self, fid, data):
        data = data.data
        vid, _ = fid.split(',')
        vid = int(vid)

        self.lock.acquire_write()

        vdoc = self.vdb[vid]
        path = vdoc['path']
        offset = vdoc['counter']

        size = len(data)
        vdoc['counter'] += size

        self.lock.release()

        with open(path, 'r+b') as f:
            f.seek(offset)
            f.write(data)

        fdoc = dict()
        fdoc['fid'] = fid
        fdoc['offset'] = offset
        fdoc['size'] = size
        fdoc['delete'] = False

        self.vdb[vid] = vdoc
        self._update_vdb()

        self.fdb[fid] = fdoc
        self._update_fdb()

        return True

    def update_file(self, fid, data):
        pass

    def delete_file(self, fid, sync=True):
        vid, _ = fid.split(',')
        vid = int(vid)

        try:
            if sync:
                master = self.get_master()
                volumns = master.find_writable_volumn(vid)

                if not volumns:
                    return False

                for volumn in volumns:
                    if volumn != 'http://%s:%d' % (self.host, self.port):
                        s = ServerProxy(volumn)
                        s.delete_file(fid, False)

            fdoc = self.fdb[fid]
            fdoc['delete'] = True

            self._update_fdb()

            return True
        except Exception as e:
            self.logger.exception('Got an exception')
            return False

    def delete_volumn(self, vid):
        pass

    def download(self, fid):
        vid, _ = fid.split(',')
        vid = int(vid)

        if vid not in self.vdb or fid not in self.fdb:
            return None

        try:
            self.lock.acquire_read()

            vdoc = self.vdb[vid]
            fdoc = self.fdb[fid]

            if fdoc['delete'] == True:
                return None

            path = vdoc['path']
            offset = fdoc['offset']
            size = fdoc['size']

            with open(path, 'rb') as f:
                f.seek(offset)
                data = f.read(size)

            return data
        except:
            return None
        finally:
            self.lock.release()

    def balance(self, vid):
        try:
            self.lock.acquire_write()

            vdoc = self.vdb[vid]
            fdocs = self.fdb

            tfdocs = fdocs.copy()

            tvdoc = vdoc.copy()
            tvdoc['counter'] = 0

            path = vdoc['path']
            size = vdoc['size']

            with open(path + '.tmp', 'wb') as f:
                f.seek(size - 1)
                f.write(b'\0')

            with open(path, 'r+b') as from_file, open(path + '.tmp',
                                                      'r+b') as to_file:
                to_file.seek(0)
                for fdoc in fdocs.values():
                    if fdoc['fid'].startswith(
                            '%d,' % vid) and fdoc['delete'] == False:
                        from_file.seek(fdoc['size'])
                        data = from_file.read(fdoc['size'])
                        to_file.write(data)

                        tfdoc = fdoc.copy()
                        tfdoc['offset'] = tvdoc['counter']
                        tvdoc['counter'] += fdoc['size']
                        tfdocs[fdoc['fid']] = tfdoc

            os.remove(path)
            os.rename(path + '.tmp', path)

            self.vdb[vid] = tvdoc
            self.fdocs = tfdocs

            return True
        except:
            self.logger.exception('Got an exception')
            return False
        finally:
            self.lock.release()

    def status(self):
        status = dict()
        total, used, free = shutil.disk_usage(__file__)
        status['total'] = str(total)
        status['used'] = str(used)
        status['free'] = str(free)
        status['vdb'] = {str(vid): vdoc for vid, vdoc in self.vdb.items()}
        return status

    def start(self):
        self.logger.info('Start serving at %s:%d' % (self.host, self.port))
        self.serv.serve_forever()
Exemplo n.º 9
0
class MSGControler:
	#def __init__(self, listen_ip, listen_port, log_file, debug):
	def __init__(self, configDic):
		"""
		Init Controler class
		"""
		self.running = True
		self.config = configDic
		self.log_file = self.config["log"]
		self.debug = self.config["debug"]
		self.security = self.config["security"]
		self.clients = {}
		self.lock = RWLock()
		## Start TCP comm server ##
		listen_ip = self.config["listen_ip"]
		listen_port = self.config["listen_port"]
		try:
			self.server = tcpServer((listen_ip,listen_port), handleConnection, self.clients, self.debug, self.security )
		except:
			self.log_error("Unable to bind TCP socket %s:%s !" % (listen_ip,listen_port))
			proc = subprocess.Popen(["ss", "-pant"], stdout=subprocess.PIPE)
			code = proc.wait()
			for aline in proc.stdout:
				if (str(listen_ip)+':'+str(listen_port)) in aline and "LISTEN" in aline:
					tmpstr1 = re.sub(').*', '', re.sub('.*(', '', aline))
					pid = re.sub(',.*', '', re.sub('.*pid=', tmpstr1))
					prog = re.sub('.*"', '', re.sub('",.*', '', aline))
					self.log_warning("Process %s, PID %s, is binding port %s. It will be killed." % (prog, pid, listen_port))
					os.system("kill -9 %s" % pid)
		
			time.sleep(10)
			self.log_info("Trying again to bind %s on %s." % (listen_port, listen_ip))
			self.server = tcpServer((listen_ip,listen_port), handleConnection, self.clients, self.debug, self.security )

		self.comm_thread = threading.Thread(target=self.server.serve_forever)
		self.comm_thread.daemon = True
		self.comm_thread.start()
		##### Send a keepalive message every minutes (60 sec) ##
		self.keepalive = KeepAliveTimer(60, self.send_keepalive, ["KeepAliveTimer"])
		self.keepalive.start()
	
	def log_error(self, newline):
		self.log(newline, "ERROR")
	
	def log_warning(self, newline):
		self.log(newline, "WARNING")
	
	def log_info(self, newline):
		self.log(newline, "INFO")
	
	def log_event(self, newline):
		self.log(newline, "EVENT")
	
	def log_debug(self, newline):
		if self.debug == True :
			self.log(newline, "DEBUG")

	def log(self, newline, level="INFO"):
		LOG_SIZE = os.path.getsize(self.log_file)
		# if > 1M create a new file
		if LOG_SIZE > 1000000:
			if os.path.exists(self.log_file+".4"):
				os.remove(self.log_file+".4")
				os.rename(self.log_file+".3", self.log_file+".4")
			if os.path.exists(self.log_file+".3"):
				os.rename(self.log_file+".3", self.log_file+".4")
			if os.path.exists(self.log_file+".2"):
				os.rename(self.log_file+".2", self.log_file+".3")
			if os.path.exists(self.log_file+".1"):
				os.rename(self.log_file+".1", self.log_file+".2")
				
			os.rename(self.log_file, self.log_file+".1")
			if os.path.exists('/opt/virtualisation/openkvi/debug'):
				os.remove('/opt/virtualisation/openkvi/debug')
			logs = open(self.log_file,'w')

		else:
			logs = open(self.log_file,'a')

	 	timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
		logs.write(timestamp+"::["+level+"]::"+newline+"\n")
		logs.close()
	
	def print_debug(self, msg):
		if self.debug == True :
			self.log_debug(msg)
			print msg

	def tell_all(self, event, data):
		self.keepalive.stop()
		self.print_debug("telling all %s %s"% (event, data))
		line = event+";"+json.dumps(data)
		## Acquire lock so that no messages are sent 
		## simultanously 
		self.lock.acquire_write()
		res = self.server.writeToAll(line)
		## Wait 500 ms between two message to prevent 
 		## clients being overwhelmed 
		time.sleep(0.5)
		self.lock.release()
		self.keepalive.start()
	
	def stop(self):
		self.print_debug("stop tcp server")
		self.keepalive.stop()
		self.server.socket.close()
	
	def send_keepalive(self):
		res = self.server.writeToAll("keep alive")
class DeepDist:
    def __init__(self, model, master='127.0.0.1:5000', min_updates=0, max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model  = model
        self.lock   = RWLock()
        self.descent  = lambda model, gradient: model
        self.master   = master
        self.state    = 'serving'
        self.served   = 0
        self.received = 0
        #self.server   = None
        self.pmodel   = None
        self.min_updates = min_updates
        self.max_updates = max_updates

    def start_server(self):
        Thread(target=self.start).start()

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving' or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    self.pmodel = pickle.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
                print "model replica weights were updated via /model"
            return pmodel
    

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = 'receiving'
            self.received += 1
            
            old_syn0, old_syn1 = self.model.syn0.copy(), self.model.syn1.copy()
            print "received gradient: " + str(gradient) 
            
            self.descent(self.model, gradient)
            
            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served   = 0
                self.state    = 'serving'
                self.pmodel = None
            
            self.lock.release()
            print "server weights were updated by model replica"
            print "old weights: "
            print old_syn0[0:3, 0:3], old_syn1[0:3, 0:3] #printing just the first few weights
            print "new weights: "
            print self.model.syn0[0:3, 0:3], self.model.syn1[0:3, 0:3]
            return 'OK'
        
        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master
        print '\n*** Master: %s\n' % master

        self.descent = descent

        def mapPartitions(data):
            return [send_gradient(gradient(fetch_model(master=master), data), master=master)]
        
        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 11
0
class DeepDist:
    def __init__(self, model, master="127.0.0.1:5000", min_updates=0, max_updates=4096):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model = model
        self.lock = RWLock()
        self.descent = lambda model, gradient: model
        self.master = master
        self.state = "serving"
        self.served = 0
        self.received = 0
        # self.server   = None
        self.pmodel = None
        self.min_updates = min_updates
        self.max_updates = max_updates

    def __enter__(self):
        Thread(target=self.start).start()
        # self.server = Process(target=self.start)
        # self.server.start()
        return self

    def __exit__(self, type, value, traceback):
        # self.server.terminate()
        pass  # need to shut down server here

    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route("/")
        def index():
            return "DeepDist"

        @app.route("/model", methods=["GET", "POST", "PUT"])
        def model_flask():
            i = 0
            while (self.state != "serving" or self.served >= self.max_updates) and (i < 1000):
                time.sleep(1)
                i += 1

            # pickle on first read
            pmodel = None
            self.lock.acquire_read()
            if not self.pmodel:
                self.lock.release()
                self.lock.acquire_write()
                if not self.pmodel:
                    self.pmodel = pickle.dumps(self.model, -1)
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            else:
                self.served += 1
                pmodel = self.pmodel
                self.lock.release()
            return pmodel

        @app.route("/update", methods=["GET", "POST", "PUT"])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            if self.min_updates <= self.served:
                state = "receiving"
            self.received += 1

            self.descent(self.model, gradient)

            if self.received >= self.served and self.min_updates <= self.received:
                self.received = 0
                self.served = 0
                self.state = "serving"
                self.pmodel = None

            self.lock.release()
            return "OK"

        print "Listening to 0.0.0.0:5000..."
        app.run(host="0.0.0.0", debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master  # will be pickled
        if master == None:
            master = rdd.ctx._conf.get("spark.master")
        if master.startswith("local["):
            master = "localhost:5000"
        else:
            if master.startswith("spark://"):
                master = "%s:5000" % urlparse.urlparse(master).netloc.split(":")[0]
            else:
                master = "%s:5000" % master.split(":")[0]
        print "\n*** Master: %s\n" % master

        self.descent = descent

        def mapPartitions(data):
            return [send_gradient(gradient(fetch_model(master=master), data), master=master)]

        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 12
0
class DeepDist:
    def __init__(self, model, batch=None, master='127.0.0.1:5000'):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model  = model
        self.lock   = RWLock()
        self.descent  = lambda model, gradient: model
        self.master   = master
        self.state    = 'serving'
        self.served   = 0
        self.received = 0
        self.batch    = batch
        self.server   = None

    def __enter__(self):
        Thread(target=self.start).start()
        # self.server = Process(target=self.start)
        # self.server.start()
        return self
    
    def __exit__(self, type, value, traceback):
        # self.server.terminate()
        pass # need to shut down server here
        
    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving') and (i < 1000):
                time.sleep(1)
                i += 1

            self.lock.acquire_read()
            self.served += 1
            model = copy.deepcopy(self.model)
            self.lock.release()
            
            return pickle.dumps(model, -1)
    

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            state = 'receiving'
            self.received += 1
            
            self.descent(self.model, gradient)
            
            if self.received >= self.served:
                self.received = 0
                self.served   = 0
                self.state    = 'serving'
            
            self.lock.release()
            return 'OK'
        
        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        master = self.master   # will be pickled
        print 'master0: ', master
        if master == None:
            master = rdd.ctx._conf.get('spark.master')
        print 'master1: ', master
        if master.startswith('local['):
            master = 'localhost:5000'
        else:
            if master.startswith('spark://'):
                master = '%s:5000' % urlparse.urlparse(master).netloc.split(':')[0]
            else:
                master = '%s:5000' % master.split(':')[0]
        print '\n*** master: %s\n' % master

        self.descent = descent
        
        batch = self.batch
        
        def mapPartitions(data):
            last = 'dummy'
            class Iter:
              def __iter__(self):
                self.i = 0
                return self
              def next(self):
                if (batch == None) or (self.i < batch):
                  self.i += 1
                  last = data.next()
                  return last
                else:
                  return None
            res = []
            while last != None:
              res.append(send_gradient(gradient(fetch_model(master=master), Iter()), master=master))
            return res
        
        return rdd.mapPartitions(mapPartitions).collect()
Exemplo n.º 13
0
class DeepDist:
    def __init__(self, model, host='127.0.0.1:5000'):
        """DeepDist - Distributed deep learning.
        :param model: provide a model that can be trained in parallel on the workers
        """
        self.model  = model
        self.lock   = RWLock()
        self.descent  = lambda model, gradient: model
        self.host     = host
        self.state    = 'serving'
        self.served   = 0
        self.received = 0

    def __enter__(self):
        Thread(target=self.start).start()
        return self
    
    def __exit__(self, type, value, traceback):
        pass # need to shut down server here
        
    def start(self):
        from flask import Flask, request

        app = Flask(__name__)

        @app.route('/')
        def index():
            return 'DeepDist'

        @app.route('/model', methods=['GET', 'POST', 'PUT'])
        def model_flask():
            i = 0
            while (self.state != 'serving') and (i < 20):
                time.sleep(1)
                i += 1

            self.lock.acquire_read()
            self.served += 1
            model = copy.deepcopy(self.model)
            self.lock.release()
            
            return pickle.dumps(model, -1)
    

        @app.route('/update', methods=['GET', 'POST', 'PUT'])
        def update_flask():
            gradient = pickle.loads(request.data)

            self.lock.acquire_write()
            state = 'receiving'
            self.received += 1
            
            self.descent(self.model, gradient)
            
            if self.received >= self.served:
                self.received = 0
                self.served   = 0
                self.state    = 'serving'
            
            self.lock.release()
            return 'OK'
        
        print 'Listening to 0.0.0.0:5000...'
        app.run(host='0.0.0.0', debug=True, threaded=True, use_reloader=False)

    def train(self, rdd, gradient, descent):
        
        self.descent = descent
        
        host = self.host   # will be pickled by rdd.mapPartitions
        
        def mapPartitions(data):
            return (send_gradient(gradient(fetch_model(host=host), data), host=host))
        
        return rdd.mapPartitions(mapPartitions).collect()