def __spider_data_source(self, config): datasource_address = config['datasource_address'] datasource_keywards = config['datasource_keywards'] dsatasource_params = {} if 'datasource_params' in config: dsatasource_params = config['datasource_params'] dsatasource_params.update({'download_data_type': 'image'}) if datasource_address not in self.keywords: self.keywords[datasource_address] = [] if datasource_keywards in self.keywords[datasource_address]: logger.error("Duplicate keywords.") return self.keywords[datasource_address].append(datasource_keywards) if datasource_address == 'baidu': baidu_download(datasource_keywards, dsatasource_params, self.dir, self.waiting_process_queue) elif datasource_address == 'bing': bing_download(datasource_keywards, dsatasource_params, self.dir, self.waiting_process_queue) elif datasource_address == 'google': google_download(datasource_keywards, dsatasource_params, self.dir, self.waiting_process_queue) elif datasource_address == 'vcg': vcg_download(datasource_keywards, dsatasource_params, self.dir, self.waiting_process_queue)
def __init__(self, record_path, read_only=True): # db if os.path.exists(os.path.join(record_path, 'record')): record_path = os.path.join(record_path, 'record') self._record_path = os.path.join(record_path, 'data.db') self._db = _DB() try: self._db.load(self._record_path) # db attributes self._db_attrs = {} self.count = self._db.get('attrib-count') # attrib # it = self._db.iteritems() # it.seek('attrib-') # for attrib_item in it: # key, value = attrib_item # if key.startswith('attrib-'): # key = key.replace('attrib-', '') # value = value.replace('attrib-', '') # self._db_attrs[key] = value # setattr(self, key, value) except: logger.error('couldnt open db')
def start(self): cmd = '' if PY3: cmd = input('antgo > ') else: cmd = raw_input('antgo > ') while cmd != 'quit': try: command = cmd.split(' ') assert (command[0] in [ 'task', 'dataset', 'experiment', 'apply', 'create', 'del', 'add', 'update', 'upload', 'challenge', 'train' ]) flags.cli_param_flags(command[1:]) # process user command self.process_cmd(command[0]) # clear flags flags.clear_cli_param_flags() except: logger.error('error antgo command\n') if PY3: cmd = input('antgo > ') else: cmd = raw_input('antgo > ')
def qiniu_upload(file_path, bucket='mltalker', out_url_base='http://experiment.mltalker.com', max_size=10): return 'qiniu:%s/%s' % ('http:127.0.0.1', 'aaa') access_key = 'ZSC-X2p4HG5uvEtfmn5fsTZ5nqB3h54oKjHt0tU6' secret_key = 'Ya8qYwIDXZn6jSJDMz_ottWWOZqlbV8bDTNfCGO0' q = Auth(access_key, secret_key) if max_size is not None: # check file size fsize = os.path.getsize(file_path) fsize = fsize / float(1024 * 1024) if fsize > max_size: logger.error('file size is larger than limit (%dMB)' % max_size) return None key = file_path.split('/')[-1] token = q.upload_token(bucket, key, 3600) ret, info = put_file(token, key, file_path) if ret['key'] == key and ret['hash'] == etag(file_path): logger.info('success to upload') return 'qiniu:%s/%s' % (out_url_base, key) return None
def get(self, experiment_id): # check signature signature = self.get_argument('signature', '') if self.signature != signature: logger.error('signature not consistent %s' % signature) self.set_status(500) self.write(json.dumps({'code': 'InvalidSignature'})) self.finish() return if experiment_id not in self.experiment_records: logger.error('no experiemnt %s here' % experiment_id) self.set_status(404) self.write( json.dumps({ 'code': 'InvalidInput', 'message': 'dont have experiment %s' % experiment_id })) self.finish() return address = self.experiment_records[experiment_id][ 'address'] if 'address' in self.experiment_records[ experiment_id] else '' status = self.experiment_records[experiment_id]['status'] self.write( json.dumps({ 'code': 'Success', 'address': address, 'status': status })) self.finish()
def __init__(self, record_path, read_only=True): # db if os.path.exists(os.path.join(record_path, 'record')): record_path = os.path.join(record_path, 'record') try: opts = rocksdb.Options( create_if_missing=False if read_only else True) self._db = rocksdb.DB(record_path, opts, read_only=read_only) # db path self._record_path = record_path # db attributes self._db_attrs = {} # attrib it = self._db.iteritems() it.seek('attrib-'.encode('utf-8')) for attrib_item in it: key, value = attrib_item key = key.decode('utf-8') value = value.decode('utf-8') if key.startswith('attrib-'): key = key.replace('attrib-', '') value = value.replace('attrib-', '') self._db_attrs[key] = value setattr(self, key, value) except: logger.error('couldnt open rocksdb')
def post(self): if self.is_worker: self.set_status(500) self.write( json.dumps({ 'code': 'InvalidServer', 'message': 'not server server' })) self.finish() return signature = self.get_argument('signature', '') if self.signature != signature: logger.error('signature not consistent %s' % signature) self.set_status(500) self.write(json.dumps({'code': 'InvalidSignature'})) self.finish() return running_experiments_str = self.get_argument('experiments', '') running_experiments = json.loads(running_experiments_str) self.client_socket.send_json({ 'cmd': 'suggestion/update', 'experiments': running_experiments }) yield self.client_socket.recv_json() self.finish()
def _query_is_legal(self, client_query): if 'CLIENT_ID' not in client_query: logger.error('client query must contain key "CLIENT_ID"') return False if 'QUERY' not in client_query: logger.error('client query must contain key "QUERY"') return False if client_query['QUERY'] not in ['START', 'NEXT']: logger.error('client query must be "START" or "NEXT"') return False if client_query['QUERY'] != 'START': if client_query['CLIENT_ID'] in self._client_response_record: # 1.step check query index consistent if int(client_query['QUERY_INDEX']) != self._client_response_record[client_query['CLIENT_ID']]['QUERY_INDEX']: logger.error('client_id %s query index %d not ' 'consistent with server query index %d'%(client_query['CLIENT_ID'], int(client_query['QUERY_INDEX']), int(self._client_response_record[client_query['CLIENT_ID']]['QUERY_INDEX']))) return False # 2.step check client session has been finished if self._client_response_record[client_query['CLIENT_ID']]['QUERY_INDEX'] == \ len(self._client_response_record[client_query['CLIENT_ID']]['ID']): logger.error('client_id %s session has been finished'%client_query['CLIENT_ID']) return False return True
def _gpu_info_func(pid): try: gpu_info = gpu_running_info(pid) if gpu_info is not None: assert (len(gpu_info['occupy_gpus']) <= len( gpu_info['gpu_mem_usage'])) assert (len(gpu_info['occupy_gpus']) <= len( gpu_info['gpu_util'])) assert (len(gpu_info['gpu_mem_usage']) == len( gpu_info['gpu_util'])) if len(gpu_info['occupy_gpus']) == 0: return running_gpu_mem_usage.append([ float(gpu_info['gpu_mem_usage'][int(index)]) for index in gpu_info['occupy_gpus'] ]) running_gpu_util.append([ float(gpu_info['gpu_util'][int(index)]) for index in gpu_info['occupy_gpus'] ]) running_gpu_occupy.append(gpu_info['occupy_gpus']) if len(gpu_model) == 0: gpu_model.append(gpu_info['gpus']) if len(gpu_driver_version) == 0: gpu_driver_version.append(gpu_info['driver-version']) except: logger.error('some error happen when sampling gpu state')
def post(self): if self.is_worker: self.set_status(500) self.write( json.dumps({ 'code': 'InvalidServer', 'message': 'not server server' })) self.finish() return signature = self.get_argument('signature', '') if self.signature != signature: logger.error('signature not consistent %s' % signature) self.set_status(500) self.write(json.dumps({'code': 'InvalidSignature'})) self.finish() return study_name = self.get_argument('study_name', '') trail_name = self.get_argument('trail_name', None) objective_value = self.get_argument('objective_value', -1.0) created_time = self.get_argument('created_time', None) updated_time = self.get_argument('updated_time', None) self.client_socket.send_json({ 'cmd': 'suggestion/make', 'study_name': study_name, 'trail_name': trail_name, 'objective_value': objective_value, 'created_time': created_time, 'updated_time': updated_time, }) server_response = yield self.client_socket.recv_json() self.write(json.dumps(server_response))
def process_cmd(self, command): try: if command == 'task': self.process_task_command() elif command == 'experiment': self.process_experiment_command() elif command == 'dataset': self.process_dataset_command() elif command == 'apply': self.process_apply_command() elif command == 'create': self.process_create_command() elif command == 'add': self.process_add_command() elif command == 'del': self.process_del_command() elif command == 'update': self.process_update_command() elif command == 'upload': self.process_upload_command() elif command == 'challenge': self.process_challenge_command() elif command == 'train': self.process_train_command() except: logger.error('error response from server')
def thread_main(self, sess): stop = False while not stop: self.datasource._reset_iteration_state() iterator = self.datasource.iterator_value() for data in iterator: try: while self.queue_size.eval(session=sess) == self.max_queue_size: if self.coord.should_stop(): self.queue.close() break time.sleep(self.wait_time) if self.coord.should_stop(): stop = True self.queue.close() break feed_dict = {} if type(data) == list or type(data) == tuple: for i in range(len(data)): feed_dict.update({self.sample_placeholder[i]: data[i]}) else: feed_dict = {self.sample_placeholder[0]: data} sess.run(self.enqueue, feed_dict=feed_dict) except: logger.error('couldnt feed data into tensorflow pipeline') pass
def _cpu_info_func(pid): try: cpu_info = cpu_running_info(pid) mem_usage = cpu_info['cpu_mem_usage'] cpu_util = cpu_info['cpu_util'] cpu_occupy = cpu_info['occupy_cpus'] running_mem_usage.append(mem_usage) running_cpu_util.append(cpu_util) running_cpu_occupy.append(cpu_occupy) except: logger.error('some error happen when sampling cpu state')
def iterator_value(self): while True: try: self._value = self._evaluate() yield self._value self._force_inputs_dirty() except StopIteration: return except: logger.error('data flow pipeline error') return
def __init__(self, record_path): self._record_path = os.path.join(record_path, 'data.db') self._db = _DB() try: count = self._db.get(str('attrib-count')) if count is None: self._db.set(str('attrib-count'), str(0)) except Exception as e: print(e) logger.error('Couldnt open db')
def reorganize_numeric_data(self, data): data_x, data_y = data try: data_x = float(data_x) except: logger.error("Channel X Must be Scalar Data") try: data_y = float(data_y) except: logger.error("Channel Y Must be Scalar Data") return (data_x, data_y)
def __init__(self, record_path): self._record_path = record_path try: opts = rocksdb.Options() opts.create_if_missing = True self._db = rocksdb.DB(record_path, opts) count = self._db.get(str('attrib-count').encode('utf-8')) if count is None: self._db.put(str('attrib-count').encode('utf-8'), b'0') except: logger.error('couldnt open rocksdb')
def __bing_find_and_download(waiting_process_queue, search_url, session, dir, max_page_num=50): t = 0 num = 0 while t < max_page_num: Url = search_url.format(t * 35 + 1) t = t + 1 try: Result = session.get(Url, timeout=7, allow_redirects=False) except BaseException: t = t + 60 continue else: pic_url = re.findall('src="(.*?)"', Result.text, re.S) # 先利用正则表达式找到图片url for each in pic_url: logger.info("Downloading(%d) %s." % (num + 1, str(each))) try: if each is not None: pic = requests.get(each, timeout=7) else: continue except BaseException: logger.error("Couldnt download %s." % each) continue else: # 分配唯一文件标识 file_folder = os.path.join(dir, 'test') if not os.path.exists(file_folder): os.makedirs(file_folder) file_path = os.path.join( file_folder, 'bing_%s.jpg' % str(uuid.uuid4())) with open(file_path, 'wb') as fp: fp.write(pic.content) num += 1 logger.info("Finish download %s ." % str(each)) # 加入等待处理队列 if waiting_process_queue is not None: waiting_process_queue.put(file_path) # 结束标记 if waiting_process_queue is not None: waiting_process_queue.put(None)
def _run_by_generator(self, data_generator, **kwargs): # bind data with self.graph.as_default(): feed_dict = {} if self._has_model_input and len(self.clones) > 1: logger.error( 'clones number > 1, must set different placeholder for every clone' ) exit(-1) if self._has_model_input: # generate data data = next(data_generator) for k, v in kwargs.items(): if k not in self.cache: placeholder_tensor = self.graph.get_tensor_by_name( '{}/{}:0'.format('input', k)) self.cache[k] = placeholder_tensor feed_dict[self.cache[k]] = data[v] if ( type(data) == tuple or type(data) == list) else data else: # set different placeholder for every clone for clone in self.clones: # generate data data = next(data_generator) for k, v in kwargs.items(): cache_name = '{}:0'.format(k) if len(self.clones) > 1: cache_name = '{}/{}:0'.format(clone[1][:-1], k) if cache_name not in self.cache: placeholder_tensor = None if len(self.clones) > 1: placeholder_tensor = self.graph.get_tensor_by_name( '{}/{}:0'.format(clone[1][:-1], k)) else: placeholder_tensor = self.graph.get_tensor_by_name( '{}:0'.format(k)) self.cache[cache_name] = placeholder_tensor feed_dict[self.cache[cache_name]] = data[v] if ( type(data) == tuple or type(data) == list) else data return self._run_by_feed(feed_dict=feed_dict, **kwargs)
def post(self, experiment_id): # check signature signature = self.get_argument('signature', '') if self.signature != signature: logger.error('signature not consistent %s' % signature) self.set_status(500) self.write(json.dumps({'code': 'InvalidSignature'})) self.finish() return if experiment_id not in self.experiment_records: logger.error('no experiemnt %s here' % experiment_id) self.set_status(404) self.write( json.dumps({ 'code': 'InvalidInput', 'message': 'dont have experiment %s' % experiment_id })) self.finish() return self.experiment_records[experiment_id]['status'] = 'running' # update model evaluate value evaluation_val = self.get_argument('evaluation_value', None) if evaluation_val is not None: self.experiment_records[experiment_id]['evaluation_value'].append( evaluation_val) self.experiment_records[experiment_id]['evaluation_time'].append( time.time()) address = self.get_argument('address', None) if address is not None: self.experiment_records[experiment_id]['address'] = address # do other things status = self.get_argument('status', None) if status is not None: self.experiment_records[experiment_id]['status'] = status if status == 'stop': free_devices = self.experiment_records[experiment_id][ 'devices'] self.server_records['occupied_devices'] = [ n for n in self.server_records['occupied_devices'] if n not in free_devices ]
def reorganize_histogram_data(self, data): data_x, data_y = data try: data_x = float(data_x) except: logger.error("Channel X Must be Scalar Data") try: data_y = data_y.flatten() bins = 10 # default bins if "BINS" in self.params: bins = self.params['BINS'] data_y = np.histogram(data_y, bins) except: logger.error("Channel Y Must be Numpy Array") return (data_x, data_y)
def __vcg_img_download(waiting_process_queue, save_dir, img_url, keyword, count): try: logger.info("Downloading(%d) %s." % (count + 1, img_url)) pic = requests.get(img_url, timeout=7) except BaseException: logger.error("Couldnt download %s." % img_url) return else: file_prefix = 'VCG_' + keyword + '_' + str(count) file_name = file_prefix + '.jpg' if download_data_type == 'image' else file_prefix + '.mp4' file_path = os.path.join(save_dir, file_name) fp = open(file_path, 'wb') fp.write(pic.content) fp.close() logger.info("Finish download %s ." % img_url) if waiting_process_queue is not None: waiting_process_queue.put(file_path)
def tftool_visualize_pb(pb_path): if not os.path.exists(pb_path): logger.error('pb model file dont exist') return logger.info('load model pb') graph = tf.get_default_graph() graphdef = graph.as_graph_def() graphdef.ParseFromString(gfile.FastGFile(pb_path, "rb").read()) _ = tf.import_graph_def(graphdef, name="") logger.info('start model FLOPs statistic') flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) logger.info('model FLOPs: {}'.format(flops.total_float_ops)) logger.info('generate visualization data') summary_write = tf.summary.FileWriter("./", graph) logger.info('open tensorboard --logdir=.')
def _key_params(self): # related parameters # 0.step token token = FLAGS.token() if not PY3 and token is not None: token = unicode(token) token = self.app_token if token is None else token # 1.step check name, if None, set it as current time automatically name = FLAGS.name() if name is None: name = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) if not PY3: name = unicode(name) # 2.step check main folder (all related model code, includes main_file and main_param) main_folder = FLAGS.main_folder() if main_folder is None: main_folder = os.path.abspath(os.curdir) main_file = FLAGS.main_file() if main_file is None or not os.path.exists( os.path.join(main_folder, main_file)): logger.error('main executing file dont exist') return # 3.step check dump dir (all running data is stored here) dump_dir = FLAGS.dump() if dump_dir is None: dump_dir = os.path.join(os.path.abspath(os.curdir), 'dump') if not os.path.exists(dump_dir): os.makedirs(dump_dir) # 4.step what is task task = FLAGS.task() # 5.step model params main_param = FLAGS.main_param() return token, name, main_file, main_folder, dump_dir, main_param, task
def process_add_command(self): task_type = FLAGS.task_type() task_measure = FLAGS.task_measure() if task_type is None or task_measure is None: logger.error('need set task_type and task_measure simutaneously') return task_measures = task_measure.split(',') task_measures = json.dumps(task_measures) remote_api = 'hub/api/terminal/task/type/%s' % task_type response = self.remote_api_request( remote_api, action='post', data={'task-measures': task_measures}) if response is None: logger.error('fail to add task type') return logger.info('success to add task type') print(response)
def spider_api_vcg(): FLAGS = flags.AntFLAGS datasource_name = 'vcg' datasource_type = '' datasource_keyword = '' for p in FLAGS.param().split(','): k, v = p.split(":") if k == 'type': datasource_type = v elif k == 'keyword': datasource_keyword = v # 替换/为, datasource_keyword = datasource_keyword.replace('/', ',') if datasource_name not in ['baidu', 'google', 'bing', 'vcg']: logger.error('Only support datasource baidu/google/bing/vcg') return if datasource_type not in ['image', 'video']: logger.error('Only support datasource type image/video') return if datasource_keyword == '': logger.error('Must set keyword') return time_stamp = (int)(time.time()) if not os.path.exists('./spider_%d' % (time_stamp)): os.makedirs('./spider_%d' % (time_stamp)) vcg_download(datasource_keyword, {'download_data_type': datasource_type}, './spider_%d' % (time_stamp))
def _evaluate(self): """Calculate the value for the Node Calls the action of the Node using values from the inputs of the Node. Returns the result of the action function. This function can also be overridden in subclasses if a class-based approach to creating Node actions is preferred. """ if not self._action: raise NotImplementedError('You must define the action= argument ' 'when instantiating the Node') try: positional_values = [ i.get_value() for i in self._positional_inputs ] keyword_values = { name: i.get_value() for name, i in items(self._keyword_inputs) } # clear reset state self._iteration_reset_state = False except StopIteration: # reset all input self._reset_iteration_state() raise StopIteration except: info = sys.exc_info() logger.error('%s:%s' % (info[0], info[1])) exit(-1) value = self._action(*positional_values, **keyword_values) # if (getattr(self._action, 'output_type', None) is not None): # # Output type checking has been enabled, and the node's action # # does specify the expected output type. Check that the calculated # # value matches that type. # self._verify_output_type(value) return value
def activelearning_api_download(): FLAGS = flags.AntFLAGS # 服务ip:port host_ip = FLAGS.host_ip() host_port = FLAGS.host_port() round = None for p in FLAGS.param().split(','): k, v = p.split(":") if k == 'round': round = (int)(v) if round is None: logger.error('Need set param (eg. --param=round:0)') return download_url = 'http://%s:%d/activelearning/download/' % (host_ip, host_port) down_res = requests.get(url=download_url, params={'round': round}) file_name = get_file_name(down_res.headers) with open(file_name, "wb") as code: code.write(down_res.content)
def start(self): try: # 1.step get template resource folder file_folder = os.path.dirname(__file__) parent_folder = '/'.join(file_folder.split('/')[:-1]) template_file_folder = os.path.join(parent_folder, 'resource', 'templates') # 2.step copy main_file.py main_file = 'task_main_file.py' if self.main_file is None else self.main_file shutil.copy( os.path.join(template_file_folder, 'task_main_file.py'), os.path.join(self.dump_dir, main_file)) # 3.step copy main_param.yaml main_param = 'task_main_param.yaml' if self.main_param is None else self.main_param shutil.copy( os.path.join(template_file_folder, 'task_main_param.yaml'), os.path.join(self.dump_dir, main_param)) logger.info('execute template command') except: logger.error('fail execute template command') traceback.print_exc()
def reorganize_image_data(self, data): data_x, data_y = data try: data_x = float(data_x) except: logger.error("Channel X Must be Scalar Data") return None try: if len(data_y.shape) != 2 and len(data_y.shape) != 3: logger.error("Channel Y Must be 2 or 3 Dimension") return None if len(data_y.shape) == 3: if data_y.shape[2] != 3: logger.error("Channel Y Must Possess 3 or 1 Channels") return None allowed_size = 50.0 height, width = data_y.shape[:2] min_scale = allowed_size / np.minimum(height, width) new_height = int(height * min_scale) new_width = int(width * min_scale) resized_img = scipy.misc.imresize(data_y, (new_height, new_width)) # resized_img = data_y if resized_img.dtype == np.uint8: return (data_x, base64.b64encode( png_encode(resized_img)).decode('utf-8')) max_val = np.max(resized_img.flatten()) min_val = np.min(resized_img.flatten()) if len(data_y.shape) == 3: resized_img = ((resized_img - np.tile(min_val, (1, 1, 3))) / np.tile(max_val, (1, 1, 3))) * 255 resized_img = resized_img.astype(np.uint8) else: resized_img = (resized_img - min_val) / max_val * 255 resized_img = resized_img.astype(np.uint8) return (data_x, base64.b64encode(png_encode(resized_img)).decode('utf-8')) except: logger.error("Channel Y Must be Numpy Array")