def __init__(self): super(Brisk, self).__init__() self.config = Config().config self.logger = Logger().logger self.proxy_manager = ProxyPool() self.db = DB().db self.__proxy_status = self.config.get('PROXY', 'empty') if self.__proxy_status == 'TRUE': self.proxy_manager.delete_valid_proxy() self.__hook_name = 'hook' self.__walk_name = 'walk' self.__flow_name = 'flow' self.__brisk_type = self.config.get('RUN', 'type') self.__func_filter = lambda m: not m.startswith("__") and \ not m.startswith(self.__hook_name) and \ not m.startswith(self.__walk_name) and \ not m.startswith(self.__flow_name) self.__flow_num = int(self.config.get('RUN', 'num')) self.__hook = None self.__flow_queue = queue.Queue() self.__walk_queue = queue.Queue() self.__go_init()
def __init__(self, queue=None): Request.__init__(self) threading.Thread.__init__(self) self.logger = Logger().logger self.config = Config().config self.proxy_pool = ProxyPool() self.__queue = queue self.db = DB().db self.proxy_ip, self.proxy_port = None, None self.proxy()
class Core(Request, threading.Thread): def __init__(self, queue=None): Request.__init__(self) threading.Thread.__init__(self) self.logger = Logger().logger self.config = Config().config self.proxy_pool = ProxyPool() self.__queue = queue self.db = DB().db self.proxy_ip, self.proxy_port = None, None self.proxy() def proxy(self): if self.config.get('PROXY', 'use') == 'TRUE': if not self.proxy_ip: self.proxy_ip, self.proxy_port = self.proxy_pool.get_proxy( type=self.config.get('PROXY', 'type'), seed_num=int(self.config.get('PROXY', 'seed_num')), distinct=self.config.get('PROXY', 'distinct') == 'TRUE' ) else: self.proxy_pool.delete_proxy(ip=self.proxy_ip, type=self.proxy_type) self.proxy_ip, self.proxy_port = self.proxy_pool.get_proxy( type=self.config.get('PROXY', 'type'), seed_num=int(self.config.get('PROXY', 'seed_num')), distinct=self.config.get('PROXY', 'distinct') == 'TRUE' ) else: self.proxy_ip, self.proxy_port = None, None @use_proxy @load_params def get(self, url, params=None, **kwargs): return super().get(url, params, **kwargs) @use_proxy @load_params def post(self, url, data, **kwargs): return super().post(url, data, **kwargs) def task(self): pass def run(self): self.logger.info('go') try: self.task() self.logger.info('ok') except Exception as e: self.logger.info('something wrong') finally: if self.__queue: assert isinstance(self.__queue, queue.Queue) self.__queue.task_done()
import pandas as pd from scipy import sparse from sklearn.metrics import f1_score from sklearn.model_selection import GridSearchCV from sklearn.metrics import make_scorer import matplotlib.pyplot as plt import seaborn as sns from src.runner import Runner from src.util import Logger from src.model_NB import ModelMultinomialNB logger = Logger() def makefig(result): sns.set_style("whitegrid") ax = sns.boxenplot(data=result, width=0.4) ax.set_ylabel('Accuracy', size=14) ax.tick_params(labelsize=14) plt.savefig(f'../model/tuning/{NAME}-NB.png', dpi=300) if __name__ == '__main__': base_params = {'alpha': 1.0, 'fit_prior': True, 'class_prior': None} params_NB = dict(base_params) param_grid_ = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
sys.path.append('../') import numpy as np import pandas as pd from src.model_SVC import ModelSVC from scipy import sparse from sklearn.model_selection import GridSearchCV import matplotlib.pyplot as plt import seaborn as sns from itertools import combinations from src.util import Logger from src.runner import Runner logger = Logger() if __name__ == '__main__': params = {'kernel': 'linear', 'gamma': 0.001} params_SVC = dict(params) param_grid_ = [{ 'n_components': [10, 30, 50, 100], 'n_iter': [8, 16], 'C': [1, 10, 100, 1000] }, { 'apply_svd': [False], 'C': [1, 10, 100, 1000] }] feature = [["mfcc", "delta", "power"]]
import pandas as pd from sklearn.metrics import log_loss from sklearn.model_selection import LeaveOneGroupOut from typing import Callable, List, Optional, Tuple, Union from sklearn.model_selection import learning_curve from scipy import sparse from scipy.sparse import load_npz import matplotlib.pyplot as plt import seaborn as sns from copy import deepcopy from src.model import Model from src.util import Logger, Util sns.set() logger = Logger() class RunnerLeaveOneOut: def __init__(self, run_name: str, model_cls: Callable[[str, dict], Model], features: str, params: dict): """コンストラクタ :param run_name: ランの名前 :param model_cls: モデルのクラス :param features: 特徴量のリスト :param params: ハイパーパラメータ """ self.run_name = run_name self.model_cls = model_cls self.features = features self.params = params
def __init__(self): super(ProxyPool, self).__init__() self.config = Config().config self.default_db = DB().default_db self.proxy_db = DB().proxy_db self.logger = Logger().logger
class ProxyPool(Request): _instance_lock = threading.Lock() def __new__(cls, *args, **kwargs): if not hasattr(ProxyPool, "_instance"): with ProxyPool._instance_lock: if not hasattr(ProxyPool, "_instance"): ProxyPool._instance = object.__new__(cls) return ProxyPool._instance def __init__(self): super(ProxyPool, self).__init__() self.config = Config().config self.default_db = DB().default_db self.proxy_db = DB().proxy_db self.logger = Logger().logger def _fetch_proxy_http_1(self): url = 'http://www.ip3366.net/free/?stype=1' soup = self.get_soup(url) for tr in soup.find_all('tr')[1:]: ip, port = map(lambda x: x.text, tr.find_all('td')[0:2]) self.default_db.hset('HTTP', ip, port) def _fetch_proxy_http_2(self): url = 'https://www.xicidaili.com/wt/' soup = self.get_soup(url) for tr in soup.find_all('tr')[1:]: ip, port = map(lambda x: x.text, tr.find_all('td')[1:3]) self.default_db.hset('HTTP', ip, port) def _fetch_proxy_https_1(self): url = 'http://www.ip3366.net/free/?stype=2' html = self.get(url) soup = self.res2soup(html) for tr in soup.find_all('tr')[1:]: ip, port = map(lambda x: x.text, tr.find_all('td')[0:2]) self.default_db.hset('HTTPS', ip, port) def _fetch_proxy_https_2(self): url = 'https://www.xicidaili.com/wn/' soup = self.get_soup(url) for tr in soup.find_all('tr')[1:]: ip, port = map(lambda x: x.text, tr.find_all('td')[1:3]) self.default_db.hset('HTTPS', ip, port) @pool_lock def delete_backup_proxy(self): _ = [ self.default_db.hdel('HTTP', key) for key in self.default_db.hkeys(name='HTTP') ] _ = [ self.default_db.hdel('HTTPS', key) for key in self.default_db.hkeys(name='HTTPS') ] @pool_lock def delete_valid_proxy(self): _ = [self.proxy_db.delete(key) for key in self.proxy_db.keys()] @pool_lock def _delete_all_proxy(self): _ = [ self.default_db.hdel('HTTP', key) for key in self.default_db.hkeys(name='HTTP') ] _ = [ self.default_db.hdel('HTTPS', key) for key in self.default_db.hkeys(name='HTTPS') ] _ = [self.proxy_db.delete(key) for key in self.proxy_db.keys()] def _fetch_proxy(self, type): if type == 'HTTP': self._fetch_proxy_http_1() self._fetch_proxy_http_2() if type == 'HTTPS': self._fetch_proxy_https_1() self._fetch_proxy_https_2() def _check_proxy(self, ip, port, timeout=3): if not ip or not port: return False http_url = 'http://{}:{}'.format(ip, port) https_url = 'https://{}:{}'.format(ip, port) try: self.get(url="http://icanhazip.com/", timeout=timeout, proxies={ 'http': http_url, 'https': https_url }) return True except: return False def _get_backup_proxy(self, type='HTTPS'): import random ip = random.choice(self.default_db.hkeys(type)) port = self.default_db.hget(type, ip) return ip, port def _get_valid_proxy(self, type='HTTPS', seed_num=1): for _ in range(len(self.default_db.hkeys(type))): ip, port = self._get_backup_proxy(type) if self._check_proxy(ip, port): self.logger.info('backup ip {} valid, fetch'.format(ip)) self.proxy_db.set(ip, port, ex=3600) if len(self.proxy_db.keys()) >= seed_num: break else: self.logger.info('backup ip {} invalid, delete'.format(ip)) self.default_db.hdel(type, ip) pass else: self.logger.info('no enough {} backup valid ip'.format(seed_num)) pass @pool_lock def get_proxy(self, type='HTTPS', seed_num=1, distinct=False): if len(self.proxy_db.keys()) == 0: self.logger.info('no valid proxy in proxy pool') if not self.default_db.exists(type) or not len( self.default_db.hkeys(type)): self.logger.info( 'no backup {} proxy, fetch from web'.format(type)) self._fetch_proxy(type) self.logger.info('fetch ok, {} items in total'.format( len(self.default_db.hkeys(type)))) self.logger.info('fetch valid proxy from backup proxy pool') self._get_valid_proxy(type, seed_num) assert len(self.proxy_db.keys()) != 0 self.logger.info('fetch valid proxy ok') ip = self.proxy_db.keys()[0] port = self.proxy_db.get(ip) if distinct: self.proxy_db.delete(ip) self.default_db.hdel(type, ip) else: pass return ip, port @pool_lock def delete_proxy(self, ip, type='HTTPS'): self.proxy_db.delete(ip) self.proxy_db.hdel(type, ip)
from hyperopt import fmin, tpe, STATUS_OK, Trials # 学習データを学習データとバリデーションデータに分ける from sklearn.model_selection import StratifiedKFold from sklearn.metrics import log_loss import matplotlib.pyplot as plt import seaborn as sns from src.util import Logger from src.model_MLP import ModelMLP from src.runner import Runner import gc gc.collect() logger = Logger() # 目的関数 def objective(params): global base_params # base parameter のパラメータを探索パラメータに更新する base_params.update(params) # モデルオブジェクト生成 & Train model = ModelMLP("MLP", **base_params) model.train(tr_x, tr_y, va_x, va_y) # 予測 va_pred = model.predict(va_x) score = log_loss(va_y, va_pred)
def main(args): np.random.seed(1) random.seed(1) ray.init(num_cpus=20) output_path = args.output_path data_dir = args.data_dir ## Select dataset dataset = args.dataset # ### Clustering options cluster_option = args.cluster_option ## plotting options plot_option_clusters_vs_lambda = args.plot_option_clusters_vs_lambda plot_option_fairness_vs_clusterE = args.plot_option_fairness_vs_clusterE plot_option_balance_vs_clusterE = args.plot_option_balance_vs_clusterE plot_option_convergence = args.plot_option_convergence # ### Data load savepath_compare = osp.join(data_dir,dataset+'.npz') if not os.path.exists(savepath_compare): X_org, demograph, K = read_dataset(dataset) np.savez(savepath_compare,X_org = X_org, demograph = demograph, K = K) else: datas = np.load(savepath_compare) X_org = datas['X_org'] demograph = datas['demograph'] K = datas['K'].item() log_path = osp.join(data_dir,dataset+'_'+cluster_option,'_log.txt') sys.stdout = Logger(log_path) print('Cluster number for dataset {}'.format(K)) V_list = [np.array(demograph == j) for j in np.unique(demograph)] V_sum = [x.sum() for x in V_list] print('Balance of the dataset {}'.format(min(V_sum)/max(V_sum))) # J = len(V_sum) N,D = X_org.shape # demographic probability for each V_j u_V = [x/N for x in V_sum] #proportional # Normalize Features X = normalizefea(X_org) ############################################################################# ######################## Run Fair clustering ################################# ############################################################################# # fairness = True # Setting False only runs unfair clustering elapsetimes = [] avg_balance_set = [] min_balance_set = [] fairness_error_set = [] E_cluster_set = [] E_cluster_discrete_set = [] bestacc = 1e10 best_avg_balance = -1 best_min_balance = -1 if args.lmbda is None: lmbdas = np.arange(45,50,2).tolist() else: lmbdas = [args.lmbda] length_lmbdas = len(lmbdas) l = None if (not 'A' in locals()) and cluster_option == 'ncut': alg_option = 'flann' if N>50000 else 'None' affinity_path = osp.join(data_dir,dataset+'_affinity_ncut_final.npz') knn = 20 if not os.path.exists(affinity_path): A = util.create_affinity(X,knn,savepath = affinity_path, alg=alg_option) else: A = util.create_affinity(X,knn,W_path = affinity_path) init_C_path = osp.join(data_dir,dataset+'_init_{}_{}_final.npy'.format(cluster_option,K)) for count,lmbda in enumerate(lmbdas): print('Inside Lambda ',lmbda) if not os.path.exists(init_C_path): print('Generating initial seeds') C_init,_ = km_init(X,K,'kmeans_plus') np.save(init_C_path,C_init) else: C_init = np.load(init_C_path) # Load initial seeds if cluster_option == 'ncut': C,l,elapsed,S,E = fair_clustering(X, K, u_V, V_list, lmbda, fairness, cluster_option, C_init, A = A) else: C,l,elapsed,S,E = fair_clustering(X, K, u_V, V_list, lmbda, fairness, cluster_option, C_init) min_balance, avg_balance = get_fair_accuracy(u_V,V_list,l,N,K) fairness_error = get_fair_accuracy_proportional(u_V,V_list,l,N,K) print('lambda = {}, \n fairness_error {: .2f} and \n avg_balance = {: .2f} \n min_balance = {: .2f}'.format(lmbda, fairness_error, avg_balance, min_balance)) # Plot the figure with clusters if dataset in ['Synthetic', 'Synthetic-unequal'] and plot_option_clusters_vs_lambda == True: filename = osp.join(output_path, 'cluster_output', 'cluster-plot_fair_{}-{}_lambda_{}.png'.format(cluster_option,dataset,lmbda)) plot_clusters_vs_lambda(X_org,l,filename,dataset,lmbda, min_balance_set, avg_balance_set,fairness_error) # if avg_balance>best_avg_balance: best_avg_balance = avg_balance best_lambda_avg_balance = lmbda if min_balance>best_min_balance: best_min_balance = min_balance best_lambda_min_balance = lmbda if fairness_error<bestacc: bestacc = fairness_error best_lambda_acc = lmbda if plot_option_convergence == True: filename = osp.join(output_path,'Fair_{}_convergence_{}.png'.format(cluster_option,dataset)) E_fair = E['fair_cluster_E'] plot_convergence(cluster_option, filename, E_fair) print('Best fairness_error %0.4f' %bestacc,'|Error lambda = ', best_lambda_acc) print('Best Avg balance %0.4f' %best_avg_balance,'| Avg Balance lambda = ', best_lambda_avg_balance) print('Best Min balance %0.4f' %best_min_balance,'| Min Balance lambda = ', best_lambda_min_balance) elapsetimes.append(elapsed) avg_balance_set.append(avg_balance) min_balance_set.append(min_balance) fairness_error_set.append(fairness_error) E_cluster_set.append(E['cluster_E'][-1]) E_cluster_discrete_set.append(E['cluster_E_discrete'][-1]) avgelapsed = sum(elapsetimes)/len(elapsetimes) print ('avg elapsed ',avgelapsed) ray.shutdown() if plot_option_fairness_vs_clusterE == True and length_lmbdas > 1: savefile = osp.join(data_dir,'Fair_{}_fairness_vs_clusterEdiscrete_{}.npz'.format(cluster_option,dataset)) filename = osp.join(output_path,'Fair_{}_fairness_vs_clusterEdiscrete_{}.png'.format(cluster_option,dataset)) plot_fairness_vs_clusterE(cluster_option, savefile, filename, lmbdas, fairness_error_set, min_balance_set, avg_balance_set, E_cluster_discrete_set) if plot_option_balance_vs_clusterE == True and length_lmbdas > 1: savefile = osp.join(data_dir,'Fair_{}_balance_vs_clusterEdiscrete_{}.npz'.format(cluster_option,dataset)) filename = osp.join(output_path,'Fair_{}_balance_vs_clusterEdiscrete_{}.png'.format(cluster_option,dataset)) plot_balance_vs_clusterE(cluster_option, savefile, filename, lmbdas, fairness_error_set, min_balance_set, avg_balance_set, E_cluster_discrete_set)
def main(args, k): if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) ## Options dataset = args.dataset cluster_option = args.cluster_option data_dir = osp.join(args.data_dir, dataset) output_path = data_dir if not osp.exists(data_dir): os.makedirs(data_dir) ## plotting options plot_option_clusters_vs_lambda = args.plot_option_clusters_vs_lambda plot_option_fairness_vs_clusterE = args.plot_option_fairness_vs_clusterE plot_option_balance_vs_clusterE = args.plot_option_balance_vs_clusterE plot_option_convergence = args.plot_option_convergence plot_option_K_vs_clusterE = args.plot_option_K_vs_clusterE # ### Data load dir_path = osp.join(data_dir, cluster_option + "_" + str(k)) savepath_compare = osp.join(dir_path,dataset+'.npz') if not os.path.exists(savepath_compare): X_org, demograph, K = read_dataset(dataset, data_dir, k) if X_org.shape[0]>200000: np.savez_compressed(savepath_compare, X_org = X_org, demograph = demograph, K = K) else: os.mkdir(dir_path) np.savez(savepath_compare, X_org=X_org, demograph=demograph, K=K) else: datas = np.load(savepath_compare) X_org = datas['X_org'] demograph = datas['demograph'] K = datas['K'].item() log_path = osp.join(data_dir,cluster_option + '_iot_log_' + str(k) + '.txt') sys.stdout = Logger(log_path) # Scale and Normalize Features X_org = scale(X_org, axis = 0) X = normalizefea(X_org) N, D = X.shape print('Cluster number for dataset {} is {}'.format(dataset,K)) V_list = [np.array(demograph == j) for j in np.unique(demograph)] V_sum = [x.sum() for x in V_list] print('Balance of the dataset {}'.format(min(V_sum)/max(V_sum))) print('Number of points in the dataset {}'.format(N)) # J = len(V_sum) # demographic probability for each V_j u_V = [x/N for x in V_sum] #proportional print('Demographic-probabilites: {}'.format(u_V)) print('Demographic-numbers per group: {}'.format(V_sum)) ############################################################################# ######################## Run Fair clustering ################################# ############################################################################# # fairness = True # Setting False only runs unfair clustering elapsetimes = [] avg_balance_set = [] min_balance_set = [] fairness_error_set = [] E_cluster_set = [] E_cluster_discrete_set = [] bestacc = 1e10 best_avg_balance = -1 best_min_balance = -1 if args.lmbda_tune: print('Lambda tune is true') lmbdas = np.arange(0,10000,100).tolist() else: lmbdas = [args.lmbda] length_lmbdas = len(lmbdas) l = None dir_path = osp.join(data_dir, cluster_option + "_" + str(k)) if (not 'A' in locals()) and cluster_option == 'ncut': alg_option = 'flann' if N>50000 else 'None' affinity_path = osp.join(dir_path, dataset +'_affinity_ncut.npz') knn = 20 if not osp.exists(affinity_path): A = util.create_affinity(X,knn,savepath = affinity_path, alg=alg_option) else: A = util.create_affinity(X,knn,W_path = affinity_path) init_C_path = osp.join(dir_path,'{}_init_{}_{}.npz'.format(dataset,cluster_option,K)) if not osp.exists(init_C_path): print('Generating initial seeds') C_init,l_init = km_init(X,K,'kmeans_plus') np.savez(init_C_path, C_init = C_init, l_init = l_init) else: temp = np.load(init_C_path) C_init = temp ['C_init'] # Load initial seeds l_init = temp ['l_init'] for count,lmbda in enumerate(lmbdas): print('Inside Lambda ',lmbda) if cluster_option == 'ncut': C,l,elapsed,S,E = fair_clustering(X, K, u_V, V_list, lmbda, fairness, cluster_option, C_init = C_init, l_init =l_init, A = A) else: C,l,elapsed,S,E = fair_clustering(X, K, u_V, V_list, lmbda, fairness, cluster_option, C_init=C_init, l_init=l_init) min_balance, avg_balance = get_fair_accuracy(u_V,V_list,l,N,K) fairness_error = get_fair_accuracy_proportional(u_V,V_list,l,N,K) print('lambda = {}, \n fairness_error {: .2f} and \n avg_balance = {: .2f} \n min_balance = {: .2f}'.format(lmbda, fairness_error, avg_balance, min_balance)) # Plot the figure with clusters if dataset in ['Synthetic', 'Synthetic-unequal', 'Sensor'] and plot_option_clusters_vs_lambda == True: cluster_plot_location = osp.join(output_path, 'cluster_output') if not osp.exists(cluster_plot_location): os.makedirs(cluster_plot_location) filename = osp.join(cluster_plot_location, 'cluster-plot_fair_{}-{}_lambda_{}.png'.format(cluster_option,dataset,lmbda)) plot_clusters_vs_lambda(X_org, demograph, l, filename, dataset, lmbda, fairness_error, cluster_option) # if avg_balance>best_avg_balance: best_avg_balance = avg_balance best_lambda_avg_balance = lmbda if min_balance>best_min_balance: best_min_balance = min_balance best_lambda_min_balance = lmbda if fairness_error<bestacc: bestacc = fairness_error best_lambda_acc = lmbda if plot_option_convergence == True and count==0: filename = osp.join(output_path,'Fair_{}_convergence_{}.png'.format(cluster_option,dataset)) E_fair = E['fair_cluster_E'] plot_convergence(cluster_option, filename, E_fair) print('Best fairness_error %0.4f' %bestacc,'|Error lambda = ', best_lambda_acc) print('Best Avg balance %0.4f' %best_avg_balance,'| Avg Balance lambda = ', best_lambda_avg_balance) print('Best Min balance %0.4f' %best_min_balance,'| Min Balance lambda = ', best_lambda_min_balance) elapsetimes.append(elapsed) avg_balance_set.append(avg_balance) min_balance_set.append(min_balance) fairness_error_set.append(fairness_error) E_cluster_set.append(E['cluster_E'][-1]) E_cluster_discrete_set.append(E['cluster_E_discrete'][-1]) avgelapsed = sum(elapsetimes)/len(elapsetimes) print ('avg elapsed ',avgelapsed) if plot_option_fairness_vs_clusterE == True and length_lmbdas > 1: savefile = osp.join(dir_path,'Fair_{}_fairness_vs_clusterEdiscrete_{}.npz'.format(cluster_option,dataset)) filename = osp.join(output_path,'Fair_{}_fairness_vs_clusterEdiscrete_{}.png'.format(cluster_option,dataset)) plot_fairness_vs_clusterE(cluster_option, savefile, filename, lmbdas, fairness_error_set, min_balance_set, avg_balance_set, E_cluster_discrete_set) if plot_option_balance_vs_clusterE == True and length_lmbdas > 1: savefile = osp.join(dir_path,'Fair_{}_balance_vs_clusterEdiscrete_{}.npz'.format(cluster_option,dataset)) filename = osp.join(output_path,'Fair_{}_balance_vs_clusterEdiscrete_{}.png'.format(cluster_option,dataset)) plot_balance_vs_clusterE(cluster_option, savefile, filename, lmbdas, fairness_error_set, min_balance_set, avg_balance_set, E_cluster_discrete_set) if plot_option_K_vs_clusterE == True and length_lmbdas > 1: savefile = osp.join(dir_path, 'Fair_{}_K_vs_clusterE_{}.npz'.format(cluster_option, dataset)) filename = osp.join(output_path, 'Fair_{}_K_vs_clusterE_{}.png'.format(cluster_option, dataset)) #TO DO , set correct set of params plot_K_vs_clusterE(cluster_option, savefile, filename, range(K), E_cluster_set, E_cluster_discrete_set, save=True)
def main(args): if args.seed is not None: np.random.seed(args.seed) random.seed(args.seed) # pdb.set_trace() dataset = args.dataset data_dir = './data/' # SLK options BO/MS/Means SLK_option = args.SLK_option # Save? mode_images = args.mode_images # save mode images in a directory? saveresult = args.saveresult # save results? log_path = os.path.join(data_dir,SLK_option+'_'+dataset+'_log_.txt') sys.stdout = Logger(log_path) # Give data matrix in samples by feature format ( N X D) X, gnd_labels, K, sigma, X_org, knn = read_dataset(dataset,data_dir) # Normalize Features X =normalizefea(X) N,D =X.shape #####Validation Set for tuning lambda and initial K-means++ seed. #### However you can set value of lambda and initial seed empirically and skip validation set ####### val_path = data_dir + dataset + '_val_set.npz' if not os.path.exists(val_path): X_val,gnd_val,val_ind,imbalance = util.validation_set(X,gnd_labels,K,0.1) np.savez(val_path, X_val = X_val, gnd_val = gnd_val, val_ind = val_ind) else: data_val = np.load(val_path) X_val = data_val['X_val'] gnd_val = data_val['gnd_val'] val_ind = data_val['val_ind'] ## # Build the knn kernel start_time = timeit.default_timer() aff_path = data_dir + 'W_'+str(knn)+'_'+ dataset+'.npz' alg = None if N>100000: alg = "flann" if not os.path.exists(aff_path): W = util.create_affinity(X, knn, scale = None, alg = alg, savepath = aff_path, W_path = None) else: W = util.create_affinity(X, knn, W_path = aff_path) elapsed = timeit.default_timer() - start_time print(elapsed) ###### Run SLK################################# bound_ = args.bound # Setting False only runs K-modes bound_it = 1000 if sigma is None: sigma = util.estimate_sigma(X,W,knn,N) # sigma = util.estimate_median_sigma(X,knn) # Or this # Initial seed path from kmeans++ seed init_C_path = data_dir+dataset+'_C_init.npy' if not os.path.exists(init_C_path): C_init,_ = km_init(X,K,'kmeans_plus') np.save(init_C_path,C_init) else: C_init = np.load(init_C_path) # Load initial seeds if args.lmbda_tune: lmbdas = np.arange(0.1,10,0.3).tolist() else: lmbdas = [args.lmbda] if args.lmbda_tune == True: elapsetimes = [] bestnmi = -1 bestacc = -1 t = len(lmbdas) trivial = [0]*t # Take count on any missing cluster for count,lmbda in enumerate(lmbdas): print('Inside Lambda ',lmbda) print('Inside Sigma ',sigma) if N<=5000: _,l,elapsed,mode_index,z,_,ts = SLK_iterative(X, sigma, K, W, bound_, SLK_option, C_init, bound_lambda = lmbda, bound_iterations=bound_it) else: _,l,elapsed,mode_index,z,_,ts = SLK(X, sigma, K, W, bound_, SLK_option, C_init, bound_lambda = lmbda, bound_iterations=bound_it) if ts: trivial[count] = 1 continue # Evaluate the performance on validation set current_nmi = nmi(gnd_val,l[val_ind]) acc,_ = get_accuracy(gnd_val,l[val_ind]) print('lambda = ',lmbda, ' : NMI= %0.4f' %current_nmi) print('accuracy %0.4f' %acc) if current_nmi>bestnmi: bestnmi = current_nmi best_lambda_nmi = lmbda if acc>bestacc: bestacc = acc best_lambda_acc = lmbda print('Best result: NMI= %0.4f' %bestnmi,'|NMI lambda = ', best_lambda_nmi) print('Best Accuracy %0.4f' %bestacc,'|Acc lambda = ', best_lambda_acc) elapsetimes.append(elapsed) avgelapsed = sum(elapsetimes)/len(elapsetimes) print ('avg elapsed ',avgelapsed) else: best_lambda_acc = args.lmbda ### Run with best Lambda and assess accuracy over whole dataset best_lambda = best_lambda_acc # or best_lambda_nmi if N>=5000: C,l,elapsed,mode_index,z,_,_ = SLK(X,sigma,K,W,bound_,SLK_option,C_init, bound_lambda = best_lambda, bound_iterations=bound_it) else: C,l,elapsed,mode_index,z,_,_ = SLK_iterative(X,sigma,K,W,bound_,SLK_option,C_init, bound_lambda = best_lambda, bound_iterations=bound_it) # Evaluate the performance on dataset print('Elapsed time for SLK = %0.5f seconds' %elapsed) nmi_ = nmi(gnd_labels,l) acc_,_ = get_accuracy(gnd_labels,l) print('Result: NMI= %0.4f' %nmi_) print(' Accuracy %0.4f' %acc_) best_lambda = best_lambda_acc if saveresult: saveresult_path = data_dir + 'Result_'+dataset+'.mat' sio.savemat(saveresult_path,{'lmbda':best_lambda,'l':l,'C':C,'z':z}) if mode_images and X_org is not None: if SLK_option == 'BO': mode_images_path = data_dir+dataset+'_modes' original_image_size = (28,28) util.mode_nn(mode_index,X,K,C,l,6,X_org,mode_images_path, original_image_size) else: print('\n For Mode images change option to -- BO and have image intensities X_org')
class Brisk(Request): def __init__(self): super(Brisk, self).__init__() self.config = Config().config self.logger = Logger().logger self.proxy_manager = ProxyPool() self.db = DB().db self.__proxy_status = self.config.get('PROXY', 'empty') if self.__proxy_status == 'TRUE': self.proxy_manager.delete_valid_proxy() self.__hook_name = 'hook' self.__walk_name = 'walk' self.__flow_name = 'flow' self.__brisk_type = self.config.get('RUN', 'type') self.__func_filter = lambda m: not m.startswith("__") and \ not m.startswith(self.__hook_name) and \ not m.startswith(self.__walk_name) and \ not m.startswith(self.__flow_name) self.__flow_num = int(self.config.get('RUN', 'num')) self.__hook = None self.__flow_queue = queue.Queue() self.__walk_queue = queue.Queue() self.__go_init() def __go_init(self): for method_name in list( filter(lambda m: m.startswith(self.__hook_name) and callable(getattr(self, m)), dir(self))): method = self.__class__.__dict__[method_name] obj = Core() obj.task = types.MethodType(method, obj) for func_name in filter(self.__func_filter, self.__class__.__dict__): func = self.__class__.__dict__[func_name] setattr(obj, func_name, types.MethodType(func, obj)) self.__hook = obj break if self.__brisk_type == 'WALK': for method_name in list( filter(lambda m: m.startswith(self.__walk_name) and callable(getattr(self, m)), dir(self))): self.__walk_queue.put(method_name) if self.__brisk_type == 'FLOW': for method_name in list( filter(lambda m: m.startswith(self.__flow_name) and callable(getattr(self, m)), dir(self))): self.__flow_queue.put(method_name) def go(self): self.logger.info('brisk go') self.logger.info('brisk create {} task(s)'.format(self.__flow_queue.qsize())) if self.__hook: self.__hook_attr_base = dir(self.__hook) self.logger.info('brisk create hook') self.__hook.start() self.__hook.join() self.logger.info('brisk complete hook') self.__hook: Core self.__hook_attr = [] for method_name in dir(self.__hook): if method_name not in self.__hook_attr_base: self.__hook_attr.append(method_name) while not self.__walk_queue.empty(): method_name = self.__walk_queue.get() method = self.__class__.__dict__[method_name] t = Core(self.__walk_queue) for attr_name in self.__hook_attr: setattr(t, attr_name, self.__hook.__dict__[attr_name]) t.task = types.MethodType(method, t) for func_name in filter(self.__func_filter, self.__class__.__dict__): func = self.__class__.__dict__[func_name] setattr(t, func_name, types.MethodType(func, t)) if self.__hook: t.make(self.__hook.headers, self.__hook.cookies) t.start() t.join() self.__walk_queue.join() while not self.__flow_queue.empty(): if (threading.activeCount() - 1) < self.__flow_num: method_name = self.__flow_queue.get() method = self.__class__.__dict__[method_name] t = Core(self.__flow_queue) for attr_name in self.__hook_attr: setattr(t, attr_name, self.__hook.__dict__[attr_name]) t.task = types.MethodType(method, t) for func_name in filter(self.__func_filter, self.__class__.__dict__): func = self.__class__.__dict__[func_name] setattr(t, func_name, types.MethodType(func, t)) if self.__hook: t.make(self.__hook.headers, self.__hook.cookies) t.start() self.__flow_queue.join() self.logger.info('brisk ok')
type=int, required=False, help="Max number of iterations of length [--minutes]") args = parser.parse_args() #Verbose set up v = args.verbose #Log Setup logs = args.logDirectory + '/' if not os.path.exists(logs): os.makedirs(logs) logfile = logs + datetime.now().strftime("%m-%d-%Y_%H:%M:S") + ".txt" logger = Logger(logfile, v) if v: logger.logAndPrint("Verbosity turned on") #Configurations conf = Conf(logger) camera = conf.camera #Output Setup output = args.outputDirectory + "/" if not os.path.exists(output): os.makedirs(output) #Iterations it = args.maxIterations i = 0