def getSample(self): cpu_info = OrderedDict() proc_info = OrderedDict() uptime_info = {} nprocs = 0 try: # Get cpu number if util.is_exist('/proc/cpuinfo'): with open('/proc/cpuinfo') as f: for line in f: if not line.strip(): nprocs += 1 # Get uptime and idletime if util.is_exist('/proc/uptime'): with open('/proc/uptime') as f: for line in f: if line.strip(): if len(line.split(' ')) == 2: uptime_info['uptime'] = {'volume':float(line.split(' ')[0].strip()),'unit':'s'} uptime_info['idletime'] = {'volume':float(line.split(' ')[1].strip()),'unit':'s'} uptime_info['cpu_num'] = {'volume':nprocs,'unit':''} # Compute idle rate uptime_info['idle_rate'] = {'volume':(uptime_info['idletime']['volume'] * 100) / (uptime_info['cpu_num']['volume'] * uptime_info['uptime']['volume']),'unit':'%'} except: print "Unexpected error:", sys.exc_info()[1] finally: return uptime_info
def build_graph(ids, vocabs, pmi, tfidf): if util.is_exist("graph.pkl"): print("loaded") return util.load_pickle("graph.pkl") G = networkx.Graph() G.add_nodes_from(ids) G.add_nodes_from(vocabs) cn2 = lambda x: x * (x - 1) // 2 print("Calculating word_word edges") for (i, w1), (j, w2) in tqdm(combinations(enumerate(vocabs), 2), total=cn2(len(vocabs))): if pmi[i][j] > 0: G.add_edge(w1, w2, weight=pmi[i][j]) print("Calculating doc_word edges") for i, review_id in tqdm(enumerate(ids), total=len(ids)): for j, word in enumerate(vocabs): G.add_edge(review_id, word, weight=tfidf[i][j]) print("Calculating doc_doc edges") for review_id in tqdm(ids, total=len(ids)): G.add_edge(review_id, review_id, weight=1) util.save_pickle(G, "graph.pkl") return G
def load_data(file, max_num=None): if util.is_exist("data.pkl"): print("loaded") return util.load_pickle("data.pkl") labels = [] reviews = [] ids = [] with open(file, 'r', encoding='utf-8') as f: lines = f.readlines() max_line = len(lines) if max_num is None else max_num + 1 # Ignore Header Line for line in lines[1:max_line]: id, label, review = line.split("\t") labels.append(label) # remove quotation marks around review = review[1:len(review) - 1] reviews.append(review) # remove quotation marks around id = id[1:len(id) - 1] ids.append(id) reviews = [cleanText(r) for r in reviews] util.save_pickle((ids, labels, reviews), "data.pkl") return ids, labels, reviews
def preprocess(): if util.is_exist("preprocessed.pkl"): print("loading") return util.load_pickle("preprocessed.pkl") _, labels, _ = util.load_pickle("data.pkl") labels = np.array(labels, dtype=np.int32) G = util.load_pickle("graph.pkl") print("calc adjacent matrix") A = networkx.to_numpy_matrix(G, weight="weight") print("calc degree matrix") degrees = [d**-0.5 if d != 0 else 0 for _, d in G.degree] print("normalize adjacent matrix") ''' degrees = np.diag(degrees) A_hat = degrees @ A @ degrees ''' # decrease memory allocation A_hat = A for i in tqdm(range(A.shape[0]), total=A.shape[0]): for j in range(A.shape[1]): A_hat[i, j] *= degrees[i] * degrees[j] print("calc feature matrix") X = np.eye(G.number_of_nodes()) # Features are just identity matrix util.save_pickle((X, A_hat, labels), "preprocessed.pkl") return X, A_hat, labels
def __init__(self, model_type, input_shape, look_back, look_forward, dataset, label, save_path, weight_path=None): """ :param model_type: 생성할 모델 종류 :param input_shape: 입력 데이터 사이즈 :param look_back: 시퀀스의 길이 ( 과거 몇 개의 데이터를 이용해 미래를 예측할 때 '과거 몇 개의 데이터'에 해당하는 길이 ) :param look_forward: 예측하고자하는 값의 범위 ( 과거 몇 개의 데이터를 이용해 미래를 예측할 때 '미래'에 해당하는 길이 ) :param dataset: 전체 데이터셋 ( train, validation, test split하지 않은 전체 데이터셋 ) :param label: 데이터셋 라벨 ( sst, u10, v10 ... ) :param save_path: 학습시 weight, loss curve, prediction 결과를 저장하기 위한 root 폴더 :param weight_path: load할 weight 경로 - 없다면 처음부터 학습 """ self.model_type = model_type self.input_shape = input_shape self.image_shape = input_shape[-3:] self.look_back = look_back self.model = self.build_model() self.model.summary() self.dataset_arr = dataset_split(dataset, look_back, look_forward) self.max_list = np.max(self.get_data('train')[0], axis=(0, 1, 2, 3)) self.min_list = np.min(self.get_data('train')[0], axis=(0, 1, 2, 3)) self.save_path = save_path self.label = label plot_model(self.model, os.path.join(self.save_path, 'model.png'), show_shapes=True) if weight_path is not None: is_exist(weight_path, 'weight file') print("Load weight from {}".format(weight_path)) self.model.load_weights(weight_path)
def calc_tf_idf(reviews, min_df=0.01): if util.is_exist("tf-idf.pkl"): print("loaded") return util.load_pickle("tf-idf.pkl") vectorizer = TfidfVectorizer(input="content", stop_words=stopwords.words("english"), min_df=min_df, max_df=0.5) vectorizer.fit(reviews) tfidf = vectorizer.transform(reviews).toarray() vocab = vectorizer.get_feature_names() util.save_pickle((tfidf, vocab), "tf-idf.pkl") return tfidf, vocab
def getSample(self): mem_info = OrderedDict() try: if util.is_exist('/proc/meminfo'): #close file is unnecessary with open('/proc/meminfo') as f: for line in f: tmp = line.split(':') if len(tmp) == 2: mem_info[tmp[0].strip()] = tmp[1].strip() except: print "Unexpected error:", sys.exc_info()[1] finally: return mem_info
def getSample(self): mem_info = OrderedDict() try: if util.is_exist("/proc/meminfo"): # close file is unnecessary with open("/proc/meminfo") as f: for line in f: tmp = line.split(":") if len(tmp) == 2: mem_info[tmp[0].strip()] = tmp[1].strip() except: print "Unexpected error:", sys.exc_info()[1] finally: return mem_info
def _read_proc_stat(self): cpu_line = OrderedDict() f = None try: if util.is_exist('/proc/stat'): f = open('/proc/stat') lines = f.readlines() for line in lines: if line.startswith('cpu'): tmp = line.strip().split() cpu_line[tmp[0]] = tmp[1:len(tmp)] except: print "Unexpected error: ", sys.exc_info[1] finally: if f: f.close() return cpu_line
def getSample(self): mem_info = OrderedDict() try: if util.is_exist('/proc/meminfo'): #close file is unnecessary with open('/proc/meminfo') as f: for line in f: tmp = line.split(':') if len(tmp) == 2: vol_unit = tmp[1].strip().split(' ') if len(vol_unit) == 2: tmp_value = self._changeUnit(value=long(vol_unit[0]), force_unit='MB') elif len(vol_unit) == 1: tmp_value = {'volume':long(long(vol_unit[0])), 'unit':''} mem_info[tmp[0].strip()] = tmp_value except: print "Unexpected error:", sys.exc_info()[1] finally: return mem_info
def getSample(self): net_state = OrderedDict() title = OrderedDict() total_item = 0 try: if util.is_exist('/proc/net/dev'): with open('/proc/net/dev') as f: for line in f: ''' Read items ''' if line.strip().startswith('Inter'): tmp = line.strip().split('|') for i in range(1, len(tmp)): title[tmp[i].strip()] = [] elif line.strip().startswith('face'): tmp = line.strip().split('|') for i in range(1, len(tmp)): title[title.items()[i-1][0]] = tmp[i].strip().split() total_item += len(title.items()[i-1][1]) else: tmp = line.strip().split(':') tmp_data = OrderedDict() value = tmp[1].strip().split() if len(value) == total_item: cnt = 0 for t_item in title.items(): tmp_data[t_item[0]] = {} for it in t_item[1]: tmp_data[t_item[0]][it] = value[cnt] cnt += 1 else: print 'number of items error' net_state[tmp[0]] = tmp_data except: print "Unexpected error:", sys.exc_info()[1] finally: return net_state
def getSample(self): load_stat = {} load_info = None f = None try: if util.is_exist('/proc/loadavg'): f = open('/proc/loadavg') load_info = f.read().split() if load_info and len(load_info) == 5: load_stat['load_1_min'] = {'volume':float(load_info[0]), 'unit':''} load_stat['load_5_min'] = {'volume':float(load_info[1]), 'unit':''} load_stat['load_15_min'] = {'volume':float(load_info[2]), 'unit':''} load_stat['nr_thread'] = load_info[3] load_stat['last_pid'] = load_info[4] except: print "Unexpected error:", sys.exc_info()[1] finally: if f: f.close() return load_stat
def getSample(self): load_stat = {} load_info = None f = None try: if util.is_exist('/proc/loadavg'): f = open('/proc/loadavg') load_info = f.read().split() if load_info and len(load_info) == 5: load_stat['load_1_min'] = load_info[0] load_stat['load_5_min'] = load_info[1] load_stat['load_15_min'] = load_info[2] load_stat['nr_thread'] = load_info[3] load_stat['last_pid'] = load_info[4] except: print "Unexpected error:", sys.exc_info()[1] finally: if f: f.close() return load_stat
def calc_pmi(reviews, vocabs, window_size=10): if util.is_exist("pmi.pkl"): print("loaded") return util.load_pickle("pmi.pkl") word2index = {word: index for index, word in enumerate(vocabs)} W = 1 # 防止出现p = 1 的情况 W_i = np.zeros(len(vocabs), dtype=np.int32) W_ij = np.identity(len(vocabs), dtype=np.int32) vocabs = set(vocabs) words_list = [[w for w in r.split() if w in vocabs] for r in reviews] for word_seq in tqdm(words_list, total=len(words_list)): for i in range(max(len(word_seq) - window_size, 1)): W += 1 word_set = set(word_seq[i:i + window_size]) for w in word_set: W_i[word2index[w]] += 1 for w1, w2 in combinations(word_set, 2): i1 = word2index[w1] i2 = word2index[w2] W_ij[i1][i2] += 1 W_ij[i2][i1] += 1 p_i = W_i / W p_ij = W_ij / W val = np.zeros(p_ij.shape, dtype=np.float64) for i in range(len(p_i)): for j in range(len(p_i)): if p_ij[i, j] != 0 and p_i[i] * p_i[j] != 0: val[i, j] = math.log(p_ij[i, j] / (p_i[i] * p_i[j])) util.save_pickle(val, "pmi.pkl") return val
def getSample(self): cpu_info = OrderedDict() proc_info = OrderedDict() nprocs = 0 try: if util.is_exist('/proc/cpuinfo'): with open('/proc/cpuinfo') as f: for line in f: if not line.strip(): cpu_info['proc%s' % nprocs] = proc_info nprocs += 1 proc_info = OrderedDict() else: if len(line.split(':')) == 2: proc_info[line.split(':')[0].strip()] = line.split(':')[1].strip() else: proc_info[line.split(':')[0].strip()] = '' except: print "Unexpected error:", sys.exc_info()[1] finally: return cpu_info
def getSample(self): cpu_info = OrderedDict() proc_info = OrderedDict() nprocs = 0 try: if util.is_exist('/proc/cpuinfo'): with open('/proc/cpuinfo') as f: for line in f: if not line.strip(): cpu_info['proc%s' % nprocs] = proc_info nprocs += 1 proc_info = OrderedDict() else: if len(line.split(':')) == 2: proc_info[line.split(':')[0].strip( )] = line.split(':')[1].strip() else: proc_info[line.split(':')[0].strip()] = '' except: print "Unexpected error:", sys.exc_info()[1] finally: return cpu_info
from ocean_predictor import OceanPredictor from keras.optimizers import Adam from util import load_data, is_exist import numpy as np import json import os import time import shutil if __name__ == '__main__': hyperparameter_json_path = './hyperparameters.json' is_exist(hyperparameter_json_path, 'hyperparameter json file') with open(hyperparameter_json_path) as json_file: """ 주의 - json은 문자를 표현할 때 작은 따옴표(')를 못 씀, 큰 따옴표(")만 사용 가능함 Hyperparameter는 hyperparameter_json_path에 위치한 json파일 내부 변수에서 세팅하면 됨 null로 주어져도 되는 것들 (python에서 로드될 때는 null이 None으로 변환됨) exp_name => null로 주어질 경우 현재 시간을 root 폴더 이름으로 사용함 weight_path => null로 주어질 경우 학습을 처음부터 수행함 random_seed => null로 주어질 경우 weight initialize가 매 학습마다 다르게 됨 학습 할 때마다 exp_name 내부에 다음과 같은 파일들이 만들어짐 exp_name ---- | SST ---- | train, test 데이터셋에 대한 예측값, 실제값 비교 그래프 ---- | SWH ---- | train, test 데이터셋에 대한 예측값, 실제값 비교 그래프 ---- | ... ( 다른 label 데이터들 ) ---- | ... ( 다른 label 데이터들 ) ---- | ... ( 다른 label 데이터들 )