示例#1
0
    def getSample(self):
        cpu_info = OrderedDict()
        proc_info = OrderedDict()
	uptime_info = {}

        nprocs = 0

        try:
            # Get cpu number
            if util.is_exist('/proc/cpuinfo'):
                with open('/proc/cpuinfo') as f:
                    for line in f:
                        if not line.strip():
                            nprocs += 1
            # Get uptime and idletime
	    if util.is_exist('/proc/uptime'):
		with open('/proc/uptime') as f:
	            for line in f:
	                if line.strip():
			    if len(line.split(' ')) == 2:
				uptime_info['uptime'] = {'volume':float(line.split(' ')[0].strip()),'unit':'s'}
				uptime_info['idletime'] = {'volume':float(line.split(' ')[1].strip()),'unit':'s'}
				uptime_info['cpu_num'] = {'volume':nprocs,'unit':''}
	    # Compute idle rate
	    uptime_info['idle_rate'] = {'volume':(uptime_info['idletime']['volume'] * 100) / (uptime_info['cpu_num']['volume'] * uptime_info['uptime']['volume']),'unit':'%'}
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:    
            return uptime_info
示例#2
0
def build_graph(ids, vocabs, pmi, tfidf):
    if util.is_exist("graph.pkl"):
        print("loaded")
        return util.load_pickle("graph.pkl")
    G = networkx.Graph()
    G.add_nodes_from(ids)
    G.add_nodes_from(vocabs)

    cn2 = lambda x: x * (x - 1) // 2
    print("Calculating word_word edges")
    for (i, w1), (j, w2) in tqdm(combinations(enumerate(vocabs), 2),
                                 total=cn2(len(vocabs))):
        if pmi[i][j] > 0:
            G.add_edge(w1, w2, weight=pmi[i][j])

    print("Calculating doc_word edges")
    for i, review_id in tqdm(enumerate(ids), total=len(ids)):
        for j, word in enumerate(vocabs):
            G.add_edge(review_id, word, weight=tfidf[i][j])

    print("Calculating doc_doc edges")
    for review_id in tqdm(ids, total=len(ids)):
        G.add_edge(review_id, review_id, weight=1)

    util.save_pickle(G, "graph.pkl")
    return G
示例#3
0
def load_data(file, max_num=None):
    if util.is_exist("data.pkl"):
        print("loaded")
        return util.load_pickle("data.pkl")
    labels = []
    reviews = []
    ids = []
    with open(file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        max_line = len(lines) if max_num is None else max_num + 1
        # Ignore Header Line
        for line in lines[1:max_line]:
            id, label, review = line.split("\t")
            labels.append(label)
            # remove quotation marks around
            review = review[1:len(review) - 1]
            reviews.append(review)
            # remove quotation marks around
            id = id[1:len(id) - 1]
            ids.append(id)

    reviews = [cleanText(r) for r in reviews]

    util.save_pickle((ids, labels, reviews), "data.pkl")
    return ids, labels, reviews
示例#4
0
def preprocess():
    if util.is_exist("preprocessed.pkl"):
        print("loading")
        return util.load_pickle("preprocessed.pkl")
    _, labels, _ = util.load_pickle("data.pkl")
    labels = np.array(labels, dtype=np.int32)
    G = util.load_pickle("graph.pkl")

    print("calc adjacent matrix")
    A = networkx.to_numpy_matrix(G, weight="weight")

    print("calc degree matrix")
    degrees = [d**-0.5 if d != 0 else 0 for _, d in G.degree]

    print("normalize adjacent matrix")
    '''
    degrees = np.diag(degrees)
    A_hat = degrees @ A @ degrees
    '''
    # decrease memory allocation
    A_hat = A
    for i in tqdm(range(A.shape[0]), total=A.shape[0]):
        for j in range(A.shape[1]):
            A_hat[i, j] *= degrees[i] * degrees[j]

    print("calc feature matrix")
    X = np.eye(G.number_of_nodes())  # Features are just identity matrix
    util.save_pickle((X, A_hat, labels), "preprocessed.pkl")
    return X, A_hat, labels
    def __init__(self,
                 model_type,
                 input_shape,
                 look_back,
                 look_forward,
                 dataset,
                 label,
                 save_path,
                 weight_path=None):
        """
        :param model_type: 생성할 모델 종류
        :param input_shape: 입력 데이터 사이즈
        :param look_back: 시퀀스의 길이 ( 과거 몇 개의 데이터를 이용해 미래를 예측할 때 '과거 몇 개의 데이터'에 해당하는 길이 )
        :param look_forward: 예측하고자하는 값의 범위 ( 과거 몇 개의 데이터를 이용해 미래를 예측할 때 '미래'에 해당하는 길이 )
        :param dataset: 전체 데이터셋 ( train, validation, test split하지 않은 전체 데이터셋 )
        :param label: 데이터셋 라벨 ( sst, u10, v10 ... )
        :param save_path: 학습시 weight, loss curve, prediction 결과를 저장하기 위한 root 폴더
        :param weight_path: load할 weight 경로 - 없다면 처음부터 학습
        """
        self.model_type = model_type
        self.input_shape = input_shape
        self.image_shape = input_shape[-3:]
        self.look_back = look_back
        self.model = self.build_model()
        self.model.summary()
        self.dataset_arr = dataset_split(dataset, look_back, look_forward)
        self.max_list = np.max(self.get_data('train')[0], axis=(0, 1, 2, 3))
        self.min_list = np.min(self.get_data('train')[0], axis=(0, 1, 2, 3))
        self.save_path = save_path
        self.label = label

        plot_model(self.model,
                   os.path.join(self.save_path, 'model.png'),
                   show_shapes=True)

        if weight_path is not None:
            is_exist(weight_path, 'weight file')
            print("Load weight from {}".format(weight_path))
            self.model.load_weights(weight_path)
示例#6
0
def calc_tf_idf(reviews, min_df=0.01):
    if util.is_exist("tf-idf.pkl"):
        print("loaded")
        return util.load_pickle("tf-idf.pkl")
    vectorizer = TfidfVectorizer(input="content",
                                 stop_words=stopwords.words("english"),
                                 min_df=min_df,
                                 max_df=0.5)
    vectorizer.fit(reviews)
    tfidf = vectorizer.transform(reviews).toarray()
    vocab = vectorizer.get_feature_names()
    util.save_pickle((tfidf, vocab), "tf-idf.pkl")
    return tfidf, vocab
示例#7
0
    def getSample(self):
        mem_info = OrderedDict()

        try:
            if util.is_exist('/proc/meminfo'):
                #close file is unnecessary
                with open('/proc/meminfo') as f:
                    for line in f:
                        tmp = line.split(':')
                        if len(tmp) == 2:
                            mem_info[tmp[0].strip()] = tmp[1].strip()
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:
            return mem_info
示例#8
0
    def getSample(self):
        mem_info = OrderedDict()

        try:
            if util.is_exist("/proc/meminfo"):
                # close file is unnecessary
                with open("/proc/meminfo") as f:
                    for line in f:
                        tmp = line.split(":")
                        if len(tmp) == 2:
                            mem_info[tmp[0].strip()] = tmp[1].strip()
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:
            return mem_info
示例#9
0
 def _read_proc_stat(self):
     cpu_line = OrderedDict()
     f = None
 
     try:
         if util.is_exist('/proc/stat'):
             f = open('/proc/stat')
             lines = f.readlines()
             for line in lines:
                 if line.startswith('cpu'):
                     tmp = line.strip().split()
                     cpu_line[tmp[0]] = tmp[1:len(tmp)]
     except:
         print "Unexpected error: ", sys.exc_info[1]
     finally:
         if f:
             f.close()
         return cpu_line
示例#10
0
    def _read_proc_stat(self):
        cpu_line = OrderedDict()
        f = None

        try:
            if util.is_exist('/proc/stat'):
                f = open('/proc/stat')
                lines = f.readlines()
                for line in lines:
                    if line.startswith('cpu'):
                        tmp = line.strip().split()
                        cpu_line[tmp[0]] = tmp[1:len(tmp)]
        except:
            print "Unexpected error: ", sys.exc_info[1]
        finally:
            if f:
                f.close()
            return cpu_line
示例#11
0
 def getSample(self):
     mem_info = OrderedDict()
 
     try:
         if util.is_exist('/proc/meminfo'):
             #close file is unnecessary
             with open('/proc/meminfo') as f:
                 for line in f:
                     tmp = line.split(':')
                     if len(tmp) == 2:
                         vol_unit = tmp[1].strip().split(' ')
                         if len(vol_unit) == 2:
                             tmp_value = self._changeUnit(value=long(vol_unit[0]), force_unit='MB')
                         elif len(vol_unit) == 1:
                             tmp_value = {'volume':long(long(vol_unit[0])), 'unit':''}
                         mem_info[tmp[0].strip()] = tmp_value
     except:
         print "Unexpected error:", sys.exc_info()[1]
     finally:
         return mem_info
示例#12
0
 def getSample(self):
     net_state = OrderedDict()
     title = OrderedDict()
     total_item = 0
     
     try:
         if util.is_exist('/proc/net/dev'):
             with open('/proc/net/dev') as f:
                 for line in f:
                     '''
                     Read items
                     '''
                     if line.strip().startswith('Inter'):
                         tmp = line.strip().split('|')
                         for i in range(1, len(tmp)):
                             title[tmp[i].strip()] = []
                     elif line.strip().startswith('face'):
                         tmp = line.strip().split('|')
                         for i in range(1, len(tmp)):
                             title[title.items()[i-1][0]] = tmp[i].strip().split()
                             total_item += len(title.items()[i-1][1])
                     else:
                         tmp = line.strip().split(':')
                         tmp_data = OrderedDict()
 
                         value = tmp[1].strip().split()
                         if len(value) == total_item:
                             cnt = 0
                             for t_item in title.items():
                                 tmp_data[t_item[0]] = {}
                                 for it in t_item[1]:
                                     tmp_data[t_item[0]][it] = value[cnt]
                                     cnt += 1
                         else:
                             print 'number of items error'
 
                         net_state[tmp[0]] = tmp_data
     except:
         print "Unexpected error:", sys.exc_info()[1]
     finally:
         return net_state
示例#13
0
 def getSample(self):
     load_stat = {}
     load_info = None
     f = None
     
     try:
         if util.is_exist('/proc/loadavg'):
             f = open('/proc/loadavg')
             load_info = f.read().split()
             if load_info and len(load_info) == 5:
                 load_stat['load_1_min'] = {'volume':float(load_info[0]), 'unit':''}
                 load_stat['load_5_min'] = {'volume':float(load_info[1]), 'unit':''}
                 load_stat['load_15_min'] = {'volume':float(load_info[2]), 'unit':''}
                 load_stat['nr_thread'] = load_info[3]
                 load_stat['last_pid'] = load_info[4]
     except:
         print "Unexpected error:", sys.exc_info()[1] 
     finally:
         if f:
             f.close()
         return load_stat
示例#14
0
    def getSample(self):
        load_stat = {}
        load_info = None
        f = None

        try:
            if util.is_exist('/proc/loadavg'):
                f = open('/proc/loadavg')
                load_info = f.read().split()
                if load_info and len(load_info) == 5:
                    load_stat['load_1_min'] = load_info[0]
                    load_stat['load_5_min'] = load_info[1]
                    load_stat['load_15_min'] = load_info[2]
                    load_stat['nr_thread'] = load_info[3]
                    load_stat['last_pid'] = load_info[4]
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:
            if f:
                f.close()
            return load_stat
示例#15
0
def calc_pmi(reviews, vocabs, window_size=10):
    if util.is_exist("pmi.pkl"):
        print("loaded")
        return util.load_pickle("pmi.pkl")

    word2index = {word: index for index, word in enumerate(vocabs)}

    W = 1  # 防止出现p = 1 的情况
    W_i = np.zeros(len(vocabs), dtype=np.int32)
    W_ij = np.identity(len(vocabs), dtype=np.int32)

    vocabs = set(vocabs)
    words_list = [[w for w in r.split() if w in vocabs] for r in reviews]

    for word_seq in tqdm(words_list, total=len(words_list)):

        for i in range(max(len(word_seq) - window_size, 1)):
            W += 1
            word_set = set(word_seq[i:i + window_size])
            for w in word_set:
                W_i[word2index[w]] += 1

            for w1, w2 in combinations(word_set, 2):
                i1 = word2index[w1]
                i2 = word2index[w2]

                W_ij[i1][i2] += 1
                W_ij[i2][i1] += 1

    p_i = W_i / W
    p_ij = W_ij / W
    val = np.zeros(p_ij.shape, dtype=np.float64)

    for i in range(len(p_i)):
        for j in range(len(p_i)):
            if p_ij[i, j] != 0 and p_i[i] * p_i[j] != 0:
                val[i, j] = math.log(p_ij[i, j] / (p_i[i] * p_i[j]))
    util.save_pickle(val, "pmi.pkl")
    return val
示例#16
0
    def getSample(self):
        cpu_info = OrderedDict()
        proc_info = OrderedDict()

        nprocs = 0

        try:    
            if util.is_exist('/proc/cpuinfo'):
                with open('/proc/cpuinfo') as f:
                    for line in f:
                        if not line.strip():
                            cpu_info['proc%s' % nprocs] = proc_info
                            nprocs += 1
                            proc_info = OrderedDict()
                        else:
                            if len(line.split(':')) == 2:
                                proc_info[line.split(':')[0].strip()] = line.split(':')[1].strip()
                            else:
                                proc_info[line.split(':')[0].strip()] = ''
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:    
            return cpu_info
示例#17
0
    def getSample(self):
        cpu_info = OrderedDict()
        proc_info = OrderedDict()

        nprocs = 0

        try:
            if util.is_exist('/proc/cpuinfo'):
                with open('/proc/cpuinfo') as f:
                    for line in f:
                        if not line.strip():
                            cpu_info['proc%s' % nprocs] = proc_info
                            nprocs += 1
                            proc_info = OrderedDict()
                        else:
                            if len(line.split(':')) == 2:
                                proc_info[line.split(':')[0].strip(
                                )] = line.split(':')[1].strip()
                            else:
                                proc_info[line.split(':')[0].strip()] = ''
        except:
            print "Unexpected error:", sys.exc_info()[1]
        finally:
            return cpu_info
示例#18
0
from ocean_predictor import OceanPredictor
from keras.optimizers import Adam
from util import load_data, is_exist
import numpy as np
import json
import os
import time
import shutil


if __name__ == '__main__':
    hyperparameter_json_path = './hyperparameters.json'
    is_exist(hyperparameter_json_path, 'hyperparameter json file')

    with open(hyperparameter_json_path) as json_file:
        """
        주의 - json은 문자를 표현할 때 작은 따옴표(')를 못 씀, 큰 따옴표(")만 사용 가능함 
        
        Hyperparameter는 hyperparameter_json_path에 위치한 json파일 내부 변수에서 세팅하면 됨

        null로 주어져도 되는 것들 (python에서 로드될 때는 null이 None으로 변환됨)
            exp_name => null로 주어질 경우 현재 시간을 root 폴더 이름으로 사용함 
            weight_path => null로 주어질 경우 학습을 처음부터 수행함
            random_seed => null로 주어질 경우 weight initialize가 매 학습마다 다르게 됨
        
        학습 할 때마다 exp_name 내부에 다음과 같은 파일들이 만들어짐
            exp_name ---- | SST ---- | train, test 데이터셋에 대한 예측값, 실제값 비교 그래프 
                     ---- | SWH ---- | train, test 데이터셋에 대한 예측값, 실제값 비교 그래프 
                     ---- | ... ( 다른 label 데이터들 )
                     ---- | ... ( 다른 label 데이터들 )
                     ---- | ... ( 다른 label 데이터들 )