예제 #1
0
    def user_relation(self, user):
        self.logger.info('========> relatioin')
        try:
            uri = '/uplus-api/meow/import/relation'
            er_list = []
            for er in self.usa_session.execute('SELECT * FROM cb.cb_er_dt WHERE follower_id=%s;' % user[0]):
                er_list.append({
                                "fromUserId":str(user[0]),
                                "toUserId":str(er.followee_id),
                                "isLiked":"1",
                                "createTime": time.mktime(time.strptime(str(er.dt)[0:18], '%Y-%m-%d %H:%M:%S'))*1000
                         })
            if 0 < len(er_list):  # 当没有关系时不用请求
                #切分处理
                s_list = util.split_list(er_list, 100)
                for sublist in s_list:
                    payload = {
                               "list":sublist,
                               "uid":str(user[0]),
                               "type":"0"
                           }
                    self.logger.info(simplejson.dumps(payload))
                    self.api_request(uri=uri, body=simplejson.dumps(payload))
        except Exception as ex:
            self.logger.warn('Exception %s' % str(ex))
        
        try:
            uri = '/uplus-api/meow/import/relation'
            ee_list = []
            for ee in self.usa_session.execute('SELECT * FROM cb.cb_ee_dt WHERE followee_id=%s;' % user[0]):
                ee_list.append({
                                "fromUserId":str(ee.follower_id),
                                "toUserId":str(user[0]),
                                "isLiked":"1",
                                "createTime": time.mktime(time.strptime(str(ee.dt)[0:18], '%Y-%m-%d %H:%M:%S'))*1000
                         })
            
            if 0 < len(ee_list):  # 当没有关系时不用请求

                # 切分处理
                s_ee_list = util.split_list(ee_list, 100)
                for sublist in s_ee_list:
                    payload = {
                               "list":sublist,
                               "uid":str(user[0]),
                               "type":"1"
                           }
                    self.logger.info(simplejson.dumps(payload))
                    self.api_request(uri=uri, body=simplejson.dumps(payload))
        except Exception as ex:
            self.logger.warn('Exception %s' % str(ex))
예제 #2
0
def is_safe_cfgval(key, cfg_str, new, safe):
    new = list(set(split_list(new)))  # remove list duplicates
    keys = CTK.cfg.keys(key)
    cfg = [CTK.cfg.get_val(cfg_str % key) for key in keys]
    old = [x for sublist in map(split_list, cfg) for x in sublist]

    old.sort()
    dupes = list(set(old) & set(new))
    if safe:  # Do not worry about safe values
        safe = split_list(safe)
        dupes = list(set(dupes) - set(safe))

    if dupes:
        raise ValueError, '%s: %s' % (_('Already in use'), ', '.join(dupes))

    return ','.join(new)
예제 #3
0
    def build_classifier(self, extract_features_func):
        self.__parse_learn_data()

        random.shuffle(self.learn_data_array)
        train_data, test_data = util.split_list(self.learn_data_array, 75) # percentage split for train/test data

        self.features = self.__get_word_features(train_data)
        self.bi_features = self.__get_bigram_features(train_data)

        train_set = nltk.classify.apply_features(\
            functools.partial(extract_features_func, self.features,\
                self.bi_features), train_data)
        test_set  = nltk.classify.apply_features(\
            functools.partial(extract_features_func, self.features,\
                self.bi_features), test_data)

        print "Total set length", len(self.learn_data_array)
        print "Train set length", len(train_set)
        print "Test set length",  len(test_set)

        classifier = nltk.NaiveBayesClassifier.train(train_set)

        print "Test set accuracy",\
            nltk.classify.accuracy(classifier, test_set)

        self.classifier = classifier
def is_safe_cfgval (key, cfg_str, new, safe):
    new  = list(set(split_list (new))) # remove list duplicates
    keys = CTK.cfg.keys (key)
    cfg  = [CTK.cfg.get_val(cfg_str % key) for key in keys]
    old  = [x for sublist in map(split_list, cfg) for x in sublist]

    old.sort()
    dupes = list (set(old) & set(new))
    if safe: # Do not worry about safe values
        safe  = split_list (safe)
        dupes = list (set(dupes) - set(safe))

    if dupes:
        raise ValueError, '%s: %s' %(_('Already in use'), ', '.join(dupes))

    return ','.join(new)
예제 #5
0
 def _create_filter_ev_mb(self):
     self.show_object={}
     self.obj_cat_list={}
     self.filter_ev_mb.delete(1,END)
     for obj_type in self.object_types:
         self.show_object[obj_type] = IntVar()
         self.obj_cat_list[obj_type] = util.split_list(util.grep_list \
             (self.ev_object_listing,obj_type+':'),':',1)
         self.filter_ev_mb.add_checkbutton(label=obj_type, variable=self.show_object[obj_type], \
             command=self.apply_filter)
예제 #6
0
def get_score(rets):
    by_opt = lambda ret: ret['optimizee_name']
    splits, opt_names = util.split_list(rets, by_opt)

    scores = {}

    for opt_name in opt_names:
        losses = [ret['loss'] for ret in splits[opt_name]]
        scores[opt_name] = -np.mean(losses)

    return scores
예제 #7
0
 def get_mine_count_in_neighboord(self, xCoord, yCoord):
     boxes_matrix = list(util.split_list(
         self.boxes, self.level.column_count))
     neighboord = util.get_neighboord(
         xCoord, yCoord, len(boxes_matrix), len(boxes_matrix[0])
     )
     mine_count = 0
     for neighbor in neighboord:
         box = self.__get_box(neighbor[0], neighbor[1])
         if box is not None and box.get_is_mine():
             mine_count += 1
     return mine_count
예제 #8
0
def convert_to_table(boxes, level):
    table = []
    table_header = [" "]
    for x in range(1, level.column_count+1):
        table_header.append(str(x))
    table.append(table_header)
    boxes_matrix = list(util.split_list(boxes, level.row_count))

    for row in range(len(boxes_matrix)):
        row_data = [str(row + 1)]
        for col in range(len(boxes_matrix[0])):
            row_data.append(box_text(boxes_matrix[row][col]))
        table.append(row_data)
    return table
예제 #9
0
    def get_full_cmd_embeds(self):
        embeds = []

        description = "Prefix: {}\nCommand names are not case sensitive.\n" \
                      "'()' means optional.\n'(-letter/letter)' means optional options, " \
                      "can be combined if specified, slash isn't included in the command.\n" \
                      "'[]' means required.".format(self.parent_client.prefix)

        for fields in util.split_list(self.handler.get_cmd_inlines(), 25):
            embeds.append(
                util.make_embed(colour=util.colour_royal_purple,
                                description=description,
                                author_name="Help",
                                author_icon_url=util.image_question_mark,
                                fields=fields))

        return embeds
예제 #10
0
    async def get_full_birthday_embeds(self, message: discord.Message):
        embeds = []
        description = ""

        if len(self.get_admin_inlines(message)) == 0:
            embeds.append(
                util.make_embed(colour=util.colour_admin, description="No one is in this list!",
                                author_name="Admin List", author_icon_url=util.image_admin_lock))

        if self.has_wanted_argument(message, "c"):
            description = self.get_count_description(message)

        for fields in util.split_list(self.get_admin_inlines(message), 25):
            embeds.append(
                util.make_embed(colour=util.colour_admin, description=description, author_name="Admin List",
                                author_icon_url=util.image_admin_lock, fields=fields))

        return embeds
def is_list (value):
    tmp = split_list (value)
    if not tmp:
        return ''
    return ','.join(tmp)
예제 #12
0
def plot_training_results(flags, experiment_path, results):
    by_opt = lambda ret: ret['optimizee_name']
    train_results, test_results = results

    train_results_splits, opts = util.split_list(train_results, by_opt)
    test_results_splits, _ = util.split_list(test_results, by_opt)

    for opt_name, rets in train_results_splits.items():
        print("{}: {} iterations".format(opt_name, len(rets)))

    fig, axes = plt.subplots(nrows=len(opts), figsize=(15, 12))

    if len(opts) == 1:
        axes = (axes, )

    alpha = 1.0
    if flags.plot_moving:
        alpha = 0.5

    for i, opt_name in enumerate(opts):
        ax = axes[i]

        losses_train = [ret['loss'] for ret in train_results_splits[opt_name]]
        try:
            losses_test = [
                ret['loss'] for ret in test_results_splits[opt_name]
            ]
        except:
            losses_test = []

        l_train = int(len(losses_train) * (1. - flags.frac))
        l_test = int(len(losses_test) * (1. - flags.frac))

        if flags.plot_moving:
            moving_train = util.get_moving(losses_train, mu=0.95)[l_train:]
            try:
                moving_test = util.get_moving(losses_test, mu=0.95)[l_test:]
            except:
                moving_test = []

        losses_train = losses_train[l_train:]
        losses_test = losses_test[l_test:]

        if len(losses_test):
            s = len(losses_train) // len(losses_test)
            lt = list(range(0, len(losses_train), s))
            lt = lt[:len(losses_test)]
        else:
            lt = []

        p_train = ax.plot(losses_train, label='train', alpha=alpha)
        p_test = ax.plot(lt, losses_test, label='test', alpha=alpha)

        if flags.plot_moving:
            ax.plot(moving_train,
                    label='moving train',
                    color=p_train[-1].get_color())
            ax.plot(lt,
                    moving_test,
                    label='moving test',
                    color=p_test[-1].get_color())

        ax.set_title(opt_name)
        ax.set_ylabel('loss')
        ax.set_xlabel('iteration number')
        ax.legend(loc='best')

    fig.tight_layout()

    save_figure(fig, filename=experiment_path / 'training')
def is_ip_or_netmask_list (value):
    re = []
    for e in split_list(value):
        re.append(is_ip_or_netmask(e))
    return ','.join(re)
예제 #14
0
def main():
    global parent_pid
    parent_pid = os.getpid()
    args_info = ArgsProcessing(sys.argv[1:])  # 处理命令行参数
    work_mode = args_info.current_mode  # 工作模式
    log_file_list = args_info.log_file_list  # 所有的日志文件
    signal.signal(signal.SIGINT, signal_handler)  # SIGINT是ctrl+c发出的信号,值为2
    start_time = time.time()
    # 模式1:分析单笔交易 模式2:分析所有交易 模式3:分析单个区块 模式4:分析所有区块
    if work_mode == 1:
        tx_hash = args_info.tx_hash  # 交易Hash
        # 获取全部的字典格式的交易数据
        all_tx_dict_list = get_all_log_dict(log_file_list, 'transaction')
        overall_earliest_msg, overall_latest_msg = retrieve_earliest_latest_msg(
            all_tx_dict_list, tx_hash)
        if overall_earliest_msg and overall_latest_msg:
            print('最早: %s' % overall_earliest_msg)
            print('最晚: %s' % overall_latest_msg)
            interval_time = millisecond2time_format(
                calc_millisecond_interval(overall_latest_msg[0],
                                          overall_earliest_msg[0]))
            print('间隔: %s' % interval_time)
        else:
            print('The transaction %s was not found in log file!' % tx_hash)
    elif work_mode == 2:
        all_tx_dict_list = get_all_log_dict(log_file_list, 'transaction')
        all_tx_hash = []  # 所有的交易Hash
        for tx_dict in all_tx_dict_list:
            all_tx_hash.extend(list(tx_dict.keys()))
        # 去除重复元素
        all_tx_hash = list(set(all_tx_hash))
        broadcasting_time_queue = Queue()  # 存储广播时间的Queue
        processes = []
        # 获取cpu核心数
        processor_num = cpu_count()
        # 将所有的交易Hash分割为与cpu核心数相等的尽量平均的份数
        split_all_tx_hash = split_list(all_tx_hash, processor_num)
        for work_list in split_all_tx_hash:
            # 创建与交易Hash份数(cpu核心数)数量相同的子进程用于计算广播时间
            p = Process(target=calc_broadcasting_time,
                        args=(work_list, broadcasting_time_queue,
                              all_tx_dict_list))
            p.start()
            processes.append(p)
        for process in processes:
            # 等待所有子进程结束
            process.join()
        broadcasting_time_list = []
        while True:
            # 将子进程的分析结果存储到父进程中的列表
            broadcasting_time_list.append(broadcasting_time_queue.get())
            if broadcasting_time_queue.empty():
                break
        # 使用最小堆对所有子进程生成的有序列表合并
        broadcasting_time_list = list(heapq.merge(*broadcasting_time_list))
        # 计算广播时间列表平均值和中位数
        average, median = get_average_median(broadcasting_time_list)
        print('最短时间: %s' % millisecond2time_format(broadcasting_time_list[0]))
        print('最长时间: %s' % millisecond2time_format(broadcasting_time_list[-1]))
        print('平均值:   %s' % millisecond2time_format(average))
        print('中位数:   %s' % millisecond2time_format(median))
    elif work_mode == 3:
        height = args_info.height  # 区块高度
        all_block_dict_list = get_all_log_dict(log_file_list, 'block')
        overall_earliest_msg, overall_latest_msg = retrieve_earliest_latest_msg(
            all_block_dict_list, height)
        if overall_earliest_msg and overall_latest_msg:
            print('最早: %s' % overall_earliest_msg)
            print('最晚: %s' % overall_latest_msg)
            interval_time = millisecond2time_format(
                calc_millisecond_interval(overall_latest_msg[0],
                                          overall_earliest_msg[0]))
            print('间隔: %s' % interval_time)
        else:
            print('The block height %s was not found in log file!' % height)
    elif work_mode == 4:
        all_block_dict_list = get_all_log_dict(log_file_list, 'block')
        all_block_height = []
        for block_dict in all_block_dict_list:
            all_block_height.extend(list(block_dict.keys()))
        all_block_height = list(set(all_block_height))
        broadcasting_time_queue = Queue()
        processes = []
        processor_num = cpu_count()
        split_all_block_height = split_list(all_block_height, processor_num)
        for work_list in split_all_block_height:
            p = Process(target=calc_broadcasting_time,
                        args=(work_list, broadcasting_time_queue,
                              all_block_dict_list))
            p.start()
            processes.append(p)
        for process in processes:
            process.join()
        broadcasting_time_list = []
        while True:
            broadcasting_time_list.append(broadcasting_time_queue.get())
            if broadcasting_time_queue.empty():
                break
        broadcasting_time_list = list(heapq.merge(*broadcasting_time_list))
        average, median = get_average_median(broadcasting_time_list)
        print('最短时间: %s' % millisecond2time_format(broadcasting_time_list[0]))
        print('最长时间: %s' % millisecond2time_format(broadcasting_time_list[-1]))
        print('平均值:   %s' % millisecond2time_format(average))
        print('中位数:   %s' % millisecond2time_format(median))

    print('分析用时:', time.time() - start_time)
def is_safe_id_list (value):
    ids = []
    for id in split_list (value):
        ids.append(is_safe_id (id))
    return ','.join(ids)
def is_ip_list (value):
    re = []
    for e in split_list(value):
        re.append(is_ip(e))
    return ','.join(re)
예제 #17
0
파일: eval.py 프로젝트: atpcurr/atpcurr
def eval_mpi(args, evaldir, modelfile, model_index):

    from mpi4py import MPI as mpi
    rank = mpi.COMM_WORLD.Get_rank()
    all = mpi.COMM_WORLD.Get_size()

    if modelfile is None:
        model = None
    else:
        if args.model_type == "ppo2":
            model = PPO2.load(modelfile)
        elif args.model_type == "ppo1":
            model = PPO1.load(modelfile)
    env = ProofEnv.ProofEnv(args)
    env.set_model(model)

    dirparts = evaldir.split("/")
    if dirparts[-1] == "":
        dirname = dirparts[-2]
    else:
        dirname = dirparts[-1]

    evalprefix = "eval_{}_{}_{}_{}".format(model_index, dirname, args.evaltype,
                                           args.evalcount)

    proofs_found = 0
    det_proofs_found = 0
    proofs_tried = 0
    len_sum = 0.0
    attempts_sum = 0.0
    prove.guidance_time = 0

    filenames_original = sorted(util.list_problems(evaldir))
    chunks = util.split_list(filenames_original, all, extensible=False)
    chunk = chunks[rank]
    for index in range(len(chunks[0])):
        if index >= len(chunk):
            tried, success, prooflen, attempts, det_success = (0, 0, 0, 0, 0)
        else:
            filepath = chunk[index]
            print("\n\nTrying to find proof for {}".format(filepath))
            success, prooflen, attempts, evaltype = find_one_proof(
                args, model, env, filepath)
            tried = 1
            det_success = success * (evaltype == "det")
        results = mpi.COMM_WORLD.gather(
            (tried, success, prooflen, attempts, det_success), root=0)
        if rank == 0:
            # print(results)
            for i in range(len(results)):
                proofs_tried += results[i][0]
                succ = results[i][1]
                if succ == 1:
                    proofs_found += 1
                    len_sum += results[i][2]
                    attempts_sum += results[i][3]
                det_succ = results[i][4]
                if det_succ:
                    det_proofs_found += 1
            logger.record_tabular("update_no", proofs_tried)
            logger.record_tabular("{}_proofs_found".format(evalprefix),
                                  proofs_found)
            logger.record_tabular("{}_det_proofs_found".format(evalprefix),
                                  det_proofs_found)
            logger.record_tabular("{}_found".format(evalprefix),
                                  util.safediv(proofs_found, proofs_tried))
            logger.record_tabular("{}_avg_prooflen".format(evalprefix),
                                  util.safediv(len_sum, proofs_found))
            logger.record_tabular("{}_avg_attempts".format(evalprefix),
                                  util.safediv(attempts_sum, proofs_found))
            logger.dumpkvs()
            print("Found: {}({})/{} proofs".format(proofs_found,
                                                   det_proofs_found,
                                                   proofs_tried))
            sys.stdout.flush()

    print("\n\nEVALUATION {}".format(rank))
    print("   evaltime: {}".format(args.evaltime))
    print("   evaldir: {}".format(dirname))
    print("   model_index: {}".format(model_index))
    print("   evaltype: {}".format(args.evaltype))
    print("   evalcount: {}".format(args.evalcount))
    print("   FOUND: {}/{}".format(proofs_found, proofs_tried))
    print("   DET FOUND: {}/{}".format(det_proofs_found, proofs_tried))
    print("   Avg proof length: {}".format(util.safediv(len_sum,
                                                        proofs_found)))
    print("   Avg attempts: {}".format(util.safediv(attempts_sum,
                                                    proofs_found)))
    print("   Avg step time: {}".format(env.backend.step_time /
                                        env.backend.step_count))

    env.close()
    del env
    del model
예제 #18
0
def collect_data(db):
    """should be ideally run in background"""
    # the names in interested_clusters are not arbitrary, it has to match the
    # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as
    # Scinet, SciNet, or Sci Net

    # be sure to use config var INTERESTED_CLUSTERS on heroku
    # here, just use scinet is for local testing,
    interested_clusters = os.getenv(
        "INTERESTED_CLUSTERS",
        "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca").split()

    # interested_clusters = ["orcinus"]

    # sort of variables initialization
    figs_data_dict, usage_frac_dict = {}, {}
    usermap = util.user_mapping()
    delta_ts, resolutions = get_delta_ts_and_resolutions()
    durations = DURATIONS
    while True:
        for ic in interested_clusters:                 # ic: interested cluster
            ic_obj = util.gen_cluster_obj_from_clustername(ic)
            raw_xml = ic_obj.fetch_raw_xml()
            created = datetime.datetime.now()
            if raw_xml:
                global RAW_XML
                RAW_XML[ic] = raw_xml
                # having such error for scinet and nestor,
                # MemcachedError: error 37 from memcached_set: SUCCESS
                # guess those xml data may be too big for memcached,
                # using system memory instead for now 2012-06-12
                # MEMC.set("RAW_XML", raw_xml_cache)

            # rcu, qcu: running & queuing core usages
            rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml)

            # 1. generate reports and cache it
            reports = MEMC.get("REPORTS")
            if not reports:                               # meaning: first time
                reports = {}
            report = ic_obj.gen_report(rcu, qcu, usermap, created)
            reports[ic_obj.clustername] = report
            MEMC.set("REPORTS", reports)

            # 2. insert to database
            insert2db(rcu, qcu, ic_obj, created, db)

            # 3. cache usage data for later plotting
            # dur_queries = [last_day_data, last_week_data, last_month_data,
            # last_year_data]
            dur_queries = prepare_data_for_plotting(ic, created, db)

            # this is for /.json kind of url
            figs_data_dict[ic] = {i:j for i, j in zip(durations, dur_queries)}
            MEMC.set("FIGS_DATA", figs_data_dict)

            # ldd:last_day_data;    lwd:last_week_data
            # lmd:last_month_data;  lyd:last_year_data
            ldd, lwd, lmd, lyd = dur_queries
            total_sec_to_now = (
                lyd[0][-1] - THE_VERY_BEGINNING).total_seconds()

            # inte_coresec: integrate core seconds
            usage_frac_dict[ic] = {
                'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600),
                'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600),
                'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600),
                'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now),
                }
            MEMC.set("USAGE_FRAC", usage_frac_dict)

        # 4. Now let's do the real plotting, first: usage vs. time, then: histogram
        # 1). usage vs. time
        keys = sorted(figs_data_dict.keys())
        for index, key_group in enumerate(util.split_list(keys, step=4)):
            figs, axes = {}, {}
            for dur in durations:
                figs[dur] = plt.figure(figsize=(24, 13.5))
                axes[dur] = figs[dur].add_subplot(111)
                fig, ax = figs[dur], axes[dur]
                fig = do_fig_plotting(fig, ax, key_group, dur,
                                      figs_data_dict, usage_frac_dict)

                canvas = FigureCanvas(fig)
                png_output = StringIO.StringIO()
                canvas.print_png(png_output)
                plt.close(fig)                            # clear up the memory

                # figure naming pattern should be systematically redesigned
                # when # gets large
                ident = str('_'.join([dur, str(index)]))
                fig_content = png_output.getvalue()
                db = update_the_figure(db, Figure, ident, fig_content, created)
            db.session.commit()

        # 2). histogram plotting
        usage_frac_dict_by_dur = {}
        for dur in durations:
            usage_frac_dict_by_dur[dur] = {}
        for ic in usage_frac_dict:
            for dur in usage_frac_dict[ic]:
                usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur]

        for dur in usage_frac_dict_by_dur:
            N = len(usage_frac_dict_by_dur[dur])
            width = 1.                         # the width of the bars
            ind = np.arange(0, N, width)       # the x locations for the groups

            keys = usage_frac_dict_by_dur[dur].keys()
            keys.sort(key=lambda k:usage_frac_dict_by_dur[dur][k], reverse=True)
            # make sure the order is right
            durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys]

            fig = plt.figure(figsize=(16, 10))
            fig.subplots_adjust(bottom=0.2) # otherwise, xticklabels cannot be shown fully
            ax = fig.add_subplot(111)

            for i, d in zip(ind, durMeans):
                # 'g': green; 'r': red
                col = 'g' if d > 1 else 'r'
                ax.bar(i, d, width, color=col)

            ylim = list(ax.get_ylim())
            ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05

            ax.plot([0, 100], [1, 1], 'k--')
            ax.set_xlim([0, N*width])
            ax.set_ylim(ylim)

            ax.set_ylabel('Usage', labelpad=40)
            ax.set_title(dur, size=40, family="monospace",
                         bbox={'facecolor':'red', 'alpha':0.5})
            ax.title.set_y(1.02)                            # offset title position
            ax.set_xticks(ind+width / 2.)
            ax.set_xticklabels(keys, size=25, rotation=45)
            ax.grid(b=True, which="both")

            canvas = FigureCanvas(fig)
            png_output = StringIO.StringIO()
            canvas.print_png(png_output)
            plt.close(fig)

            ident = "histo_{0}".format(dur)
            fig_content = png_output.getvalue()

            db = update_the_figure(db, Figure, ident, fig_content, created)
        db.session.commit()

        # when at last, maybe 10min is too frequent, think about 30 min
        dt = os.environ.get('DELTAT')
        if not dt:
            time.sleep(600)                                 # sleep 10 min
        else:
            time.sleep(float(dt))
예제 #19
0
def is_ip_list(value):
    re = []
    for e in split_list(value):
        re.append(is_ip(e))
    return ','.join(re)
예제 #20
0
def is_path_list(value):
    re = []
    for p in split_list(value):
        re.append(is_path(p))
    return ','.join(re)
예제 #21
0
def collect_data(db):
    """should be ideally run in background"""
    # the names in interested_clusters are not arbitrary, it has to match the
    # clusternames in static/xml/clusters.xml, e.g. scinet is not the same as
    # Scinet, SciNet, or Sci Net

    # be sure to use config var INTERESTED_CLUSTERS on heroku
    # here, just use scinet is for local testing,
    interested_clusters = os.getenv(
        "INTERESTED_CLUSTERS",
        "scinet mp2 colosse guillimin lattice nestor parallel orcinus orca"
    ).split()

    # interested_clusters = ["orcinus"]

    # sort of variables initialization
    figs_data_dict, usage_frac_dict = {}, {}
    usermap = util.user_mapping()
    delta_ts, resolutions = get_delta_ts_and_resolutions()
    durations = DURATIONS
    while True:
        for ic in interested_clusters:  # ic: interested cluster
            ic_obj = util.gen_cluster_obj_from_clustername(ic)
            raw_xml = ic_obj.fetch_raw_xml()
            created = datetime.datetime.now()
            if raw_xml:
                global RAW_XML
                RAW_XML[ic] = raw_xml
                # having such error for scinet and nestor,
                # MemcachedError: error 37 from memcached_set: SUCCESS
                # guess those xml data may be too big for memcached,
                # using system memory instead for now 2012-06-12
                # MEMC.set("RAW_XML", raw_xml_cache)

            # rcu, qcu: running & queuing core usages
            rcu, qcu = ic_obj.process_raw_xml(usermap, raw_xml)

            # 1. generate reports and cache it
            reports = MEMC.get("REPORTS")
            if not reports:  # meaning: first time
                reports = {}
            report = ic_obj.gen_report(rcu, qcu, usermap, created)
            reports[ic_obj.clustername] = report
            MEMC.set("REPORTS", reports)

            # 2. insert to database
            insert2db(rcu, qcu, ic_obj, created, db)

            # 3. cache usage data for later plotting
            # dur_queries = [last_day_data, last_week_data, last_month_data,
            # last_year_data]
            dur_queries = prepare_data_for_plotting(ic, created, db)

            # this is for /.json kind of url
            figs_data_dict[ic] = {i: j for i, j in zip(durations, dur_queries)}
            MEMC.set("FIGS_DATA", figs_data_dict)

            # ldd:last_day_data;    lwd:last_week_data
            # lmd:last_month_data;  lyd:last_year_data
            ldd, lwd, lmd, lyd = dur_queries
            total_sec_to_now = (lyd[0][-1] -
                                THE_VERY_BEGINNING).total_seconds()

            # inte_coresec: integrate core seconds
            usage_frac_dict[ic] = {
                'day': inte_coresec(*ldd) / (ic_obj.quota * 24 * 3600),
                'week': inte_coresec(*lwd) / (ic_obj.quota * 7 * 24 * 3600),
                'month': inte_coresec(*lmd) / (ic_obj.quota * 30 * 24 * 3600),
                'year': inte_coresec(*lyd) / (ic_obj.quota * total_sec_to_now),
            }
            MEMC.set("USAGE_FRAC", usage_frac_dict)

        # 4. Now let's do the real plotting, first: usage vs. time, then: histogram
        # 1). usage vs. time
        keys = sorted(figs_data_dict.keys())
        for index, key_group in enumerate(util.split_list(keys, step=4)):
            figs, axes = {}, {}
            for dur in durations:
                figs[dur] = plt.figure(figsize=(24, 13.5))
                axes[dur] = figs[dur].add_subplot(111)
                fig, ax = figs[dur], axes[dur]
                fig = do_fig_plotting(fig, ax, key_group, dur, figs_data_dict,
                                      usage_frac_dict)

                canvas = FigureCanvas(fig)
                png_output = StringIO.StringIO()
                canvas.print_png(png_output)
                plt.close(fig)  # clear up the memory

                # figure naming pattern should be systematically redesigned
                # when # gets large
                ident = str('_'.join([dur, str(index)]))
                fig_content = png_output.getvalue()
                db = update_the_figure(db, Figure, ident, fig_content, created)
            db.session.commit()

        # 2). histogram plotting
        usage_frac_dict_by_dur = {}
        for dur in durations:
            usage_frac_dict_by_dur[dur] = {}
        for ic in usage_frac_dict:
            for dur in usage_frac_dict[ic]:
                usage_frac_dict_by_dur[dur][ic] = usage_frac_dict[ic][dur]

        for dur in usage_frac_dict_by_dur:
            N = len(usage_frac_dict_by_dur[dur])
            width = 1.  # the width of the bars
            ind = np.arange(0, N, width)  # the x locations for the groups

            keys = usage_frac_dict_by_dur[dur].keys()
            keys.sort(key=lambda k: usage_frac_dict_by_dur[dur][k],
                      reverse=True)
            # make sure the order is right
            durMeans = [usage_frac_dict_by_dur[dur][k] for k in keys]

            fig = plt.figure(figsize=(16, 10))
            fig.subplots_adjust(
                bottom=0.2)  # otherwise, xticklabels cannot be shown fully
            ax = fig.add_subplot(111)

            for i, d in zip(ind, durMeans):
                # 'g': green; 'r': red
                col = 'g' if d > 1 else 'r'
                ax.bar(i, d, width, color=col)

            ylim = list(ax.get_ylim())
            ylim[1] = ylim[1] * 1.1 if ylim[1] > 1 else 1.05

            ax.plot([0, 100], [1, 1], 'k--')
            ax.set_xlim([0, N * width])
            ax.set_ylim(ylim)

            ax.set_ylabel('Usage', labelpad=40)
            ax.set_title(dur,
                         size=40,
                         family="monospace",
                         bbox={
                             'facecolor': 'red',
                             'alpha': 0.5
                         })
            ax.title.set_y(1.02)  # offset title position
            ax.set_xticks(ind + width / 2.)
            ax.set_xticklabels(keys, size=25, rotation=45)
            ax.grid(b=True, which="both")

            canvas = FigureCanvas(fig)
            png_output = StringIO.StringIO()
            canvas.print_png(png_output)
            plt.close(fig)

            ident = "histo_{0}".format(dur)
            fig_content = png_output.getvalue()

            db = update_the_figure(db, Figure, ident, fig_content, created)
        db.session.commit()

        # when at last, maybe 10min is too frequent, think about 30 min
        dt = os.environ.get('DELTAT')
        if not dt:
            time.sleep(600)  # sleep 10 min
        else:
            time.sleep(float(dt))
예제 #22
0
def is_extension_list(value):
    re = []
    for p in split_list(value):
        re.append(is_extension(p))
    return ','.join(re)
예제 #23
0
def main():

    args = get_configuration()
    args.state_dim = util.get_state_dim(args)

    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir, exist_ok=True)

    if args.graph_embedding:

        class MyPolicy(EmbeddingPolicy):
            def __init__(self,
                         sess,
                         ob_space,
                         ac_space,
                         n_env,
                         n_steps,
                         n_batch,
                         reuse=True,
                         **_kwargs):
                super().__init__(sess,
                                 ob_space,
                                 ac_space,
                                 n_env,
                                 n_steps,
                                 n_batch,
                                 args,
                                 reuse=reuse,
                                 **_kwargs)
    else:

        class MyPolicy(EnigmaPolicy):
            def __init__(self,
                         sess,
                         ob_space,
                         ac_space,
                         n_env,
                         n_steps,
                         n_batch,
                         reuse=True,
                         **_kwargs):
                super().__init__(sess,
                                 ob_space,
                                 ac_space,
                                 n_env,
                                 n_steps,
                                 n_batch,
                                 args,
                                 reuse=reuse,
                                 **_kwargs)

    t0 = time.time()

    from mpi4py import MPI as mpi
    comm = mpi.COMM_WORLD
    rank = comm.Get_rank()
    all = comm.Get_size()

    gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
    gpu_count = len(gpus)
    gpu = gpus[rank % gpu_count]
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    print("My rank is {} out of {}, using GPU {}".format(rank, all, gpu))

    if args.model_type == "ppo2":
        from stable_baselines import PPO2 as PPO
        env = SubprocVecEnv([(lambda: ProofEnv.ProofEnv(args))
                             for _ in range(args.parallel_envs)
                             ])  #, start_method="spawn")
    elif args.model_type == "ppo1":
        args.parallel_envs = 1
        env = DummyVecEnv([lambda: ProofEnv.ProofEnv(args)])
        # from stable_baselines import PPO1 as PPO
        from ppo import PPO1 as PPO

    if args.saved_model == None:
        myPolicy = MyPolicy
        if args.model_type == "ppo2":
            model = PPO(
                policy=myPolicy,
                env=env,
                n_steps=args.actorbatch,
                # nminibatches=args.optim_stepsize,
                lam=0.95,
                gamma=args.gamma,
                noptepochs=4,
                ent_coef=args.entcoeff,
                learning_rate=lambda f: f * 2.5e-4,
                cliprange=lambda f: f * 0.1,
                verbose=1)
        elif args.model_type == "ppo1":
            model = PPO(myPolicy,
                        env,
                        verbose=2,
                        timesteps_per_actorbatch=args.actorbatch,
                        schedule=args.lr_schedule,
                        optim_stepsize=args.optim_stepsize,
                        entcoeff=args.entcoeff,
                        optim_batchsize=args.optim_batchsize,
                        gamma=args.gamma)
    else:
        print("Loading model from {}".format(args.saved_model))
        model = PPO.load(args.saved_model)
        model.set_env(env)

    counter = 0

    for ind in range(args.parallel_envs):
        env.env_method("set_model",
                       model,
                       indices=list(range(args.parallel_envs)))

    modelfiles = []
    for train_timestep, train_dir in zip(args.train_timesteps,
                                         args.train_dirs):
        problem_files = sorted(util.list_problems(train_dir))
        problem_files = util.split_list(problem_files, all)[rank]
        problem_files_splitted = util.split_list(problem_files,
                                                 args.parallel_envs,
                                                 extensible=False)

        if args.add_repeating_pretraining:
            for ind in range(args.parallel_envs):
                env.env_method("set_source",
                               problem_files_splitted[ind],
                               indices=[ind],
                               generator_type="repeating")
            # all_thread_timestep = train_timestep * all
            print("PRETRAINING")
            model.learn(total_timesteps=train_timestep)
            print("Pretraining on {} finished in {}".format(
                train_dir, util.format_time(time.time() - t0)))

        for ind in range(args.parallel_envs):
            env.env_method("set_source",
                           problem_files_splitted[ind],
                           indices=[ind])
        # all_thread_timestep = train_timestep * all
        model.learn(total_timesteps=train_timestep)

        modelfile = "{}/ppo1_fcop_train_{}".format(args.outdir, counter)
        modelfiles.append(modelfile)
        if rank == 0:
            model.save(modelfile)
            # logger.logkv("finished_train_problems", counter)
        counter += 1

        print("Training on {} finished in {}".format(
            train_dir, util.format_time(time.time() - t0)))
        statistics_list = env.get_attr("statistics",
                                       indices=list(range(args.parallel_envs)))
        blacklist_list = env.get_attr("blacklist",
                                      indices=list(range(args.parallel_envs)))
        for i, statistics in enumerate(statistics_list):
            print("ENV {} - {} - blacklist: {}\n".format(
                rank, i, blacklist_list[i])),
            util.print_problemdict(statistics, rank)

            # for f in statistics:
            #     statistics[f]["mcts"].display_tree([0])

        # util.print_problemdict(env.envs[0].statistics)

    if len(args.train_dirs) > 0 and len(
            args.train_timesteps) > 0:  # we did training
        print("We have finished training, rank {}".format(rank))

        # for p in problem_files:
        #     vis_policy.vis_policy(env.envs[0], model, p)

        env.close()
        del env
        del model

    # here we wait for everyone
    comm.Barrier()
    print("We have started evaluation, rank {}".format(rank))

    # evaluation without training
    if (args.saved_model is not None) and (len(
            args.train_dirs) == 0):  # no training, just evaluation
        modelfiles = [args.saved_model]

    for evaldir in args.evaldirs:
        for model_index, modelfile in enumerate(modelfiles):
            eval.eval_mpi(args, evaldir, modelfile, model_index)

            # here we wait for everyone
            comm.Barrier()
예제 #24
0
def is_safe_id_list(value):
    ids = []
    for id in split_list(value):
        ids.append(is_safe_id(id))
    return ','.join(ids)
예제 #25
0
def generateHeaderForCppFile(fn):
    cur_mtime = os.path.getmtime(fn)
    old_mtime = file_data_cache[fn]['mtime'] if fn in file_data_cache else 0

    if cur_mtime > old_mtime:
        members = []
        logging.debug(fn)

        tfn = fn.replace('Private', 'Public').replace('.cpp', '.h')
        if not os.path.isdir(os.path.dirname(tfn)): os.makedirs(os.path.dirname(tfn))

        className = re.search('([A-Z0-9a-z]+)\\.cpp', fn).group(1)

        extends = []
        isClassFile = False
        classMods = ''
        isInFunction = False
        currentFunction = None
        is_enum_file = False
        enum_values = []

        try:
            with open(fn) as f:
                for l in f:
                    m = re.match('extends\\s*\\(([^)]+)\\)', l)
                    if m:
                        if m.group(1)[0] in ['U', 'E', 'F', 'T', 'A']:
                            className = m.group(1)[0] + className
                        extends = m.group(1).split(' ')
                        isClassFile = True

                    if isClassFile:
                        if (re.match("\\S.+", l)):
                            newMember = ClassMember.createFromLine(l)

                            if newMember:
                                members.append(newMember)

                                if newMember.type == 'FUNCTION': currentFunction = newMember

                        if l.startswith('}'):
                            logging.debug("End of function")
                            currentFunction = None

                        if 'Super::' in l and currentFunction:
                            logging.debug("Found Super::")
                            currentFunction.isOverride = True

                        #
                        m = re.match('blueprintEvent\\((?P<event>[^()]+)(\\((?P<args>[^)]+)\\))?\\)', l)
                        if m:
                            members.append(ClassMember('FUNCTION', cppType='void', name=m.group('event'), access='public', isConst=False, mods='BlueprintImplementableEvent', args=m.group('args')))

                        m = re.match('classMods\\((?P<mods>.+)\\)', l)
                        if m:
                            classMods = util.join_list(util.split_list(m.group('mods')))

                    m = re.match('enumValue\\((?P<value>[^)]+)\\)', l)
                    if m:
                        is_enum_file = True
                        enum_values.append(m.group('value'))
        except Exception as ex:
            logging.error("Error parsing CPP: " + str(ex) + '\n' + traceback.format_exc())

        file_data_cache[fn] = {
            'members': members,
            'className': className,
            'extends': extends,
            'classMods': classMods,
            'enum_values': enum_values,
            'mtime': cur_mtime,
            'isClassFile': isClassFile,
            'is_enum_file': is_enum_file,
            'tfn': tfn,
        }

    d = file_data_cache[fn]


    if d['isClassFile']: return classfileheadergen.generate_class_file_header(fn, list(d['members']), d['tfn'], d['className'], d['extends'], d['classMods'])
    if d['is_enum_file']: return enumfileheadergen.generate_enum_file_header(fn, list(d['enum_values']), d['tfn'], d['className'], d['classMods'])

    logging.debug("Cannot generate header for " + fn)

    return None
예제 #26
0
def is_ip_or_netmask_list(value):
    re = []
    for e in split_list(value):
        re.append(is_ip_or_netmask(e))
    return ','.join(re)
def is_extension_list (value):
    re = []
    for p in split_list(value):
        re.append(is_extension(p))
    return ','.join(re)
예제 #28
0
def is_list(value):
    tmp = split_list(value)
    if not tmp:
        return ''
    return ','.join(tmp)
def is_path_list (value):
    re = []
    for p in split_list(value):
        re.append(is_path(p))
    return ','.join(re)