Пример #1
0
 def __init__(self, workdir, language, sourcefile, testfile):
     self._path = path.dirname(path.abspath(__file__))
     self.workdir = workdir
     self.language = language
     self.sourcefile = sourcefile
     self.testfile = testfile
     self.tmpltdir = path.join(self._path, "templates")
     self.judger = Judger()
Пример #2
0
def lambda_handler(event, context):
    print(event)
    env = event.get('env')
    twitter = Twitter(env=env)
    trends = twitter.get_trends(id=WOEID)
    judger = Judger()
    for trend in trends:
        should_tweet = judger.judge_whether_tweet(trend)
        if should_tweet:
            twitter.post_tweet(trend.get('name'))
            time.sleep(SLEEP_TIME)
Пример #3
0
def play(policy_number):
    player1 = HumanPlayer()
    player2 = Player(epsilon=0, symbol=-1)
    player2.load_policy(policy_number)
    while True:
        judger = Judger(player1, player2)
        winner = judger.play()
        if winner == player2.symbol:
            print("You lose!")
        elif winner == player1.symbol:
            print("You win!")
        else:
            print("It is a tie!")
Пример #4
0
def train(epochs, print_every_n=500):
    file = open('app/saves/metrics_all.csv', "w")
    with file:
        writer = csv.writer(file)
        writer.writerow(['win_rate1', 'win_rate2', 'draw_rate'])
    file = open('app/saves/metrics_first.csv', "w")
    with file:
        writer = csv.writer(file)
        writer.writerow(['td_error'])
    file = open('app/saves/metrics_second.csv', "w")
    with file:
        writer = csv.writer(file)
        writer.writerow(['td_error'])

    epsilon = 1
    epsilon_decay = 0.999
    epsilon_min = 0.01

    player1 = Player(epsilon=epsilon, symbol=1)
    player2 = Player(epsilon=epsilon, symbol=-1)
    judger = Judger(player1, player2)
    player1_win = 0.0
    player2_win = 0.0
    for i in range(1, epochs + 1):
        winner = judger.play(train=True, print_state=False)
        if winner == 1:
            player1_win += 1
        if winner == -1:
            player2_win += 1

        win_rate1 = player1_win / i
        win_rate2 = player2_win / i
        draw_rate = (i - (player1_win + player2_win)) / i

        metrics_file = open('app/saves/metrics_all.csv', "a")
        with metrics_file:
            writer = csv.writer(metrics_file)
            writer.writerow([win_rate1, win_rate2, draw_rate])

        if i % print_every_n == 0:
            print(
                'Epoch %d, player 1 winrate: %.02f, player 2 winrate: %.02f, draw rate: %.02f'
                % (i, win_rate1, win_rate2, draw_rate))

            player1.save_policy(i)
            player2.save_policy(i)

        epsilon = max(epsilon_min, epsilon * epsilon_decay)
        player1.set_epsilon(epsilon)
        player2.set_epsilon(epsilon)
Пример #5
0
def judge():
    data = request.json
    logging.info(f"recieve{data}")
    submit_id = data['submit_id']
    problem_id = data['problem_id']
    logging.info(f"run problem id: {problem_id}")
    source = data['source']
    judge_dir = os.path.join(TMP_DIR,
                             str(submit_id))  # temp directory for running
    data_dir = os.path.join(
        BASE_DIR, str(problem_id))  # standard input output file, read only
    if os.path.exists(judge_dir):
        shutil.rmtree(judge_dir)
    os.makedirs(judge_dir)
    with open(os.path.join(judge_dir, data['src']),
              mode='w+',
              encoding='utf-8') as f:
        f.write(source)
    compiler = Compiler(data['compile_command'], judge_dir)
    spj = False
    if os.path.exists(os.path.join(data_dir, "spj")) or \
            os.path.exists(os.path.join(data_dir, "spj.py")):
        spj = True
    judger = Judger(data['max_cpu_time'], data['max_memory'],
                    data['run_command'], data.get('seccomp_rule'), judge_dir,
                    1 if data.get('memory_limit_check_only') else 0, data_dir,
                    submit_id, spj)
    judge_pool.apply_async(run, (judger, compiler), callback=callback)
    return "success"
Пример #6
0
def compete(player1, turns, policy_number):
    player2 = Player(epsilon=0, symbol=-1)
    player2.load_policy(policy_number)
    judger = Judger(player1, player2)
    player1_win = 0.0
    player2_win = 0.0
    for _ in range(turns):
        winner = judger.play()
        if winner == 1:
            player1_win += 1
        if winner == -1:
            player2_win += 1

    draw_rate = (turns - (player1_win + player2_win)) / turns

    print(
        '%d turns, player 1 winrate: %.02f, player 2 winrate: %.02f, draw rate: %.02f'
        % (turns, player1_win / turns, player2_win / turns, draw_rate))
Пример #7
0
class Checker:
    def __init__(self, workdir, language, sourcefile, testfile):
        self._path = path.dirname(path.abspath(__file__))
        self.workdir = workdir
        self.language = language
        self.sourcefile = sourcefile
        self.testfile = testfile
        self.tmpltdir = path.join(self._path, "templates")
        self.judger = Judger()

    @property
    def testcases(self):
        testpath = path.join(self.workdir, self.testfile)
        with open(testpath, "r") as f:
            return json.load(f)

    @property
    def languages(self):
        return listdir(self.tmpltdir)

    def _loadModule(self):
        tmpltpath = path.join(self.tmpltdir, self.language)
        loader = SourceFileLoader(self.language, tmpltpath)
        module = loader.load_module()
        module.workdir = self.workdir
        return module

    def check(self):
        if self.language not in self.languages:
            raise ValueError("Language %s is not supported" % self.language)

        module = self._loadModule()
        codepath = path.join(self.workdir, self.sourcefile)
        if zipfile.is_zipfile(codepath):
            with zipfile.ZipFile(codepath, "r") as zpf:
                zpf.extractall(self.workdir)
            self.sourcefile = zpf.namelist()

        self.judger.judge(module, self.sourcefile, self.testcases, timeout=3)
        return self.judger.result

    def _export_result(self, results):
        with open(path.join(self.workdir, "result.json"), "w") as f:
            json.dump(results, f)
Пример #8
0
    def init_game(self):
        ''' Initialilze the game of Limit Texas Hold'em

        This version supports two-player limit texas hold'em

        Returns:
            (tuple): Tuple containing:

                (dict): The first state of the game
                (int): Current player's id
        '''
        # Initilize a dealer that can deal cards
        self.dealer = Dealer()

        # Initilize two players to play the game
        self.players = [
            Player(i, self.init_chips) for i in range(self.num_players)
        ]

        # Initialize a judger class which will decide who wins in the end
        self.judger = Judger()

        # Deal cards to each  player to prepare for the first round
        for i in range(self.num_players):
            self.players[i].hand.append(self.dealer.deal_card())

        # Initilize public cards
        self.public_cards = []

        # Randomly choose a big blind and a small blind
        s = np.random.randint(0, self.num_players)
        b = (s + 1) % self.num_players
        self.players[b].in_chips = self.big_blind
        self.players[s].in_chips = self.small_blind

        # The player next to the small blind plays the first
        self.game_pointer = (b + 1) % self.num_players

        # Initilize a bidding round, in the first round, the big blind and the small blind needs to
        # be passed to the round for processing.
        self.round = Round(self.num_players, self.big_blind)

        self.round.start_new_round(game_pointer=self.game_pointer,
                                   raised=[p.in_chips for p in self.players])

        # Count the round. There are 4 rounds in each game.
        self.round_counter = 0

        # Save the hisory for stepping back to the last state.
        self.history = []
        self.action_history = []
        for i in range(2):
            self.action_history.append([])
        state = self.get_state(self.game_pointer)

        return state, self.game_pointer
Пример #9
0
 def __init__(self, name, url_queue, url_list, url_in_queue, Flock, home_urls ,tem_siteID = [0], continue_run = [True]):
     '''
     name
     url_queue       从主服务器中分配到的url
     url_list        本地区分是否重复
     url_in_queue    解析得到的新url    将为每一个站点分配一个 UrlQueue
     Flock
     home_urls       测试是否符合爬取集合
     tem_conn        初始的DNS 缓存
     is_new_task     通过引用传递 由communitor修改  以判断是否需要修改
     tem_home_url    
     old_home_url    引用传递
     continue_run[]  是否继续运行的标志
     '''
     threading.Thread.__init__(self, name = name )  
     #本地测试url队列 如果在本地重复 则直接舍弃
     #如果不重复 加入临时队列 将来传输到中央服务器进行测试
     #为每个站点分配了一个list对象 分开进行url的分辨
     self.__url_list = url_list
     self.__url_queue = url_queue
     #默认为每一个站点分配一个inqueue
     #本地临时记录队列 在url_list中测试不重复后 加入in_queue
     #在积累到一定量后 传输给中央服务器管理
     #Queue()
     self.__url_in_queue = url_in_queue
     #----------------------------------------------------------------
     self.__Flock = Flock
     self.__home_urls = home_urls
     #强制刷新 DNS
     self.__tem_siteID = None
     #引用传递 方便进行对照
     self.__tem_siteID = tem_siteID
     #----------------------------------------------------------------
     self.__Flock = Flock
     self.__htmlparser = HtmlParser()
     self.__picparser = PicParser()
     self.__judger = Judger(self.__home_urls)
     #init temporary home_url and siteID
     #both to determine weather to refresh DNS cache
     self.__dbsource = DBSource()
     self.__collector = Collector(home_urls)
     #continue run
     self.__continue_run = continue_run
Пример #10
0
def test_tweet_is_correct():
    judger = Judger()
    # dummy blacklist class injection
    judger.blacklists = DummyBlacklists()
    for trend in sample_trends:
        should_tweet = judger.judge_whether_tweet(trend)

        if trend.get('name') == 'trend_A':
            assert should_tweet is False
        elif trend.get('name') == 'trend_B':
            assert should_tweet is False
        elif trend.get('name') == 'trend_C':
            assert should_tweet is True
        elif trend.get('name') == 'trend_D':
            assert should_tweet is True
        elif trend.get('name') == 'trend_E':
            assert should_tweet is True
        elif trend.get('name') == 'trend_F':
            assert should_tweet is False
        elif trend.get('name') == 'trend_blacklisted_A':
            assert should_tweet is False
import os
import sys
import time
import logging
sys.path.append('..')
from judger import Judger

strtime = time.strftime("%Y-%m-%d-%H-%M", time.localtime())
log_name = "./" + strtime + "ensemble.txt"
logging.basicConfig(handlers=[logging.FileHandler(log_name, 'w+', 'utf-8')],
                    format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)

accusation_path = 'accu.txt'
law_path = 'law.txt'
judger = Judger(accusation_path, law_path)

marked_labels_list = np.load('../accu/accu_labels.npy')
scores_path = '../accu/'
scores_names = [
    'accu_lstm89.1.npy', 'accu_gruaug88.3.npy', 'accu_gru87.9.npy',
    'accu_grubigaug87.3.npy', 'accu_rcnn87.npy', 'accu_rcnnaug86.84.npy',
    'accu_cnn86.77.npy', 'accu_fasttextaug76.14.npy'
]
scores_name_num = len(scores_names)


def sigmoid_(inputs):
    """

    Calculate the sigmoid for the give inputs (array)
Пример #12
0
        'tfidf__norm': ('l1', 'l2'),
        'clf__alpha': (0.00001, 0.000005, 0.000001),
        'clf__penalty': ('l2', 'elasticnet'),
        #'clf__n_iter': (10, 50, 80),
    }
    sgd_param_list = parse_params(SGD_parameters)

    law_model = None
    accu_model = None
    time_model = None
    '''
    test_fold = np.zeros((train_docs_num + val_docs_num), dtype='int')
    test_fold[:train_docs_num] = -1
    ps = PredefinedSplit(test_fold = test_fold)
    '''
    judge = Judger("../baseline/accu.txt", "../baseline/law.txt")

    parameters = SVC_parameters
    param_list = parse_params(parameters)

    def pipeline_train(pid):
        clf = None
        param = None
        if clf_name == 'SVC':
            param = svc_param_list[pid]
            clf = LinearSVC(**param['clf'])
        elif clf_name == 'SGD':
            param = sgd_param_list[pid]
            clf = SGDClassifier(**param['clf'])
        elif clf_name == 'LR':
            param = lr_param_list[pid]
Пример #13
0
class Evaluator(object):
    def __init__(self, predictor, input_path='./input', output='./out'):
        self.predictor = predictor
        self.input_path = input_path
        self.output_path = output
        self.judger = Judger('./data/accu.txt', './data/law.txt')
        self.cnt = 0

    def format_result(self, result):
        rex = {"accusation": [], "articles": [], "imprisonment": -3}

        res_acc = []
        for x in result["accusation"]:
            if not (x is None):
                res_acc.append(int(x))
        rex["accusation"] = res_acc

        if not (result["imprisonment"] is None):
            rex["imprisonment"] = int(result["imprisonment"])
        else:
            rex["imprisonment"] = -3

        res_art = []
        for x in result["articles"]:
            if not (x is None):
                res_art.append(int(x))
        rex["articles"] = res_art

        return rex

    def get_batch(self):
        v = self.predictor.batch_size
        if not (type(v) is int) or v <= 0:
            raise NotImplementedError

        return v

    def solve(self, fact):
        result = self.predictor.predict(fact)

        for a in range(0, len(result)):
            result[a] = self.format_result(result[a])

        return result

    def output_result(self, file_name):
        inf = open(os.path.join(self.input_path, file_name), "r")
        ouf = open(os.path.join(self.output_path, file_name), "w")

        fact = []

        for line in inf:
            fact.append(json.loads(line)["fact"])
            if len(fact) == self.get_batch():
                result = self.solve(fact)
                self.cnt += len(result)
                for x in result:
                    print(json.dumps(x), file=ouf)
                fact = []

        if len(fact) != 0:
            result = self.solve(fact)
            self.cnt += len(result)
            for x in result:
                print(json.dumps(x), file=ouf)
            fact = []

        ouf.close()

    def scoring(self, file_name):
        result = self.judger.test(self.input_path, self.output_path, file_name)
        return self.judger.get_score(result)
Пример #14
0
class Reptile(threading.Thread):
    '''
    单个线程
    '''
    def __init__(self, name, url_queue, url_list, url_in_queue, Flock, home_urls ,tem_siteID = [0], continue_run = [True]):
        '''
        name
        url_queue       从主服务器中分配到的url
        url_list        本地区分是否重复
        url_in_queue    解析得到的新url    将为每一个站点分配一个 UrlQueue
        Flock
        home_urls       测试是否符合爬取集合
        tem_conn        初始的DNS 缓存
        is_new_task     通过引用传递 由communitor修改  以判断是否需要修改
        tem_home_url    
        old_home_url    引用传递
        continue_run[]  是否继续运行的标志
        '''
        threading.Thread.__init__(self, name = name )  
        #本地测试url队列 如果在本地重复 则直接舍弃
        #如果不重复 加入临时队列 将来传输到中央服务器进行测试
        #为每个站点分配了一个list对象 分开进行url的分辨
        self.__url_list = url_list
        self.__url_queue = url_queue
        #默认为每一个站点分配一个inqueue
        #本地临时记录队列 在url_list中测试不重复后 加入in_queue
        #在积累到一定量后 传输给中央服务器管理
        #Queue()
        self.__url_in_queue = url_in_queue
        #----------------------------------------------------------------
        self.__Flock = Flock
        self.__home_urls = home_urls
        #强制刷新 DNS
        self.__tem_siteID = None
        #引用传递 方便进行对照
        self.__tem_siteID = tem_siteID
        #----------------------------------------------------------------
        self.__Flock = Flock
        self.__htmlparser = HtmlParser()
        self.__picparser = PicParser()
        self.__judger = Judger(self.__home_urls)
        #init temporary home_url and siteID
        #both to determine weather to refresh DNS cache
        self.__dbsource = DBSource()
        self.__collector = Collector(home_urls)
        #continue run
        self.__continue_run = continue_run
    #------------------------------------------------------
    @dec
    def init(self, siteID):
        console('self.init()')
        self.siteID = -1
        self.__tem_siteID[0] = siteID
        self.__dbsource.init(siteID)
        self.__url_queue.init(siteID)
        netloc = self.transNetloc(self.__home_urls[siteID])
        print 'get netloc',netloc
        self.__conn = httplib.HTTPConnection(netloc, 80, timeout = 10)
    @dec 
    def conn(self):
        '''
        包含刷新DNS功能
        siteID引用传入  检测DNS改变
        '''
        if self.siteID != self.__tem_siteID[0]:
            '''
            更新DNS
            '''
            self.siteID = self.__tem_siteID[0]
            #netloc = (urlparse.urlsplit(self.__home_urls[self.__tem_siteID[0]])).netloc
            netloc = self.transNetloc(self.__home_urls[self.__tem_siteID[0]])
            print 'netloc',netloc
            self.__conn = httplib.HTTPConnection(netloc, 80, timeout = 10)
        return self.__conn
    
    def transcode(self, source):
        '''
        转码 自动转化为utf8
        '''
        res = chardet.detect(source)
        confidence = res['confidence']
        encoding = res['encoding']
        p = re.compile("&#(\S+);")
        source = p.sub("",source)
        print 'transcode', res
        if encoding == 'utf-8':
            return source
        if confidence < 0.6:
            return False
        else:
            return unicode(source, encoding, 'ignore')
    @dec
    def transPath(self, page_url, path):
        '''
        将任意一个链接转化为 路径
        '''
        url = self.__judger.transToStdUrl(page_url, path)
        return urlparse.urlsplit(url).path
    @dec
    def transNetloc(self, url):
        '''
        传入绝对url  
        '''
        return urlparse.urlsplit(url).netloc
    #-------------------------------------------------------------
    @dec       
    def run(self):
        '''
        运行主程序
        '''
        console('self.run()')
        self.conn()
        home_url = self.__home_urls[self.siteID]
        print 'home_url',home_url
        while(True):
            #从外界传入标志 是否继续运行
            #实现中断或者停止
            if not self.__continue_run[0]:
                return 
            #[title, path]
            urlinfo = self.getAUrl()
            print 'get urlinfo ',urlinfo
            if not urlinfo:
                print "No Task\nqueue is empty!"
                return
            #全局 页面信息
            page_path = urlinfo[1]
            page_url = self.__judger.transToStdUrl(home_url, page_path)
            print 'page_path',page_path
            source = self.getPage(home_url, page_path)
            #判断是否为html源码
            if not self.__htmlparser.init(source):
                '''
                图片和其他文件单独处理
                此处不作解析
                '''
                continue
            #取得绝对地址
            #url = self.__judger.transToStdUrl(home_url, page_path)
            #url统一存储为绝对地址
            #save html source
            print 'saveHtml'+'-'*200
            self.saveHtml(page_url, urlinfo[0])
            imgsrcs = self.getImgSrcs()
            #save images
            self.saveImgList(page_url, imgsrcs)
            newurls = self.__htmlparser.getALinkText_List()
            self.addNewInQueue(page_url, newurls)
    @dec 
    def requestSource(self, path):
        '''
        page_url    子页面 如 ./index.html
        url: 直接传入绝对url 包括home_url
        内部进行解析
        '''
        conn = self.conn()
        conn.request("GET", path)
        #print self.__conn
        r1 = conn.getresponse()
        #print r1
        print r1.status
        data = r1.read()
        '''
        if r1.status != 'OK':
            print 'status is ',r1.status
            print 'status not OK'
            print r1.reason
            return False
        data = r1.read()
        if not len(data):
            print 'length of data is 0'
            return False
        '''
        return data
    @dec 
    def getPage(self,page_url, url):
        '''
        任意传入url
        将自动转化为path 然后调用底层 requestSource()
        '''
        console('self.getPage()')
        path = self.transPath(page_url, url)
        data = self.requestSource(path)
        print 'page_url: url',page_url, url
        if len(data):
            data = self.transcode(data)
            #print 'data',data
            if not len(data):
                return False
            if not self.__collector.init(data):
                print 'collector.init',
                return False
            #self.__htmlparser.init(data)
            self.__htmlparser = self.__collector.htmlparser
        return data
    @dec    
    def getImg(self,page_url, url):
        '''
        path
        img_path    './img/1.jpg'
        返回 [绝对url, source]
        '''
        url = self.transPath(page_url, url)
        return [url, self.requestSource(url)]
    @dec    
    def getAUrl(self):
        return self.__url_queue.get(timeout = 3)
    @dec 
    def getUrls(self):
        '''
        取得urls
        并且进行判断 
        '''
        return self.__htmlparser.getALink_list()
    @dec 
    def getImgSrcs(self):
        '''
        parse html source and return src_list
        '''
        return self.__htmlparser.getPicSrcs_List()
    @dec
    def addNewQueue(self, path_list):
        '''
        外界: 控制服务器传来的新的paths
        url_list = [
            ['cau','path'],
        ]
        '''
        #控制刷新
        for url in path_list:
            self.__url_queue.put(url)
    @dec    
    def addNewInQueue(self, page_url, url_list):
        '''
        url直接为原始的url   不需要另外进行处理
        将new_url添加到对应的queue中
        '''
        for urlinfo in url_list:
            #处理为绝对url
            url = self.__judger.transToStdUrl(page_url, urlinfo[1])
            siteID = self.__judger.judgeUrl(page_url, url)
            path = urlparse.urlsplit(url).path
            #判断是否为本平台url
            if siteID != -1:
                if not self.__url_list.find(siteID, path):
                    '''
                    not duplicate in url_list
                    '''
                    #将url减少
                    self.__url_in_queue.put(siteID, urlinfo[0], path)
        self.__url_in_queue.show()
    @dec
    def saveHtml(self, url, title):
        '''
        存储 source 和 parsedsource to database
        '''
        #得到绝对url
        assert self.siteID != -1
        #url = self.__judger.transToStdUrl(self.__home_urls[self.siteID], path)
        today = datetime.date.today()
        info = {
            'title' :   title,
            'url':      url,
            'date':     datetime.date.isoformat(today)
        }
        self.__dbsource.saveHtml(info, self.__collector.html, self.__collector.transXml_Str(url))

    def saveImg(self, url, source):
        imgsource = self.__picparser.getCompressedPic()
        size = imgsource['size']
        source = imgsource['source']
        #print 'source',source
        info = {
            'url':url,
            'width':size[0],
            'height':size[1]
        }
        self.__dbsource.saveImg(info, source)

    def saveImgList(self, page_url, srcs):
        '''
        传入绝对src
        传入 srcs 系列存储
        '''
        for src in srcs:
            imgsource = self.getImg(page_url, src)
            url = imgsource[0]
            source = imgsource[1]
            self.__picparser.init(source)
            self.saveImg(url, imgsource)
Пример #15
0
def test_tweet_volume_lager_than_threshold():
    trend = {'name': 'dummy', 'tweet_volume': THRETHOLD + 1}
    judger = Judger()
    judger.blacklists = DummyBlacklists()
    assert judger.judge_whether_tweet(trend)
Пример #16
0
def test_tweet_volume_equals_threshold():
    trend = {'name': 'dummy', 'tweet_volume': THRETHOLD}
    judger = Judger()
    judger.blacklists = DummyBlacklists()
    assert not judger.judge_whether_tweet(trend)
    del judger
Пример #17
0
    def judge(args, ip):
        with InitIsolateEnv() as box_id:
            compile_config = languages[args['language_name']]['compile']
            run_config = languages[args['language_name']]['run']
            src_name = compile_config['src_name']
            time_limit = args['time_limit'] / 1000.0
            if args['language_name'] == 'java':
                memory_limit = 512 * 1024
            else:
                memory_limit = args['memory_limit']
            test_case_id = args['test_case_id']
            submission_id = args['submission_id']
            logger.exception(test_case_id)

            path = os.path.join(JUDGE_DEFAULT_PATH, str(box_id))
            host_name = socket.gethostname()
            is_spj = True if 'spj_code' in args and args['spj_code'] else False

            # write source code into file
            try:
                src_path = os.path.join(path, 'box', src_name)
                f = open(src_path, "w")
                f.write(args['src_code'].encode("utf8"))
                f.close()
            except Exception as e:
                logger.exception(e)
                raise JudgeServerError('unable write code to file')
            # write spj code into file
            if is_spj:
                spj_src_path = os.path.join(path, 'box', 'spj.c')
                f = open(spj_src_path, "w")
                f.write(args['spj_code'].encode("utf8"))
                f.close()

            update_submission_status(ip, submission_id, 'compiling')

            # compile
            compiler = Compiler(compile_config=compile_config, box_id=box_id)
            compiler.compile()
            # compile spj code
            if is_spj:
                spj_config = languages['c++']['compile']
                spj_config['src_name'] = 'spj.c'
                spj_config['exe_name'] = 'spj'
                spj_compiler = Compiler(compile_config=spj_config,
                                        box_id=box_id)
                spj_compiler.compile()

            update_submission_status(ip, submission_id, 'running & judging')

            # run
            judger = Judger(run_config=run_config,
                            max_cpu_time=time_limit,
                            max_memory=memory_limit,
                            test_case_id=test_case_id,
                            box_id=box_id,
                            server_ip=ip,
                            submission_id=submission_id,
                            is_spj=is_spj)
            result = judger.run()
            judge_result = {
                "status": RESULT["accepted"],
                "info": result,
                "time": None,
                "memory": None,
                "server": host_name
            }
            for item in judge_result["info"]:
                if item["status"] != RESULT['accepted']:
                    judge_result["status"] = item["status"]
                    break
            else:
                st = sorted(result, key=lambda k: k['info']['time'])
                judge_result["time"] = st[-1]['info']["time"] * 1000
                # TODO 我也不知道为啥除了10之后内存和实际相符
                # 2017.04.06 update:
                # VSS - Virtual Set Size 虚拟耗用内存(包含共享库占用的内存)
                # RSS - Resident Set Size 实际使用物理内存(包含共享库占用的内存)
                # PSS - Proportional Set Size 实际使用的物理内存(比例分配共享库占用的内存)
                # USS - Unique Set Size 进程独自占用的物理内存(不包含共享库占用的内存)
                # 目前来看大概rss/10=uss
                # 经过测试 poj使用的是uss hdu使用的是rss
                judge_result["memory"] = st[-1]['info']["max-rss"]

            judge_result["status"] = RE_RESULT[judge_result["status"]]
            for item in judge_result["info"]:
                item["status"] = RE_RESULT[item["status"]]

            return judge_result
Пример #18
0
 def __init__(self, homeurls):
     self.htmlparser = HtmlParser()
     self.judger = Judger(homeurls)
Пример #19
0
            # pdb.set_trace()
            if set(one_tags) == set(predic_labels_names):
                all_qual_num = all_qual_num + 1
            # pdb.set_trace()
    result_file.write(
        "true_count={},predict_count={},all_qual_num={}\n".format(
            true_tags_count, predic_tags_count, all_qual_num))
    # pdb.set_trace()
    outf_path = '../output/'
    out_filename = "{}_output.json".format(task_type_name)
    outf_file = os.path.join(outf_path, out_filename)
    inf_path = os.path.join(labor_data_path, data_filename)
    generate_pred_file(labor_tags_list, labor_preds, inf_path, outf_file)

    # 对结果进行评估
    judger_labor = Judger(tag_path=labor_tag_file)
    reslt_labor = judger_labor.test(truth_path=inf_path, output_path=outf_file)
    score_labor = judger_labor.gen_score(reslt_labor)
    result_file.write('score_{}={}\n\n'.format(model_filename, score_labor))

    exit()

    # 生成divorce领域的预测文件
    print('predict_divorce...')
    tags_list = []
    with open('../../data/divorce/tags.txt', 'r', encoding='utf-8') as tagf:
        for line in tagf.readlines():
            tags_list.append(line.strip())
    prd = Predictor('model_divorce/')
    inf_path = '../../data/divorce/data_small_selected.json'
    outf_path = '../../output/divorce_output.json'
Пример #20
0
class Collector:
    '''
    从html中提取相关tag内容
    并组合为一定格式  并进行存储
    主要作用为转化为xml格式
    '''
    def __init__(self, homeurls):
        self.htmlparser = HtmlParser()
        self.judger = Judger(homeurls)

    def init(self, html):
        '''
        显式刷新缓存内容
        '''
        self.html=html
        self.html.replace('"',"'")
        self.html.replace("'", "''")
        if not self.htmlparser.init(html):
            return False
        self.d = self.htmlparser.d
        self.d=pq(html)
        self.d('script').remove()
        self.d('SCRIPT').remove()
        self.d('style').remove()
        self.d('STYLE').remove()
        print '-'*200
        print self.html
        return True
        
    def clear_other_node(self):
        '''
        删除无用标签
        '''
        self.d('head').remove()
        self.d('h1').remove()
        self.d('h2').remove()
        self.d('h3').remove()
        self.d('b').remove()
        self.d('a').remove()
        
    def getTitleText(self):
        '''
        提取 title
        '''
        return self.d('title').text()

    def getNodes(self,tag_name):
        '''
        get a list of certain tag nodes
        '''
        return self.d(tag_name)
        
    
    def __xmlAppendNodesTextList(self, xmlnode, tagname):
        '''
        xml节点为list中每个元素添加记录
        注意需要提前将链接化为 绝对链接
        如 <b> 
            <item>hello</item>
            <item>world</item>
          </b>
        '''
        html_node_text_list = self.d(tagname)
        print html_node_text_list
        childnode = self.dd.createElement(tagname)
        print childnode
        for i in range(len(html_node_text_list)):
            '''
            为每个元素添加一个item
            '''
            text_node = self.dd.createElement('item')
            text_node.setAttribute('text', html_node_text_list.eq(i).text())
            childnode.appendChild(text_node)
        xmlnode.appendChild(childnode)

    def transXml_Str(self,url):
        '''
        返回xml源码 以此格式储存
        '''
        strr='<html></html>'
        titleText = self.getTitleText()
        self.dd = dom.parseString(strr)
        html = self.dd.firstChild
        html.setAttribute('url', url)
        #为如下标签设立记录
        for tag in ['title','b', 'h1', 'h2', 'h3']:
            self.__xmlAppendNodesTextList(html, tag)
        #生成a
        aa=self.htmlparser.getALink_list()
        a=self.dd.createElement('a')
        for u in aa:
            #i=self.transurl.trans_d(i) #对url转化为标准绝对地址
            aindex=self.dd.createElement('item')
            aindex.setAttribute('title',u[0])
            #aindex.setAttribute('href',self.a_trav(aa[i]))
            aindex.setAttribute('href',self.judger.transToStdUrl(url, u[1]))
            a.appendChild(aindex)
        html.appendChild(a)
        #加入content
        #htmltext=self.d.html().decode('gbk','ignore').encode('utf-8')
        #ht=pq(htmltext)
        #bug 说明
        #此处  需啊注意 其中有html的特殊字符   &# 等等
        #在分词的时候另外说明
        content=self.d.text()
        cc=self.dd.createElement('content')
        ctext=self.dd.createTextNode(content)
        cc.appendChild(ctext)
        html.appendChild(cc)
        #print self.dd.toprettyxml()
        return html.toxml()
Пример #21
0
 def __init__(self, predictor, input_path='./input', output='./out'):
     self.predictor = predictor
     self.input_path = input_path
     self.output_path = output
     self.judger = Judger('./data/accu.txt', './data/law.txt')
     self.cnt = 0
Пример #22
0
    def run(self):
        with open(self.__SampleListFile, 'w', encoding='utf-8') as fp:
            scaned_files, sampled_files, err_counters = 0, 0, [
                0, 0, 0, 0, 0, 0
            ]
            for initial_path in self.__InitialPaths:
                for dir_path, dir_names, file_names in os.walk(initial_path):
                    if False in [
                            not match(excluded_path, dir_path)
                            for excluded_path in self.__ExcludedPaths
                    ]:  # 跳过例外目录
                        dir_names[:] = []  # 跳过例外目录的子目录
                        continue
                    if not os.access(dir_path,
                                     os.X_OK | os.R_OK):  # 有的目录下面的循环拦不住!
                        log.warning('[Permission Denied:] ' + dir_path)
                        continue
                    for dir_name in dir_names:  # 对无权进入的子目录,从扫描列表中清除并记录告警日志
                        dir_fullname = os.path.join(dir_path, dir_name)
                        if not os.access(dir_fullname, os.X_OK | os.R_OK):
                            dir_names.remove(dir_name)
                            log.warning('[Permission denied:] ' + dir_fullname)
                    if len(file_names
                           ) > self.__MaxFiles:  # 目录下文件特别多,很可能是数据文件目录
                        log.warning('[Too Many Files]( ' +
                                    str(len(file_names)) + '), Ignoring:' +
                                    dir_path)
                        continue

                    timer = time.time()
                    for file_name in file_names:
                        try:
                            scaned_files += 1
                            if scaned_files % 1000 == 0:
                                log.info(
                                    'Files scaned:[%d], error[%d], inactive[%d], small[%d], wrong-type[%d], non-text[%d], candidate[%d]\t%s'
                                    %
                                    (scaned_files, err_counters[0],
                                     err_counters[1], err_counters[2],
                                     err_counters[3], err_counters[4] +
                                     err_counters[5], sampled_files, dir_path))
                                if time.time(
                                ) - timer > self.__MaxSeconds:  # Too slow to scan a folder
                                    log.warning(
                                        '[Too slow to scan, Ignoring:]( ' +
                                        dir_path)
                                    break
                                time.sleep(self.__SleepSeconds)  # 防止过多占有系统资源

                            file_fullname = os.path.join(dir_path, file_name)
                            rc = Judger.filter(file_fullname)
                            if type(rc) is int:  # 该文件不是候选日志,无需采
                                err_counters[rc] += 1
                                continue
                            print(file_fullname, file=fp)
                            sampled_files += 1
                        except Exception as err:  # 出现过目录/文件名为乱字符导致写fp文件出现字符集异常情况
                            log.error(str(err))

        log.info(
            'Finish scan:[%d], error[%d], inactive[%d], small[%d], wrong-type[%d], non-text[%d], candidate[%d]'
            % (scaned_files, err_counters[0], err_counters[1], err_counters[2],
               err_counters[3], err_counters[4] + err_counters[5],
               sampled_files))
Пример #23
0
    #accu = train_SVC(vec, accu_label)
    print('law SVC')
    sys.stdout.flush()
    #law = train_SVC(vec, law_label)
    print('time SVC')
    sys.stdout.flush()
    #time = train_SVC(vec, time_label)

    #test
    print('predict')
    sys.stdout.flush()
    predictor = PredictorLocal(tfidf, accu, law, time)
    test_label, test_predict = predictor.predict_file(test_filename)

    #metrics
    judge = Judger("../baseline/accu.txt", "../baseline/law.txt")
    result = judge.test2(test_label, test_predict)
    print(result)
    rst = judge.get_score(result)

    print(rst)
    rstr = "ACCU:(%.4f, %.4f, %.4f); LAW:(%.4f, %.4f, %.4f) TIME: %.4f"% \
            (rst[0][0], rst[0][1], rst[0][2], rst[1][0], rst[1][1], rst[1][2], rst[2])

    sinfo = 'Prog:%s TrainFile:%s Seg:%s DIM:%s NGRAM:%d RESULT: %s' % (
        sys.argv[0], train_fname, seg_method, dim, ngram, rstr)
    logger.info(sinfo)

    print('begin test model:')
    print('saving model')
    joblib.dump(tfidf, 'predictor/model/tfidf.model')
Пример #24
0
 def __init__(self, homeurls):
     self.htmlparser = HtmlParser()
     self.judger = Judger(homeurls)
Пример #25
0
class Collector:
    '''
    从html中提取相关tag内容
    并组合为一定格式  并进行存储
    主要作用为转化为xml格式
    '''
    def __init__(self, homeurls):
        self.htmlparser = HtmlParser()
        self.judger = Judger(homeurls)

    def init(self, html):
        '''
        显式刷新缓存内容
        '''
        self.html = html
        self.html.replace('"', "'")
        self.html.replace("'", "''")
        if not self.htmlparser.init(html):
            return False
        self.d = self.htmlparser.d
        self.d = pq(html)
        self.d('script').remove()
        self.d('SCRIPT').remove()
        self.d('style').remove()
        self.d('STYLE').remove()
        print '-' * 200
        print self.html
        return True

    def clear_other_node(self):
        '''
        删除无用标签
        '''
        self.d('head').remove()
        self.d('h1').remove()
        self.d('h2').remove()
        self.d('h3').remove()
        self.d('b').remove()
        self.d('a').remove()

    def getTitleText(self):
        '''
        提取 title
        '''
        return self.d('title').text()

    def getNodes(self, tag_name):
        '''
        get a list of certain tag nodes
        '''
        return self.d(tag_name)

    def __xmlAppendNodesTextList(self, xmlnode, tagname):
        '''
        xml节点为list中每个元素添加记录
        注意需要提前将链接化为 绝对链接
        如 <b> 
            <item>hello</item>
            <item>world</item>
          </b>
        '''
        html_node_text_list = self.d(tagname)
        print html_node_text_list
        childnode = self.dd.createElement(tagname)
        print childnode
        for i in range(len(html_node_text_list)):
            '''
            为每个元素添加一个item
            '''
            text_node = self.dd.createElement('item')
            text_node.setAttribute('text', html_node_text_list.eq(i).text())
            childnode.appendChild(text_node)
        xmlnode.appendChild(childnode)

    def transXml_Str(self, url):
        '''
        返回xml源码 以此格式储存
        '''
        strr = '<html></html>'
        titleText = self.getTitleText()
        self.dd = dom.parseString(strr)
        html = self.dd.firstChild
        html.setAttribute('url', url)
        #为如下标签设立记录
        for tag in ['title', 'b', 'h1', 'h2', 'h3']:
            self.__xmlAppendNodesTextList(html, tag)
        #生成a
        aa = self.htmlparser.getALink_list()
        a = self.dd.createElement('a')
        for u in aa:
            #i=self.transurl.trans_d(i) #对url转化为标准绝对地址
            aindex = self.dd.createElement('item')
            aindex.setAttribute('title', u[0])
            #aindex.setAttribute('href',self.a_trav(aa[i]))
            aindex.setAttribute('href', self.judger.transToStdUrl(url, u[1]))
            a.appendChild(aindex)
        html.appendChild(a)
        #加入content
        #htmltext=self.d.html().decode('gbk','ignore').encode('utf-8')
        #ht=pq(htmltext)
        #bug 说明
        #此处  需啊注意 其中有html的特殊字符   &# 等等
        #在分词的时候另外说明
        content = self.d.text()
        cc = self.dd.createElement('content')
        ctext = self.dd.createTextNode(content)
        cc.appendChild(ctext)
        html.appendChild(cc)
        #print self.dd.toprettyxml()
        return html.toxml()
Пример #26
0
def evaluate():
    accu_pred, law_pred = [], []
    ground_truth = []
    count = 0
    for batch in batches_val:
        count += 1
        feed_dict = get_feed_dict(batch)
        law_score, law_pred_b, accu_pred_b, loss = sess.run(
            [
                train_model.law_score, train_model.law_prediction,
                train_model.prediction, train_model.loss
            ],
            feed_dict=feed_dict)
        if count % 100 == 0:
            print('valid_step:', count, 'valid loss:', loss)
        # accu_pred+= [[accu_class[j] for j in i] for i in utils.index_to_label(accu_pred_b, model_config.batch_size)][:len(batch)]
        accu_pred += [
            [j + 1 for j in i]
            for i in utils.index_to_label(accu_pred_b, model_config.batch_size)
        ][:len(batch)]
        law_pred += law_pred_b.tolist()
        ground_truth += list(
            zip(feed_dict[train_model.label].tolist(),
                feed_dict[train_model.law_label].tolist()))
        # if count%10==0:
        #     break
        if count == val_step_per_epoch:
            break

    with open('data/valid_label.txt', 'w', encoding='utf-8') as f:
        for each in ground_truth:
            for i in range(len(each[0])):
                if each[0][i] == 1:
                    f.write(str(accu_class[i]))
            for i in range(len(each[1])):
                if each[1][i] == 1:
                    f.write(', ' + str(law_class[i]))
            f.write('\n')

    with open('data/data_valid_predict.json', 'w', encoding='utf-8') as f:
        for i in range(len(accu_pred)):
            rex = {"accusation": [], "articles": [], "imprisonment": 0}
            rex["accusation"] = accu_pred[i]
            for each in law_pred[i]:
                # each is the index of law predicted in law_class
                if each > 0:
                    rex["articles"].append(file_order[law_class[int(each)]])
            print(json.dumps(rex, ensure_ascii=False), file=f)
            # print(rex)
            # f.write('{{"accusation": [0], "articles": {}, "imprisonment": 0}}'.format(law_pred[i]))
    J = Judger('data/accu.txt', 'data/law.txt')
    res = J.test('data/data_valid.json', 'data/data_valid_predict.json')
    total_score = 0
    scores = []
    for task_idx in range(2):
        TP_micro = 0
        FP_micro = 0
        FN_micro = 0
        f1 = []
        for class_idx in range(len(res[task_idx])):
            if res[task_idx][class_idx]["TP"] == 0:
                f1.append(0)
                continue
            TP_micro += res[task_idx][class_idx]["TP"]
            FP_micro += res[task_idx][class_idx]["FP"]
            FN_micro += res[task_idx][class_idx]["FN"]
            precision = res[task_idx][class_idx]["TP"] * 1.0 / (
                res[task_idx][class_idx]["TP"] +
                res[task_idx][class_idx]["FP"])
            recall = res[task_idx][class_idx]["TP"] * 1.0 / (
                res[task_idx][class_idx]["TP"] +
                res[task_idx][class_idx]["FN"])
            f1.append(2 * precision * recall / (precision + recall))
        precision_micro = TP_micro * 1.0 / (TP_micro + FP_micro + 1e-6)
        recall_micro = TP_micro * 1.0 / (TP_micro + FN_micro + 1e-6)
        F1_micro = 2 * precision_micro * recall_micro / (precision_micro +
                                                         recall_micro + 1e-6)
        F1_macro = np.sum(f1) / len(f1)
        total_score += 100.0 * (F1_micro + F1_macro) / 2
        print(
            'task id: {}, F1_micro: {}, F1_macro: {}, final score: {}'.format(
                task_idx + 1, F1_micro, F1_macro,
                100.0 * (F1_micro + F1_macro) / 2))
        scores.append([F1_micro, F1_macro])
    total_score += res[2]['score'] / res[2]['cnt'] * 100
    print('task id: 3, score:{}'.format(res[2]['score'] / res[2]['cnt'] * 100))
    print('total score:', total_score)
    return total_score, scores
def get_batch(data_path, batch_id):
    """get a batch from data_path"""
    new_batch = np.load(data_path + str(batch_id) + '.npz')
    X_batch = new_batch['X']
    y_batch = new_batch['y']
    return [X_batch, y_batch]
import logging
import time
import sys
import os
from judger import Judger
if __name__ == '__main__':
    accusation_path = '../cail_0518/accu.txt'
    law_path = '../cail_0518/law.txt'
    judger = Judger(accusation_path,law_path)
    marked_labels_list = list()
    a = []
    strtime = time.strftime("%Y-%m-%d-%H-%M", time.localtime())
    log_name = "../logs/"+strtime+".txt"
    logging.basicConfig(handlers=[logging.FileHandler(log_name, 'w+', 'utf-8')], format='%(asctime)s: %(levelname)s: %(message)s')
    logging.root.setLevel(level=logging.INFO)
    batchpath = '../data_old/predictbatch/accu/'
    tr_batches = os.listdir(batchpath)  # batch 文件名列表
    n_tr_batches = len(tr_batches)
    X = []
    y = []
    maxitem = 0
    allindex = 0
    count = 0
    threshold = []