Пример #1
0
    def __train_init(cls, data_lines):
        """
        Train init parameter
        Args:
             data_lines(list): content list of training data
        """
        print('Begin training init parameter...')
        length = len(data_lines)
        progress = LineProgress(title='Training init')
        count = 0
        char_prob = dict()
        for line in data_lines:
            count += 1
            progress.update(count * 100 / length)
            line = line.strip()
            if len(line) == 0:
                continue
            if not is_chinese(line[0]):
                continue
            char_prob[line[0]] = char_prob.get(line[0], 0) + 1

        print('\nInserting into database...')
        for character, prob in char_prob.items():
            # noinspection PyTypeChecker
            cls.insert(Init, character=character, prob=float(np.log(prob / length)))
        print('Done training init parameter')
Пример #2
0
    def __train_emission(cls, data_lines):
        """
        Train emission parameter
        Args:
            data_lines(list): content list of training data
        """
        print('Begin training emission parameter...')
        length = len(data_lines)
        progress = LineProgress(title='Training emission')
        count = 0
        char_pinyin_prob = dict()
        for line in data_lines:
            count += 1
            progress.update(count * 100 / length)
            line = line.strip()
            if len(line) == 0:
                continue
            if not is_chinese(line):
                continue
            pinyin_list = pinyin(line, style=NORMAL, heteronym=True)
            for character, pinyin_s in zip(line, pinyin_list):
                pinyin_prob = char_pinyin_prob.get(character, dict())
                for pin_yin in pinyin_s:
                    pinyin_prob[pin_yin] = pinyin_prob.get(pin_yin, 0) + 1
                char_pinyin_prob[character] = pinyin_prob

        print('\nInserting into database...')
        for character, pinyin_prob in char_pinyin_prob.items():
            for pin_yin, prob in pinyin_prob.items():
                # noinspection PyTypeChecker
                cls.insert(Emission, character=character, pin_yin=pin_yin,
                           prob=float(np.log(prob / sum(pinyin_prob.values()))))
        print('Done training emission parameter')
Пример #3
0
    def __train_transition(cls, data_lines):
        """
        Train emission parameter
        Args:
            data_lines(list): content list of training data
        """
        print('Begin training transition parameter...')
        length = len(data_lines)
        progress = LineProgress(title='Training transition')
        count = 0
        prev_next_prob = dict()
        for line in data_lines:
            count += 1
            progress.update(count * 100 / length)
            line = line.strip()
            if len(line) <= 1:
                continue
            if not is_chinese(line):
                continue
            for prev_char, next_char in zip(line[:-1], line[1:]):
                next_prob = prev_next_prob.get(prev_char, dict())
                next_prob[next_char] = next_prob.get(next_char, 0) + 1
                prev_next_prob[prev_char] = next_prob

        print('\nInserting into database...')
        for prev_char, next_prob in prev_next_prob.items():
            for next_char, prob in next_prob.items():
                # noinspection PyTypeChecker
                cls.insert(Transition, prev_char=prev_char, next_char=next_char,
                           prob=float(np.log(prob / sum(next_prob.values()))))
        print('Done training transition parameter')
Пример #4
0
 def accuracy(cls, path='./Data/test.txt', size=1000, rounds=5):
     """
     Test the model accuracy
     Args:
         path(str): path of test data
         size(int): size of test data
         rounds(int): rounds of test
     """
     with open(path, 'r', encoding='utf-8') as file:
         lines = file.readlines()
         if size > len(lines):
             print("The size is too large")
             raise BaseException
         accuracy_count = list()
         start = time.time()
         for i in range(rounds):
             progress = LineProgress(title='Round ' + str(i + 1))
             positives = np.random.randint(0, len(lines), size)
             words = [lines[pos].split()[0] for pos in positives]
             accurate_num = 0
             for index, word in enumerate(words):
                 progress.update(index / size * 100)
                 pin_yin = ' '.join([py_list[0] for py_list in pinyin(word, style=NORMAL)])
                 prediction = HMModel.translate(pin_yin)
                 count = 0
                 for j in range(len(prediction)):
                     if prediction[j] == word[j]:
                         count += 1
                 accurate_num += count / len(word)
             accuracy_count.append(accurate_num)
         print("Avg. time consumption per round: {} s".format((time.time() - start) / rounds))
         print("The test accuracy is: {:.2f}%".format(sum(accuracy_count) / (size * rounds) * 100))
Пример #5
0
def downloadTxtNovel(url, curChaptersNum=0):
    response = requests.get(url)
    response.encoding = "utf-8"

    html = etree.HTML(response.text)
    coverUrl = html.xpath("//*[@id='fmimg']/img/@src")[0]

    title = html.xpath("//*[@id='info']/h1/text()")[0]
    chaptersHtml = html.xpath("//*[@id='list']/dl/dd")
    data = """\n"""

    progress = LineProgress(total=100, title=title)
    total = len(chaptersHtml) - curChaptersNum
    for index in range(curChaptersNum, len(chaptersHtml)):
        chapter = chaptersHtml[index]
        title = chapter.xpath("./a/text()")[0].split("【")[0].split("(")[0]
        data += ("\n\n" + title + "\n\n")
        response = requests.get("http://www.xbiquge.la/" +
                                chapter.xpath("./a/@href")[0])
        response.encoding = "utf-8"

        chapterHtml = etree.HTML(response.text)
        data += "".join(
            chapterHtml.xpath("//*[@id='content']/text()")).replace(
                "\xa0", " ").replace("\r", "\n")
        progress.update(int((index - curChaptersNum + 1) / total * 100))
    return coverUrl, len(chaptersHtml), data
Пример #6
0
def test_accuracy(path='./Data/dict.txt', size=1000, loop=5):
    """
    Function for testing the model's accuracy
    Args:
        path(str): The path of test file
        size(int): The size of test data
        loop(int): The times of test
    """
    with open(path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        if size > len(lines):
            print("The size is too large")
            raise BaseException
        accurate_nums = list()
        for i in range(loop):
            progress = LineProgress(title='Loop ' + str(i + 1))
            test_pos_list = np.random.randint(0, len(lines), size)
            test_words = [lines[pos].split()[0] for pos in test_pos_list]
            accurate_num = 0
            for index, test_word in enumerate(test_words):
                progress.update(index / size * 100)
                pin_yin = ' '.join([
                    py_list[0] for py_list in pinyin(test_word, style=NORMAL)
                ])
                prediction = Model.predict(pin_yin)
                count = 0
                for j in range(len(prediction)):
                    if prediction[j] == test_word[j]:
                        count += 1
                accurate_num += count / len(test_word)
            accurate_nums.append(accurate_num)
        print("The test accuracy is: {:.2f}%".format(
            sum(accurate_nums) / (size * loop) * 100))
Пример #7
0
class Progress():
    def __init__(self, title, total):
        self._counter = 0
        self.total = total
        self.progress = LineProgress(title=title)

    def show_progress(self):
        self._counter += 1
        self.progress.update(int(self._counter / self.total * 100))
Пример #8
0
 def minMaxSearch(self, board, color, nSteps, alpha, beta):
     """search for the best solution within nSteps range"""
     if self.endBoard(board, WHITE):
         return 1e9
     elif self.endBoard(board, BLACK):
         return -1e9
     if nSteps == 0:
         return ChessJudger().evaluate(board)
     if color == WHITE:  # enemy, min
         ret = 1e9
     else:  # ai side, max
         ret = -1e9
     if nSteps == self.nSteps:
         nCounter = len(np.where(board > 64)[0])
         displayer = LineProgress(total=nCounter)
         nCounter = 0
     if color == WHITE:
         iTurn = list(range(8))
     elif color == BLACK:
         iTurn = list(range(7, -1, -1))
     jTurn = [3, 4, 2, 5, 1, 6, 0, 7]
     for i in iTurn:
         for j in jTurn:
             if (color == BLACK and board[i][j] > 64) or (
                     color == WHITE and board[i][j] > 0
                     and board[i][j] < 64):  # black
                 tempJudger = ChessJudger()
                 validMoves = tempJudger.validMoves(board, [i, j])
                 positionsX, positionsY = np.where(validMoves == 1)
                 rearrangedPairs = self.rearrangePositions(
                     color, positionsX, positionsY)
                 for (x, y) in rearrangedPairs:
                     newBoard = tempJudger.movedSituation(
                         board, [i, j], [x, y])
                     playoutPotential = self.minMaxSearch(
                         newBoard, not color, nSteps - 1, alpha, beta)
                     if color == WHITE:  # enemy, min
                         ret = min(ret, playoutPotential)
                         beta = min(beta, ret)
                         if beta <= alpha:
                             # print("ALPHA CUT!")
                             return alpha
                     else:
                         if nSteps == self.nSteps:
                             # displayer.update(nCounter+1)
                             # nCounter+=1
                             if ret < playoutPotential:  # TOP LAYER, upgrade best move
                                 self.bestMove = [i, j, x, y]
                                 print("FIND:", self.bestMove,
                                       playoutPotential)
                         ret = max(ret, playoutPotential)
                         alpha = max(alpha, ret)
                         if beta <= alpha:
                             # print("BETA PRUNED!")
                             return beta
     return ret
Пример #9
0
    def __train_emission(cls, char_lines):
        """
        Train emission parameter
        Args:
            char_lines(list): content list of training data
        """
        print('Begin training emission parameter...')
        length = len(char_lines)
        progress = LineProgress(title='Training emission')
        count = 0
        char_pinyin_prob = dict()
        for line in char_lines:
            count += 1
            progress.update(count * 100 / length)
            line = line.strip()
            line, data_type = line.split()[0], line.split()[1]
            if data_type == 'S':
                line = list(jieba.cut(line))
            if len(line) == 0:
                continue
            if not is_chinese(line):
                continue
            if data_type == 'S':
                pinyin_list = [pinyin(word, style=NORMAL) for word in line]
            else:
                pinyin_list = pinyin(line, style=NORMAL)
            for character, pinyin_s in zip(line, pinyin_list):
                pinyin_prob = char_pinyin_prob.get(character, dict())
                pin_yin = ' '.join([py[0] for py in pinyin_s])
                pinyin_prob[pin_yin] = pinyin_prob.get(pin_yin, 0) + 1
                if data_type == 'W':
                    for pin_yin in pinyin_s:
                        pinyin_prob[pin_yin] = pinyin_prob.get(pin_yin, 0) + 1
                char_pinyin_prob[character] = pinyin_prob

        print('\nInserting into database...')
        for character, pinyin_prob in char_pinyin_prob.items():
            for pin_yin, prob in pinyin_prob.items():
                # noinspection PyTypeChecker
                cls.insert(Emission, character=character, pin_yin=pin_yin,
                           prob=float(np.log(prob / sum(pinyin_prob.values()))))
        print('Done training emission parameter')
Пример #10
0
 def addLineProgress(self, progressID, progressName):
     progress = LineProgress(symbol=self.__progressSymbol__, width=self.__progressWidth__, title=progressName)
     self.childProgress[progressID] = progress
     self.__MultiProgress__.put(progressID, progress)
Пример #11
0
 def __init__(self, title, total):
     self._counter = 0
     self.total = total
     self.progress = LineProgress(title=title)
Пример #12
0
    # print('线程%s已完成' % threading.current_thread().name)

t1 = threading.Thread(target=test_po, name='t1')
t2 = threading.Thread(target=test_po, name='t2')
t1.start()
t2.start()
t1.join()
t2.join()
test_po()
test_po()'''

from eprogress import LineProgress, MultiProgressManager
import threading, time


def li_po(proess_mager, sleep_time):
    for i in range(1, 101):
        proess_mager.update(threading.current_thread().name, i)
        time.sleep(sleep_time)

proess_mager = MultiProgressManager()
t1 = threading.Thread(target=li_po, args=(proess_mager, 0.5), name='1')
t2 = threading.Thread(target=li_po, args=(proess_mager, 1), name='2')
proess_mager.put(str(1), LineProgress(total=100, title='1 Thread'))
proess_mager.put(str(2), LineProgress(total=100, title='2 Thread'))
t1.start()
t2.start()
t1.join()
t2.join()

Пример #13
0
def CreateProgress(title, max_l):
    line_progress = LineProgress(title='qweqweqew', total=100, symbol='#')
    for i in range(101):
        line_progress.update(i * 2)
        time.sleep(0.05)
Пример #14
0
def data_analysis(fund_with_achievement, choice_return_this, choice_time_this):
    """
    按传入的训责策略,筛选出符合要求的基金
    :param fund_with_achievement: 全部的基金信息文件名
    :param choice_return_this: 要求的基金收益率
    :param choice_time_this: 要求的任职时间
    """
    # 文件以a方式写入,先进行可能的文件清理
    try:
        os.remove(fund_choice_filename)
    except FileNotFoundError:
        pass

    try:
        with open(fund_choice_filename, 'w') as f:
            if fund_with_achievement == all_index_fund_with_msg_filename:
                f.write(header_index_fund)
            else:
                f.write(header_guaranteed_fund)

        print('筛选基金。。。')
        with open(fund_with_achievement, 'r') as f:
            count = 0
            all_lines = f.readlines()[1:]
            len_of_lines = len(all_lines)
            line_progress = LineProgress(title='爬取进度')

            for i in all_lines:
                # 逐条检查
                count += 1
                sign = 1

                # 取基金信息,并按收益率和任职时间分类
                _, _, one_month, three_month, six_month, one_year, three_year, from_st, _, this_tenure_time, \
                this_return, all_tenure_time, _ = i.split(',')
                return_all = [
                    one_month, three_month, six_month, one_year, three_year,
                    from_st, this_return
                ]
                time_all = [this_tenure_time, all_tenure_time]

                # 信息未知或一月数据不存在(成立时间过短)的淘汰
                if one_month == '??' or one_month == '--':
                    continue

                # 收益率部分的筛选
                for j, k in zip(choice_return_this.values(), return_all):
                    if k == '--':
                        continue
                    if float(k[:-1]) < j:
                        sign = 0
                        break

                # 任职时间部分的筛选
                if sign == 1:
                    for j, k in zip(choice_time_this.values(), time_all):
                        for l, m in zip(j, get_time_from_str(k)):
                            if m > l:
                                break
                            elif m == l:
                                continue
                            else:
                                sign = 0
                                break

                # 符合要求的保存进文件
                if sign == 1:
                    with open(fund_choice_filename, 'a') as f2:
                        f2.write(i)
                line_progress.update(count * 100 // len_of_lines)

    except Exception as e:
        print(e)
Пример #15
0
def get_past_performance(source_file):
    """
    在简单基金目录的基础上,爬取所有基金的信息
    :param source_file:要爬取的基金目录
    :return 爬取失败的(基金代码,基金名称)list
    """
    # 测试文件是否被占用,并写入列索引
    global thread_pool
    try:
        if source_file == all_fund_filename:
            with open(all_index_fund_with_msg_filename, 'w') as f:
                f.write(header_index_fund)
                f.write('\n')
            with open(all_guaranteed_fund_with_msg_filename, 'w') as f:
                f.write(header_guaranteed_fund)
                f.write('\n')
    except IOError:
        print('文件' + all_fund_filename + '无法打开')
        return

    if type(source_file) == str:
        with open(source_file, 'r') as f:
            # 逐个爬取所有基金的信息
            fund_list = f.readlines()
        os.remove(source_file)
    else:
        fund_list = source_file

    # 进度条
    line_progress = LineProgress(title='爬取进度')

    # 线程集合
    thread = list()
    # 接受线程爬取的信息
    queue_index_fund = Queue()
    queue_guaranteed_fund = Queue()
    queue_other_fund = Queue()
    queue_give_up = Queue()
    lock_thread_pool = threading.Lock()

    last_queue_num = 0
    fund_list_length = len(fund_list)
    ture_done_num = 0

    def save_file():
        # 写入文件
        with open(all_index_fund_with_msg_filename, 'a') as f:
            while not queue_index_fund.empty():
                i = queue_index_fund.get()
                for j in i:
                    f.write(j + ',')
                f.write('\n')

        with open(all_guaranteed_fund_with_msg_filename, 'a') as f:
            while not queue_guaranteed_fund.empty():
                i = queue_guaranteed_fund.get()
                for j in i:
                    f.write(j + ',')
                f.write('\n')

    for i in fund_list:
        # 已完成的基金数目
        done_num = (queue_index_fund.qsize() + queue_guaranteed_fund.qsize() +
                    queue_other_fund.qsize() + queue_give_up.qsize())
        try:
            code, name = i.split(',')
            name = name[:-1]
        except ValueError:
            continue

        # 多线程爬取
        t = threading.Thread(target=thread_get_past_performance,
                             args=(code, name, queue_index_fund,
                                   queue_guaranteed_fund, queue_other_fund,
                                   queue_give_up, lock_thread_pool))
        thread.append(t)
        t.setName(code + ',' + name)
        t.start()
        for t in thread:
            if not t.is_alive():
                thread.remove(t)

        # 判断线程集合是否过大
        if len(thread) > thread_pool:
            thread_pool += done_num - last_queue_num
            last_queue_num = done_num
            while len(thread) > thread_pool:
                time.sleep(random.random())
                for t in thread:
                    if not t.is_alive():
                        thread.remove(t)

        line_progress.update(
            (ture_done_num + done_num) * 100 // fund_list_length)
        # 爬取一定数目之后保存一次文件,发现爬取过程中速度会变慢,可通过实验调节num_save_file和休眠的值,达到间歇爬取,提高速度的目的
        if done_num >= num_save_file:
            time.sleep(5)
            thread_pool += done_num - last_queue_num
            last_queue_num = 0
            ture_done_num += queue_index_fund.qsize(
            ) + queue_guaranteed_fund.qsize()
            save_file()

    # 等待所有线程执行完毕
    while len(thread) > 0:
        for t in thread:
            if not t.is_alive():
                thread.remove(t)
        process = (queue_index_fund.qsize() + queue_guaranteed_fund.qsize() +
                   queue_other_fund.qsize() + queue_give_up.qsize() +
                   ture_done_num) * 100 // fund_list_length
        line_progress.update(process)
        time.sleep(random.random())

    save_file()
    print('\n基金信息爬取完成,其中处于封闭期或已终止的基金有' + str(queue_other_fund.qsize()) +
          '个,爬取失败的有' + str(queue_give_up.qsize()) + '个')
    return list(queue_give_up.get() for i in range(queue_give_up.qsize()))
Пример #16
0
def get_past_performance(all_fund_generator_or_list, first_crawling=True):
    """
    在简单基金目录的基础上,爬取所有基金的信息
    :param all_fund_generator_or_list: 要爬取的基金目录(generator) 也可以直接是列表('基金代码,基金名称')(list)
    :param first_crawling: 是否是第一次爬取,这决定了是否会重新写保存文件(清空并写入列索引)
    :return 爬取失败的('基金代码,基金名称')(list)
    """
    maximum_of_thread = 1
    # 测试文件是否被占用,并写入列索引
    try:
        if first_crawling:
            with open(all_index_fund_with_msg_filename, 'w') as f:
                f.write(header_index_fund)
            with open(all_guaranteed_fund_with_msg_filename, 'w') as f:
                f.write(header_guaranteed_fund)
    except IOError:
        print('文件' + all_fund_filename + '无法打开')
        return

    # 对于输入为list的情况,构造成迭代器
    if type(all_fund_generator_or_list) == list:
        all_fund_generator_or_list = (i for i in all_fund_generator_or_list)
    elif str(type(all_fund_generator_or_list)) != "<class 'generator'>":
        raise AttributeError

    # 进度条
    line_progress = LineProgress(title='爬取进度')

    # 线程集合
    thread = list()
    # 接受线程爬取的信息
    queue_index_fund = Queue()
    queue_guaranteed_fund = Queue()
    queue_other_fund = Queue()
    queue_give_up = Queue()

    num_of_previous_completed = 0
    num_of_last_addition_of_completed_fund_this_time = 0
    num_of_last_addition_give_up_fund = 0
    num_of_last_addition_other_fund = 0
    need_to_save_file_event = threading.Event()

    def save_file():
        nonlocal maximum_of_thread, num_of_last_addition_of_completed_fund_this_time, num_of_previous_completed, \
            num_of_last_addition_give_up_fund, num_of_last_addition_other_fund
        # 写入文件和最大线程数减半
        while True:
            need_to_save_file_event.wait()
            maximum_of_thread = (maximum_of_thread // 2) + 1
            num_of_last_addition_of_completed_fund_this_time = 0
            num_of_previous_completed += (queue_index_fund.qsize() +
                                          queue_guaranteed_fund.qsize() +
                                          queue_other_fund.qsize() +
                                          queue_give_up.qsize() -
                                          num_of_last_addition_give_up_fund -
                                          num_of_last_addition_other_fund)
            num_of_last_addition_give_up_fund = queue_give_up.qsize()
            with open(all_index_fund_with_msg_filename, 'a') as f:
                while not queue_index_fund.empty():
                    i = queue_index_fund.get()
                    for j in i:
                        f.write(j + ',')
                    f.write('\n')

            with open(all_guaranteed_fund_with_msg_filename, 'a') as f:
                while not queue_guaranteed_fund.empty():
                    i = queue_guaranteed_fund.get()
                    for j in i:
                        f.write(j + ',')
                    f.write('\n')
            need_to_save_file_event.clear()

    t = threading.Thread(target=save_file)
    t.setDaemon(True)
    t.start()

    try:
        while True:
            i = next(all_fund_generator_or_list)
            try:
                code, name = i.split(',')
                name = name[:-1]
            except ValueError:
                continue

            num_of_completed_this_time = (queue_index_fund.qsize() +
                                          queue_guaranteed_fund.qsize() +
                                          queue_other_fund.qsize() +
                                          queue_give_up.qsize() -
                                          num_of_last_addition_give_up_fund -
                                          num_of_last_addition_other_fund)

            # 多线程爬取
            t = threading.Thread(target=thread_get_past_performance,
                                 args=(code, name, queue_index_fund,
                                       queue_guaranteed_fund, queue_other_fund,
                                       queue_give_up, need_to_save_file_event))
            thread.append(t)
            t.setName(code + ',' + name)
            t.start()
            for t in thread:
                if not t.is_alive():
                    thread.remove(t)

            if len(thread) > maximum_of_thread:
                time.sleep(random.random())
                if need_to_save_file_event.is_set():
                    while need_to_save_file_event.is_set():
                        pass
                else:
                    maximum_of_thread += num_of_completed_this_time - num_of_last_addition_of_completed_fund_this_time
                    num_of_last_addition_of_completed_fund_this_time = num_of_completed_this_time

                while len(thread) > maximum_of_thread // 2:
                    for t in thread:
                        if not t.is_alive():
                            thread.remove(t)

            line_progress.update(
                (num_of_previous_completed + num_of_completed_this_time) *
                100 // sum_of_fund)

    except StopIteration:
        pass

    # 等待所有线程执行完毕
    while len(thread) > 0:
        line_progress.update((sum_of_fund - len(thread)) * 100 // sum_of_fund)
        time.sleep(random.random())
        for t in thread:
            if not t.is_alive():
                thread.remove(t)

    line_progress.update(99)
    need_to_save_file_event.set()
    line_progress.update(100)
    print('\n基金信息爬取完成,其中处于封闭期或已终止的基金有' + str(queue_other_fund.qsize()) +
          '个,爬取失败的有' + str(queue_give_up.qsize()) + '个')
    return list(queue_give_up.get() for i in range(queue_give_up.qsize()))
Пример #17
0
    which = int(which)

    thread_pool = []
    if which == 1:
        # circle
        circle_progress = CircleProgress(title='circle loading')
        # circle_thread = threading.Thread(target=mock_single_progress, args=(circle_progress, 0.1))
        # thread_pool.append(circle_thread)
        # circle_thread.start()
        for i in range(1, 101):
            circle_progress.update(i)
            time.sleep(0.1)
    elif which == 2:
        # line
        line_progress = LineProgress(title='line progress')
        for i in range(1, 101):
            line_progress.update(i)
            time.sleep(0.05)
        # line_thread = threading.Thread(target=mock_single_progress, args=(line_progress, 0.05))
        # thread_pool.append(line_thread)
        # line_thread.start()
    elif which == 3:
        # multi line
        progress_manager = MultiProgressManager()

        thread1 = threading.Thread(target=mock_multi_progress,
                                   args=(progress_manager, 0.05),
                                   name=str(1001))
        thread2 = threading.Thread(target=mock_multi_progress,
                                   args=(progress_manager, 0.2),
Пример #18
0
from eprogress import LineProgress, CircleProgress, MultiProgressManager
import time
circle_progress = CircleProgress(title='circle loading')
for i in range(1, 101):
    circle_progress.update(i)
    time.sleep(0.1)

line_progress = LineProgress(title='line progress')
for i in range(1, 101):
    line_progress.update(i)
    time.sleep(0.05)
Пример #19
0
def main(LIMT=20, ):

    circle_progress = CircleProgress(title='Initialize loading')
    for i in range(1, 10):
        circle_progress.update(i)
        time.sleep(0.1)

    line_progress = LineProgress(title='Scan System Information')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.02)

    line_progress = LineProgress(title='Scan Patch')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.03)

    line_progress = LineProgress(title='Scan Model')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.03)


    MODLENUM = 0
    FORMATS = 0
    STACKS = 0
    REGSS = 0
    RWS = 0
    TOTALNUM = 0
    FORMATN = 0
    STACKN = 0
    REGN = 0
    RWN = 0

    start = timeit.default_timer()
    set_win()
    display_info(" BJTU ZXY MAKARA ", 0, 0, 1)
    display_info(" Version: Beta 0.1 ", 17, 0, 2)
    display_info(" Gitee: https://gitee.com/zeroaone/makara \n", 36, 0, 3)
    str1 = (
        " __  __    _    _  __    _    ____      _\n"
        "|  \/  |  / \  | |/ /   / \  |  _ \    / \\\n"
        "| |\/| | / _ \ | ' /   / _ \ | |_) |  / _ \\\n"
        "| |  | |/ ___ \| . \  / ___ \|  _ <  / ___ \\\n"
        "|_|  |_/_/   \_\_|\_\/_/   \_\_| \_\/_/   \_\\\n"
        "\n"
    )
    display_info_flash(str1, 0, 1)
    display_info("[CPU:     %]", 0, 7, 4)
    display_info("[CPU-USER-TIME:          ]", 0, 8, 4)
    display_info("[MEM:     %]", 0, 9, 4)
    display_info("[ PROCESS TIMING ] \n", 0, 10, 1)
    display_info("TIMING LIMIT :" + str(LIMT) + " MIN", 0, 11, 1)
    display_info("TIMING RUNING:", 0, 12, 1)
    display_info("[ SCAN BUG STEP  ] \n", 0, 13, 2)
    display_info("MODEL     NUM:", 0, 14, 4)
    display_info("FORMAT  STEPS:", 0, 15, 4)
    display_info("STACK   STEPS:", 0, 16, 4)
    display_info("REGS    STEPS:", 0, 17, 4)
    display_info("RW      STEPS:", 0, 18, 4)
    display_info("[ SCAN BUG NUM  ] \n", 0, 19, 2)
    display_info("TOTAL    NUMS:", 0, 20, 4)
    display_info("FORMAT   NUMS:", 0, 21, 4)
    display_info("STACK    NUMS:", 0, 22, 4)
    display_info("REGS     NUMS:", 0, 23, 4)
    display_info("RW       NUMS:", 0, 24, 4)

    try:
        while 1:
            MODLENUM = MODLENUM + random.randint(30,50)
            FORMATS = FORMATS + random.randint(0, 20)
            STACKS = STACKS + random.randint(0, 20)
            REGSS = REGSS + random.randint(0, 20)
            RWS = RWS + random.randint(0, 20)


            FORMATN = ReadNum("fmt.json")
            STACKN = ReadNum("stack.json")
            REGN = ReadNum("regs.json")
            RWN = ReadNum("ArbRW.json")
            TOTALNUM = FORMATN + STACKN + REGN + RWN


            end = timeit.default_timer()
            time.sleep(0.5)
            mem = psutil.virtual_memory().percent
            cpu = psutil.cpu_percent()
            cpuuser = psutil.cpu_times().user
            display_info(str(cpu), 6, 7, 5)
            display_info(str(cpuuser), 16, 8, 5)
            display_info(str(mem), 6, 9, 5)
            display_info(str(end - start), 14, 12, 5)

            display_info(str(MODLENUM), 15, 14, 5)
            display_info(str(FORMATS), 15, 15, 5)
            display_info(str(STACKS), 15, 16, 5)
            display_info(str(REGSS), 15, 17, 5)
            display_info(str(RWS), 15, 18, 5)
            display_info(str(TOTALNUM), 15, 20, 5)
            display_info(str(FORMATN), 15, 21, 5)
            display_info(str(STACKN), 15, 22, 5)
            display_info(str(REGN), 15, 23, 5)
            display_info(str(RWN), 15, 24, 5)



    except KeyboardInterrupt:
        unset_win()
Пример #20
0
def crawling_fund(fund_list_class: GetFundList, first_crawling=True):
    """
    在简单基金目录的基础上,爬取所有基金的信息
    :param fund_list_class: 提供要爬取的基金目录的类
    :param first_crawling: 是否是第一次爬取,这决定了是否会重新写保存文件(清空并写入列索引)
    :return 爬取失败的('基金代码,基金名称')(list)
    """
    # 进度条 基金总数 爬取进度
    line_progress = LineProgress(title='爬取进度')
    cur_process = 0
    # 爬取输入、输出队列,输入结束事件,网络状态事件,爬取核心
    input_queue = Queue()
    result_queue = Queue()
    finish_sign = Event()
    network_health = Event()
    crawling_core = GetPageByWebWithAnotherProcessAndMultiThreading(
        input_queue, result_queue, finish_sign, network_health, TIMEOUT)
    crawling_core.start()

    fund_list = fund_list_class.get_fund_list()
    num_of_fund = fund_list_class.sum_of_fund
    having_fund_need_to_crawl = True

    # 未来有计划将解析部分独立
    fund_web_page_parse = parse_fund_info()
    manager_web_page_parse = parse_manager_info()
    write_file = write_to_file(first_crawling)
    next(fund_web_page_parse)
    next(manager_web_page_parse)
    next(write_file)
    if_first_show_network_problem = True
    while True:
        if network_health.is_set():
            if if_first_show_network_problem:
                print('如果此条提示持续出现,请检查当前的网络状态')
                if_first_show_network_problem = False
        elif not if_first_show_network_problem:
            if_first_show_network_problem = True

        # 根据短路原则,首先是是否还有要爬取的基金,然后是判断需要解析的数据量(控制内存),最后才是查看输入队列的情况
        while having_fund_need_to_crawl and result_queue.qsize(
        ) < 100 and input_queue.qsize() < 10:
            try:
                code, name = next(fund_list).split(',')
            except StopIteration:
                having_fund_need_to_crawl = False
                break
            tem_fund_info = FundInfo()
            tem_fund_info.set_fund_info('基金名称', name)
            tem_fund_info.set_fund_info('基金代码', code)
            input_queue.put(
                ('http://fund.eastmoney.com/' + code + '.html', tem_fund_info))

        # 优先补充输入队列,保证爬取的速度,再处理需要解析的数据
        while (input_queue.qsize() > 5
               or not having_fund_need_to_crawl) and result_queue.qsize():
            a_result = result_queue.get()
            # 若上次的爬取失败了,则重试,未对一直失败的进行排除
            if a_result[0] == 'error':
                input_queue.put(a_result[1:])
            else:
                if a_result[2].next_step == 'parsing_fund':
                    new_fund_info: FundInfo = fund_web_page_parse.send(
                        a_result[1:])
                    if new_fund_info.next_step == 'parsing_manager':
                        input_queue.put(
                            (new_fund_info.manager_need_process_list[-1][0],
                             new_fund_info))
                    else:
                        result_queue.put((None, None, new_fund_info))
                elif a_result[2].next_step == 'parsing_manager':
                    new_fund_info: FundInfo = manager_web_page_parse.send(
                        a_result[1:])
                    if new_fund_info.next_step == 'parsing_manager':
                        input_queue.put(
                            (new_fund_info.manager_need_process_list[-1][0],
                             new_fund_info))
                    else:
                        result_queue.put((None, None, new_fund_info))

                elif a_result[2].next_step == 'writing_file':
                    write_file.send(a_result[2])
                    cur_process += 1
                    line_progress.update(100 * cur_process / num_of_fund)
                else:
                    print(
                        f'请检查FundInfo的next_step(此处为{a_result[2].next_step})设置,出现了未知的参数'
                    )

        # 完成所有任务判断
        if not having_fund_need_to_crawl and input_queue.qsize(
        ) == 0 and result_queue.qsize() == 0:
            time.sleep(TIMEOUT)
            if not having_fund_need_to_crawl and input_queue.qsize(
            ) == 0 and result_queue.qsize() == 0:
                break

    finish_sign.set()
Пример #21
0

def main(args):
    # pass
    FindBUG.main(args)


if __name__ == '__main__':
    global filename

    circle_progress = CircleProgress(title='Initialize loading')
    for i in range(1, 10):
        circle_progress.update(i)
        time.sleep(0.1)

    line_progress = LineProgress(title='Scan System Information')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.2)

    line_progress = LineProgress(title='Scan Patch')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.5)

    line_progress = LineProgress(title='Scan Model')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.5)

    # print("\033[34mSuixinBlog: https://suixinblog.cn\033[0m")
Пример #22
0
def main(LIMT=20, ):
    circle_progress = CircleProgress(title='Initialize loading')
    for i in range(1, 10):
        circle_progress.update(i)
        time.sleep(0.1)

    line_progress = LineProgress(title='Scan System Information')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.02)

    line_progress = LineProgress(title='Scan Patch')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.03)

    line_progress = LineProgress(title='Scan Model')
    for i in range(1, 101):
        line_progress.update(i)
        time.sleep(0.03)

    cpuuser = 0
    MODLENUM = 0
    FORMATS = 0
    STACKS = 0
    REGSS = 0
    RWS = 0
    TOTALNUM = 0
    FORMATN = 0
    STACKN = 0
    REGN = 0
    RWN = 0

    start = timeit.default_timer()
    set_win()
    display_info(" BJTU ZXY MAKARA ", 0, 0, 1)
    display_info(" Version: Beta 0.1 ", 17, 0, 2)
    display_info(" Gitee: https://gitee.com/zeroaone/makara \n", 36, 0, 3)
    str1 = (" __  __    _    _  __    _    ____      _\n"
            "|  \/  |  / \  | |/ /   / \  |  _ \    / \\\n"
            "| |\/| | / _ \ | ' /   / _ \ | |_) |  / _ \\\n"
            "| |  | |/ ___ \| . \  / ___ \|  _ <  / ___ \\\n"
            "|_|  |_/_/   \_\_|\_\/_/   \_\_| \_\/_/   \_\\\n"
            "\n")
    display_info_flash(str1, 0, 1)
    display_info("[CPU:     %]", 0, 7, 4)
    display_info("[CPU-USER-TIME:          ]", 0, 8, 4)
    display_info("[MEM:     %]", 0, 9, 4)
    display_info("[ PROCESS TIMING ] \n", 0, 10, 1)
    display_info("TIMING LIMIT :" + str(LIMT) + " MIN", 0, 11, 1)
    display_info("TIMING RUNING:", 0, 12, 1)
    display_info("[ SCAN BUG STEP  ] \n", 0, 13, 2)
    display_info("MODEL     NUM:", 0, 14, 4)
    display_info("FORMAT  STEPS:", 0, 15, 4)
    display_info("STACK   STEPS:", 0, 16, 4)
    display_info("REGS    STEPS:", 0, 17, 4)
    display_info("RW      STEPS:", 0, 18, 4)
    display_info("[ SCAN BUG NUM  ] \n", 0, 19, 2)
    display_info("TOTAL    NUMS:", 0, 20, 4)
    display_info("FORMAT   NUMS:", 0, 21, 4)
    display_info("STACK    NUMS:", 0, 22, 4)
    display_info("REGS     NUMS:", 0, 23, 4)
    display_info("RW       NUMS:", 0, 24, 4)
    display_info("[ SYSTEM  INFO  ] \n", 0, 25, 2)
    display_info("CPU      NAME:", 0, 26, 4)
    display_info("CPU      ARCH:", 0, 27, 4)
    display_info("IP     ADDRES:", 0, 28, 4)
    display_info("OS       INFO:", 0, 29, 4)
    display_info("[LOG INFO LATEST 10] \n", 0, 30, 2)

    try:
        while 1:
            FORMATS = FORMATS + random.randint(0, 10)
            STACKS = STACKS + random.randint(0, 20)
            REGSS = REGSS + random.randint(0, 20)
            RWS = RWS + random.randint(0, 20)
            MODLENUM = FORMATS + STACKS + REGSS + RWS
            #PROCESS TIMING
            FORMATN = FORMATN + random.randint(0, 2)
            STACKN = STACKN + random.randint(0, 2)
            REGN = REGN + random.randint(0, 2)
            RWN = RWN + random.randint(0, 2)
            TOTALNUM = FORMATN + STACKN + REGN + RWN

            end = timeit.default_timer()
            time.sleep(1.5)
            mem = random.randint(0, 100)
            cpu = random.randint(0, 100)
            cpuuser = cpuuser + 1
            #SCAN BUG STEP
            display_info(str(cpu), 6, 7, 5)
            display_info(str(cpuuser), 16, 8, 5)
            display_info(str(mem), 6, 9, 5)
            display_info(str(end - start), 14, 12, 5)
            #SYSTEM  INFO
            display_info(str(MODLENUM), 15, 14, 5)
            display_info(str(FORMATS), 15, 15, 5)
            display_info(str(STACKS), 15, 16, 5)
            display_info(str(REGSS), 15, 17, 5)
            display_info(str(RWS), 15, 18, 5)
            display_info(str(TOTALNUM), 15, 20, 5)
            display_info(str(FORMATN), 15, 21, 5)
            display_info(str(STACKN), 15, 22, 5)
            display_info(str(REGN), 15, 23, 5)
            display_info(str(RWN), 15, 24, 5)
            #SYSTEM  INFO
            display_info('Qualcomm Technologies, Inc SDM660', 15, 26, 5)
            display_info('ARM aarch64', 15, 27, 5)
            display_info('192.168.31.106', 15, 28, 5)
            display_info('Android 9', 15, 29, 5)
            #logINFO
            str1 = []
            str1.append("adasdasdasdasdasdasdasd")
            str1.append("sfasgea")
            str1.append("asasdasdasd")
            str1.append("ftrythfgh")
            str1.append("sadf34q")
            str1.append("vbvnvcbnbvnvb")
            str1.append("wrwersdf")
            str1.append("3454356ydh")
            str1.append("hdfhdfg")
            str1.append("eqwrsdfasdf")
            str1.append("cvbnvcb")
            str1.append("adhxcncvnvbxdfgsdgrsdrg")
            str1.append("fdsadfzxdvzxvd")
            str1.append("vzxcvzxcvzxcvzxvzxcvzxc")
            index = random.randint(0, 12)
            display_info(str1[index], 0, 31, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 32, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 33, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 34, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 35, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 36, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 37, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 38, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 39, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 40, 5)
            index = random.randint(0, 12)
            display_info(str1[index], 0, 41, 5)

    except KeyboardInterrupt:
        unset_win()
Пример #23
0
def get_room_search_result(min_lng,
                           max_lng,
                           min_lat,
                           max_lat,
                           zoom,
                           order_by,
                           sort_flag,
                           room_filter_args,
                           court_filter_args=None,
                           min_price='',
                           max_price='',
                           feature='',
                           leasetype='',
                           tag='',
                           resblock_id='',
                           transport='',
                           minute=''):
    """
    爬取搜索结果,参数含义请看文件开头对url的描述
    :return list of ARoomResult, total_num of room
    """
    global IF_FILTER_COURT
    if not court_filter_args:
        IF_FILTER_COURT = False

    results = ResultListWithFilter(
        RoomFilterByBothPriceAndArea(room_filter_args),
        CourtFilterByAge(court_filter_args))
    first_result = get_result_from_one_page(min_lng, max_lng, min_lat, max_lat,
                                            zoom, 1, order_by, sort_flag,
                                            min_price, max_price, feature,
                                            leasetype, tag, resblock_id,
                                            transport, minute)
    total_room_num = first_result['total']
    print(f'过滤前的搜索到的房间总数为{total_room_num}')
    # 设置进度条
    line_progress = LineProgress(
        total=total_room_num) if LineProgress else None

    for i in first_result['rooms']:
        tem_room_base_info = RoomBaseInfo.parse(i)
        tem_court_info = CourtDetail.parse(
            get_court_info(tem_room_base_info.court_id))
        results.append(RoomDetail(tem_room_base_info, tem_court_info))
        if line_progress:
            line_progress.update_by_cur_num(results.count)
    if first_result:
        total_page_num = int(first_result['pages'])
        for page_num in range(2, total_page_num):
            page_result = get_result_from_one_page(min_lng, max_lng, min_lat,
                                                   max_lat, zoom, page_num,
                                                   order_by, sort_flag,
                                                   min_price, max_price,
                                                   feature, leasetype, tag,
                                                   resblock_id)
            if page_result:
                for i in page_result['rooms']:
                    tem_room_base_info = RoomBaseInfo.parse(i)
                    tem_court_info = CourtDetail.parse(
                        get_court_info(tem_room_base_info.court_id))
                    results.append(
                        RoomDetail(tem_room_base_info, tem_court_info))
                    if line_progress:
                        line_progress.update_by_cur_num(results.count)

    return results.get_result_list(), total_room_num
Пример #24
0
import urllib.request as rq
import os
import json
from eprogress import LineProgress

url = "https://www.bilibili.com/index/index-icon.json"
save_dir = "bilibili_images/"

res = rq.urlopen(url)
json_str = json.loads(res.read())

if not os.path.exists(save_dir):
    os.mkdir(save_dir)

images = json_str["fix"]
total = len(images)

progress = LineProgress(title='total ' + str(total) + ' images, downloading progress')

for index, im in enumerate(images):
    title, icon_url = im["title"], im["icon"]

    ss = rq.urlretrieve("http:" + icon_url, filename=os.path.join(save_dir, title + "." + icon_url.split('.')[-1]))

    progress.update(int((index + 1) / total * 100))
print("\ndone.")