def __init__(self):
     self.parser = ArticleParser()
     self.category = {
         '정치': 100,
         '경제': 101,
         '사회': 102,
         '생활문화': 103,
         '세계': 104,
         'IT과학': 105
     }
     self.selected_category = []
     self.date = {'start_year': 0, 'end_year': 0, 'end_month': 0}
示例#2
0
    def __init__(self):
        self.parser = ArticleParser()
        self.categories = {
            '야구': "kbaseball",
            '해외야구': "wbaseball",
            '축구': "kfootball",
            '해외축구': "wfootball",
            '농구': "basketball",
            '배구': "volleyball",
            '골프': "golf",
            '일반': "general",
            'e스포츠': "esports"
        }

        self.selected_categories = []
        self.date = {
            'start_year': 0,
            'start_month': 0,
            'start_day': 0,
            'end_year': 0,
            'end_month': 0,
            'end_day': 0
        }
        self.user_operating_system = str(platform.system())
示例#3
0
 def __init__(self):
     self.parser = ArticleParser()
     self.categories = {'정치': 100, '경제': 101, '사회': 102, '생활문화': 103, 'IT과학': 105,
                        'politics': 100, 'economy': 101, 'society': 102, 'living_culture': 103, 'IT_science': 105}
     self.selected_categories = []
     self.date = {'start_year': 0, 'end_year': 0, 'end_month': 0}
示例#4
0
logging.basicConfig(format=u'[%(asctime)s] # %(levelname)-8s [%(filename)s] %(message)s',
                    filename="web_parser.log", level=logging.INFO)


def readBaseDataSetFromFile(path):
    with open(path, encoding='UTF8') as json_file:
        data = json.load(json_file)

    return data


if __name__ == "__main__":
    # from nltk.tokenize import sent_tokenize # разбивает на предложения

    # logging.info('Program started')
    article_parser = ArticleParser("mongodb://*****:*****@185.246.152.112/daryana")

    # article_parser.createDefaultSet(readBaseDataSetFromFile("meanings.json"))

    # article_parser.selfTeaching(2000, 200)
    # article_parser.resetMeanings()
    # article_parser.setMeanings()
    article_parser.classify()
    generateImgs()
    # generateImgs("first_def_set.json", "first_")

    # pikabu_urls = [
    #     "https://pikabu.ru/tag/iphone",
    #     "https://pikabu.ru/tag/apple",
    #     "https://pikabu.ru/tag/ios",
    #     "https://pikabu.ru/tag/macos",