def __init__(self, config_file): if not os.path.exists(config_file): logging.warning('[config] config file "%s" does not exist, program exit...' % config_file) sys.exit() with open(config_file) as config_fp: import json config_data = json.load(config_fp, object_pairs_hook=collections.OrderedDict) self.notifications = collections.OrderedDict() # key: notify_name, value: Notification notifications = config_data[NOTIFY_KEY] if NOTIFY_KEY in config_data else None if not notifications: logging.warning("[config] no notification specified, program exit...") sys.exit() for notify_name in notifications: from wbnt_path import adjust_path adjust_path() from util.setting.notify import Notification self.notifications[notify_name] = Notification.create(notify_name, notifications[notify_name]) from util.setting.audience import Audience audiences = config_data[AUDIENCE_KEY] if AUDIENCE_KEY in config_data else None self.audiences = Audience.create(audiences)
def setUp(self): from wbnt_path import adjust_path adjust_path()
if __name__ == '__main__': from argparse import ArgumentParser arg_parser = ArgumentParser(description="Test utility for HTMLParser") arg_parser.add_argument('htm_file') arg_parser.add_argument("-s", "--strip", dest="strip", action="store_const", const=True, help="strip (default: '%s')" % False) arg_parser.add_argument("-c", "--combine_br", dest="combine_br", action="store_const", const=True, help="combine <br> (default: '%s')" % False) arg_parser.add_argument("-rmw", "--remove_ws", dest="remove_ws", action="store_const", const=True, help="remove whitespace (default: '%s')" % False) arg_parser.add_argument("-rmh", "--remove_hyperlink", dest="remove_hyperlink", action="store_const", const=True, help="remove hyperlink tag (default: '%s')" % False) args = arg_parser.parse_args() if not args.htm_file or not os.path.exists(args.htm_file): import sys sys.exit() with open(args.htm_file, encoding='utf8') as fd: web_content = fd.read() if args.remove_hyperlink: web_content = HtmAnalyzer.remove_hyperlink_tag(web_content) from wbnt_path import adjust_path adjust_path() from util.setting.parser import HtmParseSetting, STRIP_KEY, REMOVE_WS_KEY, REMOVE_LIST_KEY, COMBINE_BR_KEY remove_list = None setting = HtmParseSetting({STRIP_KEY: args.strip, REMOVE_WS_KEY: args.remove_ws, REMOVE_LIST_KEY: remove_list, COMBINE_BR_KEY: args.combine_br}) content = HTMLParser().parse_and_retrieve(web_content, setting=setting) for i, line in zip(range(len(content)), content): print(i, "=>", line)