Пример #1
0
 def setUpClass(cls):
     JsonConfig.set_data('TESTING', True)
     cls.u1 = User(email='*****@*****.**',
                   nickname='gureuso01',
                   password=SHA256.encrypt('1234'))
     cls.u2 = User(email='*****@*****.**',
                   nickname='gureuso02',
                   password=SHA256.encrypt('1234'))
     db.session.add(cls.u1)
     db.session.add(cls.u2)
     db.session.commit()
Пример #2
0
def test():
    """test code"""
    JsonConfig.set_data('TESTING', True)

    loader = unittest2.TestLoader()
    start_dir = '{0}/apps'.format(Config.ROOT_DIR)
    suite = loader.discover(start_dir)

    runner = unittest2.TextTestRunner()
    r = runner.run(suite)

    JsonConfig.set_data('TESTING', False)

    if r.wasSuccessful():
        print('success')
    else:
        print('fail')
        exit(1)
Пример #3
0
        }
    },
    "Misc" : {
        "type": "menu",
        "info": "select command",
        "items": {
            "facebook": {
                "type": "command",
                "value": "xdg-open http://www.facebook.com"
            }
        }
    }
}
'''

import os, sys
from config import JsonConfig
from commands import Menu

if __name__ == "__main__":
    if len(sys.argv) == 1:
        script_dir = os.path.dirname(os.path.realpath(__file__))
        config_file_path = os.path.join(script_dir, 'ninj_conf.json')
    else:
        config_file_path = sys.argv[1]
    
    top_menu = Menu("Ninj", "A bash helper for people with bad memory.", 
        exclude_back_option=True)
    json_config = JsonConfig()
    json_config.build_menu(top_menu, config_file_path)
    top_menu.enter()
Пример #4
0
def get_model(model):
    if JsonConfig.get_data('TESTING'):
        return model.test_model
    return model
Пример #5
0
    def __init__(self,
                 cfg_file_path,
                 json_file_path,
                 site_index,
                 shall_resume,
                 daemonize,
                 library_mode=False):
        # set up logging before it's defined via the config file,
        # this will be overwritten and all other levels will be put out
        # as well, if it will be changed.
        configure_logging({"LOG_LEVEL": "CRITICAL"})
        self.log = logging.getLogger(__name__)

        self.cfg_file_path = cfg_file_path
        self.json_file_path = json_file_path
        self.site_number = int(site_index)
        self.shall_resume = shall_resume \
            if isinstance(shall_resume, bool) else literal_eval(shall_resume)
        self.daemonize = daemonize \
            if isinstance(daemonize, bool) else literal_eval(daemonize)

        # set up the config file
        self.cfg = CrawlerConfig.get_instance()
        self.cfg.setup(self.cfg_file_path)
        self.log.debug("Config initialized - Further initialisation.")

        self.cfg_crawler = self.cfg.section("Crawler")

        # load the URL-input-json-file or - if in library mode - take the json_file_path as the site information (
        # kind of hacky..)
        if not library_mode:
            self.json = JsonConfig.get_instance()
            self.json.setup(self.json_file_path)
            sites = self.json.get_site_objects()
            site = sites[self.site_number]
        else:
            sites = [json_file_path]
            site = json_file_path

        if "ignore_regex" in site:
            ignore_regex = "(%s)|" % site["ignore_regex"]
        else:
            ignore_regex = "(%s)|" % \
                           self.cfg.section('Crawler')['ignore_regex']

        # Get the default crawler. The crawler can be overwritten by fallbacks.
        if "additional_rss_daemon" in site and self.daemonize:
            self.crawler_name = "RssCrawler"
        elif "crawler" in site:
            self.crawler_name = site["crawler"]
        else:
            self.crawler_name = self.cfg.section("Crawler")["default"]
        # Get the real crawler-class (already "fallen back")
        crawler_class = self.get_crawler(self.crawler_name, site["url"])

        if not self.cfg.section('Files')['relative_to_start_processes_file']:
            relative_to_path = os.path.dirname(self.cfg_file_path)
        else:
            # absolute dir this script is in
            relative_to_path = os.path.dirname(__file__)

        news_item_class_name = self.cfg.section("Scrapy").get(
            "item_class", None)
        if not news_item_class_name:
            news_item_class = NewscrawlerItem
        else:
            news_item_class = ClassLoader.from_string(news_item_class_name)
            if not issubclass(news_item_class, NewscrawlerItem):
                raise ImportError(
                    "ITEM_CLASS must be a subclass of NewscrawlerItem")

        self.helper = Helper(self.cfg.section('Heuristics'),
                             self.cfg.section("Files")["local_data_directory"],
                             relative_to_path,
                             self.cfg.section('Files')['format_relative_path'],
                             sites, crawler_class, news_item_class,
                             self.cfg.get_working_path())

        self.__scrapy_options = self.cfg.get_scrapy_options()

        self.update_jobdir(site)

        # make sure the crawler does not resume crawling
        # if not stated otherwise in the arguments passed to this script
        self.remove_jobdir_if_not_resume()

        self.load_crawler(crawler_class, site["url"], ignore_regex)

        # start the job. if in library_mode, do not stop the reactor and so on after this job has finished
        # so that further jobs can be executed. it also needs to run in a thread since the reactor.run method seems
        # to not return. also, scrapy will attempt to start a new reactor, which fails with an exception, but
        # the code continues to run. we catch this excepion in the function 'start_process'.
        if library_mode:
            start_new_thread(start_process, (
                self.process,
                False,
            ))
        else:
            self.process.start()
Пример #6
0
 def tearDownClass(cls):
     Tag.query.delete()
     Post.query.delete()
     User.query.delete()
     db.session.commit()
     JsonConfig.set_data('TESTING', False)
Пример #7
0
    def __init__(self, cfg_directory_path, is_resume, is_reset_elasticsearch,
        is_reset_json, is_reset_mysql, is_reset_postgresql, is_no_confirm, library_mode=False):
        """
        The constructor of the main class, thus the real entry point to the tool.
        :param cfg_file_path:
        :param is_resume:
        :param is_reset_elasticsearch:
        :param is_reset_json:
        :param is_reset_mysql:
        :param is_reset_postgresql:
        :param is_no_confirm:
        """
        configure_logging({"LOG_LEVEL": "ERROR"})
        self.log = logging.getLogger(__name__)

        # other parameters
        self.shall_resume = is_resume
        self.no_confirm = is_no_confirm
        self.library_mode = library_mode

        # Sets an environmental variable called 'CColon', so scripts can import
        # modules of this project in relation to this script's dir
        # example: sitemap_crawler can import UrlExtractor via
        #   from newsplease.helper_classderes.url_extractor import UrlExtractor
        os.environ['CColon'] = os.path.abspath(os.path.dirname(__file__))

        # set stop handlers
        self.set_stop_handler()

        # threading
        self.thread_event = threading.Event()

        # Get & set CFG and JSON locally.
        if cfg_directory_path:
            # if a path was given by the user
            self.cfg_directory_path = self.get_expanded_path(cfg_directory_path)
        else:
            # if no path was given by the user, use default
            self.cfg_directory_path = self.get_expanded_path(self.config_directory_default_path)
        # init cfg path if empty
        self.init_config_file_path_if_empty()
        self.cfg_file_path = self.cfg_directory_path + self.config_file_default_name

        # config
        self.cfg = CrawlerConfig.get_instance()
        self.cfg.setup(self.cfg_file_path)
        self.mysql = self.cfg.section("MySQL")
        self.postgresql = self.cfg.section("Postgresql")
        self.elasticsearch = self.cfg.section("Elasticsearch")

        # perform reset if given as parameter
        if is_reset_mysql:
            self.reset_mysql()
        if is_reset_postgresql:
            self.reset_postgresql()
        if is_reset_json:
            self.reset_files()
        if is_reset_elasticsearch:
            self.reset_elasticsearch()
        # close the process
        if is_reset_elasticsearch or is_reset_json or is_reset_mysql or is_reset_postgresql:
            sys.exit(0)

        self.json_file_path = self.cfg_directory_path + self.cfg.section('Files')['url_input_file_name']

        self.json = JsonConfig.get_instance()
        self.json.setup(self.json_file_path)

        self.crawler_list = self.CrawlerList()
        self.daemon_list = self.DaemonList()

        self.__single_crawler = self.get_abs_file_path("single_crawler.py", True, False)

        self.manage_crawlers()