def test_bad_project(self): project = Project(path=os.path.dirname(__file__)) self.assertFalse(project.inside_project) self.assertIsNone(project.cfg_path) self.assertIsNone(project.cfg) self.assertIsNone(project.project_dir) self.assertDictEqual(project.module_settings.values, {}) self.assertEqual(project.data_dir, '.crawlmi')
def test_dummy_project(self): project = Project(path=None) self.assertFalse(project.inside_project) self.assertIsNone(project.cfg_path) self.assertIsNone(project.cfg) self.assertIsNone(project.project_dir) self.assertDictEqual(project.module_settings.values, {}) self.assertEqual(project.data_dir, '.crawlmi')
def test_data_dir(self): project = Project(path=sample_project_dir) self.assertEqual(project.data_dir, '.crawlmi') project.set_data_dir('crawlmi_data') self.assertEqual(project.data_dir, join(sample_project_dir, 'crawlmi_data')) self.assertFalse(exists(project.data_dir)) project.data_path('.', create_dir=True) self.assertTrue(exists(project.data_dir)) os.rmdir(project.data_dir) project.set_data_dir(None) self.assertEqual(project.data_dir, '.crawlmi')
def execute(argv=None): if argv is None: argv = sys.argv project = Project() settings = EngineSettings(module_settings=project.module_settings) inside_project = project.inside_project cmds = get_commands(settings, inside_project) cmd_name = pop_command_name(argv) if not cmd_name: print_commands(cmds, inside_project) sys.exit(0) elif cmd_name not in cmds: print_unknown_command(cmd_name, inside_project) sys.exit(2) # initialize the command cmd = cmds[cmd_name]() # initalize parser parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), conflict_handler='resolve') parser.usage = 'crawlmi %s %s' % (cmd_name, cmd.syntax()) parser.description = cmd.short_desc() cmd.add_options(parser) options, args = parser.parse_args(args=argv[1:]) # initialize custom settings custom_settings = run_print_help(parser, cmd.get_settings, args, options) settings.custom_settings = custom_settings # initialize engine engine = Engine(settings, project, command_invoked=cmd_name) spider = run_print_help(parser, cmd.get_spider, engine, args, options) engine.set_spider(spider) # set project's data dir. It has to be when all the settings are known. project.set_data_dir(engine.settings.get('DATA_DIR')) engine.setup() cmd.set_engine(engine) # save pidfile if getattr(options, 'pidfile', None): with open(options.pidfile, 'wb') as f: f.write(str(os.getpid()) + os.linesep) # run command run_print_help(parser, cmd.run, args, options)
def test_good_project(self): project_dirs = [ sample_project_dir, join(sample_project_dir, 'crawlmi_project'), ] for project_dir in project_dirs: project = Project(path=project_dir) self.assertTrue(project.inside_project) self.assertEqual(project.project_dir, sample_project_dir) self.assertEqual(project.module_settings.get_int('TEST'), 42)
def get_engine(custom_settings=None, **kwargs): '''Return the engine initialized with the custom settings. ''' custom_settings = custom_settings or {} custom_settings.update(kwargs) settings = EngineSettings(custom_settings=custom_settings) engine = Engine(settings, Project(path=None), clock=Clock()) engine.set_spider(BaseSpider('dummy')) engine.stop_if_idle = False # it is common to use stats and signals in unittests, without full # initialization of the engine engine.stats = MemoryStats(engine) engine.signals = SignalManager(engine) return engine
def test_data_path(self): project = Project(path=sample_project_dir) project.set_data_dir('crawlmi_data') # relative path expected = join(sample_project_dir, 'crawlmi_data', 'a', 'b') relative = project.data_path(join('a', 'b'), create_dir=False) self.assertEqual(relative, expected) self.assertFalse(exists(expected)) # create dir project.data_path(join('a', 'b'), create_dir=True) self.assertTrue(exists(expected)) os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a', 'b')) os.rmdir(join(sample_project_dir, 'crawlmi_data', 'a')) # absolute path expected = os.path.abspath(__file__) absolute = project.data_path(expected, create_dir=False) self.assertEqual(absolute, expected) os.rmdir(project.data_dir)