def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = [ '/userRpm/StatusRpm.htm', 'var wanPara = new Array(.+?)"([\d\.]+? , [\d\.]+?)"', 2 ] self.res['firmware'] = [ '/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?"(.+?)"', 1 ] self.res['hardware'] = [ '/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?".+?".+?"(.+?)"', 1 ] auth_cookie = base64.b64encode(self.username + ':' + self.password) self.headers = { b'Cookie': 'tLargeScreenP=1; subType=pcSub; Authorization=Basic ' + auth_cookie, b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['<wan_dns>(.+?)</wan_dns>', 1] self.res['firmware'] = ['<fw_ver>(.+?)</fw_ver>', 1] self.res['hardware'] = ['<hw_ver>(.+?)</hw_ver>', 1] self.url = 'http://' + self.addr + ':' + str(self.port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res["dns"] = ["<wan_dns>(.+?)</wan_dns>", 1] self.res["firmware"] = ["<fw_ver>(.+?)</fw_ver>", 1] self.res["hardware"] = ["<hw_ver>(.+?)</hw_ver>", 1] self.url = "http://" + self.addr + ":" + str(self.port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/Comm/Status.js', 'var va_DNSServer.+?(".+?".+?".+?")', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/Status_Router.asp', 'share.dns.+?<B>(.+?)<', 1] self.res['firmware'] = ['/Status_Router.asp', 'share.firmwarever.+?(v\d.+?)<', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = [ '/userRpm/StatusRpm.htm', 'var wanPara = new Array(.+?)"([\d\.]+? , [\d\.]+?)"', 2 ] self.res['firmware'] = ['', 'openAboutWindow.+?>(.+?)</a>"', 1] self.res['hardware'] = [ '', '>Capture\(status_router.sys_model.+?\n(.+?) ', 1 ]
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/status.asp', 'temp_dns1="(.+?)";', 1] self.res['firmware'] = ['/status.asp', 'dw\(FirmwareVersion\)</script></td>.+?>(.+?)</td>', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/eng/admin/st_device.cgi', '<dns1>(.+?)</dns1>', 1] self.res['firmware'] = ['/eng/admin/st_device.cgi', '<version>(.+?)</version>', 1] self.res['hardware'] = ['/eng/admin/st_device.cgi', '<product>(.+?)</product>', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/Status/st_devic.htm', 'priDns = "(.+?)".+?"(.+?)";'] self.res['firmware'] = ['/Status/st_devic.htm', 'firmareVersion = "(.+?)"', 1] self.res['hardware'] = ['/Status/st_devic.htm', 'ModuleName="(.+?)"', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, features=['os', 'cpu'], namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__( self, features=features, plugin_places=plugin_places) plugins_manager.reload_host_crawl_plugins( features, plugin_places, options) self.plugins = plugins_manager.get_host_crawl_plugins( features=features) self.namespace = namespace
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/st_device.html', 'DNS[^\.]+?([\d\.]+[\d\.]+[\d\.]+[\d\.]+)', 1] self.res['firmware'] = ['/st_device.html', '</font>.+?font>\r\n(.+?)\r\n.+?((mon)|(tues)|(wed)|(thurs)|(fri))', 1] self.res['hardware'] = ['/st_device.html', '<TITLE>(.+?)</TITLE>', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/userRpm/StatusRpm.htm', 'var wanPara = new Array(.+?)"([\d\.]+? , [\d\.]+?)"', 2] self.res['firmware'] = ['/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?"(.+?)"', 1] self.res['hardware'] = ['/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?".+?".+?"(.+?)"', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/RST_status.htm', '<b>Domain Name Server</b>(.+?)</td></tr>'] self.res['firmware'] = ['/RST_status.htm', 'V\d\.[\d\._]+', 1] self.res['hardware'] = ['/RST_status.htm', '<META name="description" content="(.+?)">', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, features=['os', 'cpu'], namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__(self, features=features, plugin_places=plugin_places) plugins_manager.reload_host_crawl_plugins(features, plugin_places, options) self.plugins = plugins_manager.get_host_crawl_plugins( features=features) self.namespace = namespace
def mery_crawler(): root_url = 'http://mery.jp' categories = ['fashion', 'cosme', 'hairstyle', 'nail', 'beauty', 'gourmet', 'travel', 'love', 'lifestyle'] rules = {'article': {'title': ['h1'], 'content': ['.articleArea p.article_text', '.articleArea p.article_image_desc', '.articleArea h2.article_headline'], 'pv': ['li.view span'], 'keywords': ['li.tag a'], 'category': ['ul.topBar_in li a span:nth-of-type(2)']}, 'links': 'h3.article_list_title a'} for category in categories[:1]: category_root_url = '/'.join([root_url, category]) crawler = BaseCrawler(root_url=category_root_url, rules=rules, databse='mery') crawler.run(start_page=1, end_page=2)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/cgi-bin/info.html' self.res['dns'] = '([\d\.]+?)<br>([\d\.]+?)</td>' self.res['firmware'] = 'V[\d\._]+' self.res['hardware'] = "document.write\(router_status_hwversion\).+?</script>(.+?)</td>" self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/stsdev.htm' self.res['dns'] = 'DNS</TD><TD>(.+?)</TD>.+?DNS</TD><TD>(.+?)</TD></TR>' self.res['firmware'] = '[\d\.]+ \(.+?\)' self.res['hardware'] = 'DCS-\d+' self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/Status_Router.asp', 'document.write\("DNS"\).+?<TD class=FUNFIELD><B>(.+?)</B>', 1] self.res['firmware'] = ['/Status_Router.asp', 'document.write\("Firmware Version"\).+?<TD class=FUNFIELD><B>(.+?)<', 1] self.res['hardware'] = ['/Status_Router.asp', '<TD class=MODELNAME>(.+?)</TD>', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = [ '/Comm/Status.js', 'var va_DNSServer.+?(".+?".+?".+?")', 1 ] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/RST_status.htm' self.res['dns1'] = 'var info_get_dns1="(.+?)";' self.res['dns2'] = 'var info_get_dns2="(.+?)";' self.res['firmware'] = '<TD nowrap>V([\d\._]+?)</TD>' self.res['hardware'] = "var product_id='(.+?)';" self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res["dns"] = ["/userRpm/StatusRpm.htm", 'var wanPara = new Array\(.+?"([\d\.]+, [\d\.]+)"', 1] self.res["firmware"] = ["/userRpm/SoftwareUpgradeRpm.htm", 'var softUpInf(.+?".+?".+?){2}.+?"(.+?)"', 2] self.res["hardware"] = ["/userRpm/SoftwareUpgradeRpm.htm", 'var softUpInf(.+?".+?".+?){3}.+?"(.+?)"', 2] auth_cookie = base64.b64encode(self.username + ":" + self.password) self.headers = { b"Cookie": "tLargeScreenP=1; subType=pcSub; Authorization=Basic " + auth_cookie, b"User-Agent": b"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0", b"Accept-Language": b"en-US", b"Referer": "", } self.url = "http://" + self.addr + ":" + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/userRpm/StatusRpm.htm', 'var wanPara = new Array(.+?)"([\d\.]+? , [\d\.]+?)"', 2] self.res['firmware'] = ['/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?"(.+?)"', 1] self.res['hardware'] = ['/userRpm/StatusRpm.htm', 'var statusPara = new Array.+?".+?".+?"(.+?)"', 1] auth_cookie = base64.b64encode(self.username + ':' + self.password) self.headers = { b'Cookie': 'tLargeScreenP=1; subType=pcSub; Authorization=Basic ' + auth_cookie, b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/Advanced_WAN_Content.asp' self.res['dns1'] = 'name="wan_dns1_x" value="(.+?)"' self.res['dns2'] = 'name="wan_dns2_x" value="(.+?)"' self.res['firmware'] = 'name="firmver" value="(.+?)">' self.res['hardware'] = 'RT-\S+' self.res['type'] = "wan_route_x = '(.+?)';" self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/stsdev.htm' self.res[ 'dns'] = 'DNS</TD><TD>(.+?)</TD>.+?DNS</TD><TD>(.+?)</TD></TR>' self.res['firmware'] = '[\d\.]+ \(.+?\)' self.res['hardware'] = 'DCS-\d+' self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/status.asp', 'temp_dns1="(.+?)";', 1] self.res['firmware'] = [ '/status.asp', 'dw\(FirmwareVersion\)</script></td>.+?>(.+?)</td>', 1 ] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/Status_Router.asp' self.res['dns1'] = 'dns\[0\] = (\S+)' self.res['dns2'] = 'dns\[1\] = (\S+)' self.res['firmware'] = 'firmware version = (\S+)' self.res['hardware'] = 'model name = (\S+)' self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.info_url = '/tcpipwan.asp' self.hardware_url = '/lang.js' self.firmware_url = '/state.js' self.res['dns1'] = 'name="dns1" class="input" size="18" maxlength="15" value=(.*?)>' self.res['dns2'] = 'name="dns2" class="input" size="18" maxlength="15" value=(.*?)>' self.res['firmware'] = 'showtext\(\$\("firmver"\), "([\d\.]+?)"\);' self.res['hardware'] = 'Web_Title="(.+?)"' self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, features=['os', 'cpu'], user_list=[], host_namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__(self, features=features, plugin_places=plugin_places, options=options) self.vms_list = [] plugins_manager.reload_vm_crawl_plugins(features, plugin_places, options) self.plugins = plugins_manager.get_vm_crawl_plugins(features) self.host_namespace = host_namespace self.user_list = user_list
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns1'] = ['/system_status.asp', 'dns1[^"]+"(.+?)"', 1] self.res['dns2'] = ['/system_status.asp', 'dns2[^"]+"(.+?)"', 1] self.res['firmware'] = [ '/system_status.asp', 'run_code_ver[^"]+"(.+?)"', 1 ] self.res['hardware'] = ['/system_status.asp', 'hw_ver[^"]+"(.+?)"', 1] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', b'Cookie': 'admin:language=en; language=en' } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, features=['os', 'cpu'], user_list=[], host_namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__( self, features=features, plugin_places=plugin_places, options=options) self.vms_list = [] plugins_manager.reload_vm_crawl_plugins( features, plugin_places, options) self.plugins = plugins_manager.get_vm_crawl_plugins(features) self.host_namespace = host_namespace self.user_list = user_list
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = [ '/Status/st_devic.htm', 'priDns = "(.+?)".+?"(.+?)";' ] self.res['firmware'] = [ '/Status/st_devic.htm', 'firmareVersion = "(.+?)"', 1 ] self.res['hardware'] = [ '/Status/st_devic.htm', 'ModuleName="(.+?)"', 1 ] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self, features=['os', 'cpu'], environment='cloudsight', user_list='ALL', host_namespace='', plugin_places=['plugins'], options={}): BaseCrawler.__init__( self, features=features, plugin_places=plugin_places, options=options) plugins_manager.reload_env_plugin(environment, plugin_places) plugins_manager.reload_container_crawl_plugins( features, plugin_places, options) self.plugins = plugins_manager.get_container_crawl_plugins(features) self.environment = environment self.host_namespace = host_namespace self.user_list = user_list
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = [ '/Status_Router.asp', 'document.write\("DNS"\).+?<TD class=FUNFIELD><B>(.+?)</B>', 1 ] self.res['firmware'] = [ '/Status_Router.asp', 'document.write\("Firmware Version"\).+?<TD class=FUNFIELD><B>(.+?)<', 1 ] self.res['hardware'] = [ '/Status_Router.asp', '<TD class=MODELNAME>(.+?)</TD>', 1 ] self.headers = { b'User-Agent': b'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0', b'Accept-Language': b'en-US', b'Referer': '', } self.url = 'http://' + self.addr + ':' + str(port)
def __init__(self): CrawlSpider.__init__(self) BaseCrawler.__init__(self)
def __init__(self, addr, port, username, password, session, debug): BaseCrawler.__init__(self, addr, port, username, password, session, debug) self.res['dns'] = ['/userRpm/StatusRpm.htm', 'var wanPara = new Array(.+?)"([\d\.]+? , [\d\.]+?)"', 2] self.res['firmware'] = ['', 'openAboutWindow.+?>(.+?)</a>"', 1] self.res['hardware'] = ['', '>Capture\(status_router.sys_model.+?\n(.+?) ', 1]