def parse(self, response): item = DoubleItem() driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log') driver.get('http://jxndjy.jxau.edu.cn/module/jobfairs?type=') html = etree.HTML(driver.page_source) #lists = response.xpath('//div[@class="newsBox"]') #print(lists) title = html.xpath('//div[@class="text-eps w240"]/@title') print(title) publishDate = list( map( lambda x: x.strip(), html.xpath( '//table[@class="tb-pub-list"]/tbody/tr/td[2]/text()'))) holdDate = "" #url = lists.xpath('ul/li[2]/a/@href').extract() time = getPresentTime() #print('运行成功') for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][:10] item['holdDate'] = holdDate item['url'] = 'http://jxndjy.jxau.edu.cn/module/jobfairs?type=' yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log') #初始化 driver.get('http://zsjy.sru.jx.cn/html/srsfzscjyyw/index.html') #爬取网页 html = etree.HTML(driver.page_source) #转换格式 #lists = response.xpath('//div[@class="newsBox"]') #print(lists) title = html.xpath('//span[@class="a-box"]/ul/li/a/text()') print(title) publishDate = html.xpath('//span[@class="a-box"]/ul/li/span/text()') holdDate = "" url = html.xpath('//span[@class="a-box"]/ul/li/a/@href') time = getPresentTime() #print('运行成功') for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://zsjy.sru.jx.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() driver = webdriver.PhantomJS(service_log_path=r'../watchlog.log') #初始化 driver.get('http://jy.huaszj.cn/module/jobfairs') #爬取网页 html = etree.HTML(driver.page_source) #转换格式 #lists = response.xpath('//div[@class="newsBox"]') #print(lists) title = html.xpath('//ul[@id="data_html"]/li/div/div[2]/p[1]/a/@title') print(title) publishDate = html.xpath( '//ul[@id="data_html"]/li/div/div[3]/div/p[1]/text()') holdDate = "" url = html.xpath('//ul[@id="data_html"]/li/div/div[2]/p[1]/a/@href') time = getPresentTime() #print('运行成功') for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][:10] item['holdDate'] = holdDate item['url'] = 'http://jy.huaszj.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath( '//table[@id="__01"]/tr[2]/td[6]/table/tr[2]/td') print(lists) title = lists.xpath('a/text()').extract() print(title) times = lists.xpath('text()').extract() f = re.compile(r'\d{2}-\d{2}') b = list(map(lambda x: a.findall(x), times)) publishDate = [x for x in b if len(x) != 0] holdDate = "" url = lists.xpath('a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find("供需见面会") != -1) and time[5:] == publishDate[i][0]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://zsjyw.lcvtc.edu.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//ul[@class="list-unstyled"]') print(lists) title = list( map(lambda x: x.strip(), lists.xpath('li/a/text()').extract())) print(title) publishDate = list( map(lambda x: x.strip(), lists.xpath('li/span/text()').extract())) holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 ) and time[5:] == publishDate[i][1:6]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://job.ahau.edu.cn/tzgg/' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="infoBox mt10 b"]') print(lists) title = lists.xpath( 'ul[@class="infoList jobfairList"]/li[1]/a/@title').extract() print(title) publishDate = list( map( lambda x: x.strip(), lists.xpath('ul[@class="infoList jobfairList"]/li[5]/text()'). extract())) holdDate = "" url = lists.xpath( 'ul[@class="infoList jobfairList"]/li[1]/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][:10] item['holdDate'] = holdDate item['url'] = 'http://jzit.91wllm.com' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="infoBox mt10"]/ul') print(lists) title = lists.xpath( '//ul[@class="infoList"]/li[@class="span7"]/a/text()').extract( )[:20] print(title) publishDate = lists.xpath( '//ul[@class="infoList"]/li[@class="span4"]/text()').extract()[:20] holdDate = "" url = lists.xpath('//ul[@class="infoList"]/li[@class="span7"]/a/@href' ).extract()[:20] time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 ) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://yzu.91job.gov.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() #lists = response.xpath('//a[@class="listA" and @target="_self"]/@title').extract() #print(lists) title = response.xpath('//a[@class="listA" and @target="_self"]/@title').extract() print(title) publishDate = '' holdDate = "" url = response.xpath('//a[@class="listA" and @target="_self"]/@href').extract() time = getPresentTime() for i in range(len(title)): if title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会')!=-1 :#and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate item['holdDate'] = holdDate item['url'] = 'http://www.jxaevc.com'+url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="newsBox"]') print(lists) title = lists.xpath('ul/li[2]/a/text()').extract() print(title) publishDate = lists.xpath('ul/li[1]/text()').extract() holdDate = "" url = lists.xpath('ul/li[2]/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会')!=-1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://hzau.91wllm.com'+url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@id="wp_news_w71"]/table') print(lists) title = lists.xpath('tr/td[2]/a[2]/text()').extract() print(title) publishDate = lists.xpath('tr/td[4]/text()').extract() holdDate = "" url = lists.xpath('tr/td[2]/a[2]/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time[5:] == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://www.aqvtc.edu.cn'+url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath("//div[@class='info_list']") print(lists) title = lists.xpath('/ol/a/text()').extract() print(title) publishDate = lists.xpath('ol/span/text()').extract() holdDate = "" url = lists.xpath('ol/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("校园招聘") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://jiuye.hebau.edu.cn/news2/' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="newsnr"]/div/ul') print(lists) title = lists.xpath('li/a/text()').extract() print(title) publishDate = lists.xpath('li/span/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://www.hetaodaxue.com/jyzdzx' + url[i][5:] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@id="wp_news_w6"]/ul') print(lists) title = lists.xpath('li/div/span/a/text()').extract() print(title) publishDate = lists.xpath('li/div[2]/span/text()').extract() holdDate = "" url = lists.xpath('li/div/span/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://news.jhc.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="list_right fr"]/table') print(lists) title = lists.xpath('tr[@height="20"]/td[2]/a/@title').extract() print(title) publishDate = lists.xpath( 'tr[@height="20"]/td[3]/span/text()').extract() holdDate = "" url = lists.xpath('tr[@height="20"]/td[2]/a/@href') time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][:-1] item['holdDate'] = holdDate item['url'] = 'http://xsc.sxnu.edu.cn' + url[i][2:] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@id="newslist"]/table') print(lists) title = lists.xpath('tr/td[2]/table/tr/td/a/font/text()').extract() print(title) publishDate = '' holdDate = "" url = lists.xpath('tr/td[2]/table/tr/td/a/@href').extract() time = getPresentTime() for i in range(len(title)): if title[i].find("招聘会") != -1 or title[i].find( "双选会") != -1 or title[i].find("宣讲会") != -1 or title[ i].find('供需见面会') != -1: # and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate item['holdDate'] = holdDate item['url'] = 'http://jyw.bzpt.edu.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@id="newlist"]/ul') print(lists) title = lists.xpath('li/a/@title').extract() print(title) publishDate = lists.xpath('li/span/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == '20' + publishDate[i][1:9]: print(title[i]) item['title'] = title[i] item['publishDate'] = '20' + publishDate[i][1:9] item['holdDate'] = holdDate item['url'] = 'http://218.5.241.22:8036' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//table[@class="table_style01"]') print(lists) title = lists.xpath('tr[@class="trbg"]/td[2]/a/text()').extract() print(title) publishDate = lists.xpath('tr[@class="trbg"]/td[4]/text()').extract() holdDate = "" url = lists.xpath('tr[@class="trbg"]/td[2]/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://hnbemc.university-hr.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//table[@id="GridView1"]') print(lists) title = lists.xpath('tr/td/a/span/text()').extract() print(title) publishDate = lists.xpath('tr/td/span/text()').extract() holdDate = "" url = lists.xpath('tr/td/a/@href').extract() time = getPresentTime() for i in range(len(title)): if title[i].find("招聘会") != -1 or title[i].find( "双选会") != -1 or title[i].find("宣讲会") != -1 or title[ i].find('供需见面会') != -1: #and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://www2.nynu.edu.cn/xzbm/jiuye/' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="articleList articleList2"]/ul') print(lists) title = lists.xpath('li/a/@title').extract() print(title) publishDate = lists.xpath('li/span/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选周") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = r'http://www.xyafu.edu.cn/jyxxw/tzgg/' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="listbox"]') print(lists) title = lists.xpath('div[@class="txt link_lan"]/h2/a/text()').extract() print(title) publishDate = lists.xpath( 'div[@class="txt link_lan"]/em[1]/text()').extract() holdDate = "" url = lists.xpath('div[@class="txt link_lan"]/h2/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//table[@class="winstyle2456"]') print(lists) title = lists.xpath('tr/td[1]/a/@title').extract() print(title) publishDate = lists.xpath('tr/td[2]/text()').extract() holdDate = "" url = lists.xpath('tr/td[1]/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][:10] item['holdDate'] = holdDate item['url'] = 'http://job.sqzy.edu.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="artileListWraper"]') print(lists) title = lists.xpath('div/h3/a/text()').extract() print(title) publishDate = lists.xpath( 'div/div[@class="m-news-data"]/span[1]/text()').extract() holdDate = "" url = lists.xpath('div/h3/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://www.ndgzy.com' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="article-list floatL"]/ul') print(lists) title = lists.xpath('li/a/span/text()').extract() print(title) publishDate = lists.xpath('li/a/i/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][:10]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://dzujy.dzu.edu.cn' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="gov-main"]/div/ul') print(lists) title = lists.xpath('li[@style=";"]/a/@title').extract() print(title) publishDate = lists.xpath('li[@style=";"]/span/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][1:11]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][1:11] item['holdDate'] = holdDate item['url'] = url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="p2_m"]/div[2]/table') print(lists) title = lists.xpath('tr[@class="trbg"]/td[@align="left"]/a/text()').extract() print(title) publishDate = lists.xpath('tr[@class="trbg"]/td[4]/text()').extract() holdDate = "" url = lists.xpath('tr[@class="trbg"]/td[2]/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate if url[i][:11] == 'showarticle': item['url'] = 'http://sxau.university-hr.com/'+url[i] else: item['url'] = url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//ul[@class="inf_lc"]') print(lists) title = lists.xpath('li/a[1]/@title').extract() print(title) publishDate = list( map(lambda x: x.strip(), lists.xpath('li/h4/span/text()').extract())) holdDate = "" url = lists.xpath('li/a[1]/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1) and time[:7] == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('body/div[2]/table[2]/tr/td[3]/table[2]') print(lists) title = lists.xpath('tr/td/a/text()').extract() print(title) publishDate = list( map(lambda x: x.strip(), lists.xpath('tr/td/text()').extract())) holdDate = "" url = lists.xpath('tr/td/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i][1:]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i][1:] item['holdDate'] = holdDate item['url'] = 'http://www6.hpu.edu.cn/web5' + url[i][2:] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() lists = response.xpath('//div[@class="article cur02"]/ul') print(lists) title = list( map(lambda x: x.strip(), lists.xpath('li/a/text()').extract())) print(title) publishDate = lists.xpath('li/span/text()').extract() holdDate = "" url = lists.xpath('li/a/@href').extract() time = getPresentTime() for i in range(len(title)): if (title[i].find("招聘会") != -1 or title[i].find("双选会") != -1 or title[i].find("宣讲会") != -1 or title[i].find('供需见面会') != -1) and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate[i] item['holdDate'] = holdDate item['url'] = 'http://www.lcu.edu.cn/ztzx/ldyw/' + url[i] yield item else: print('没有匹配')
def parse(self, response): item = DoubleItem() #lists = response.xpath('//table[@class="table_style01"]') #print(lists) title = response.xpath( '//div[@class="tdtext1"]/table/tr/td/a/font/text()').extract() print(title) publishDate = '' holdDate = "" url = response.xpath( '//div[@class="tdtext1"]/table/tr/td/a/@href').extract() time = getPresentTime() for i in range(len(title)): if title[i].find("招聘会") != -1 or title[i].find( "双选会") != -1 or title[i].find("宣讲会") != -1 or title[ i].find('供需见面会') != -1: # and time == publishDate[i]: print(title[i]) item['title'] = title[i] item['publishDate'] = publishDate item['holdDate'] = holdDate item['url'] = 'http://www.hnyzzy.com' + url[i] yield item else: print('没有匹配')