def parse(self, response):
		i = 0


		for div in response.xpath('//li[@class="conference vevent"]'):
			item = AfeventItem()
			item['location'] = div.xpath('.//p[@class="location"]/a[3]/text()').extract_first()
			
			item['title'] = div.xpath('//h4/a/text()').extract()[i]
			item['date'] = div.xpath('//p[@class="date"]/abbr[1]/@title').extract()[i]
			item['host'] = ''
			item['time'] = ''
			item['description'] = ''

			
			follow_url_1 = div.xpath('//h4/a/@href').extract()[i]
			follow_url = 'http://lanyrd.com' + follow_url_1
			request = Request(follow_url, callback = self.parse_url)

			url = div.xpath('//h4/a/@href').extract()[i]
			url = 'http://lanyrd.com' + url
			request = Request(url, callback = self.parse_url)

			request.meta['item'] = item

			if i < len(response.xpath('//li[@class="conference vevent"]')):
				i = i + 1	
			yield request
Exemplo n.º 2
0
    def parser(self, response):
        i = 0
        for div in response.xpath('//*[@id="CalendarContainer"]/div/div/a'):
            item = AfeventItem()
            print "response.xpath"
            item['title'] = div.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/h1/text()'
            ).extract()[i]
            item['venue'] = div.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/text()'
            ).extract()[i]
            item['date'] = div.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/span/text()'
            ).extract()[i]
            item['time'] = div.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/span[2]/span/text()|//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[3]/span/text()[3]'
            ).extract()[i]
            item['url'] = div.xpath(
                '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href'
            ).extract()[i]
            follow_url_1 = div.xpath(
                '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href'
            ).extract()[i]
            follow_url = 'http://www.afconsult.com' + follow_url_1
            request = Request(follow_url, callback=self.parse_url)
            request.meta['item'] = item

            if i < len(
                    response.xpath('//*[@id="CalendarContainer"]/div/div/a')):
                i = i + 1
                print i
            yield request
Exemplo n.º 3
0
    def parse(self, response):
        i = 0
        for div in response.xpath('//div[@class="exhibit clearfix"]'):
            item = AfeventItem()
            #Store data into lists
            item['title'] = div.xpath(
                '//a/div/div[@class="header"]/text()').extract()[i]
            url = div.xpath('.//a[@href]/@href').extract()[0]
            url = 'http://allamassor.se/' + url
            item['url'] = url

            #Remove unwanted characters
            location = div.xpath('//span[@class="ort"]/text()').extract()[i]
            for char in " | ":
                location = location.replace(char, "")
                item['location'] = location

    #The following code changes the format of the date
            origDate = div.xpath('//span[@class="date"]/text()').extract()[i]
            #split up the text in the date
            newDate = origDate.split()

            #Handles if date is between two dates, e.g. "10 - 11 maj 2016"
            if len(newDate) > 3:
                rightDate = []
                rightDate.extend((newDate[0], newDate[3], newDate[4]))
                newDate = rightDate

    #Assign values to month names
            month = [
                "", "januari", "februari", "mars", "april", "maj", "juni",
                "juli", "augusti", "september", "oktober", "november",
                "december"
            ].index(newDate[1])

            #Assign a "0" in the beginning if month number is < 10
            if month < 10:
                zeroMonth = [0, month]
                zeroMonth = ''.join(map(str, zeroMonth))
            else:
                zeroMonth = month

    #same thing as above with day
            if int(newDate[0]) < 10:
                zeroDate = [0, newDate[0]]
                zeroDate = ''.join(map(str, zeroDate))
            else:
                zeroDate = newDate[0]

    #Puts everything together and stores into item['date']
            finalDate = [newDate[2], zeroMonth, zeroDate]
            item['date'] = '-'.join(finalDate)
            request = Request(url, callback=self.parse_second)
            request.meta['item'] = item

            if i < len(response.xpath('//div[@class="exhibit clearfix"]')):
                i = i + 1
                yield request
Exemplo n.º 4
0
    def parse(self, response):
        i = 0
        print i
        for div in response.xpath('//*[@id="content"]/div/div'):
            print "IN FOR"
            item = AfeventItem()
            #Store data into lists
            item['title'] = div.xpath('//h2/a/text()').extract()[i]
            item['url'] = div.xpath('//h2/a/@href').extract()[i]
            item['location'] = ''
            item['description'] = div.xpath(
                '//*[@id="content"]/div/div[1]/a[1]/p/text()').extract()[i]

            #The following code changes the format of the date
            origDate = div.xpath('//p/text()').extract()[i]
            newDate = ''.join(origDate).replace(',', '').split()

            #Assign values to month names
            month = [
                "", "januari", "februari", "mars", "april", "maj", "juni",
                "juli", "augusti", "september", "oktober", "november",
                "december"
            ].index(newDate[1])
            #Assign a "0" in the beginning if month number is < 10
            if month < 10:
                zeroMonth = [0, month]
                zeroMonth = ''.join(map(str, zeroMonth))
            else:
                zeroMonth = month

    #same thing as above with day
            if int(newDate[0]) < 10:
                zeroDate = [0, newDate[0]]
                zeroDate = ''.join(map(str, zeroDate))
            else:
                zeroDate = newDate[0]

    #Puts everything together and stores into item['date']
            finalDate = [newDate[2], zeroMonth, zeroDate]
            item['date'] = '-'.join(finalDate)
            print i

            if i < len(response.xpath('//*[@id="content"]/div/div')):
                print "I IF"
                print len(response.xpath('//*[@id="content"]/div/div'))
                i = i + 1

            yield item
	def parse(self, response):
		i = 0
		for div in response.xpath('//div[@class="article-content"]'):
			item = AfeventItem()
			item['title'] = div.xpath('//h1[@class="h2 entry-title"]/text()').extract()[i]
			item['location'] = div.xpath('//*[@id]/div[3]/h2/text()').extract()[i]
			item['venue'] = div.xpath('//*[@id]/div[3]/table/tr[1]/td[2]/text()').extract()[i]
			origDate = div.xpath('//*[@id]/div[3]/p/text()').extract()[i]

			#split up the text in the date
			newDate = origDate.split()

			#handles if date is between two dates, e.g. "10 - 11 maj 2016"
			if len(newDate) > 3:
				rightDate = []
				rightDate.extend((newDate[0], newDate[3], newDate[4]))
				newDate = rightDate

			#Assign values to month names
			month = ["", "januari", "februari", "mars", "april", "maj", "juni", "juli", "augusti", "september", "oktober", "november", "december"].index(newDate[1])

			#Assign a "0" in the beginning if month number is < 10
			if month < 10:
				zeroMonth = [0, month]
				zeroMonth = ''.join(map(str, zeroMonth))
			else:
				zeroMonth = month

			#same thing as above with day
			if int(newDate[0]) < 10:
				zeroDate = [0, newDate[0]]
				zeroDate = ''.join(map(str, zeroDate))
			else:
				zeroDate = newDate[0]

			#Puts everything together and stores into item['date']
			finalDate = [newDate[2], zeroMonth, zeroDate]
			item['date'] = '-'.join(finalDate)

			item['url'] = div.xpath('//*[@id="main"]/div/div/a/@href').extract()[i]
			follow_url_1 = div.xpath('//*[@id="main"]/div/div/a/@href').extract()[i]
			follow_url = 'http://swedsoft.se/event-ovrigt/' + follow_url_1
			request = Request(follow_url, callback = self.parse_url)
			request.meta['item'] = item

			if i < len(response.xpath('//div[@class="article-content"]')):
				i = i + 1	
			yield request
Exemplo n.º 6
0
	def parse(self, response):
		divs = response.xpath('//tbody/tr')
		for div in divs:

			item = AfeventItem()
			item['location'] = div.xpath('./td[4]/text()').extract_first().strip()
			item['title'] = div.xpath('./td/div[@class = "event-title"]/span[@class = "link"]/text()').extract()[0]
			item['description'] = div.xpath('./td[3]/div[2]/div//text()').extract()[0]
			date_time = div.xpath('./td[1]/span/text()').extract()[0].split(' ')
			item['date'] = date_time[0]
			item['time'] = date_time[1]
			item['host'] = div.xpath('./td[5]/a/text()').extract()[0]
			url = div.xpath('./td[3]/div[2]/a/@href').extract()[0]
			url = 'https://natverk.dfs.se' + url
			item['url'] = url
			request = Request(url, callback = self.parse_url)
			request.meta['item'] = item
			yield request
Exemplo n.º 7
0
    def parser(self, response):
        divs = response.xpath('//body')
        item = AfeventItem()
        location = [
            "Malmö", "Göteborg", "Stockholm", "Linköping", "Uppsala",
            "Helsingborg", "Enköping", "Jönköping", "Solna"
        ]

        title_list = ''.join(
            divs.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/h1/text()'
            ).extract())
        date_list = ''.join(
            divs.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/span/text()'
            ).extract())
        time_list = ''.join(
            divs.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[2]/span[2]/span/text()|//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article/p[3]/span/text()[3]'
            ).extract())
        url_list = ''.join(
            divs.xpath(
                '//*[@id="mainContent"]/section/div/div/nav/ul/li[4]/a/@href').
            extract())
        url_list = 'http://www.afconsult.com' + url_list
        description_list = ''.join(
            divs.xpath(
                '//*[@id="mainContent"]/main/section[3]/div[1]/div[1]/article//text()'
            ).extract())

        item['title'] = title_list
        item['url'] = url_list
        item['date'] = date_list
        item['time'] = time_list
        item['description'] = description_list

        for i in range(0, len(location)):
            if location[i] in item['description']:
                item['location'] = location[i]

        yield item
Exemplo n.º 8
0
    def parse(self, response):
        i = 0
        for div in response.xpath('//article'):
            item = AfeventItem()
            #Store data into lists
            item['title'] = div.xpath(
                '//div/div/div[1]/div[1]/a/*[self::h1 or self::h2]/text()'
            ).extract()[i]
            url = div.xpath('.//div/div/div[1]/div[1]/a/@href').extract()[0]
            url = 'http://iva.se' + url
            item['url'] = url
            item['description'] = div.xpath(
                '//div/div/div[1]/div[2]/figure/div/p/text()').extract()[i]

            #Remove unwanted characters
            location = div.xpath(
                '//div/div/div[2]/section/ul/li[@class="schedule-where icon-where schedule__row"]/text()'
            ).extract()[i]
            item['location'] = location.strip()

            #The following code changes the format of the date
            origDate = div.xpath(
                '//div/div/div[2]/section/ul/li[@class="schedule-when icon-when schedule__row"]/text()'
            ).extract()[i]
            #split up the text in the date
            newDate = origDate.split()

            #Handles if date is between two dates, e.g. "10 - 11 maj 2016"
            if len(newDate) > 3:
                rightDate = []
                rightDate.extend((newDate[2], newDate[3], newDate[4]))

                newDate = rightDate

    #Assign values to month names
            month = [
                "", "januari", "februari", "mars", "april", "maj", "juni",
                "juli", "augusti", "september", "oktober", "november",
                "december"
            ].index(newDate[1])

            #Assign a "0" in the beginning if month number is < 10
            if month < 10:
                zeroMonth = [0, month]
                zeroMonth = ''.join(map(str, zeroMonth))
            else:
                zeroMonth = month

    #same thing as above with day
            if int(newDate[0]) < 10:
                zeroDate = [0, newDate[0]]
                zeroDate = ''.join(map(str, zeroDate))
            else:
                zeroDate = newDate[0]

    #Puts everything together and stores into item['date']
            finalDate = [newDate[2], zeroMonth, zeroDate]
            item['date'] = '-'.join(finalDate)
            request = Request(url, callback=self.parse_second)
            request.meta['item'] = item

            if i < len(response.xpath('//article[@class="item"]')):
                i = i + 1
                yield request

            yield item