示例#1
0
 def parseUrbanL(self, response):
     print("Inside parseUrban function")
     artists = response.css(".list-view-details").css(".headliners").css("a::text").extract()
     # Returns array with day of week, month/day
     dateArray = response.css(".list-view-details").css(".dates::text").extract()
     # Separate month from day
     monthArray = []
     dayArray = []
     for date in dateArray:
         head, sep, tail = date.partition(', ')
         head, sep, tail = tail.partition('/')
         monthArray.append(head)
         dayArray.append(tail)
     # change month number from string to int
     monthArray = list(map(int, monthArray))
     # change day number from str to int
     dayArray = list(map(int, dayArray))
     images = response.css('.list-view-item').css('a').css('img::attr(src)').extract()
     ticket_links = response.css('.ticket-price').css('a::attr(href)').extract()
     # send data to pipeline
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i])
         item = ConcertItem()
         item['venue'] = "The Urban Lounge"
         item['artist'] = artists[i]
         item['month'] = monthArray[i]
         item['day'] = dayArray[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#2
0
    def parseSR(self, response):
        print("Inside parseSR function")
        venue = "The State Room"
        artists = response.css(".event_detail_title").css("span::text").extract()
        dates = response.css("h3").css("span::text").extract()
        # separate month and day from the start time and blank space extracted
        array = []
        for date in dates:
            if 'pm' in date:
                continue
            elif date == " ":
                continue
            elif ' ' in date:
                array.append(date)
        # separate month and day so each can be saved individually in db
        monthArray = []
        dayArray = []
        yearArray = []
        for date in array:
            head,sep,tail = date.partition(', ')
            yearArray.append(tail)
            newdate = head
            head,sep,tail = newdate.partition(' ')
            # monthArray returns ['August','','September,''].  Need to eliminate ''
            if head == "":
                continue
            else:
                monthArray.append(head)
                # 
            if tail == " ":
                continue
            else:
                dayArray.append(tail)
        # change month name to int for db
        monthArray = self.changeMonthNameToNumber(monthArray)
        # change days to int for db
        dayArray = list(map(int, dayArray))
        # change years (strings) to int for db
        finalYears = []
        for year in yearArray:
            if year == '':
                continue
            else:
                finalYears.append(year)

        finalYears = list(map(int, finalYears))
        ticket_links = response.css(".ohanah-registration-link").css("a::attr(href)").extract()
        # --images-- must put http://thestateroom.com before the link
        images = response.css(".ohanah_modal::attr(href)").extract()

        for i in range(len(artists)):
            item = ConcertItem()
            item['venue'] = "The State Room"
            item['artist'] = artists[i]
            item['month'] = monthArray[i]
            item['day'] = dayArray[i]
            item['year'] = finalYears[i]
            item['image'] = 'http://thestateroom.com' + images[i]
            item['ticket_link'] = ticket_links[i]
            yield item
示例#3
0
 def parseUnion(self, response):
     print("Inside parseUnion function")
     artists = response.css('.eventlist-event--upcoming').css('h1').css('a::text').extract()
     months = response.css(".eventlist-datetag-startdate--month::text").extract()
     days = response.css(".eventlist-datetag-startdate--day::text").extract()
     images = response.css('.eventlist-event--upcoming').css('img::attr(data-src)').extract()
     ticket_links = response.css('.eventlist-event--upcoming').css('.eventlist-column-info').css('.eventlist-excerpt').css('a::attr(href)').extract()
     # Change month abbreviations to number
     months = self.changeMonthNameToNumber(months)
     # Change days strings to ints
     days = list(map(int, days))
     # If a sold out show exists
     differenceInLengths = len(artists) - len(ticket_links)
     if differenceInLengths >= 1:
         for i in range(0,differenceInLengths):
             ticket_links.append("https://theunioneventcenter.com/upcomingevents/")
     # Send data to pipeline
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, months[i], days[i])
         item = ConcertItem()
         item['venue'] = "The Union"
         item['artist'] = artists[i]
         item['month'] = months[i]
         item['day'] = days[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#4
0
 def parseKingsbury(self, response):
     print("Inside parseKingsbury function")
     venues = response.css(".eq-ht").css('.venue::text').extract()
     artists = response.css(".eq-ht").css('h3::text').extract()
     months = response.css(".eq-ht").css('.event-month::text').extract()
     days = response.css(".eq-ht").css('.event-day::text').extract()
     images = response.css(".eq-ht").css('img::attr(src)').extract()
     ticket_links = response.css(".eq-ht").css('a::attr(href)').extract()
     # change month abbreviations to number
     months = self.changeMonthNameToNumber(months)
     # Change days strings to ints
     days = list(map(int, days))
     # send data to pipeline
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, months[i], days[i])
         item = ConcertItem()
         item['venue'] = venues[i]
         item['artist'] = artists[i]
         item['month'] = months[i]
         item['day'] = days[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#5
0
 def parseKilby(self, response):
     print("Inside parseKilby function")
     artists = response.css(".headliners.summary").css('a::text').extract()
     initialArray = response.css(".dates::text").extract()
     monthArray = []
     dayArray = []
     # Separate month and day
     for i in range(len(initialArray)):
         head, sep, tail = initialArray[i].partition(', ')
         head, sep, tail = tail.partition('/')
         monthArray.append(head)
         dayArray.append(tail)
     # Change month name to a number for db
     monthArray = self.changeMonthNameToNumber(monthArray)
     # Change day str to int
     dayArray = list(map(int, dayArray))
     images = response.css('.list-view-item').css('a').css('img::attr(src)').extract()
     ticket_links = response.css(".ticket-link").css("a::attr(href)").extract()
     # Data to send to pipelines.py
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i])
         item = ConcertItem()
         item['venue'] = "Kilby Court"
         item['artist'] = artists[i]
         item['month'] = monthArray[i]
         item['day'] = dayArray[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#6
0
 def parseVivint(self, response):
     print("Inside the parseVivint function")
     # 1 Run extraction css selectors
     # 2 Place extracted data into the ConcertItem()
     
     artists = response.css(".title").css("h5::text").extract()
     days = response.css(".date").css("em::text").extract()
     # change days (strings) to int for db
     days = list(map(int, days))
     images = response.css(".synopsis").css("img::attr(src)").extract()
     # ticket_links get extracted as 'url', '/tickets', 'url', '/tickets.  Ignore the '/tickets'
     ticket_links = []
     tickets = response.css(".tickets").css("a::attr(href)").extract() 
     for ticket in tickets:
         if ticket == '/tickets':
             continue
         else:
             ticket_links.append(ticket)
     # Sometimes the artists and ticket_links quantities don't match up
     differenceInLengths = len(artists) - len(ticket_links)
     if differenceInLengths >= 1:
         for i in range(0,differenceInLengths):
             # Generic link for buying tickets at Vivint Arena
             ticket_links.append('https://www.ticketmaster.com/new/venue/246072?_ga=2.28280443.630826876.1536686741-1322752007.1534960663&x-flag-desktop=true&m_efeat6690v1desktop&x-flag-desktop-ads-variant=3')
     # Setting data 
     for i in range(len(artists)):
         item = ConcertItem()
         item['venue'] = "Vivint Arena"
         item['artist'] = artists[i]
         item['month'] = 0
         item['day'] = days[i]
         item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#7
0
 def parseEgyptian(self, response):
     print("Inside parseEgyptian function")
     artists = response.css(".event_info").css("h2::text").extract()
     images = response.css('.flyer').css('a').css('img::attr(src)').extract()
     ticket_links = response.css(".event_info").css("a::attr(href)").extract()
     for i in range(len(artists)):
         item = ConcertItem()
         item['venue'] = "The Egyptian"
         item['artist'] = artists[i]
         item['month'] = 0
         item['day'] = 0
         item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = "https://www.egyptiantheatrecompany.org/" + ticket_links[i]
         yield item
示例#8
0
    def parseMaverik(self, response):
        print("Inside parseMaverik function")
        artists = response.css(".data-info").css("h4::text").extract()
        initialArray = response.css(".data-info").css("h5::text").extract()
        dateArray = []
        dayArray = []
        monthArray = []
        yearArray = []
        for i in range(len(initialArray)):
            if i % 2 == 0:
                dateArray.append(initialArray[i])

        for date in dateArray:
            head, sep, tail = date.partition(' ')
            monthArray.append(head)
            head, sep, tail = tail.partition(', ')
            dayArray.append(head)
            yearArray.append(tail)
        # Change month name to number for db
        monthArray = self.changeMonthNameToNumber(monthArray)
        # Change day str to int for db
        dayArray = list(map(int, dayArray))
        # Change year str to int for db
        yearArray = list(map(int, yearArray))
        images = response.css('.image').css('img::attr(src)').extract()
        ticketArray = response.css(".buttons").css("a::attr(href)").extract()
        ticket_links = []
        for i in range(len(ticketArray)):
            if i % 2 == 0:
                ticket_links.append(ticketArray[i])
        differenceInLengths = len(artists) - len(ticket_links)
        if differenceInLengths >= 1:
            for i in range(0,differenceInLengths):
                ticket_links.append("http://maverikcenter.com/events-tickets/upcoming-events/")
        # Send data to pipeline
        for i in range(len(artists)):
            item = ConcertItem()
            item['venue'] = "Maverik Center"
            item['artist'] = artists[i]
            item['month'] = monthArray[i]
            item['day'] = dayArray[i]
            item['year'] = yearArray[i]
            item['image'] = images[i]
            item['ticket_link'] = ticket_links[i]
            yield item
示例#9
0
    def parseComplex(self, response):
        print("Inside parseComplex function")
        artists = response.css('.inner-box').css('.content').css('h3::text').extract()
        images = response.css('.portfolio-item').css('.image-box').css('img::attr(src)').extract()
        ticket_links = response.css('.inner-box').css('.content').css('a::attr(href)').extract()
        # Extracts month, day, and exact venue within The Complex
        initialArray = response.css('.inner-box').css('.content').css('h4::text').extract()
        dateArray = []
        dayArray = []
        monthArray = []
        for i in range(len(initialArray)):
            if i % 2 == 0:
                dateArray.append(initialArray[i])

        for date in dateArray:
            head, sep, tail = date.partition(' ')
            head, sep, tail = tail.partition(' ')
            monthArray.append(head)
            onlyInt = re.split('(\d+)',tail)
            dayArray.append(onlyInt[1])
        # Change month name to an int for db
        monthArray = self.changeMonthNameToNumber(monthArray)
        # Change day strings to ints
        dayArray = list(map(int, dayArray))

        # send data to pipeline
        for i in range(len(artists)):
            concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i])
            item = ConcertItem()
            item['venue'] = "The Complex"
            item['artist'] = artists[i]
            item['month'] = monthArray[i]
            item['day'] = dayArray[i]
            # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
            if concertDate < self.today:
                item['year'] = self.currentYear + 1
            else:
                item['year'] = self.currentYear
            item['image'] = images[i]
            item['ticket_link'] = ticket_links[i]
            yield item
示例#10
0
 def parseCommon(self, response):
     print("Inside parseCommon function")
     artists = response.css(".headliners.summary").css('a::text').extract()
     initialArray = response.css(".dates::text").extract()
     monthArray = []
     dayArray = []
     # Separate the day and month
     for i in range(len(initialArray)):
         head, sep, tail = initialArray[i].partition(' ')
         head, sep, tail = tail.partition('.')
         monthArray.append(head)
         dayArray.append(tail)
     # resulting month number is a string - make it an int
     monthArray = list(map(int, monthArray))
     # change day number string to an int
     dayArray = list(map(int, dayArray))
     images = response.css(".list-view-item").css('a').css('img::attr(src)').extract()
     ticket_links = response.css(".ticket-link").css("a::attr(href)").extract()
     # If a show is sold out then the ticket_link goes away
     differenceInLengths = len(artists) - len(ticket_links)
     if differenceInLengths >= 1:
         for i in range(0,differenceInLengths):
             ticket_links.append("http://thecommonwealthroom.ticketfly.com/listing")
     # send data to pipeline
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i])
         item = ConcertItem()
         item['venue'] = "The Commonwealth Room"
         item['artist'] = artists[i]
         item['month'] = monthArray[i]
         item['day'] = dayArray[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         item['ticket_link'] = ticket_links[i]
         yield item
示例#11
0
 def parseMetro(self, response):
     print("Inside parseMetro function")
     artists = response.css('.list-view-details').css('.headliners').css('a::text').extract()
     # Returns day of week and date as 9/19
     dateArray = response.css('.list-view-details').css('.dates::text').extract()
     monthArray = []
     dayArray = []
     # separate out month from day
     for date in dateArray:
         head, sep, tail = date.partition(', ')
         head, sep, tail = tail.partition('/')
         monthArray.append(head)
         dayArray.append(tail)
     # Change month name to an int for db
     monthArray = self.changeMonthNameToNumber(monthArray)
     # Change day string to int
     dayArray = list(map(int, dayArray))
     images = response.css('.list-view-item').css('a').css('img::attr(src)').extract()
     ticket_links = response.css('.ticket-link').css('.primary-link').css('a::attr(href)').extract()
     # send data to pipeline
     for i in range(len(artists)):
         concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i])
         item = ConcertItem()
         item['venue'] = "Metro Music Hall"
         item['artist'] = artists[i]
         item['month'] = monthArray[i]
         item['day'] = dayArray[i]
         # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year)
         if concertDate < self.today:
             item['year'] = self.currentYear + 1
         else:
             item['year'] = self.currentYear
         item['image'] = images[i]
         # Sometimes ticket_link array errors 'list index out of range'
         item['ticket_link'] = ticket_links[i]
         yield item