def parseUrbanL(self, response): print("Inside parseUrban function") artists = response.css(".list-view-details").css(".headliners").css("a::text").extract() # Returns array with day of week, month/day dateArray = response.css(".list-view-details").css(".dates::text").extract() # Separate month from day monthArray = [] dayArray = [] for date in dateArray: head, sep, tail = date.partition(', ') head, sep, tail = tail.partition('/') monthArray.append(head) dayArray.append(tail) # change month number from string to int monthArray = list(map(int, monthArray)) # change day number from str to int dayArray = list(map(int, dayArray)) images = response.css('.list-view-item').css('a').css('img::attr(src)').extract() ticket_links = response.css('.ticket-price').css('a::attr(href)').extract() # send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i]) item = ConcertItem() item['venue'] = "The Urban Lounge" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseSR(self, response): print("Inside parseSR function") venue = "The State Room" artists = response.css(".event_detail_title").css("span::text").extract() dates = response.css("h3").css("span::text").extract() # separate month and day from the start time and blank space extracted array = [] for date in dates: if 'pm' in date: continue elif date == " ": continue elif ' ' in date: array.append(date) # separate month and day so each can be saved individually in db monthArray = [] dayArray = [] yearArray = [] for date in array: head,sep,tail = date.partition(', ') yearArray.append(tail) newdate = head head,sep,tail = newdate.partition(' ') # monthArray returns ['August','','September,'']. Need to eliminate '' if head == "": continue else: monthArray.append(head) # if tail == " ": continue else: dayArray.append(tail) # change month name to int for db monthArray = self.changeMonthNameToNumber(monthArray) # change days to int for db dayArray = list(map(int, dayArray)) # change years (strings) to int for db finalYears = [] for year in yearArray: if year == '': continue else: finalYears.append(year) finalYears = list(map(int, finalYears)) ticket_links = response.css(".ohanah-registration-link").css("a::attr(href)").extract() # --images-- must put http://thestateroom.com before the link images = response.css(".ohanah_modal::attr(href)").extract() for i in range(len(artists)): item = ConcertItem() item['venue'] = "The State Room" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] item['year'] = finalYears[i] item['image'] = 'http://thestateroom.com' + images[i] item['ticket_link'] = ticket_links[i] yield item
def parseUnion(self, response): print("Inside parseUnion function") artists = response.css('.eventlist-event--upcoming').css('h1').css('a::text').extract() months = response.css(".eventlist-datetag-startdate--month::text").extract() days = response.css(".eventlist-datetag-startdate--day::text").extract() images = response.css('.eventlist-event--upcoming').css('img::attr(data-src)').extract() ticket_links = response.css('.eventlist-event--upcoming').css('.eventlist-column-info').css('.eventlist-excerpt').css('a::attr(href)').extract() # Change month abbreviations to number months = self.changeMonthNameToNumber(months) # Change days strings to ints days = list(map(int, days)) # If a sold out show exists differenceInLengths = len(artists) - len(ticket_links) if differenceInLengths >= 1: for i in range(0,differenceInLengths): ticket_links.append("https://theunioneventcenter.com/upcomingevents/") # Send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, months[i], days[i]) item = ConcertItem() item['venue'] = "The Union" item['artist'] = artists[i] item['month'] = months[i] item['day'] = days[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseKingsbury(self, response): print("Inside parseKingsbury function") venues = response.css(".eq-ht").css('.venue::text').extract() artists = response.css(".eq-ht").css('h3::text').extract() months = response.css(".eq-ht").css('.event-month::text').extract() days = response.css(".eq-ht").css('.event-day::text').extract() images = response.css(".eq-ht").css('img::attr(src)').extract() ticket_links = response.css(".eq-ht").css('a::attr(href)').extract() # change month abbreviations to number months = self.changeMonthNameToNumber(months) # Change days strings to ints days = list(map(int, days)) # send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, months[i], days[i]) item = ConcertItem() item['venue'] = venues[i] item['artist'] = artists[i] item['month'] = months[i] item['day'] = days[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseKilby(self, response): print("Inside parseKilby function") artists = response.css(".headliners.summary").css('a::text').extract() initialArray = response.css(".dates::text").extract() monthArray = [] dayArray = [] # Separate month and day for i in range(len(initialArray)): head, sep, tail = initialArray[i].partition(', ') head, sep, tail = tail.partition('/') monthArray.append(head) dayArray.append(tail) # Change month name to a number for db monthArray = self.changeMonthNameToNumber(monthArray) # Change day str to int dayArray = list(map(int, dayArray)) images = response.css('.list-view-item').css('a').css('img::attr(src)').extract() ticket_links = response.css(".ticket-link").css("a::attr(href)").extract() # Data to send to pipelines.py for i in range(len(artists)): concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i]) item = ConcertItem() item['venue'] = "Kilby Court" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseVivint(self, response): print("Inside the parseVivint function") # 1 Run extraction css selectors # 2 Place extracted data into the ConcertItem() artists = response.css(".title").css("h5::text").extract() days = response.css(".date").css("em::text").extract() # change days (strings) to int for db days = list(map(int, days)) images = response.css(".synopsis").css("img::attr(src)").extract() # ticket_links get extracted as 'url', '/tickets', 'url', '/tickets. Ignore the '/tickets' ticket_links = [] tickets = response.css(".tickets").css("a::attr(href)").extract() for ticket in tickets: if ticket == '/tickets': continue else: ticket_links.append(ticket) # Sometimes the artists and ticket_links quantities don't match up differenceInLengths = len(artists) - len(ticket_links) if differenceInLengths >= 1: for i in range(0,differenceInLengths): # Generic link for buying tickets at Vivint Arena ticket_links.append('https://www.ticketmaster.com/new/venue/246072?_ga=2.28280443.630826876.1536686741-1322752007.1534960663&x-flag-desktop=true&m_efeat6690v1desktop&x-flag-desktop-ads-variant=3') # Setting data for i in range(len(artists)): item = ConcertItem() item['venue'] = "Vivint Arena" item['artist'] = artists[i] item['month'] = 0 item['day'] = days[i] item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseEgyptian(self, response): print("Inside parseEgyptian function") artists = response.css(".event_info").css("h2::text").extract() images = response.css('.flyer').css('a').css('img::attr(src)').extract() ticket_links = response.css(".event_info").css("a::attr(href)").extract() for i in range(len(artists)): item = ConcertItem() item['venue'] = "The Egyptian" item['artist'] = artists[i] item['month'] = 0 item['day'] = 0 item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = "https://www.egyptiantheatrecompany.org/" + ticket_links[i] yield item
def parseMaverik(self, response): print("Inside parseMaverik function") artists = response.css(".data-info").css("h4::text").extract() initialArray = response.css(".data-info").css("h5::text").extract() dateArray = [] dayArray = [] monthArray = [] yearArray = [] for i in range(len(initialArray)): if i % 2 == 0: dateArray.append(initialArray[i]) for date in dateArray: head, sep, tail = date.partition(' ') monthArray.append(head) head, sep, tail = tail.partition(', ') dayArray.append(head) yearArray.append(tail) # Change month name to number for db monthArray = self.changeMonthNameToNumber(monthArray) # Change day str to int for db dayArray = list(map(int, dayArray)) # Change year str to int for db yearArray = list(map(int, yearArray)) images = response.css('.image').css('img::attr(src)').extract() ticketArray = response.css(".buttons").css("a::attr(href)").extract() ticket_links = [] for i in range(len(ticketArray)): if i % 2 == 0: ticket_links.append(ticketArray[i]) differenceInLengths = len(artists) - len(ticket_links) if differenceInLengths >= 1: for i in range(0,differenceInLengths): ticket_links.append("http://maverikcenter.com/events-tickets/upcoming-events/") # Send data to pipeline for i in range(len(artists)): item = ConcertItem() item['venue'] = "Maverik Center" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] item['year'] = yearArray[i] item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseComplex(self, response): print("Inside parseComplex function") artists = response.css('.inner-box').css('.content').css('h3::text').extract() images = response.css('.portfolio-item').css('.image-box').css('img::attr(src)').extract() ticket_links = response.css('.inner-box').css('.content').css('a::attr(href)').extract() # Extracts month, day, and exact venue within The Complex initialArray = response.css('.inner-box').css('.content').css('h4::text').extract() dateArray = [] dayArray = [] monthArray = [] for i in range(len(initialArray)): if i % 2 == 0: dateArray.append(initialArray[i]) for date in dateArray: head, sep, tail = date.partition(' ') head, sep, tail = tail.partition(' ') monthArray.append(head) onlyInt = re.split('(\d+)',tail) dayArray.append(onlyInt[1]) # Change month name to an int for db monthArray = self.changeMonthNameToNumber(monthArray) # Change day strings to ints dayArray = list(map(int, dayArray)) # send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i]) item = ConcertItem() item['venue'] = "The Complex" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseCommon(self, response): print("Inside parseCommon function") artists = response.css(".headliners.summary").css('a::text').extract() initialArray = response.css(".dates::text").extract() monthArray = [] dayArray = [] # Separate the day and month for i in range(len(initialArray)): head, sep, tail = initialArray[i].partition(' ') head, sep, tail = tail.partition('.') monthArray.append(head) dayArray.append(tail) # resulting month number is a string - make it an int monthArray = list(map(int, monthArray)) # change day number string to an int dayArray = list(map(int, dayArray)) images = response.css(".list-view-item").css('a').css('img::attr(src)').extract() ticket_links = response.css(".ticket-link").css("a::attr(href)").extract() # If a show is sold out then the ticket_link goes away differenceInLengths = len(artists) - len(ticket_links) if differenceInLengths >= 1: for i in range(0,differenceInLengths): ticket_links.append("http://thecommonwealthroom.ticketfly.com/listing") # send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i]) item = ConcertItem() item['venue'] = "The Commonwealth Room" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] item['ticket_link'] = ticket_links[i] yield item
def parseMetro(self, response): print("Inside parseMetro function") artists = response.css('.list-view-details').css('.headliners').css('a::text').extract() # Returns day of week and date as 9/19 dateArray = response.css('.list-view-details').css('.dates::text').extract() monthArray = [] dayArray = [] # separate out month from day for date in dateArray: head, sep, tail = date.partition(', ') head, sep, tail = tail.partition('/') monthArray.append(head) dayArray.append(tail) # Change month name to an int for db monthArray = self.changeMonthNameToNumber(monthArray) # Change day string to int dayArray = list(map(int, dayArray)) images = response.css('.list-view-item').css('a').css('img::attr(src)').extract() ticket_links = response.css('.ticket-link').css('.primary-link').css('a::attr(href)').extract() # send data to pipeline for i in range(len(artists)): concertDate = datetime.date(self.currentYear, monthArray[i], dayArray[i]) item = ConcertItem() item['venue'] = "Metro Music Hall" item['artist'] = artists[i] item['month'] = monthArray[i] item['day'] = dayArray[i] # Add 1 to the current year if concert is listed as before current date (i.e. March 6th and today is Oct 3rd, therefore it must be happening the upcoming year) if concertDate < self.today: item['year'] = self.currentYear + 1 else: item['year'] = self.currentYear item['image'] = images[i] # Sometimes ticket_link array errors 'list index out of range' item['ticket_link'] = ticket_links[i] yield item