starttime = re.findall("[0-9]{1,2}:[0-9]{2}\s*[aApP][mM]", time)[0] #This extracts the time, including am/pm try: price = bsObj.find("h3", {"class":"price-range"}).get_text().strip() # Pulls the price, which could be a price range except: # Let's hope that it's free if it doesn't have an h3 w/ a class of "price-range" price = "Free!" price = price.replace("DONATIONS AT THE DOOR","Donations at the Door") artist = bsObj.find("h1", {"class":"headliners summary"}).get_text() # Event / top artist name artist = artist.replace("VINYL LOUNGE OPEN MIC", "Vinyl Lounge Open Mic") # Eliminate annoying all-caps, if applicable artist = artist.replace(", VINYL LOUNGE", "") # Eliminate 'bonus' info about artist being @ Vinyl artist = artist.replace("Gordon Sterling Presents:", "") if "gypsy sally's jam" in artist.lower(): genre = "Potpourri" if "closed" in artist.lower() in artist: # Skip closed private events continue localList = scraperLibrary.getLocalList() if scraperLibrary.compactWord(artist) in localList: local = "Yes" else: local = "" try: artistweb = bsObj.find("li", {"class":"web"}).find("a").attrs["href"] #THIS finds the first instance of a li with a class of "web", then digs deeper, finding the first instance w/in that li of a child a, and pulls the href. BUT - since some artists may not have link, using try/except except: artistweb = "" try: # There isn't always a description... description = bsObj.find("div", {"class":"bio"}).get_text() # Get the description, which does include a lot of breaks - will it be a mess? except: description = "" [description, readmore] = scraperLibrary.descriptionTrim(description, [], 800, artistweb, newhtml) descriptionJammed = description.replace(" ","") # Create a string with no spaces
'%Y-%m-%d').weekday() == 3: mrHenrys.genre = "Americana" #Mr. Henry's almost always has Americana on Thursdays... else: mrHenrys.genre = "Jazz & Blues" pageanddate.add( (newPage, mrHenrys.date, datetoday) ) # Add link to list, paired with event date and today's date mrHenrys.artist = bsObj.find( "h1", { "class": "tribe-events-single-event-title" }).get_text().strip() if mrHenrys.artist == "Smith Jackson": mrHenrys.artist = "SmithJackson" mrHenrys.artist = mrHenrys.artist.replace("Double Header", "") localList = scraperLibrary.getLocalList() if scraperLibrary.compactWord(mrHenrys.artist) in localList: mrHenrys.local = "Yes" else: mrHenrys.local = "" longtime = bsObj.find("span", { "class": "tribe-event-date-start" }).get_text().strip() starttime = re.findall("[0-9]{1,2}\:[0-5][0-9]\s[aApP][mM]", longtime)[0] if starttime == "6:00 pm": # events with 6:00 start time actually have 6:00 doors but 2 different event times mrHenrys.starttime = ["7:30 pm", "9:45 pm"] else: mrHenrys.starttime = [starttime] try: # Most events are free, but 1 or 2 events a month require tickets mrHenrys.price = bsObj.find("span", {