示例#1
0
def extract(url):
    base = "https://www.active.com"
    sleep(0.2)
    try:
        if (url[:3] == "http"):
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        else:
            req = Request(base + url, headers={'User-Agent': 'Mozilla/5.0'})
        s = urlopen(req).read()
        soup = bs.BeautifulSoup(s, 'lxml')
        info = soup.find(id='body-container')
        #print("\n-----> Extracting: ",base+url)
        try:
            event = Event()
            myStr = ""
            for i in range(14):
                myStr += random.choice(string.ascii_letters + string.digits)
            event.id = myStr

            event.title = info.h1.text
            event.link = base + url
            print("Title: ", event.title)
            description = info.find(class_='asset-summary span8').text
            event.description = description
            date = info.h5.text
            event.date = event.dateFinder(date)
            location = info.find(class_='ed-address-text').text
            event.address, event.city, event.lat, event.lng = event.addressFinder(
                location)
            if (type(event.lat) != str):
                event.lat = str(event.lat)
                event.lng = str(event.lng)
                print("SUCCESS")
                table.put_item(Item=event.toJSON())
            else:
                event.address, event.city, event.lat, event.lng = event.addressFinderBasic(
                    location)
                if (type(event.lat) != str):
                    event.lat = str(event.lat)
                    event.lng = str(event.lng)
                    print("Success")
                    table.put_item(Item=event.toJSON())
                else:
                    print("address failure")
        except:
            print("Error")
    except:
        print("Page error", base + url)
示例#2
0
def extract(url):
	try:
		req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
		print("Extracting...")
		s = urlopen(req).read()
		soup = bs.BeautifulSoup(s,'lxml')
		info = soup.find(class_='mn-section mn-event-detail-listing')
		
		try:
			event = Event()
			myStr = ""
			for i in range(14):
				myStr+= random.choice(string.ascii_letters + string.digits)
			event.id=myStr

			event.title = info.find(class_='mn-event-content').text
			event.link = url
			description = info.find(itemprop='description').text
			event.description = description
			#event.short_description = description[:92]+"..."
			event.date = datetime.strptime(info.find(class_='mn-event-day').text, '%B %d, %Y').strftime('%Y-%m-%d')
			#event.category = event.categoryFinder(description)
			location = info.find(itemprop='name').text
			event.address, event.city, event.lat, event.lng = event.addressFinder(location)
			if(type(event.lat)!=str):
				event.lat = str(event.lat)
				event.lng = str(event.lng)
				print("SUCCESS\n")
				table.put_item(Item=event.toJSON())
			else:
				event.address, event.city, event.lat, event.lng = event.addressFinderBasic(location)
				if(type(event.lat)!=str):
					event.lat = str(event.lat)
					event.lng = str(event.lng)
					print("Success\n")
					table.put_item(Item=event.toJSON())
				else:
					print("address failure\n")
		except:
			print("Event error",url,"\n")
	except:
		print("Page error", url,"\n")