Пример #1
0
def extract(url):
    base = "https://www.active.com"
    sleep(0.2)
    try:
        if (url[:3] == "http"):
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        else:
            req = Request(base + url, headers={'User-Agent': 'Mozilla/5.0'})
        s = urlopen(req).read()
        soup = bs.BeautifulSoup(s, 'lxml')
        info = soup.find(id='body-container')
        #print("\n-----> Extracting: ",base+url)
        try:
            event = Event()
            myStr = ""
            for i in range(14):
                myStr += random.choice(string.ascii_letters + string.digits)
            event.id = myStr

            event.title = info.h1.text
            event.link = base + url
            print("Title: ", event.title)
            description = info.find(class_='asset-summary span8').text
            event.description = description
            date = info.h5.text
            event.date = event.dateFinder(date)
            location = info.find(class_='ed-address-text').text
            event.address, event.city, event.lat, event.lng = event.addressFinder(
                location)
            if (type(event.lat) != str):
                event.lat = str(event.lat)
                event.lng = str(event.lng)
                print("SUCCESS")
                table.put_item(Item=event.toJSON())
            else:
                event.address, event.city, event.lat, event.lng = event.addressFinderBasic(
                    location)
                if (type(event.lat) != str):
                    event.lat = str(event.lat)
                    event.lng = str(event.lng)
                    print("Success")
                    table.put_item(Item=event.toJSON())
                else:
                    print("address failure")
        except:
            print("Error")
    except:
        print("Page error", base + url)
Пример #2
0
def extract(url):
    sleep(0.2)
    try:
        if (url[:3] == "http"):
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        else:
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        s = urlopen(req).read()
        soup = bs.BeautifulSoup(s, 'lxml')
        info = soup.find(class_='full m-event-detail clearfix')
        #print("\n-----> Extracting: ",base+url)
        try:
            event = Event()
            myStr = ""
            for i in range(14):
                myStr += random.choice(string.ascii_letters + string.digits)
            event.id = myStr

            event.title = info.h1.text
            event.link = url
            description = info.find(class_='m-event-detail-description').text
            event.description = description
            date = info.find(class_="m-date__singleDate").text
            event.date = event.dateFinder(date)
            #location = "18300 W Alameda Pkwy, Morrison, CO 80465"
            event.lng = "-105.2048546"
            event.lat = "39.6664666"
            event.city = "Morrison"
            event.address = "8300 W Alameda Pkwy"
            print("\n\n Item: ", event.title)
            table.put_item(Item=event.toJSON())
        except:
            print("Info error", url)
    except:
        print("Page error", url)
Пример #3
0
def extract(url):
	try:
		req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
		print("Extracting...")
		s = urlopen(req).read()
		soup = bs.BeautifulSoup(s,'lxml')
		info = soup.find(class_='mn-section mn-event-detail-listing')
		
		try:
			event = Event()
			myStr = ""
			for i in range(14):
				myStr+= random.choice(string.ascii_letters + string.digits)
			event.id=myStr

			event.title = info.find(class_='mn-event-content').text
			event.link = url
			description = info.find(itemprop='description').text
			event.description = description
			#event.short_description = description[:92]+"..."
			event.date = datetime.strptime(info.find(class_='mn-event-day').text, '%B %d, %Y').strftime('%Y-%m-%d')
			#event.category = event.categoryFinder(description)
			location = info.find(itemprop='name').text
			event.address, event.city, event.lat, event.lng = event.addressFinder(location)
			if(type(event.lat)!=str):
				event.lat = str(event.lat)
				event.lng = str(event.lng)
				print("SUCCESS\n")
				table.put_item(Item=event.toJSON())
			else:
				event.address, event.city, event.lat, event.lng = event.addressFinderBasic(location)
				if(type(event.lat)!=str):
					event.lat = str(event.lat)
					event.lng = str(event.lng)
					print("Success\n")
					table.put_item(Item=event.toJSON())
				else:
					print("address failure\n")
		except:
			print("Event error",url,"\n")
	except:
		print("Page error", url,"\n")
Пример #4
0
now = datetime.datetime.now()


def random_date(start, end):
    """Generate a random datetime between `start` and `end`"""
    return start + datetime.timedelta(
        # Get a random amount of seconds between `start` and `end`
        seconds=random.randint(0, int((end - start).total_seconds())), )


for i in range(50):
    event = Event()
    myID = ""
    for i in range(14):
        myID += random.choice(string.ascii_letters + string.digits)
    images = ["one.jpg", "two.jpg", "three.jpg", "four.jpg"]
    event.id = myID
    event.title = text.sentence()
    event.link = "https://google.com"
    event.description = text.quote()
    event.date = str(random_date(now, now + datetime.timedelta(600)))
    event.category = text.words(quantity=4)
    event.address = address.address()
    event.city = address.city()
    event.lat = str(round(random.uniform(37, 40.8), 7))
    event.lng = str(round(random.uniform(-108.9, -102.2), 7))
    event.image = random.choice(images)

    ans = event.toJSON()
    #print(ans)
    table.put_item(Item=ans)