def googleSearch(self):
     #Query can be site specific-> site:link word
     google = GoogleSearch(self.query)
     ct = GoogleSearch(self.query).count()
     print "No. Of Google Results: %d" % (ct)
     results = google.top_urls()
     return results
示例#2
0
def Search(paper):

    gs = GoogleSearch(paper + " ieeexplore.ieee.org")
    url = gs.top_urls()[0]
    artNumber = [word for word in url.split("=")][-1]

    return artNumber
示例#3
0
def Search(paper):

	gs = GoogleSearch(paper + " ieeexplore.ieee.org")
	url = gs.top_urls()[0]
	artNumber = [word for word in url.split("=")][-1]
	
	return artNumber
from openpyxl import Workbook, load_workbook
from googlesearch import GoogleSearch
wb = load_workbook('Attendees.xlsx')
for sheet in wb:
    ws = wb[sheet.title]
    for i in range(1, 290):
        nameCell = ws.cell(row=i, column=1)
        jobCell = ws.cell(row=i, column=3)
        search = str(nameCell.value) + " " + str(jobCell.value) + " email"
        gs = GoogleSearch(search)
        print search
        print gs.top_urls()[0]
        print("\n")
示例#5
0
def url_search(query, lucky=True):
    gs = GoogleSearch(query)
    try:
        return [gs.top_url()] if lucky else gs.top_urls()
    except ProxyError:
        raise ValueError
from openpyxl import Workbook, load_workbook
from googlesearch import GoogleSearch
wb = load_workbook('Attendees.xlsx')
for sheet in wb:
	ws = wb[sheet.title]
	for i in range(1, 290):
		nameCell = ws.cell(row=i, column=1)
		jobCell = ws.cell(row=i, column=3)
		search = str(nameCell.value) + " " + str(jobCell.value) + " email"
		gs = GoogleSearch(search)
		print search
		print gs.top_urls()[0]
		print("\n")
示例#7
0
def get_url_by_name(name,elem):
    gs = GoogleSearch('site:goodreads.com/{0}/show {1}'.format(elem,name))
    for url in gs.top_urls():
        if 'goodreads.com/{0}/show'.format(elem) in url:
            return str(url)
    return int(0)
from googlesearch import GoogleSearch
from readability.readability import Document
from bs4 import BeautifulSoup
import re
import sys
import requests
import urllib

def remove_non_ascii(text):
    return ''.join(i for i in text if ord(i)<128)


if __name__ == "__main__":
	if len(sys.argv) < 2:
		print 'no urls found'
		sys.exit(0)
	query = ''
        for i in range (1,len(sys.argv)):
                query += sys.argv[i] + ' '
	gs = GoogleSearch(query)
	if len(gs.top_urls()) < 1:
		print 'no urls found'
		sys.exit(0)
	urls = gs.top_urls()
#	for url in urls:
#		print url
	html = urllib.urlopen(urls[0]).read()
	soup = BeautifulSoup(Document(html).summary(), "lxml")
	print remove_non_ascii(soup.get_text()[0:1000])