Exemplo n.º 1
0
def import_locations():
    params = {
        "hl":
        "en",
        "gl":
        "us",
        "google_domain":
        "google.com",
        "api_key":
        "550793bf576080db90a6392a62372dc5134e77a6a91953e5447e31dbd774ecbc"
    }
    with open('hirings.csv', mode='r') as csv_file:
        hirings = list(csv.DictReader(csv_file, delimiter=';'))
    with open('7-usa.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=';')
        writer.writerow(['id', 'company', 'status', 'longitude', 'latitude'])
        for h in hirings:
            print(f"Searching for ... {h['company']}")
            params['q'] = h['company']
            client = GoogleSearchResults(params)
            map = client.get_dict().get('local_map')
            if map and map.get('gps_coordinates'):
                long, lat = map['gps_coordinates']['longitude'], map[
                    'gps_coordinates']['latitude']
                print(h['company'], h['status'], long, lat)
                writer.writerow([h['company'], h['status'], long, lat])
                time.sleep(1)
Exemplo n.º 2
0
 def test_get_json(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "engine": "google_scholar"
     })
     data = client.get_json()
     self.assertIsNotNone(data["organic_results"][0]["title"])
Exemplo n.º 3
0
 def test_get_html(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "location": "Austin,Texas"
     })
     data = client.get_html()
     self.assertGreater(len(data), 10)
Exemplo n.º 4
0
def search(keyword, date_start, date_end):
    """
	Function to find specific search result from Google.
	Using the already collated article data to seek wider breadth of information
	for the specific company at a specific time range

	Result from GoogleSearchResults returns a dictionary with the following fields in order:

	1. search_metadata
		- Includes "created_at" field useful for filtering results of specific date time
	2. search_parameters
	3. search_information
	4. organic_results
		- most relevant field
		- use "title" and "displayed_link" for extracting text data
	"""

    client = GoogleSearchResults({
        "q": keyword,
        "location": "Austin,Texas",
        "api_key": "demo",
        "created_at": "2019-04-05"
    })
    result = client.get_dict()

    return result
Exemplo n.º 5
0
def GetAnswer(return_text):
    params = {
      "api_key": "609d4beab3d238843bcf22588195a5b6db5ae8490a089215eac71985ba37f654",
      "engine": "google",
      "q": return_text,
      "location": "Los Angeles, CA, California, United States",
      "google_domain": "google.com",
      "gl": "us",
      "hl": "en",
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    #print(results['knowledge_graph'])
    while True:
        try:
            useful_output = results['knowledge_graph']
            answer = useful_output['description']
            break
        except KeyError:
            try: 
                useful_output = results['answer_box']
                answer = useful_output['type']
                break
            except KeyError:
                try:
                    useful_output = results['knowledge_graph']
                    answer = useful_output['title']
                except KeyError:
                    answer = str("I don't know, try ask me something else")#this can go on and on to increase accuracy of the answe
    print(answer)
    return answer   
Exemplo n.º 6
0
 def test_get_dictionary(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "location": "Austin,Texas"
     })
     data = client.get_dictionary()
     self.assertIsNotNone(data.get('local_results'))
Exemplo n.º 7
0
		def test_get_json(self):
				client = GoogleSearchResults({"q": "Coffee", "location": "Austin,Texas"})
				data = client.get_json()
				self.assertEqual(data["search_metadata"]["status"], "Success")
				self.assertIsNotNone(data["search_metadata"]["google_url"])
				self.assertIsNotNone(data["search_metadata"]["id"])
				# pp = pprint.PrettyPrinter(indent=2)
				# pp.pprint(data['local_results'])
				self.assertIsNotNone(data['local_results']['places'][0])
 def test_search_google_images(self):
     client = GoogleSearchResults({"q": "coffe", "tbm": "isch"})
     for image_result in client.get_json()['images_results']:
         link = image_result["original"]
         try:
             print("link: " + link)
             # wget.download(link, '.')
         except:
             pass
Exemplo n.º 9
0
 def test_get_json(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "location": "Austin,Texas"
     })
     data = client.get_json()
     # pp = pprint.PrettyPrinter(indent=2)
     # pp.pprint(data['local_results'])
     self.assertIsNotNone(data['local_results']['places'][0])
Exemplo n.º 10
0
    def search(self, query: str) -> list:
        key = self.get_credentials()

        params = {'engine': 'google_scholar', 'q': query, 'api_key': key}

        client = GoogleSearchResults(params)
        res = client.get_dict()

        filtered_res = self.get_filtered_results(res['organic_results'])
        return filtered_res
 def test_search_google_shopping(self):
     client = GoogleSearchResults({
         "q": "coffe",  # search client
         "tbm": "shop",  # news
         "tbs": "p_ord:rv",  # last 24h
         "num": 100
     })
     data = client.get_json()
     for shopping_result in data['shopping_results']:
         print(
             str(shopping_result['position']) + " - " +
             shopping_result['title'])
 def test_search_by_location(self):
     for city in ["new york", "paris", "berlin"]:
         location = GoogleSearchResults({}).get_location(
             city, 1)[0]["canonical_name"]
         client = GoogleSearchResults({
             "q": "best coffee shop",  # search client
             "location": location,
             "num": 10,
             "start": 0
         })
         data = client.get_json()
         top_result = data['organic_results'][0]["title"]
         print("top coffee result for " + location + " is: " + top_result)
 def test_search_google_news(self):
     client = GoogleSearchResults({
         "q": "coffe",  # search client
         "tbm": "nws",  # news
         "tbs": "qdr:d",  # last 24h
         "num": 10
     })
     for offset in [0, 1, 2]:
         client.params_dict["start"] = offset * 10
         data = client.get_json()
         for news_result in data['news_results']:
             print(
                 str(news_result['position'] + offset * 10) + " - " +
                 news_result['title'])
Exemplo n.º 14
0
def pictureUrl(name):
    nameKeyword = "'" + name + " basketball + '"
    params = {
        "api_key":
        "5953e1e6264c6019fa48feea446a6ff68616adc5789be4e1fed149f11a89b020",
        "engine": "google",
        "ijn": "0",
        "q": nameKeyword,
        "google_domain": "google.com",
        "tbm": "isch",
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    url = results["images_results"][0]["original"]
    return url
Exemplo n.º 15
0
def visual(q):
    params = {
        "q":
        q,
        "tbm":
        "isch",
        "ijn":
        "0",
        "api_key":
        "872d5dde1c1b4ad04b1b288b3cc26683597993dd25a9eff1777c5c20a52b7d50"
    }
    client = GoogleSearchResults(params)
    results = client.get_dict()
    images_results = results['images_results']
    link = images_results[5]["original"]
    return link
Exemplo n.º 16
0
def fetch_serp_api_data(domain, keywords):
    results = []
    for keyword in keywords:
        keyword_text = keyword['keyword']
        print("Getting shopping results for: %s with keyword: %s..." %
              (domain, keyword_text))
        params = {
            "api_key":
            "bd9c6284ab9a91ffe763e700cfc6f8b0035b7db18b59591a2a6498723b79ad24",
            "engine": "google",
            "q": keyword_text,
            "google_domain": "google.com",
        }

        client = GoogleSearchResults(params)
        result = client.get_dict()
        results.append(result)
        time.sleep(1)
    return results
def top_results(keyword):
    """Return list of (position, link) tuples for keyword results."""
    query_parameters = {
        "q": keyword,
        "hl": "en",
        "gl": "us",
        "google_domain": "google.com",
        "num": N_RESULTS,
    }
    response = GoogleSearchResults(query_parameters).get_dict()

    return [(result["position"], result["link"])
            for result in response["organic_results"]]
 def test_get_search_archive(self):
     client = GoogleSearchResults({
         "q": "Coffee",
         "location": "Austin,Texas"
     })
     search_result = client.get_dictionary()
     search_id = search_result.get("search_metadata").get("id")
     archived_search_result = GoogleSearchResults({}).get_search_archive(
         search_id, 'json')
     self.assertEqual(
         archived_search_result.get("search_metadata").get("id"), search_id)
     GoogleSearchResults({}).get_search_archive(search_id, 'html')
def query_serp(query: str, site: str, dates: list, num_results: int,
               paper_name: str) -> list:
    """
    Make dict for serp query using make_params()
    query serpAPI
    :param query: query string for google news
    :param site: site root url (eg. www.google.com)
    :param dates: list of dates
    :param num_results: number of results
    :param paper_name: name of paper
    :return: list of query dicts containing query information and sites list for the query
    """
    all_sites = []
    total_sites_count = 0

    for d in dates:
        try:
            # Get query dict and params dict
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            # serpAPI query
            client = GoogleSearchResults(params)
            results = client.get_dict()
            news_results = results['news_results']

            count = 0
            sites_date = []
            # Loop through till end of search results or error encountered
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                sites = [news['link'] for news in news_results]
                sites_date.extend(sites)
                count += len(sites)

                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], len(sites_date)))

            # add list of sites to query dict
            query_r['sites'] = sites_date
            all_sites.append(query_r)
            total_sites_count += len(sites_date)
        except Exception as e:
            print(e)
            print(d)
            continue
    print('Total Sites: {}'.format(total_sites_count))
    return all_sites
    def test_async(self):
        # store searches
        search_queue = Queue()

        # Serp API client
        client = GoogleSearchResults({
            "location": "Austin,Texas",
            "async": True
        })

        # loop through companies
        for company in ['amd', 'nvidia', 'intel']:
            print("execute async search: q = " + company)
            client.params_dict["q"] = company
            search = client.get_dict()
            print("add search to the queue where id: " +
                  search['search_metadata']['id'])
            # add search to the search_queue
            search_queue.put(search)

        print("wait until all search statuses are cached or success")

        # Create regular client
        client = GoogleSearchResults({"async": True})
        while not search_queue.empty():
            search = search_queue.get()
            search_id = search['search_metadata']['id']

            # retrieve search from the archive - blocker
            print(search_id + ": get search from archive")
            search_archived = client.get_search_archive(search_id)
            print(search_id + ": status = " +
                  search_archived['search_metadata']['status'])

            # check status
            if re.search('Cached|Success',
                         search_archived['search_metadata']['status']):
                print(search_id + ": search done with q = " +
                      search_archived['search_parameters']['q'])
            else:
                # requeue search_queue
                print(search_id + ": requeue search")
                search_queue.put(search)

                # wait 1s
                time.sleep(1)

        print('all searches completed')
Exemplo n.º 21
0
def query_serp(query, site, dates, num_results, paper_name):
    all_sites = []
    total_sites_count = 0

    for d in dates:
        try:
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            client = GoogleSearchResults(params)
            results = client.get_dict()
            news_results = results['news_results']

            count = 0
            sites_date = []
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                sites = [news['link'] for news in news_results]
                sites_date.extend(sites)
                count += len(sites)

                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], len(sites_date)))

            query_r['sites'] = sites_date
            all_sites.append(query_r)
            total_sites_count += len(sites_date)
        except Exception as e:
            print(e)
            print(d)
            continue
    print('Total Sites: {}'.format(total_sites_count))
    return all_sites
Exemplo n.º 22
0
def query_serp_count(query, site, dates, num_results, paper_name):
    siteCounts = []
    totalSites = 0
    for i, d in enumerate(dates):
        try:
            # Get query dict and params dict
            query_r, params = make_params(query=query,
                                          site=site,
                                          date_start=d[0],
                                          date_end=d[1],
                                          num_results=num_results,
                                          paper=paper_name)
            # serpAPI query
            client = GoogleSearchResults(params)
            results = client.get_dict()

            news_results = results['news_results']

            count = 0
            while (news_results and len(news_results) > 0) or ('error'
                                                               not in results):
                count += len(news_results)
                params['start'] = count
                client = GoogleSearchResults(params)
                results = client.get_dict()
                news_results = results['news_results']

            print('Date Range: {}-{}\tTotal Sites: {}'.format(
                d[0], d[1], count))
            query_r['site_count'] = count
            siteCounts.append(query_r)
            totalSites += count
        except Exception as e:
            print(e)
            print(d)
            continue
    print('\nTotal Sites: {}'.format(totalSites))
    return siteCounts
Exemplo n.º 23
0
 def test_get_account(self):
     client = GoogleSearchResults({})
     account = client.get_account()
     self.assertIsNotNone(account.get("account_id"))
     self.assertEqual(account.get("api_key"),
                      GoogleSearchResults.SERP_API_KEY)
Exemplo n.º 24
0
def main():
  GoogleSearchResults.SERP_API_KEY = "3d42b081b7807b6f9c6a37504613b07a02f5f9b00b6aed88731ff9483e161bc2"
  months = {'January':1, 'February':2, 'March':3, 'April':4, 'May':5, 'June':6, 'July':7,'August':8,'September':9, 'October':10, 'November':11,'December':12}
  platform_altnames = {'PS3':'PlayStation 3', 'PS4':'PlayStation 4', 'PSP':'PlayStation Portable'}

  sales = pd.read_csv("games_w_keywords.csv")
  sales.reset_index(drop=True, inplace=True)
  release_date = []
  new_sales = pd.DataFrame(columns=sales.columns)
  for g, game in enumerate(sales['Name']):
    try:
      page = wptools.page(game)
      if 'released' in page.get_parse().infobox:
        date_str = page.get_parse().infobox['released']
      else:
        date_str = page.get_parse().infobox['release']
    except Exception:  #wiki page not found or non-game wiki page detected
      try:
        google_qry = game + ' site:wikipedia.org'
        client = GoogleSearchResults({"q": google_qry, "serp_api_key": GoogleSearchResults.SERP_API_KEY})
        site = client.get_dict()['organic_results'][0]['link']
        wiki_qry = site.split('/')[-1]
        page = wptools.page(wiki_qry)
        if 'released' in page.get_parse().infobox:
          date_str = page.get_parse().infobox['released']
        else:
          date_str = page.get_parse().infobox['release']
      except Exception:  #no results found or wiki infobox has no 'release' section
        continue
    date_str_split = date_str.split("|")

    #preprocess to narrow down dates to right platform and right region
    for i, q in enumerate(date_str_split):
      if sales['Platform'][g] in ''.join(date_str_split[:i]):
          date_str_split = date_str_split[i:]
          break
      elif sales['Platform'][g] in platform_altnames:
        if platform_altnames[sales['Platform'][g]] in ''.join(date_str_split[:i]):
          date_str_split = date_str_split[i:]
          break

    for i, q in enumerate(date_str_split):
      if 'NA' in date_str_split[i-1] or 'WW' in date_str_split[i-1]:
          date_str_split = [date_str_split[i]]
          break

    for i, q in enumerate(date_str_split):
      if 'JP' in date_str_split[i-1]:
        continue
      day, month, year = '', '', ''
      date = date_str_split[i].split()
      for mem in date:
          mem= re.sub('[(){}<>,]', '', mem)
          if mem in months:
              month = str(months[mem])
          if mem.isnumeric():
              if int(mem) < 32:
                  day = str(mem)
              else:
                  year = str(mem)
      try:
        int(year)
      except Exception:
        continue
      if (day != '' and month != '' and int(year) == int(sales['Year_of_Release'][g])):
          break
    try:
      int(year)
    except Exception:
      continue
    if day == '' or month == '' or year == '' or int(year) != int(sales['Year_of_Release'][g]):
      continue
    if month == '12' and int(year) == 2016:
      continue
    rd = month + '/' + day + '/' + year
    release_date.append(rd)
    new_sales = new_sales.append(sales.iloc[[g]])
  
  new_sales['release date'] = release_date
  f = open('games_w_rldates.csv', "w+")
  new_sales.to_csv('games_w_rldates.csv', index=False)
Exemplo n.º 25
0
from serpapi.google_search_results import GoogleSearchResults

params = {
    "q": "Pythagorean Theorem",
    "hl": "en",
    "gl": "us",
    "google_domain": "google.com",
    "api_key": "secret_api_key"
}

client = GoogleSearchResults(params)
results = client.get_dict()
Exemplo n.º 26
0
    for line in csv_file:
        new = line.strip()
        print("new:" + new)
        params = {
            "api_key":
            "5b4695dcbf33b844d77818346074c2268857ec9c42f24202d18cce2fd1709ba3",
            "engine": "google",
            "q": new,
            "location": "London, England, United Kingdom",
            "google_domain": "google.co.uk",
            "gl": "uk",
            "hl": "en",
            "no_cache": "true",
            "device": "mobile",
        }
        client = GoogleSearchResults(params)
        #results = client.get_dict()
        time.sleep(2)
        json_results = client.get_json()

        try:
            for position in json_results['ads']:
                data.append(new + ":" + position['displayed_link'] + ":" +
                            str(position['position']))

                #print (json_results)

            #  with open('fileName.csv', "wb") as csv_file:
            # writer = csv.writer(csv_file, delimiter =":")
            # writer.writerow(data)
Exemplo n.º 27
0
 def test_get_location(self):
     client = GoogleSearchResults({"q": None, "async": True})
     location_list = client.get_location("Austin", 3)
     self.assertIsNotNone(location_list[0].get("id"))
     pp = pprint.PrettyPrinter(indent=2)
     pp.pprint(location_list)
Exemplo n.º 28
0
def _search(params):
    client = GoogleSearchResults(params)
    return client.get_dict()
Exemplo n.º 29
0
url = #Insert URL String Here from Github 

from serpapi.google_search_results import GoogleSearchResults
import pandas as pd
import csv

#Add your SERP API KEY between the '' below
api_key = 'API_KEY_GOES_HERE'

df1 = pd.read_csv(url) # Dataset is now stored in a Pandas Dataframe

# The While Loop putting items into a queue
query_queue = []
i = 0
while i < data_frame_size:
 query = df1.iloc[i][0]
 query_queue.append(query)
 i+=1

for search_query in query_queue:
 query_params = {
 "location": location,
 "engine": engine,
 "api_key": api_key,
 "q": search_query
 }
 search_client = GoogleSearchResults(query_params)
 search_results = search_client.get_dict()
 for link in search_results['organic_results']:
   print(link['link'])