Пример #1
0
def GenerateAnkiCardsFromWikipediaCategory(url,deckName,user_id,api_key):
		cards=[]
		client = importio.importio(user_id=user_id,api_key=api_key , host="https://query.import.io")
		client.connect()
		global queryLatch
		queryLatch = latch.latch(1)
		client.query({
				"connectorGuids": [
						"68b4b6ac-25ce-434d-923d-7cc9661216ff"#7fc7daa2-25a4-4649-b48c-be1d7fd8756e
				],
				"input": {
						"webpage/url": url
				}
		}, callback)
		print "Queries dispatched, now waiting for results"
		queryLatch.await()
		print json.dumps(dataRows, indent = 4)
		#print(dataRows[0]["title"])
		queryLatch = latch.latch(len(dataRows))
		for data in dataRows :
			if('url' in data.keys()):
								client.query({
										"connectorGuids": [
						"7fc7daa2-25a4-4649-b48c-be1d7fd8756e"
										],
										"input": {
														"webpage/url": data['url']
										}
						}, callback2)

		queryLatch.await()
		print json.dumps(dataRows2, indent = 4)
		for d in dataRows2:
			if(all(x in d.keys() for x in ["title","first_par"])):
				cards.append(AnkiCard(d["title"],d["first_par"]))
		client.disconnect()
		reinitGlobalVariables()
		return cards
# OLD IMPORT IO SCRAPING SCRIPT FOR BBC OLD STYLE NEWS SITE ARTICLES 01/01/2010 - 20/08/2014

import logging, json, importio, latch

client = importio.importio(user_id="cf592fba-bd1f-4128-8e98-e729c2bb7dec", api_key="aledxqRLOCLFo9O7cYeeC58aotifmZbL2C57Mg1zicz6ZLVSY94xttvI9AjeV1Fw9DpBg2y/cbrNZXM23yiWBg==", host="https://query.import.io")
client.connect()
queryLatch = latch.latch(13441)
dataRows = []
d = ''

def callback(query, message):
  global dataRows
  global d
  
  if message["type"] == "DISCONNECT":
    print "Query in progress when library disconnected"
    print json.dumps(message["data"], indent = 4)

  if message["type"] == "MESSAGE":
    
    if "errorType" in message["data"]:
      print "Got an error!" 
      print json.dumps(message["data"], indent = 4)
    else:
      print "Got data!"
      print json.dumps(message["data"], indent = 4)
      dataRows.extend(message["data"]["results"])
      d = message["data"]["results"]
      for i in d:
        with open('urls.txt', 'a') as f:
          f.write(i["url"] + ',\n')
Пример #3
0
#proxies = { "http": "127.0.0.1:3128" }

# Then you can use the "proxies" variable when instanciating a new client library object
# For more details on this see below

# You have two choices for authenticating with the Python client: you can use your API key
# or your username and password. Username and password is quicker to get started with, but
# API key authentication will be more reliable for really large query volumes.
# If you need it, you can get YOUR_USER_GUID and YOUR_API_KEY from your account page, at
# http://import.io/data/account

# To use an API key for authentication, use the following code to initialise the library
client = importio.importio(
    user_id="fdcf7c1f-ee58-4644-9b55-0b660ef6bd2e'",
    api_key=
    "YcSkwP9WeCxz1kbdy64oJcLQehYtw8RAlyy5vd+EtcfJVpe0ojkzgujNtQUPkDHlpizfGo76Zr6EOzpyjdG9DA=="
)
# If you want to use the client library with API keys and proxies, use this command:
#client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY", proxies=proxies)

# Once you have initialised the client, connect it to the server:
client.connect()

# If you wish to use username and password based authentication, first create a client:
#client = importio.importio()
# If you wish to use proxies with your username and password, then you can do so like this:
#client = importio.importio(proxies=proxies)

# Next you need to log in to import.io using your username and password, like so:
#client.login("YOUR_USERNAME", "YOUR_PASSWORD")
Пример #4
0
def clientGen():
	return importio.importio(user_id=os.environ.get('IIO_USER'), api_key=os.environ.get('IIO_API'), host="https://query.import.io")
Пример #5
0
def extract(connector, urls):
    # To use an API key for authentication, use the following code:
    client = importio.importio(
        user_id="d133b9b6-1253-4568-b727-425c7181ed93",
        api_key=
        "xCSj76J7NK+PaXi5foAzbIjgyo+Y+Xpu1+oS+OpngOor8gYN/johObwTLAUaQSoGTGzmSCxVMJQU3mXbICU6SQ==",
        host="https://query.import.io",
        proxies={
            "http": "http://proxy.server:3128",
            "https": "http://proxy.server:3128"
        })

    client.connect()
    queryLatch = latch.latch(len(urls))

    def callback(query, message):
        global data

        # Disconnect messages happen if we disconnect the client library while a query is in progress
        if message["type"] == "DISCONNECT":
            data["log"].append("Query in progress when library disconnected")
            data["log"].append(json.dumps(message["data"], indent=4))

        # Check the message we receive actually has some data in it
        if message["type"] == "MESSAGE":
            if "errorType" in message["data"]:
                # In this case, we received a message, but it was an error from the external service
                data["log"].append("Got an error!")
                data["log"].append(json.dumps(message["data"], indent=4))
            else:
                # Save the data we got in our dataRows variable for later
                data[connector].extend(message["data"]["results"])

        # When the query is finished, countdown the latch so the program can continue when everything is done
        if query.finished(): queryLatch.countdown()

    for url in urls:
        client.query(
            {
                "connectorGuids": [connectors[connector]],
                "input": {
                    "webpage/url": url
                }
            }, callback)

    data["log"].append("Queries dispatched, now waiting for results")

    queryLatch. await ()

    data["log"].append("Latch has completed, all results returned")

    client.disconnect()

    # Now we can print out the data we got
    data["log"].append("All data received:")
    if connector == "fixture":
        for f in data[connector]:
            data["log"].append("%s vs %s" %
                               (f["hometeam/_title"], f["awayteam/_title"]))
    elif connector == "history":
        data["log"].extend(urls)
    else:
        data["log"].append(json.dumps(data[connector], indent=4))

    return data[connector]
Пример #6
0
from importio_login import GUID, API_key, Amaz_BestSeller_GUID, Amaz_BestSeller_URL
import logging, json, importio, latch


# You do not need to do this, but setting the logging level will reveal logs about
# what the import.io client is doing and will surface more information on errors
logging.basicConfig(level=logging.INFO)

# If you wish, you may configure HTTP proxies that Python can use to connect
# to import.io. If you need to do this, uncomment the following line and fill in the
# correct details to specify an HTTP proxy:

#proxies = { "http": "127.0.0.1:3128" }

client = importio.importio(user_id=GUID, api_key=API_key)
client.connect()

queryLatch = latch.latch(1)

dataRows = []

def callback(query, message):
    global dataRows
    
    # Disconnect messages happen if we disconnect the client library while a query is in progress
    if message["type"] == "DISCONNECT":
        print "Query in progress when library disconnected"
        print json.dumps(message["data"], indent = 4)

    # Check the message we receive actually has some data in it
    if message["type"] == "MESSAGE":
Пример #7
0
def importquery(
        conNum, ImportURL, filename
):  # To use an API key for authentication, use the following code:
    client = importio.importio(user_id=user_id_Value,
                               api_key=api_key_Value,
                               host="https://query.import.io")
    # Once we have started the client and authenticated, we need to connect it to the server:
    client.connect()

    # Because import.io queries are asynchronous, for this simple script we will use a "latch"
    # to stop the script from exiting before all of our queries are returned
    # For more information on the latch class, see the latch.py file included in this client library
    queryLatch = latch.latch(1)

    # Define here a global variable that we can put all our results in to when they come back from
    # the server, so we can use the data later on in the script
    # dataRows = []

    # In order to receive the data from the queries we issue, we need to define a callback method
    # This method will receive each message that comes back from the queries, and we can take that
    # data and store it for use in our app
    def callback(query, message):
        global dataRows

        # Disconnect messages happen if we disconnect the client library while a query is in progress
        if message["type"] == "DISCONNECT":
            print("Query in progress when library disconnected")
            ##print json.dumps(message["data"], indent = 4)

        # Check the message we receive actually has some data in it
        if message["type"] == "MESSAGE":
            if "errorType" in message["data"]:
                # In this case, we received a message, but it was an error from the external service
                print("Got an error!")
                #print json.dumps(message["data"], indent = 4)
            else:
                # We got a message and it was not an error, so we can process the data
                print("Got data!")
                #print json.dumps(message["data"], indent = 4)
                # Save the data we got in our dataRows variable for later
                dataRows = (message["data"]["results"])

        # When the query is finished, countdown the latch so the program can continue when everything is done
        if query.finished(): queryLatch.countdown()

    # Issue queries to your data sources and with your inputs
    # You can modify the inputs and connectorGuids so as to query your own sources
    # Query for tile Polycount_Freelance
    client.query(
        {
            "connectorGuids": [conNum],
            "input": {
                "webpage/url": ImportURL
            }
        }, callback)

    print("Checking " + filename + ", waiting for results")

    # Now we have issued all of the queries, we can "await" on the latch so that we know when it is all done
    queryLatch. await ()

    #print "Latch has completed, all results returned"

    # It is best practice to disconnect when you are finished sending queries and getting data - it allows us to
    # clean up resources on the client and the server
    client.disconnect()

    # Now we can print out the data we got
    open("sites\\" + filename + ".dat",
         'w').write(json.dumps(dataRows, indent=4))
    print(filename + " data received, file saved")
# to import.io. If you need to do this, uncomment the following line and fill in the
# correct details to specify an HTTP proxy:

#proxies = { "http": "127.0.0.1:3128" }

# Then you can use the "proxies" variable when instanciating a new client library object
# For more details on this see below

# You have two choices for authenticating with the Python client: you can use your API key
# or your username and password. Username and password is quicker to get started with, but
# API key authentication will be more reliable for really large query volumes.
# If you need it, you can get YOUR_USER_GUID and YOUR_API_KEY from your account page, at
# http://import.io/data/account

# To use an API key for authentication, use the following code to initialise the library
client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY")
# If you want to use the client library with API keys and proxies, use this command:
#client = importio.importio(user_id="YOUR_USER_GUID", api_key="YOUR_API_KEY", proxies=proxies)

# Once you have initialised the client, connect it to the server:
client.connect()

# If you wish to use username and password based authentication, first create a client:
#client = importio.importio()
# If you wish to use proxies with your username and password, then you can do so like this:
#client = importio.importio(proxies=proxies)

# Next you need to log in to import.io using your username and password, like so:
#client.login("YOUR_USERNAME", "YOUR_PASSWORD")

# Because import.io queries are asynchronous, for this simple script we will use a "latch"
Пример #9
0
import logging, json, importio, latch

# To use an API key for authentication, use the following code:
client = importio.importio(user_id="bf4b28b0-c952-47c2-a0c2-a615ae963b71", api_key="bf4b28b0c95247c2a0c2a615ae963b71d0de22db4cc190b6305176ed3aa35ef5c0765cb94923560fc48ce293ca722eee9578b2d55ac766852f751eb7e9818a1b495035b513f162f0d29ff242672209dc", host="https://query.import.io")

# Once we have started the client and authenticated, we need to connect it to the server:
client.connect()

# Because import.io queries are asynchronous, for this simple script we will use a "latch"
# to stop the script from exiting before all of our queries are returned
# For more information on the latch class, see the latch.py file included in this client library
queryLatch = latch.latch(2)

# Define here a global variable that we can put all our results in to when they come back from
# the server, so we can use the data later on in the script
dataRows = []

# In order to receive the data from the queries we issue, we need to define a callback method
# This method will receive each message that comes back from the queries, and we can take that
# data and store it for use in our app
def callback(query, message):
    global dataRows
        
    # Disconnect messages happen if we disconnect the client library while a query is in progress
    if message["type"] == "DISCONNECT":
        print "Query in progress when library disconnected"
    print json.dumps(message["data"], indent = 4)
    
    # Check the message we receive actually has some data in it
    if message["type"] == "MESSAGE":
    if "errorType" in message["data"]:
      print "Got an error!" 
      print json.dumps(message["data"], indent = 4)
    else:
      # We got a message and it was not an error, so we can process the data
      #print "Got data!"
      #print json.dumps(message["data"], indent = 4)
      # Save the data we got in our current_results variable for later
      current_results[message["data"]["pageUrl"]]=message["data"]["results"]
  
  # When the query is finished, countdown the latch so the program can continue when everything is done
  if query.finished(): queryLatch.countdown()

# Initialise the library
# To use an API key for authentication, use the following code:
client = importio.importio(user_id=user_id, 
  api_key=api_key, 
  host="https://query.import.io")

client.connect()

# Now we are going to query the first extractor
print "Querying the first extractor:"
# If the input for the first extractor is onyl one:
if isinstance(starting_query,list)==False:
    # Use a latch to stop the program from exiting
    queryLatch = latch.latch(1)
    current_results = {}

    # Querying extractor 1:
    client.query({
      "connectorGuids": [
Пример #11
0
import importio, latch, sys, uuid, time

# Retrieve the credentials from the command line
host = sys.argv[1]
username = sys.argv[2]
password = sys.argv[3]
userguid = sys.argv[4]
api_key = sys.argv[5]
'''
Test 1

Test that specifying incorrect username and password raises an exception
'''

client = importio.importio(host="http://query." + host)

try:
    client.login(str(uuid.uuid4()),
                 str(uuid.uuid4()),
                 host="https://api." + host)
    print("Test 1: Failed (did not throw exception)")
    sys.exit(1)
except Exception:
    print("Test 1: Success")

client.disconnect()
'''
Test 2

Test that providing an incorrect user GUID raises an exception
Пример #12
0
import importio, latch, sys, uuid, time

# Retrieve the credentials from the command line
host = sys.argv[1]
username = sys.argv[2]
password = sys.argv[3]
userguid = sys.argv[4]
api_key = sys.argv[5]

'''
Test 1

Test that specifying incorrect username and password raises an exception
'''

client = importio.importio(host= "http://query." + host)

try:
	client.login(str(uuid.uuid4()), str(uuid.uuid4()), host = "https://api." + host)
	print("Test 1: Failed (did not throw exception)")
	sys.exit(1)
except Exception:
	print("Test 1: Success")

client.disconnect()

'''
Test 2

Test that providing an incorrect user GUID raises an exception
'''