def import_in_abc_list (in_abc_fp, out_abc_fp, latch_list, lut_list): # Copy until reach the line of latch line = in_abc_fp.readline() while not line.startswith('.latch'): out_abc_fp.write(line) line = in_abc_fp.readline() if line.startswith('.names'): break # Read and Process Latch Lines while not line.startswith('.names'): if line.startswith('.latch'): latch_list.append(latch.latch(line)) line = in_abc_fp.readline() # Read and Process LUT Lines while len(line)!= 0: if line.startswith('.names'): if line.endswith('\\\n'): line_p_2 = in_abc_fp.readline() line = line.replace(str('\\\n'), str(line_p_2)) lut_list.append(lut.lut(line)) line = in_abc_fp.readline() while len(line)!= 0 and (not line.startswith('.names')) and (not line.startswith('.end')): lut_list[len(lut_list)-1].entries.append(line) line = in_abc_fp.readline() else: line = in_abc_fp.readline()
def import_in_abc_list(in_abc_fp, out_abc_fp, latch_list, lut_list): # Copy until reach the line of latch line = in_abc_fp.readline() while not line.startswith('.latch'): out_abc_fp.write(line) line = in_abc_fp.readline() if line.startswith('.names'): break # Read and Process Latch Lines while not line.startswith('.names'): if line.startswith('.latch'): latch_list.append(latch.latch(line)) line = in_abc_fp.readline() # Read and Process LUT Lines while len(line) != 0: if line.startswith('.names'): if line.endswith('\\\n'): line_p_2 = in_abc_fp.readline() line = line.replace(str('\\\n'), str(line_p_2)) lut_list.append(lut.lut(line)) line = in_abc_fp.readline() while len(line) != 0 and (not line.startswith('.names')) and ( not line.startswith('.end')): lut_list[len(lut_list) - 1].entries.append(line) line = in_abc_fp.readline() else: line = in_abc_fp.readline()
def GenerateAnkiCardsFromWikipediaCategory(url,deckName,user_id,api_key): cards=[] client = importio.importio(user_id=user_id,api_key=api_key , host="https://query.import.io") client.connect() global queryLatch queryLatch = latch.latch(1) client.query({ "connectorGuids": [ "68b4b6ac-25ce-434d-923d-7cc9661216ff"#7fc7daa2-25a4-4649-b48c-be1d7fd8756e ], "input": { "webpage/url": url } }, callback) print "Queries dispatched, now waiting for results" queryLatch.await() print json.dumps(dataRows, indent = 4) #print(dataRows[0]["title"]) queryLatch = latch.latch(len(dataRows)) for data in dataRows : if('url' in data.keys()): client.query({ "connectorGuids": [ "7fc7daa2-25a4-4649-b48c-be1d7fd8756e" ], "input": { "webpage/url": data['url'] } }, callback2) queryLatch.await() print json.dumps(dataRows2, indent = 4) for d in dataRows2: if(all(x in d.keys() for x in ["title","first_par"])): cards.append(AnkiCard(d["title"],d["first_par"])) client.disconnect() reinitGlobalVariables() return cards
# OLD IMPORT IO SCRAPING SCRIPT FOR BBC OLD STYLE NEWS SITE ARTICLES 01/01/2010 - 20/08/2014 import logging, json, importio, latch client = importio.importio(user_id="cf592fba-bd1f-4128-8e98-e729c2bb7dec", api_key="aledxqRLOCLFo9O7cYeeC58aotifmZbL2C57Mg1zicz6ZLVSY94xttvI9AjeV1Fw9DpBg2y/cbrNZXM23yiWBg==", host="https://query.import.io") client.connect() queryLatch = latch.latch(13441) dataRows = [] d = '' def callback(query, message): global dataRows global d if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" print json.dumps(message["data"], indent = 4) if message["type"] == "MESSAGE": if "errorType" in message["data"]: print "Got an error!" print json.dumps(message["data"], indent = 4) else: print "Got data!" print json.dumps(message["data"], indent = 4) dataRows.extend(message["data"]["results"]) d = message["data"]["results"] for i in d: with open('urls.txt', 'a') as f: f.write(i["url"] + ',\n')
# Once you have initialised the client, connect it to the server: client.connect() # If you wish to use username and password based authentication, first create a client: #client = importio.importio() # If you wish to use proxies with your username and password, then you can do so like this: #client = importio.importio(proxies=proxies) # Next you need to log in to import.io using your username and password, like so: #client.login("YOUR_USERNAME", "YOUR_PASSWORD") # Because import.io queries are asynchronous, for this simple script we will use a "latch" # to stop the script from exiting before all of our queries are returned # For more information on the latch class, see the latch.py file included in this client library queryLatch = latch.latch(3) # Define here a global variable that we can put all our results in to when they come back from # the server, so we can use the data later on in the script dataRows = [] # In order to receive the data from the queries we issue, we need to define a callback method # This method will receive each message that comes back from the queries, and we can take that # data and store it for use in our app def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print("Query in progress when library disconnected")
def extract(connector, urls): # To use an API key for authentication, use the following code: client = importio.importio( user_id="d133b9b6-1253-4568-b727-425c7181ed93", api_key= "xCSj76J7NK+PaXi5foAzbIjgyo+Y+Xpu1+oS+OpngOor8gYN/johObwTLAUaQSoGTGzmSCxVMJQU3mXbICU6SQ==", host="https://query.import.io", proxies={ "http": "http://proxy.server:3128", "https": "http://proxy.server:3128" }) client.connect() queryLatch = latch.latch(len(urls)) def callback(query, message): global data # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": data["log"].append("Query in progress when library disconnected") data["log"].append(json.dumps(message["data"], indent=4)) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]: # In this case, we received a message, but it was an error from the external service data["log"].append("Got an error!") data["log"].append(json.dumps(message["data"], indent=4)) else: # Save the data we got in our dataRows variable for later data[connector].extend(message["data"]["results"]) # When the query is finished, countdown the latch so the program can continue when everything is done if query.finished(): queryLatch.countdown() for url in urls: client.query( { "connectorGuids": [connectors[connector]], "input": { "webpage/url": url } }, callback) data["log"].append("Queries dispatched, now waiting for results") queryLatch. await () data["log"].append("Latch has completed, all results returned") client.disconnect() # Now we can print out the data we got data["log"].append("All data received:") if connector == "fixture": for f in data[connector]: data["log"].append("%s vs %s" % (f["hometeam/_title"], f["awayteam/_title"])) elif connector == "history": data["log"].extend(urls) else: data["log"].append(json.dumps(data[connector], indent=4)) return data[connector]
# You do not need to do this, but setting the logging level will reveal logs about # what the import.io client is doing and will surface more information on errors logging.basicConfig(level=logging.INFO) # If you wish, you may configure HTTP proxies that Python can use to connect # to import.io. If you need to do this, uncomment the following line and fill in the # correct details to specify an HTTP proxy: #proxies = { "http": "127.0.0.1:3128" } client = importio.importio(user_id=GUID, api_key=API_key) client.connect() queryLatch = latch.latch(1) dataRows = [] def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" print json.dumps(message["data"], indent = 4) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]: # In this case, we received a message, but it was an error from the external service
def importquery( conNum, ImportURL, filename ): # To use an API key for authentication, use the following code: client = importio.importio(user_id=user_id_Value, api_key=api_key_Value, host="https://query.import.io") # Once we have started the client and authenticated, we need to connect it to the server: client.connect() # Because import.io queries are asynchronous, for this simple script we will use a "latch" # to stop the script from exiting before all of our queries are returned # For more information on the latch class, see the latch.py file included in this client library queryLatch = latch.latch(1) # Define here a global variable that we can put all our results in to when they come back from # the server, so we can use the data later on in the script # dataRows = [] # In order to receive the data from the queries we issue, we need to define a callback method # This method will receive each message that comes back from the queries, and we can take that # data and store it for use in our app def callback(query, message): global dataRows # Disconnect messages happen if we disconnect the client library while a query is in progress if message["type"] == "DISCONNECT": print("Query in progress when library disconnected") ##print json.dumps(message["data"], indent = 4) # Check the message we receive actually has some data in it if message["type"] == "MESSAGE": if "errorType" in message["data"]: # In this case, we received a message, but it was an error from the external service print("Got an error!") #print json.dumps(message["data"], indent = 4) else: # We got a message and it was not an error, so we can process the data print("Got data!") #print json.dumps(message["data"], indent = 4) # Save the data we got in our dataRows variable for later dataRows = (message["data"]["results"]) # When the query is finished, countdown the latch so the program can continue when everything is done if query.finished(): queryLatch.countdown() # Issue queries to your data sources and with your inputs # You can modify the inputs and connectorGuids so as to query your own sources # Query for tile Polycount_Freelance client.query( { "connectorGuids": [conNum], "input": { "webpage/url": ImportURL } }, callback) print("Checking " + filename + ", waiting for results") # Now we have issued all of the queries, we can "await" on the latch so that we know when it is all done queryLatch. await () #print "Latch has completed, all results returned" # It is best practice to disconnect when you are finished sending queries and getting data - it allows us to # clean up resources on the client and the server client.disconnect() # Now we can print out the data we got open("sites\\" + filename + ".dat", 'w').write(json.dumps(dataRows, indent=4)) print(filename + " data received, file saved")
def scrapeData(userid, network): client = clientGen() client.connect() queryLatch = latch.latch(1) global target_url, connector_guid, short_url #if the userid is 'refresh_music', this method will update the music library, otherwise it will check the user's recently played #stuff for scraping programmed sun cookie = cookies[network] if(network == 'ps'): short_url = "webui.programmedsun.com" if userid == "refresh_music": print "refreshing PS song list..." target_url = "http://webui.programmedsun.com/iidx/0/music" connector_guid = "e53e03d2-1468-4ebb-8fe9-2ef64de33db2" else: print "refreshing PS player %s's tracklist..." % userid target_url = "http://webui.programmedsun.com/iidx/0/players/%s/scores" % userid connector_guid = "9247219f-a36f-4e6b-85b0-1956eff5836d" #stuff for scraping programmed world elif(network == 'pw'): short_url = "programmedworld.net" if(userid == "refresh_music"): print "refreshing PW song list..." target_url = "https://programmedworld.net/iidx/22/music" connector_guid = "7d120ee9-000f-43f1-961a-17e4ff45771e" else: print "refreshing PW player %s's tracklist..." % userid target_url = "https://programmedworld.net/iidx/22/players/%s/scores" % userid connector_guid = "329e12e0-85ea-4961-83b6-a1156e25d46a" #callback to export the returned data def callback(query, message): global data if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" if message["type"] == "MESSAGE": if "errorType" in message["data"]: #handle users with hidden accounts if "Not authorised" in message["data"]["error"]: print "This user has a hidden profile!" data = "ERROR" else: print "An error occured." print json.dumps(message, indent = 4) data = "ERROR" else: #handle non-existant users if message["data"]["results"] == [] and userid != 'refresh_music': print json.dumps(message, indent = 4) print "Non-existent user." data = "ERROR" else: data = (message["data"]["results"]) if query.finished(): queryLatch.countdown() #import.io's template queries sure are awesome client.query({ "connectorGuids":[ connector_guid ], "input": { "webpage/url": target_url }, "additionalInput": { connector_guid: { "cookies": [cookie] } } }, callback) queryLatch.await() client.disconnect() return data
def generateCookies(): #makes a small to import.io to just login and return a cookie without loading #anything else for network in ['ps']: client = clientGen() client.connect() queryLatch = latch.latch(1) global target_url, connector_guid, short_url, username, password if(network == 'ps'): print "getting PS cookie..." short_url = "webui.programmedsun.com" target_url = "http://webui.programmedsun.com/iidx/0/music" connector_guid = "e53e03d2-1468-4ebb-8fe9-2ef64de33db2" username = PS_USER password = PS_PWD elif(network == 'pw'): print "getting PW cookie..." short_url = "programmedworld.net" target_url = "https://programmedworld.net/iidx/22/music" connector_guid = "7d120ee9-000f-43f1-961a-17e4ff45771e" username = PW_USER password = PW_PWD #callback to export the returned data def callback(query, message): if message["type"] == "DISCONNECT": print "Query in progress when library disconnected" if message["type"] == "MESSAGE": if "errorType" in message["data"]: print "Got an error!" #handle users with hidden accounts print "An error occured." print json.dumps(message["data"], indent = 4) data = "ERROR" else: cookies[network] = message["data"]["cookies"][0] if query.finished(): queryLatch.countdown() #import.io's template queries sure are awesome client.query({ "connectorGuids":[ connector_guid ], "input": { "webpage/url": target_url }, "loginOnly": 'true', "additionalInput": { connector_guid: { "domainCredentials":{ short_url:{ "username": username, "password": password } } } } }, callback) queryLatch.await() client.disconnect() print cookies
if query.finished(): queryLatch.countdown() # Initialise the library # To use an API key for authentication, use the following code: client = importio.importio(user_id=user_id, api_key=api_key, host="https://query.import.io") client.connect() # Now we are going to query the first extractor print "Querying the first extractor:" # If the input for the first extractor is onyl one: if isinstance(starting_query,list)==False: # Use a latch to stop the program from exiting queryLatch = latch.latch(1) current_results = {} # Querying extractor 1: client.query({ "connectorGuids": [ extractor_guid_1 ], "input": { input_first_extractor: starting_query } }, callback) # Wait until queries complete queryLatch.await()
user_id=userguid, api_key=str(uuid.uuid4())) try: client.connect() print("Test 3: Failed (did not throw exception)") sys.exit(3) except Exception: print("Test 3: Success") ''' Test 4 Test that querying a source that doesn't exist returns an error ''' test4latch = latch.latch(1) test4pass = False def test4callback(query, message): global test4pass if message["type"] == "MESSAGE" and "errorType" in message["data"]: if message["data"]["errorType"] == "ConnectorNotFoundException": test4pass = True else: print("Unexpected error: %s" % message["data"]["errorType"]) if query.finished(): test4latch.countdown() client = importio.importio(host="http://query." + host,
try: client.connect() print("Test 3: Failed (did not throw exception)") sys.exit(3) except Exception: print("Test 3: Success") ''' Test 4 Test that querying a source that doesn't exist returns an error ''' test4latch = latch.latch(1) test4pass = False def test4callback(query, message): global test4pass if message["type"] == "MESSAGE" and "errorType" in message["data"]: if message["data"]["errorType"] == "ConnectorNotFoundException": test4pass = True else: print("Unexpected error: %s" % message["data"]["errorType"]) if query.finished(): test4latch.countdown() client = importio.importio(host= "http://query." + host, user_id=userguid, api_key=api_key) client.connect() client.query({ "input":{ "query": "server" }, "connectorGuids": [ str(uuid.uuid4()) ] }, test4callback)