示例#1
0
def import_in_abc_list (in_abc_fp, out_abc_fp, latch_list, lut_list):

    # Copy until reach the line of latch
    line = in_abc_fp.readline()

    while not line.startswith('.latch'):
        out_abc_fp.write(line)
        line = in_abc_fp.readline()
        if line.startswith('.names'):
            break

    # Read and Process Latch Lines
    while not line.startswith('.names'):
        if line.startswith('.latch'):
            latch_list.append(latch.latch(line))
        line = in_abc_fp.readline()

    # Read and Process LUT Lines
    while len(line)!= 0:
        if line.startswith('.names'):
            if line.endswith('\\\n'):
                line_p_2 = in_abc_fp.readline()
                line = line.replace(str('\\\n'), str(line_p_2))
            lut_list.append(lut.lut(line))
            line = in_abc_fp.readline()
            while len(line)!= 0 and (not line.startswith('.names')) and (not line.startswith('.end')):
                lut_list[len(lut_list)-1].entries.append(line)
                line = in_abc_fp.readline()
        else:
            line = in_abc_fp.readline()
示例#2
0
def import_in_abc_list(in_abc_fp, out_abc_fp, latch_list, lut_list):

    # Copy until reach the line of latch
    line = in_abc_fp.readline()

    while not line.startswith('.latch'):
        out_abc_fp.write(line)
        line = in_abc_fp.readline()
        if line.startswith('.names'):
            break

    # Read and Process Latch Lines
    while not line.startswith('.names'):
        if line.startswith('.latch'):
            latch_list.append(latch.latch(line))
        line = in_abc_fp.readline()

    # Read and Process LUT Lines
    while len(line) != 0:
        if line.startswith('.names'):
            if line.endswith('\\\n'):
                line_p_2 = in_abc_fp.readline()
                line = line.replace(str('\\\n'), str(line_p_2))
            lut_list.append(lut.lut(line))
            line = in_abc_fp.readline()
            while len(line) != 0 and (not line.startswith('.names')) and (
                    not line.startswith('.end')):
                lut_list[len(lut_list) - 1].entries.append(line)
                line = in_abc_fp.readline()
        else:
            line = in_abc_fp.readline()
示例#3
0
def GenerateAnkiCardsFromWikipediaCategory(url,deckName,user_id,api_key):
		cards=[]
		client = importio.importio(user_id=user_id,api_key=api_key , host="https://query.import.io")
		client.connect()
		global queryLatch
		queryLatch = latch.latch(1)
		client.query({
				"connectorGuids": [
						"68b4b6ac-25ce-434d-923d-7cc9661216ff"#7fc7daa2-25a4-4649-b48c-be1d7fd8756e
				],
				"input": {
						"webpage/url": url
				}
		}, callback)
		print "Queries dispatched, now waiting for results"
		queryLatch.await()
		print json.dumps(dataRows, indent = 4)
		#print(dataRows[0]["title"])
		queryLatch = latch.latch(len(dataRows))
		for data in dataRows :
			if('url' in data.keys()):
								client.query({
										"connectorGuids": [
						"7fc7daa2-25a4-4649-b48c-be1d7fd8756e"
										],
										"input": {
														"webpage/url": data['url']
										}
						}, callback2)

		queryLatch.await()
		print json.dumps(dataRows2, indent = 4)
		for d in dataRows2:
			if(all(x in d.keys() for x in ["title","first_par"])):
				cards.append(AnkiCard(d["title"],d["first_par"]))
		client.disconnect()
		reinitGlobalVariables()
		return cards
# OLD IMPORT IO SCRAPING SCRIPT FOR BBC OLD STYLE NEWS SITE ARTICLES 01/01/2010 - 20/08/2014

import logging, json, importio, latch

client = importio.importio(user_id="cf592fba-bd1f-4128-8e98-e729c2bb7dec", api_key="aledxqRLOCLFo9O7cYeeC58aotifmZbL2C57Mg1zicz6ZLVSY94xttvI9AjeV1Fw9DpBg2y/cbrNZXM23yiWBg==", host="https://query.import.io")
client.connect()
queryLatch = latch.latch(13441)
dataRows = []
d = ''

def callback(query, message):
  global dataRows
  global d
  
  if message["type"] == "DISCONNECT":
    print "Query in progress when library disconnected"
    print json.dumps(message["data"], indent = 4)

  if message["type"] == "MESSAGE":
    
    if "errorType" in message["data"]:
      print "Got an error!" 
      print json.dumps(message["data"], indent = 4)
    else:
      print "Got data!"
      print json.dumps(message["data"], indent = 4)
      dataRows.extend(message["data"]["results"])
      d = message["data"]["results"]
      for i in d:
        with open('urls.txt', 'a') as f:
          f.write(i["url"] + ',\n')
示例#5
0
# Once you have initialised the client, connect it to the server:
client.connect()

# If you wish to use username and password based authentication, first create a client:
#client = importio.importio()
# If you wish to use proxies with your username and password, then you can do so like this:
#client = importio.importio(proxies=proxies)

# Next you need to log in to import.io using your username and password, like so:
#client.login("YOUR_USERNAME", "YOUR_PASSWORD")

# Because import.io queries are asynchronous, for this simple script we will use a "latch"
# to stop the script from exiting before all of our queries are returned
# For more information on the latch class, see the latch.py file included in this client library
queryLatch = latch.latch(3)

# Define here a global variable that we can put all our results in to when they come back from
# the server, so we can use the data later on in the script
dataRows = []


# In order to receive the data from the queries we issue, we need to define a callback method
# This method will receive each message that comes back from the queries, and we can take that
# data and store it for use in our app
def callback(query, message):
    global dataRows

    # Disconnect messages happen if we disconnect the client library while a query is in progress
    if message["type"] == "DISCONNECT":
        print("Query in progress when library disconnected")
示例#6
0
def extract(connector, urls):
    # To use an API key for authentication, use the following code:
    client = importio.importio(
        user_id="d133b9b6-1253-4568-b727-425c7181ed93",
        api_key=
        "xCSj76J7NK+PaXi5foAzbIjgyo+Y+Xpu1+oS+OpngOor8gYN/johObwTLAUaQSoGTGzmSCxVMJQU3mXbICU6SQ==",
        host="https://query.import.io",
        proxies={
            "http": "http://proxy.server:3128",
            "https": "http://proxy.server:3128"
        })

    client.connect()
    queryLatch = latch.latch(len(urls))

    def callback(query, message):
        global data

        # Disconnect messages happen if we disconnect the client library while a query is in progress
        if message["type"] == "DISCONNECT":
            data["log"].append("Query in progress when library disconnected")
            data["log"].append(json.dumps(message["data"], indent=4))

        # Check the message we receive actually has some data in it
        if message["type"] == "MESSAGE":
            if "errorType" in message["data"]:
                # In this case, we received a message, but it was an error from the external service
                data["log"].append("Got an error!")
                data["log"].append(json.dumps(message["data"], indent=4))
            else:
                # Save the data we got in our dataRows variable for later
                data[connector].extend(message["data"]["results"])

        # When the query is finished, countdown the latch so the program can continue when everything is done
        if query.finished(): queryLatch.countdown()

    for url in urls:
        client.query(
            {
                "connectorGuids": [connectors[connector]],
                "input": {
                    "webpage/url": url
                }
            }, callback)

    data["log"].append("Queries dispatched, now waiting for results")

    queryLatch. await ()

    data["log"].append("Latch has completed, all results returned")

    client.disconnect()

    # Now we can print out the data we got
    data["log"].append("All data received:")
    if connector == "fixture":
        for f in data[connector]:
            data["log"].append("%s vs %s" %
                               (f["hometeam/_title"], f["awayteam/_title"]))
    elif connector == "history":
        data["log"].extend(urls)
    else:
        data["log"].append(json.dumps(data[connector], indent=4))

    return data[connector]
示例#7
0

# You do not need to do this, but setting the logging level will reveal logs about
# what the import.io client is doing and will surface more information on errors
logging.basicConfig(level=logging.INFO)

# If you wish, you may configure HTTP proxies that Python can use to connect
# to import.io. If you need to do this, uncomment the following line and fill in the
# correct details to specify an HTTP proxy:

#proxies = { "http": "127.0.0.1:3128" }

client = importio.importio(user_id=GUID, api_key=API_key)
client.connect()

queryLatch = latch.latch(1)

dataRows = []

def callback(query, message):
    global dataRows
    
    # Disconnect messages happen if we disconnect the client library while a query is in progress
    if message["type"] == "DISCONNECT":
        print "Query in progress when library disconnected"
        print json.dumps(message["data"], indent = 4)

    # Check the message we receive actually has some data in it
    if message["type"] == "MESSAGE":
        if "errorType" in message["data"]:
            # In this case, we received a message, but it was an error from the external service
示例#8
0
def importquery(
        conNum, ImportURL, filename
):  # To use an API key for authentication, use the following code:
    client = importio.importio(user_id=user_id_Value,
                               api_key=api_key_Value,
                               host="https://query.import.io")
    # Once we have started the client and authenticated, we need to connect it to the server:
    client.connect()

    # Because import.io queries are asynchronous, for this simple script we will use a "latch"
    # to stop the script from exiting before all of our queries are returned
    # For more information on the latch class, see the latch.py file included in this client library
    queryLatch = latch.latch(1)

    # Define here a global variable that we can put all our results in to when they come back from
    # the server, so we can use the data later on in the script
    # dataRows = []

    # In order to receive the data from the queries we issue, we need to define a callback method
    # This method will receive each message that comes back from the queries, and we can take that
    # data and store it for use in our app
    def callback(query, message):
        global dataRows

        # Disconnect messages happen if we disconnect the client library while a query is in progress
        if message["type"] == "DISCONNECT":
            print("Query in progress when library disconnected")
            ##print json.dumps(message["data"], indent = 4)

        # Check the message we receive actually has some data in it
        if message["type"] == "MESSAGE":
            if "errorType" in message["data"]:
                # In this case, we received a message, but it was an error from the external service
                print("Got an error!")
                #print json.dumps(message["data"], indent = 4)
            else:
                # We got a message and it was not an error, so we can process the data
                print("Got data!")
                #print json.dumps(message["data"], indent = 4)
                # Save the data we got in our dataRows variable for later
                dataRows = (message["data"]["results"])

        # When the query is finished, countdown the latch so the program can continue when everything is done
        if query.finished(): queryLatch.countdown()

    # Issue queries to your data sources and with your inputs
    # You can modify the inputs and connectorGuids so as to query your own sources
    # Query for tile Polycount_Freelance
    client.query(
        {
            "connectorGuids": [conNum],
            "input": {
                "webpage/url": ImportURL
            }
        }, callback)

    print("Checking " + filename + ", waiting for results")

    # Now we have issued all of the queries, we can "await" on the latch so that we know when it is all done
    queryLatch. await ()

    #print "Latch has completed, all results returned"

    # It is best practice to disconnect when you are finished sending queries and getting data - it allows us to
    # clean up resources on the client and the server
    client.disconnect()

    # Now we can print out the data we got
    open("sites\\" + filename + ".dat",
         'w').write(json.dumps(dataRows, indent=4))
    print(filename + " data received, file saved")
示例#9
0
def scrapeData(userid, network):
	client = clientGen()
	client.connect()
	queryLatch = latch.latch(1)

	global target_url, connector_guid, short_url

	#if the userid is 'refresh_music', this method will update the music library, otherwise it will check the user's recently played
	#stuff for scraping programmed sun
	cookie = cookies[network]
	if(network == 'ps'):
		short_url = "webui.programmedsun.com"
		if userid == "refresh_music":
			print "refreshing PS song list..."
			target_url = "http://webui.programmedsun.com/iidx/0/music"
			connector_guid = "e53e03d2-1468-4ebb-8fe9-2ef64de33db2"
		else:
			print "refreshing PS player %s's tracklist..." % userid
			target_url = "http://webui.programmedsun.com/iidx/0/players/%s/scores" % userid
			connector_guid = "9247219f-a36f-4e6b-85b0-1956eff5836d"
	#stuff for scraping programmed world
	elif(network == 'pw'):
		short_url = "programmedworld.net"
		if(userid == "refresh_music"):
			print "refreshing PW song list..."
			target_url = "https://programmedworld.net/iidx/22/music"
			connector_guid = "7d120ee9-000f-43f1-961a-17e4ff45771e"
		else:
			print "refreshing PW player %s's tracklist..." % userid
			target_url = "https://programmedworld.net/iidx/22/players/%s/scores" % userid
			connector_guid = "329e12e0-85ea-4961-83b6-a1156e25d46a"
	#callback to export the returned data
	def callback(query, message):
		global data
		if message["type"] == "DISCONNECT":
			print "Query in progress when library disconnected"
		if message["type"] == "MESSAGE":
			if "errorType" in message["data"]:
				#handle users with hidden accounts
				if "Not authorised" in message["data"]["error"]:
					print "This user has a hidden profile!"
					data = "ERROR"
				else:
					print "An error occured."
					print json.dumps(message, indent = 4)
					data = "ERROR"
			else:
				#handle non-existant users
				if message["data"]["results"] == [] and userid != 'refresh_music':
					print json.dumps(message, indent = 4)
					print "Non-existent user."
					data = "ERROR"
				else:
					data = (message["data"]["results"])
		
		if query.finished(): 
			queryLatch.countdown()

	#import.io's template queries sure are awesome
	client.query({
		"connectorGuids":[
			connector_guid
		],
		"input": {
			"webpage/url": target_url
		},
		"additionalInput": {
			connector_guid: {
				"cookies": [cookie]
			}
		}
	}, callback)

	queryLatch.await()
	client.disconnect()

	return data
示例#10
0
def generateCookies():
	#makes a small to import.io to just login and return a cookie without loading
	#anything else
	for network in ['ps']:
		client = clientGen()
		client.connect()
		queryLatch = latch.latch(1)
		global target_url, connector_guid, short_url, username, password
		
		if(network == 'ps'):
			print "getting PS cookie..."
			short_url = "webui.programmedsun.com"
			target_url = "http://webui.programmedsun.com/iidx/0/music"
			connector_guid = "e53e03d2-1468-4ebb-8fe9-2ef64de33db2"
			username = PS_USER
			password = PS_PWD
		elif(network == 'pw'):
			print "getting PW cookie..."
			short_url = "programmedworld.net"
			target_url = "https://programmedworld.net/iidx/22/music"
			connector_guid = "7d120ee9-000f-43f1-961a-17e4ff45771e"		
			username = PW_USER
			password = PW_PWD
		#callback to export the returned data
		def callback(query, message):
			if message["type"] == "DISCONNECT":
				print "Query in progress when library disconnected"
			if message["type"] == "MESSAGE":
				if "errorType" in message["data"]:
					print "Got an error!" 
					#handle users with hidden accounts
					print "An error occured."
					print json.dumps(message["data"], indent = 4)
					data = "ERROR"
				else:
					cookies[network] = message["data"]["cookies"][0]
			if query.finished(): 
				queryLatch.countdown()
		#import.io's template queries sure are awesome
		client.query({
			"connectorGuids":[
				connector_guid
			],
			"input": {
				"webpage/url": target_url
			},
			"loginOnly": 'true',
			"additionalInput": {
				connector_guid: {
					"domainCredentials":{
						short_url:{
							"username": username,
							"password": password
						}
					}
				}
			}
		}, callback)

		queryLatch.await()
		client.disconnect()
		print cookies
  if query.finished(): queryLatch.countdown()

# Initialise the library
# To use an API key for authentication, use the following code:
client = importio.importio(user_id=user_id, 
  api_key=api_key, 
  host="https://query.import.io")

client.connect()

# Now we are going to query the first extractor
print "Querying the first extractor:"
# If the input for the first extractor is onyl one:
if isinstance(starting_query,list)==False:
    # Use a latch to stop the program from exiting
    queryLatch = latch.latch(1)
    current_results = {}

    # Querying extractor 1:
    client.query({
      "connectorGuids": [
        extractor_guid_1
      ],
      "input": {
        input_first_extractor: starting_query
      }
    }, callback)

    # Wait until queries complete
    queryLatch.await()
示例#12
0
                           user_id=userguid,
                           api_key=str(uuid.uuid4()))

try:
    client.connect()
    print("Test 3: Failed (did not throw exception)")
    sys.exit(3)
except Exception:
    print("Test 3: Success")
'''
Test 4

Test that querying a source that doesn't exist returns an error
'''

test4latch = latch.latch(1)
test4pass = False


def test4callback(query, message):
    global test4pass
    if message["type"] == "MESSAGE" and "errorType" in message["data"]:
        if message["data"]["errorType"] == "ConnectorNotFoundException":
            test4pass = True
        else:
            print("Unexpected error: %s" % message["data"]["errorType"])

    if query.finished(): test4latch.countdown()


client = importio.importio(host="http://query." + host,
示例#13
0
try:
	client.connect()
	print("Test 3: Failed (did not throw exception)")
	sys.exit(3)
except Exception:
	print("Test 3: Success")


'''
Test 4

Test that querying a source that doesn't exist returns an error
'''

test4latch = latch.latch(1)
test4pass = False

def test4callback(query, message):
	global test4pass
	if message["type"] == "MESSAGE" and "errorType" in message["data"]:
		if message["data"]["errorType"] == "ConnectorNotFoundException":
			test4pass = True
		else:
			print("Unexpected error: %s" % message["data"]["errorType"])

	if query.finished(): test4latch.countdown()

client = importio.importio(host= "http://query." + host, user_id=userguid, api_key=api_key)
client.connect()
client.query({ "input":{ "query": "server" }, "connectorGuids": [ str(uuid.uuid4()) ] }, test4callback)