def test_encode(self): linRegFactory = LinearRegressionFactory(11) linReg = linRegFactory.get_instance() encoded = linRegFactory.encode(linReg) protocol = JSONProtocol() print protocol.write(0, encoded)
def test_encode(self): ''' Test whether algorithm can be json encoded (used as mrjob internal protocol) ''' layerSizes = [3,2,1] nnFactory = PredictionNNFactory(layerSizes) nn = nnFactory.get_instance() # encode obj_encoded = nnFactory.encode(nn) # call json protocol protocol = JSONProtocol() protocol.write("test_decode", obj_encoded)
def encode_node(node_id, links=None, score=1): node = {} if links: node['links'] = sorted(links.items()) node['score'] = score x = JSONProtocol() return x.write(node_id, node) + '\n'
def test_uses_json_format(self): KEY = ['a', 1] VALUE = {'foo': {'bar': 3}, 'baz': None} ENCODED = '["a", 1]\t{"foo": {"bar": 3}, "baz": null}' self.assertEqual((KEY, VALUE), JSONProtocol.read(ENCODED)) self.assertEqual(ENCODED, JSONProtocol.write(KEY, VALUE))
def test_numerical_keys_become_strs(self): # JSON should convert numbers to strings when they are dict keys self.assertEqual(({ '1': 2 }, { '3': 4 }), JSONProtocol.read(JSONProtocol.write({1: 2}, {3: 4})))
def encode_node(node_id, links=None, score=1): node = {} if links: node['links'] = sorted(links.items()) node['score'] = score x=JSONProtocol() return x.write(node_id, node) + '\n'
def test_decode(self): linRegFactory = LinearRegressionFactory(11) linReg = linRegFactory.get_instance() obj_encoded = linRegFactory.encode(linReg) protocol = JSONProtocol() json_encoded = protocol.write(0, obj_encoded) obj_encoded = protocol.read(json_encoded) linRegArr = linRegFactory.decode([obj_encoded[1]]) assert type(linRegArr) == list, "decoded not as a list" assert type(linRegArr[0]) == LinearRegression, "decoded not as LinearRegression"
def test_decode(self): linRegFactory = LinearRegressionFactory(11) linReg = linRegFactory.get_instance() obj_encoded = linRegFactory.encode(linReg) protocol = JSONProtocol() json_encoded = protocol.write(0, obj_encoded) obj_encoded = protocol.read(json_encoded) linRegArr = linRegFactory.decode([obj_encoded[1]]) assert type(linRegArr) == list, "decoded not as a list" assert type(linRegArr[0] ) == LinearRegression, "decoded not as LinearRegression"
def encode_node(node_id, links=None, score=1): """Print out a node, in JSON format. :param node_id: unique ID for this node (any type is okay) :param links: a list of tuples of ``(node_id, weight)``; *node_id* is the ID of a node to send score to, and *weight* is a number between 0 and 1. Your weights should sum to 1 for each node, but if they sum to less than 1, the algorithm will still converge. :type score: float :param score: initial score for the node. Defaults to 1. Ideally, the average weight of your nodes should be 1 (but it if isn't, the algorithm will still converge). """ node = {} if links is not None: node['links'] = sorted(links.items()) node['score'] = score return JSONProtocol.write(node_id, node) + '\n'
def test_decode(self): ''' Test whether algorithm can be json encoded (used as mrjob internal protocol) ''' layerSizes = [3,2,1] nnFactory = PredictionNNFactory(layerSizes) nn = nnFactory.get_instance() # encode obj_encoded = nnFactory.encode(nn) # call json protocol protocol = JSONProtocol() json_encoded = protocol.write("test_decode", obj_encoded) obj_encoded = protocol.read(json_encoded) nnArr = nnFactory.decode([obj_encoded[1]]) assert type(nnArr) == list, "decoded not as a list" assert type(nnArr[0]) == MultilayerPerceptron, "decoded not as LinearRegression"
def test_numerical_keys_become_strs(self): # JSON should convert numbers to strings when they are dict keys self.assertEqual(({'1': 2}, {'3': 4}), JSONProtocol.read(JSONProtocol.write({1: 2}, {3: 4})))
def test_tuples_become_lists(self): # JSON should convert tuples into lists self.assertEqual(([1, 2], [3, 4]), JSONProtocol.read(JSONProtocol.write((1, 2), (3, 4))))
NUMBER_RE = re.compile(r"[-?\d']+") input_file = 'sample_input.txt' with open(input_file, 'r') as out_file: data = [x.split() for x in out_file.read().splitlines()] # print(data) nodes = {} for line in data: nodes[int(line[0])] = [] #Will be written as null for line in data: #Check for dangling nodes if line[1:] == []: nodes[int(line[0])] = [] #Will be written as null else: nodes[int(line[0])].append(int(line[1:][0])) # print('nodes',nodes) # unique_nodes = sorted(set(nodes), key = lambda ele: nodes.count(ele)) # print(nodes) # print(unique_nodes) unique_node_count = len(nodes.keys()) initial_pagerank = 1 / unique_node_count j = JSONProtocol() with open("preprocessed_" + input_file, "wb+") as out_file: j = JSONProtocol() for _id, adj in nodes.items(): out_file.write(j.write(_id, (adj, initial_pagerank))) out_file.write('\n'.encode('utf-8'))
inF = open(inputFileName, 'r') myP4.stdin = inF.readlines() inF.close() # Variables for keeping track of convergence converged = False # keeps track of convergence ctrValue = 0 # value of the counter (number of better paths found) iteration = 0 # number of iterations # Loops mrP4 until we have convergence while not converged: with myP4.make_runner() as runner: # make a runner runner.run() # run job nextIn = open(outputFileName, 'w') for line in runner.stream_output(): key, value = myP4.parse_output_line(line) nextIn.write(JSONProtocol.write(key, value) + '\n') print '-> Output of MR Job is:', key, value nextIn.close() iteration += 1 # update number of iterations # Get counter ctr = runner.counters() ctrValue = ctr[0]['reducer'][ 'better found'] # Extract counter value if (ctrValue == 0): converged = True # We have convergence # Get previous run's values with open(outputFileName, 'r') as nextIn: myP4.stdin = nextIn.readlines() # Output file reorganization s = []