def main(): import json import sys def cvt(t): return parse(t) thre = cvt('2015-08-14T11:57:00-05:00') with open(sys.argv[1]) as reader: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) if not data['data']['_golden']: for judgment in data['results']['judgments']: if 'input' in judgment['data'] and cvt( judgment['created_at']) > thre: print '\t\t'.join([ url, judgment['data']['input'], judgment['created_at'], str(judgment['id']) ]) pass
def main(): import sys import os.path as osp import json json_path = sys.argv[1] save_path = sys.argv[1] + '.test-miss' delta = save_path + '.delta' invalid = set() if osp.exists(save_path): with open(save_path) as reader: for line in reader: fields = line.strip().split('\t') invalid.add((fields[0].lower(), fields[1].lower())) with open(json_path) as reader, open(save_path, 'a') as writer, open(delta, 'w') as w: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) for judgment in data['results']['judgments']: if 'input' in judgment['data'] and 'missed' in judgment and judgment['missed']: i = judgment['data']['input'] if (url.lower(), i.lower()) not in invalid: writer.write(url + '\t' + i + '\n') #w.write(url + '\t' + i + '@' + judgment['country'] + '\n') w.write(url + '\t' + i + '\n') pass
def main(): import json import sys json_path = sys.argv[1] org_upload = sys.argv[2] unit_urls = sys.argv[3] valid_units = set() valid_urls = set() with open(json_path) as reader: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) if len(data['results']['judgments']) > 0: valid_units.add(str(data['id'])) valid_urls.add(url) all_units = set() unit2url = {} with open(unit_urls) as reader: dr = csv.DictReader(reader) for row in dr: all_units.add(row['_unit_id']) unit2url[row['_unit_id']] = clean_url(row['url1']) all_urls = set() with open(org_upload) as reader: dr = csv.DictReader(reader) for row in dr: if len(row['_golden']) == 0: all_urls.add(clean_url(row['url1'])) print len(all_units), len(all_units - valid_units), len(valid_units) print len(all_urls), len(all_urls - valid_urls), len(valid_urls) with open(org_upload + '.delete.js', 'w') as writer: for unit in (all_units - valid_units): writer.write("$.ajax({url:'/jobs/761321/units', type:'DELETE', data:{'unit_ids[]':%s}});\n" % unit) with open(org_upload + '.rest', 'w') as writer: for url in (all_urls - valid_urls): if len(url): writer.write('%s\n' % url) pass
def main(): import sys import os.path as osp import json json_path = sys.argv[1] fn_file = sys.argv[2] forgive_path = fn_file + 'forgive.js' notify_path = fn_file + 'notify.curl' misses = {} with open(json_path) as reader: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) for judgment in data['results']['judgments']: if 'input' in judgment[ 'data'] and 'missed' in judgment and judgment['missed']: i = judgment['data']['input'] misses[(url, i)] = (judgment['unit_id'], judgment['worker_id']) job_id = judgment['job_id'] forgive = [] from collections import defaultdict notify = defaultdict(list) with open(fn_file) as reader: for line in reader: fields = line.strip().split('\t') unit, worker = misses[(fields[0], fields[1])] forgive.append((job_id, unit, worker)) notify[worker].append(fields[1]) with open(forgive_path, 'w') as writer: for job_id, unit, worker in forgive: writer.write( "new Request({url: '/jobs/%s/workers/%s', onComplete: function(data) {console.log(JSON.decode(data).message);}}).put({forgive: %s}); \n" % (job_id, worker, unit)) api_key = 'E5FEx4v9LzGe4X1wKD2n' with open(notify_path, 'w') as writer: for idx, (worker, sents) in enumerate(notify.items()): msg = 'We have manually reviewed your sentence(s) and accepted them for their good quality. Your accuracy will be corrected accordingly. We appreciate your high quality work! (The following sentences are accepted: ' for sent in sents: msg += " '%s' " % sent msg += ')' writer.write( 'curl -X POST --data-urlencode "message=%s" https://api.crowdflower.com/v1/jobs/%s/workers/%s/notify.json?key=%s; echo %d\n' % (msg, job_id, worker, api_key, idx)) pass
def main(): import json import sys print 'gif\tsent\tcountry' for line in sys.stdin: data = json.loads(line) url = clean_url(data['data']['url1']) for judgment in data['results']['judgments']: if 'input' in judgment['data']: print url + '\t' + judgment['data']['input'] + '\t' + judgment['country'] + '\t' + str(judgment['worker_id']) pass
def main(): import sys import os.path as osp import json json_path = sys.argv[1] fn_file = sys.argv[2] forgive_path = fn_file + 'forgive.js' notify_path = fn_file + 'notify.curl' misses = {} with open(json_path) as reader: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) for judgment in data['results']['judgments']: if 'input' in judgment['data'] and 'missed' in judgment and judgment['missed']: i = judgment['data']['input'] misses[(url, i)] = (judgment['unit_id'], judgment['worker_id']) job_id = judgment['job_id'] forgive = [] from collections import defaultdict notify = defaultdict(list) with open(fn_file) as reader: for line in reader: fields = line.strip().split('\t') unit, worker = misses[(fields[0], fields[1])] forgive.append((job_id, unit, worker)) notify[worker].append(fields[1]) with open(forgive_path, 'w') as writer: for job_id, unit, worker in forgive: writer.write("new Request({url: '/jobs/%s/workers/%s', onComplete: function(data) {console.log(JSON.decode(data).message);}}).put({forgive: %s}); \n" % (job_id, worker, unit)) api_key = 'E5FEx4v9LzGe4X1wKD2n' with open(notify_path, 'w') as writer: for idx, (worker, sents) in enumerate(notify.items()): msg = 'We have manually reviewed your sentence(s) and accepted them for their good quality. Your accuracy will be corrected accordingly. We appreciate your high quality work! (The following sentences are accepted: ' for sent in sents: msg += " '%s' " % sent msg += ')' writer.write('curl -X POST --data-urlencode "message=%s" https://api.crowdflower.com/v1/jobs/%s/workers/%s/notify.json?key=%s; echo %d\n' % (msg, job_id, worker, api_key, idx)) pass
def main(): import json import sys print 'gif\tsent\tcountry' for line in sys.stdin: data = json.loads(line) url = clean_url(data['data']['url1']) for judgment in data['results']['judgments']: if 'input' in judgment['data']: print url + '\t' + judgment['data']['input'] + '\t' + judgment[ 'country'] + '\t' + str(judgment['worker_id']) pass
def main(): import json import sys def cvt(t): return parse(t) thre = cvt('2015-08-14T11:57:00-05:00') with open(sys.argv[1]) as reader: for line in reader: data = json.loads(line) url = clean_url(data['data']['url1']) if not data['data']['_golden']: for judgment in data['results']['judgments']: if 'input' in judgment['data'] and cvt(judgment['created_at']) > thre: print '\t\t'.join([url, judgment['data']['input'], judgment['created_at'], str(judgment['id'])]) pass