Exemplo n.º 1
0
def main():

    import json
    import sys

    def cvt(t):
        return parse(t)

    thre = cvt('2015-08-14T11:57:00-05:00')

    with open(sys.argv[1]) as reader:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            if not data['data']['_golden']:
                for judgment in data['results']['judgments']:
                    if 'input' in judgment['data'] and cvt(
                            judgment['created_at']) > thre:
                        print '\t\t'.join([
                            url, judgment['data']['input'],
                            judgment['created_at'],
                            str(judgment['id'])
                        ])

    pass
Exemplo n.º 2
0
def main():

    import sys
    import os.path as osp
    import json

    json_path = sys.argv[1]
    save_path = sys.argv[1] + '.test-miss'
    delta = save_path + '.delta'

    invalid = set()

    if osp.exists(save_path):
        with open(save_path) as reader:
            for line in reader:
                fields = line.strip().split('\t')
                invalid.add((fields[0].lower(), fields[1].lower()))

    with open(json_path) as reader, open(save_path, 'a') as writer, open(delta, 'w') as w:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            for judgment in data['results']['judgments']:
                if 'input' in judgment['data'] and 'missed' in judgment and judgment['missed']:
                    i = judgment['data']['input']
                    if (url.lower(), i.lower()) not in invalid:
                        writer.write(url + '\t' + i + '\n')
                        #w.write(url + '\t' + i + '@' + judgment['country'] + '\n')
                        w.write(url + '\t' + i + '\n')

    pass
Exemplo n.º 3
0
def main():

    import json
    import sys

    json_path = sys.argv[1]
    org_upload = sys.argv[2]
    unit_urls = sys.argv[3]

    valid_units = set()
    valid_urls = set()
    with open(json_path) as reader:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            if len(data['results']['judgments']) > 0:
                valid_units.add(str(data['id']))
                valid_urls.add(url)
    all_units = set()
    unit2url = {}
    with open(unit_urls) as reader:
        dr = csv.DictReader(reader)
        for row in dr:
            all_units.add(row['_unit_id'])
            unit2url[row['_unit_id']] = clean_url(row['url1'])

    all_urls = set()
    with open(org_upload) as reader:
        dr = csv.DictReader(reader)
        for row in dr:
            if len(row['_golden']) == 0:
                all_urls.add(clean_url(row['url1']))

    print len(all_units), len(all_units - valid_units), len(valid_units)
    print len(all_urls), len(all_urls - valid_urls), len(valid_urls)

    with open(org_upload + '.delete.js', 'w') as writer:
        for unit in (all_units - valid_units):
            writer.write("$.ajax({url:'/jobs/761321/units', type:'DELETE', data:{'unit_ids[]':%s}});\n" % unit)

    with open(org_upload + '.rest', 'w') as writer:
        for url in (all_urls - valid_urls):
            if len(url):
                writer.write('%s\n' % url)
    pass
Exemplo n.º 4
0
def main():

    import sys
    import os.path as osp
    import json

    json_path = sys.argv[1]
    fn_file = sys.argv[2]
    forgive_path = fn_file + 'forgive.js'
    notify_path = fn_file + 'notify.curl'

    misses = {}

    with open(json_path) as reader:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            for judgment in data['results']['judgments']:
                if 'input' in judgment[
                        'data'] and 'missed' in judgment and judgment['missed']:
                    i = judgment['data']['input']
                    misses[(url, i)] = (judgment['unit_id'],
                                        judgment['worker_id'])
                    job_id = judgment['job_id']

    forgive = []
    from collections import defaultdict
    notify = defaultdict(list)
    with open(fn_file) as reader:
        for line in reader:
            fields = line.strip().split('\t')
            unit, worker = misses[(fields[0], fields[1])]
            forgive.append((job_id, unit, worker))
            notify[worker].append(fields[1])

    with open(forgive_path, 'w') as writer:
        for job_id, unit, worker in forgive:
            writer.write(
                "new Request({url: '/jobs/%s/workers/%s', onComplete: function(data) {console.log(JSON.decode(data).message);}}).put({forgive: %s}); \n"
                % (job_id, worker, unit))

    api_key = 'E5FEx4v9LzGe4X1wKD2n'
    with open(notify_path, 'w') as writer:
        for idx, (worker, sents) in enumerate(notify.items()):
            msg = 'We have manually reviewed your sentence(s) and accepted them for their good quality. Your accuracy will be corrected accordingly. We appreciate your high quality work! (The following sentences are accepted: '
            for sent in sents:
                msg += " '%s' " % sent
            msg += ')'
            writer.write(
                'curl -X POST --data-urlencode "message=%s" https://api.crowdflower.com/v1/jobs/%s/workers/%s/notify.json?key=%s; echo %d\n'
                % (msg, job_id, worker, api_key, idx))

    pass
Exemplo n.º 5
0
def main():

    import json
    import sys

    print 'gif\tsent\tcountry'

    for line in sys.stdin:
        data = json.loads(line)
        url = clean_url(data['data']['url1'])
        for judgment in data['results']['judgments']:
            if 'input' in judgment['data']:
                print url + '\t' + judgment['data']['input'] + '\t' + judgment['country'] + '\t' + str(judgment['worker_id'])

    pass
Exemplo n.º 6
0
def main():

    import sys
    import os.path as osp
    import json

    json_path = sys.argv[1]
    fn_file = sys.argv[2]
    forgive_path = fn_file + 'forgive.js'
    notify_path = fn_file + 'notify.curl'

    misses = {}

    with open(json_path) as reader:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            for judgment in data['results']['judgments']:
                if 'input' in judgment['data'] and 'missed' in judgment and judgment['missed']:
                    i = judgment['data']['input']
                    misses[(url, i)] = (judgment['unit_id'], judgment['worker_id'])
                    job_id = judgment['job_id']

    forgive = []
    from collections import defaultdict
    notify = defaultdict(list)
    with open(fn_file) as reader:
        for line in reader:
            fields = line.strip().split('\t')
            unit, worker = misses[(fields[0], fields[1])]
            forgive.append((job_id, unit, worker))
            notify[worker].append(fields[1])

    with open(forgive_path, 'w') as writer:
        for job_id, unit, worker in forgive:
            writer.write("new Request({url: '/jobs/%s/workers/%s', onComplete: function(data) {console.log(JSON.decode(data).message);}}).put({forgive: %s}); \n" % (job_id, worker, unit))


    api_key = 'E5FEx4v9LzGe4X1wKD2n'
    with open(notify_path, 'w') as writer:
        for idx, (worker, sents) in enumerate(notify.items()):
            msg = 'We have manually reviewed your sentence(s) and accepted them for their good quality. Your accuracy will be corrected accordingly. We appreciate your high quality work! (The following sentences are accepted: '
            for sent in sents:
                msg += " '%s' " % sent
            msg += ')'
            writer.write('curl -X POST --data-urlencode "message=%s" https://api.crowdflower.com/v1/jobs/%s/workers/%s/notify.json?key=%s; echo %d\n' % (msg, job_id, worker, api_key, idx))

    pass
Exemplo n.º 7
0
def main():

    import json
    import sys

    print 'gif\tsent\tcountry'

    for line in sys.stdin:
        data = json.loads(line)
        url = clean_url(data['data']['url1'])
        for judgment in data['results']['judgments']:
            if 'input' in judgment['data']:
                print url + '\t' + judgment['data']['input'] + '\t' + judgment[
                    'country'] + '\t' + str(judgment['worker_id'])

    pass
Exemplo n.º 8
0
def main():

    import json
    import sys

    def cvt(t):
        return parse(t)

    thre = cvt('2015-08-14T11:57:00-05:00')

    with open(sys.argv[1]) as reader:
        for line in reader:
            data = json.loads(line)
            url = clean_url(data['data']['url1'])
            if not data['data']['_golden']:
                for judgment in data['results']['judgments']:
                    if 'input' in judgment['data'] and cvt(judgment['created_at']) > thre:
                        print '\t\t'.join([url, judgment['data']['input'], judgment['created_at'], str(judgment['id'])])

    pass