示例#1
0
def main(argv):
    global dbg_editor, dbg_sql, dbg_track, hub, off_hub

    # Unicode I/O
    sys.stdin = codecs.getreader('UTF-8')(sys.stdin)
    sys.stdout = codecs.getwriter('UTF-8')(sys.stdout)
    sys.stderr = codecs.getwriter('UTF-8')(sys.stderr)

    if len(argv[1:]) > 0 and argv[1] in ('-h', '--help'):
        print >> sys.stderr, u'TransCenter Live Server'
        print >> sys.stderr, u'Usage: {0} [-p port=8080] [-t timeout=600] [--dbg-editor] [--dbg-sql] [--dbg-track] [--dbg-webpy]'.format(argv[0])
        print >> sys.stderr, u'(crtl+c to stop server)'
        sys.exit(2)

    opts, argv = getopt.getopt(argv[1:], 'p:t:', ['dbg-editor', 'dbg-sql', 'dbg-track', 'dbg-webpy'])
    sys.argv = [sys.argv[0]]
    test_file = False
    timeout = 600
    for o, a in opts:
        if o == '-p':
            sys.argv.append(a)
        elif o == '-t':
            timeout = int(a)
        elif o == '--dbg-editor':
            dbg_editor = True
        elif o == '--dbg-sql':
            dbg_sql = True
            user_db.printing = dbg_sql
        elif o == '--dbg-track':
            dbg_track = True
        elif o == '--dbg-webpy':
            web.config['debug'] = True

    # Realtime hub
    io_extra.log(u'STATUS: starting Realtime hub...')
    hub = rt_hub.RealtimeHub(trans_db, db_write_lock, timeout=timeout) if not dbg_editor else None
    # [task][source] = target
    off_hub = {}
    # Init translators
    # TODO: change to on-demand/timeout
    for task in sorted((f for f in os.listdir(io_extra.TASK_DIR) if f.endswith('.data'))):
        config = io_extra.read_cfg(os.path.join(io_extra.TASK_DIR, task, 'config.txt'))
        if config['task'] in (track.REALTIME, track.REALTIME_STATIC):
            realtime_d = config['config']
            hub.manual_start(realtime_d)
        # Load offline data
        elif config['task'] == 'offline':
            off_hub[task] = dict((s.strip(), t.strip()) for (s, t) in itertools.izip(codecs.open(os.path.join(io_extra.TASK_DIR, task, 'source.txt'), 'rb', 'UTF-8'), codecs.open(os.path.join(io_extra.TASK_DIR, task, 'target.txt'), 'rb', 'UTF-8')))

    io_extra.log(u'STATUS: Realtime hub started.')

    # Database writer
    writer = threading.Thread(target=io_extra.run_database_writer, args=(trans_db, db_write_lock, ev_queue, ed_queue, queue_lock))
    writer.start()
    io_extra.log(u'STATUS: Database writing started.')

    # Start web app
    io_extra.log(u'STATUS: Main webpy app starting.')
    app.run()

    # Cleanup
    ev_queue.append(None)
    if not dbg_editor:
        hub.close()
    io_extra.log(u'STATUS: Realtime hub closed.')
    writer.join()
    io_extra.log(u'STATUS: All database writes finished.')
    session_store.cleanup(0)
    io_extra.log(u'STATUS: Ready to shutdown.')
示例#2
0
def reload_groups():
    global groups
    # These are stored val: key
    groups = dict((v, k) for (k, v) in io_extra.read_cfg(os.path.join(io_extra.DATA_DIR, 'groups.txt')).iteritems())
示例#3
0
def report_edits_pauses(t_dir, r_dir):

    users = []  # Users in order
    user_dict = {}  # user_dict[user] = user_i
    user_trans = []  # user_trans[user_i][sent_i] = [inter1, inter2]
                     # where intermediate is (left, diff, right, type, time)
    base_trans = collections.defaultdict(dict)  # base_trans[user_i] = list of translations before editing

    # Get the directory name / database entry for this task
    task_id = os.path.basename(os.path.abspath(t_dir))

    # Read config
    config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt'))
    task = config[u'task']

    conn = sqlite3.connect(os.path.join(t_dir, 'data.db'))
    c = conn.cursor()
    # Find users who finished this task
    i = 0
    for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''):
        users.append(u)
        user_dict[u] = i
        i += 1

    # Populate base translations (mt) for each user
    for user in users:
        for (sent, text) in c.execute('''SELECT sent, text FROM mt WHERE user=? ORDER BY sent ASC''', ('STATIC' if task == track.REALTIME_STATIC else user,)):
            base_trans[user][sent] = text
        # Each user starts with a list of intermediates, starting with the base translation
        user_trans.append([[['', t, '', -1, 'start', '']] for (i, t) in sorted(base_trans[user].iteritems())])

    # Read user edits
    res = c.execute('''SELECT user, sent, caret, op, input, time FROM edits ORDER BY user ASC, sent ASC, time ASC''')

    # Trace edits, annotate with types, times
    for r in res:
        user_i = user_dict.get(r[0], -1)
        if user_i == -1:
            continue
        sent_i = r[1] - 1
        caret = r[2]
        op = r[3]
        diff = r[4]
        time = r[5]
        if user_trans[user_i][sent_i][-1][3] in (-1, track.INS):
            prev = u''.join(user_trans[user_i][sent_i][-1][:3])
        elif user_trans[user_i][sent_i][-1][3] == track.DEL:
            prev = u''.join((user_trans[user_i][sent_i][-1][0], user_trans[user_i][sent_i][-1][2]))
        else:
            io_extra.log('Unknown edit op, using emptry string')
            prev = u''
        left = prev[:caret]
        # For deletes, diff overlaps prev, so cut out
        right = prev[caret + len(diff):] if op == track.DEL else prev[caret:]
        if op == track.INS:
            opclass = u'ins'
        elif op == track.DEL:
            opclass = u'del'
        else:
            # Only count inserts and deletes
            continue
        user_trans[user_i][sent_i].append([left, diff, right, op, opclass, unicode(time)])

    # Final outputs
    for trans in user_trans:
        for sent in trans:
            op = sent[-1][3]
            if op == -1:
                prev = sent[-1][1]
            elif op == track.INS:
                prev = u''.join(sent[-1][:3])
            else:
                prev = u''.join((sent[-1][0], sent[-1][2]))
            sent.append(['', prev, '', -1, 'end', ''])

    # Pull initial and final focus/blur times
    res = c.execute('''SELECT user, sent, time FROM events ORDER BY user ASC, sent ASC, time ASC''')
    i = -1
    for r in res:
        user_i = user_dict.get(r[0], -1)
        if user_i == -1:
            continue
        sent_i = r[1] - 1
        # Initial focus
        if sent_i != i:
            user_trans[user_i][sent_i][0][5] = str(r[2])
        else:
            # overwrite with every following record
            user_trans[user_i][sent_i][-1][5] = str(r[2])
        i = sent_i

    # Write user-specific reports (CSV)
    for (i, u) in enumerate(users):
        csv_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.csv'), mode='wb', encoding='UTF-8')
        print >>csv_out, u'\t'.join(('Time', 'Operation', 'Left', 'Edit', 'Right'))
        for sent_edits in user_trans[i]:
            for edit in sent_edits:
                print >>csv_out, u'\t'.join((edit[5], edit[4], edit[0], edit[1], edit[2]))
            # "empty" line
            print >>csv_out, u'\t'.join(('', '', '', '', ''))
        csv_out.close()

    # Escape everything
    for trans in user_trans:
        for sent in trans:
            for edit in sent:
                edit[0] = io_extra.html_escape(edit[0])
                edit[1] = io_extra.html_escape(edit[1])
                edit[2] = io_extra.html_escape(edit[2])
    c.close()

    # Write user-specific reports (HTML)
    for (i, u) in enumerate(users):
        # Write out report using template
        html_out = codecs.open(os.path.join(r_dir, 'edits.' + u + '.html'), mode='wb', encoding='UTF-8')
        html_out.write(unicode(render.report_edits(user_trans[i])))
        html_out.close()

    # Write user-specific pause reports (CSV only)
    for (i, u) in enumerate(users):
        csv_out = codecs.open(os.path.join(r_dir, 'pause.' + u + '.csv'), mode='wb', encoding='UTF-8')
        print >>csv_out, u'\t'.join(('ID', 'Initial', 'Final', 'Short', 'Medium', 'Long', 'Total Time', 'Pause Time', 'Words', 'APR', 'PWR'))
        for (j, sent_edits) in enumerate(user_trans[i]):
            # Count pauses (initial, final, short, medium, long)
            ip = 0
            fp = 0
            pause = {'s': 0, 'm': 0, 'l': 0, 't': 0}
            total = 0
            words = 0
            apr = 0
            pwr = 0
            def mark_pause(p):
                # Actually a pause
                if p >= PAUSE_SHORT:
                    if p >= PAUSE_LONG:
                        pause['l'] += 1
                    elif p >= PAUSE_MEDIUM:
                        pause['m'] += 1
                    else:
                        # p >= PAUSE_SHORT
                        pause['s'] += 1
                    pause['t'] += p
            # Initial pause:
            ip = 0
            try:
                ip = long(sent_edits[1][5]) - long(sent_edits[0][5])
            except:
                io_extra.log(u'Warning: cannot compute initial pause, setting to 0 for ({}, {})'.format(u, j + 1))
            mark_pause(ip)
            # If edited
            if len(sent_edits) > 2:
                for k in range(2, len(sent_edits) - 1):
                    p = long(sent_edits[k][5]) - long(sent_edits[k - 1][5])
                    mark_pause(p)
                # Final pause
                fp = long(sent_edits[-1][5]) - long(sent_edits[-2][5])
                mark_pause(fp)
            # Total time
            total = 0
            try:
                total = long(sent_edits[-1][5]) - long(sent_edits[0][5])
            except:
                io_extra.log(u'Warning: cannot compute total, setting to 0 for ({}, {})'.format(u, j + 1))
            # Words
            words = len(sent_edits[-1][1].split())
            # Average pause ratio
            allp = pause['s'] + pause['m'] + pause['l']
            try:
                apr = (float(pause['t']) / allp) / (float(total) / words)
            except:
                # No pauses or no words
                apr = 0
            # Pause to word ratio
            pwr = float(allp) / words
            print >>csv_out, u'\t'.join(str(n) for n in (j + 1, ip, fp, pause['s'], pause['m'], pause['l'], total, pause['t'], words, apr, pwr))
        csv_out.close()
示例#4
0
def reload_config():
    global config
    config = io_extra.read_cfg(os.path.join(io_extra.DATA_DIR, 'config.txt'))
示例#5
0
def get_stats(t_dir):

    header = []  # Column headings
    static_data = []  # Columns that are the same for all users
    users = []  # Users in order
    user_dict = {}  # user_dict[user] = user_i
    user_data = []  # data[user_i][type_i] = [val1, val2, ...]
    col_avg = []  # Does column average (or is static)?
                  # True: average or use static, False: drop in average report

    # Get the directory name / database entry for this task
    task_id = os.path.basename(os.path.abspath(t_dir))

    # Read config
    config = io_extra.read_cfg(os.path.join(t_dir, 'config.txt'))
    task = config[u'task']

    # Populate left/right plus headers (including next header for user input)
    if task in (track.REALTIME, track.REALTIME_STATIC, track.OFFLINE):
        header.append(H_SOURCE)
        col_avg.append(True)
        static_data.append(io_extra.read_utf8(os.path.join(t_dir, 'source.txt')))
        header.append(H_MT)
        col_avg.append(False)
        header.append(H_POST_EDITED)
        col_avg.append(False)

    n_sent = len(static_data[0])  # Source length

    # Pre-pend sentence IDs
    header.insert(0, H_ID)
    static_data.insert(0, range(1, n_sent + 1))
    col_avg.insert(0, True)

    # User Data
    conn = sqlite3.connect(os.path.join(t_dir, 'data.db'))
    c = conn.cursor()

    # Find users who finished this task
    i = 0
    for (u,) in c.execute('''SELECT DISTINCT user FROM status WHERE status='finished' ORDER BY user ASC'''):
        users.append(u)
        user_data.append([])
        user_dict[u] = i
        i += 1

    # MT
    res = c.execute('''SELECT user, sent, text FROM mt ORDER BY user ASC, sent ASC, time ASC''')
    add_vals(user_dict, user_data, res, n_sent, mt=True, task=task)

    # Post-edited
    res = c.execute('''SELECT user, sent, text FROM translations ORDER BY user ASC, sent ASC, time ASC''')
    add_vals(user_dict, user_data, res, n_sent)

    # Compute HTER
    header.append(H_HTER)
    for user in user_data:
        user.append(hter(user[0], user[1], norm=True))

    # Get user ratings
    header.append(H_RATING)
    col_avg.append(True)
    res = c.execute('''SELECT user, sent, rating FROM ratings ORDER BY user ASC, sent ASC, time ASC''')
    add_vals(user_dict, user_data, res, n_sent)

    # Key/mouse counts
    res = c.execute('''SELECT user, sent, op, count FROM counts ORDER BY user ASC, sent ASC, time ASC''')
    header.append(H_KEYPRESS)
    header.append(H_MOUSECLICK)
    col_avg.append(True)
    col_avg.append(True)
    add_km_sums(user_dict, user_data, res, n_sent)

    # User edit counts
    res = c.execute('''SELECT user, sent, caret, op, input FROM edits ORDER BY user ASC, sent ASC, time ASC''')
    header.append(H_EDITS)
    add_edits(user_dict, user_data, res, n_sent)
    col_avg.append(True)

    # Times from sentence focus/blur
    res = c.execute('''SELECT user, sent, op, time FROM events ORDER BY user ASC, sent ASC, time ASC''')
    header.append(H_TIME)
    add_times(user_dict, user_data, res, n_sent)
    col_avg.append(True)

    c.close()

    return (config, header, col_avg, static_data, users, user_data)