示例#1
0
文件: cut.py 项目: joyrexus/ldp
#!/usr/bin/env python 

'''
cut.py -- cut specified columns from a TSV-formatted file.

Assumes the file contains a header line containing column names, 
of which the columns to be cut are subset.

Modify the columns variable below.

'''
import sys
from datastore.table import Reader

transcripts = sys.argv[1:]
columns = '''id subject session row time line key 
             p_utts_orig p_utts p_form p_lrb p_obj p_gloss p_orient p_mspd 
             c_utts_orig c_utts c_form c_lrb c_obj c_gloss c_orient c_mspd 
             context'''.split()

def pprint(values): print "\t".join(values)

pprint(columns)

for t in transcripts:
    T = Reader(t)
    for row in T.values(*columns): 
        pprint(row)
示例#2
0
文件: merge.py 项目: joyrexus/ldp
                 (19,86), (20,90)])

speech = Reader('speech.xls')
ses = Reader('ses.xls')

subjects = defaultdict(dict)
ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split()

for row in ses:
    subjects[row['SUBJ']] = row


visits = defaultdict(dict)
columns = 'subject session speaker word_types'.split()

for subj, sess, spkr, wt in speech.values(*columns):
    age = sess_map[int(sess)]
    if not visits.has_key((subj, sess)):
        visits[subj, sess] = {'SUBJ': subj, 'SESS': sess, 'AGE': age,
                              'CWT': '', 'PWT': ''}
    if spkr == "child":
        visits[subj, sess]['CWT'] = wt
    else:
        visits[subj, sess]['PWT'] = wt

viz_cols = 'SESS AGE PWT CWT'.split()

print "\t".join(ses_cols + viz_cols)

for id, data in visits.items():
    subj, sess = id
示例#3
0
文件: merge.py 项目: joyrexus/ldp
from collections import defaultdict


ses = Reader('ses.xls')
subjects = defaultdict(dict)
ses_cols = 'SUBJ SEX EDU INC RACE ETHN'.split()

for row in ses:
    subjects[row['SUBJ']] = row


outcomes = Reader('outcomes.tsv')
out = defaultdict(dict)
columns = 'SUBJ SESS VOCAB READ_WJ READ_GM'.split()

for subj, sess, voc, rwj, rgm in outcomes.values(*columns):
    if not out.has_key(subj):
        out[subj] = {
                'SUBJ': subj,
                'VOCB1': '',
                'VOCB2': '', 
                'VOCB3': '', 
                'VOCB4': '', 
                'READ1': '', 
                'READ2': '', 
                'READ3': '', 
                'READ4': '', 
                'READ5': ''
                }
    if sess == "5" and voc:
        out[subj]['VOCB1'] = voc
示例#4
0
文件: select.py 项目: joyrexus/ldp
from datastore.table import Reader

r = Reader('ses.tsv')

def pprint(args):
    print "\t".join(args)

pprint('SUBJ SEX EDU INC RACE ETHN'.split())
for v in r.values('id', 'sex', 'edu', 'income', 'race', 'ethn'):
    pprint(v)