예제 #1
0
    data = {}
    csv_obj = csv.reader(text, delimiter="\t", quotechar='"')

    rows = [x for x in csv_obj]

    variables = list(
        set([
            y[0] for x in rows
            for y in re.findall("%(\w+)(\([\w,]*\))?%", x[0])
        ]))

    data['length'] = len(rows)
    data['speech_acts'] = list(
        set([y for x in rows for y in x[1].split(',') if bool(y)]))
    data['num_speech_acts'] = len(data['speech_acts'])
    data['variables'] = variables

    # TODO parse text?

    return data


if __name__ == "__main__":
    queue = [join("data", "tsv")]
    input_ext = ".tsv"
    output_lists = []
    output_ext = ".tsv_analysis"

    utils.standard_main(queue, input_ext, extract_from_file, output_lists,
                        output_ext)
def accum_final(data):


    return data

if __name__ == "__main__":
    queue = join("data","xml","CiFStates")
    input_ext = ".xml"
    output_lists = []
    output_ext = ".xml_rule_analysis"

    initial_accum = {'_cif_state_components': set(),
                     '_prom_week_components' : set(),
                     '_cif_library_components' : set(),

                     '_cif_state_counts': {},
                     '_prom_week_counts' : {},
                     '_cif_library_counts' : {},
                     '__all_counts' : {}
    }

    utils.standard_main(queue,
                        input_ext,
                        extract_from_file,
                        output_lists,
                        output_ext,
                        accumulator=accumulator,
                        accumulator_final=accum_final,
                        init_accum=initial_accum)