예제 #1
0
def generate_features(input_data, mode, filename=None):
    new_data = []

    from umls import UMLS
    from generate_features import Generator
    cui_retriever = UMLS()
    feature_generator = Generator(cui_retriever)

    def save_one_line(doc_id, sentence_id, features, filename):
        line = ','.join([doc_id, sentence_id, features]) + "\n"

        # save data to files
        with open(filename, 'a') as f:
            f.writelines(line)

    for i, line in enumerate(input_data):
        import datetime
        print(i, datetime.datetime.now(), mode, line)

        sentence = line[-1]
        sentence_id = line[-2]
        doc_id = line[-3]
        features = feature_generator.produce_features_from_sentence(
            sentence, sentence_id, doc_id, mode)
        new_data.append(features)

        if filename is not None:
            # save the data
            save_one_line(doc_id, sentence_id, features, filename)

    return new_data