Python Datahelper примеры использования

Язык программирования: Python

Пространство имен/Пакет: datahelper

Класс/Тип: Datahelper

Примеров на hotexamples.com: 13

Python Datahelper - 13 примеров найдено. Это лучшие примеры Python кода для datahelper.Datahelper, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Datahelper(13)

get_normalised_phrase(6)

format_atc_code(2)

make_pheno_string(2)

format_digit_code(1)

get_excluded_words(1)

get_merge_key_list(1)

load_cls_phrases(1)

match_all_phrases(1)

Пример #1

Показать файл

def main():

    dh = Datahelper()
    try:
        chembl = pymysql.connect(host=os.environ["CHHOST"],
                                 user=os.environ["CHUSER"],
                                 passwd=os.environ["CHPWD"],
                                 port=int(os.environ["CHPORT"]),
                                 db=os.environ["CHDB"])
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        atc_code = data[3]
        rdata = data[:4]
        query = "select level3_description from atc_classification where level3 = '%s' limit 1" % (
            atc_code)
        count = 0

        with chembl.cursor() as cursor:
            cursor.execute(query)
            for row in cursor:
                count += 1
                descr = dh.get_normalised_phrase(row[0].lower())
                rdata.append(descr)
                rdata.append(data[4])
                print ','.join(rdata)

    chembl.close()
    return count

Пример #2

Показать файл

def main(options):
    count = 0
    mcount = 0
    umcount = 0
    dh = Datahelper()

    try:
        fh = open(options.synfile, "r")
        synonyms = load_synonyms(fh)
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        print "Unexpected error:", sys.exc_info()
        exit()

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        phrase = data[1].lower()
        matched = False
        for key in dh.get_merge_key_list(phrase):
            if key in synonyms:
                print "%s,%s,%s" % (data[0], phrase, '|'.join(synonyms[key]))
                matched = True
                mcount += 1
                break
        if matched == False:
            print "%s,%s" % (data[0], phrase)

    return count, mcount

Пример #3

Показать файл

def main():

  dh = Datahelper()
  try:
  except:
    #print "Unexpected error:", sys.exc_info()[0]
    print "Unexpected error:", sys.exc_info()
    exit()


  count = 0
  for line in sys.stdin:
    data = line.strip().split(',')
    atc_code = data[0]
    query = "select level1_description, level2_description, level3_description from atc_classification where level3 = '%s' limit 1" % (atc_code)
    count = 0
  
    with chembl.cursor() as cursor:
      cursor.execute(query)
      for row in cursor:
        count += 1
        descr = row[0].lower() + ": " + dh.get_normalised_phrase(row[1].lower() + " " + row[2].lower())
        data.append(descr)
        print ','.join(data)

  chembl.close()
  return count

Пример #4

Показать файл

Файл: generate_bnf_medication_annotations.py Проект: spiros/ukbb-srmed

def main(options):
    """
  """

    dh = Datahelper()
    try:
        fh = open(options.bnfcodes, "r")
        code_lookup = load_code_list(fh)
    except:
        print "Unexpected error:", sys.exc_info()
        exit()

    print "pheno,PHENOTYPE,Category,type"

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        count = 0

        if data[0] in code_lookup:
            count += 1
            pheno_string = dh.get_normalised_phrase(code_lookup[data[0]])
            pheno_string = dh.make_pheno_string(pheno_string)
            #data.append(data[0] + "_" + pheno_string)
            data.append(data[0])
            data.append(pheno_string)
            data.append("BINARY")
            print ','.join(data)

    return count

Пример #5

Показать файл

def main():

    try:
        dh = Datahelper()
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        print "I/O error:", sys.exc_info()
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    ewords = dh.get_excluded_words()

    count = 0
    outarray = []
    for wd in ewords:
        count += 1
        outarray.append(wd)
        if count % 4 == 0:
            print ",".join(outarray)
            outarray = []
    if len(outarray) > 0:
        print ",".join(outarray)
    #print count

    return

Пример #6

Показать файл

def main():
    """
  Requires the input to be sorted on the first field (the numeric molregno).
  One record per molregno is output.

  Calls a datahelper function to normalise each word or phrase (convert to 
  lower case and remove special characters)
  """
    count = 0
    last_molno = ""
    related_synonyms = []
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split('\t')
        if data[0] != last_molno and last_molno != "":
            print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno
            related_synonyms = []
        last_molno = data[0]
        text = dh.get_normalised_phrase(data[1])
        stype = data[2]
        syn = stype + ":" + text
        if syn not in related_synonyms:
            related_synonyms.append(syn)

    # output the last synonym group
    print '|'.join(related_synonyms) + "|MOLREGNO:" + last_molno

Пример #7

Показать файл

def main(options):
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split(',')
        data[0] = dh.format_atc_code(data[0], int(options.codelen))
        data[1] = data[1].lower()
        print ','.join(data)

Пример #8

Показать файл

def main(options):
    count = 0
    dh = Datahelper()

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data[2] = dh.format_atc_code(data[2], int(options.codelen))
        print ','.join(data)

    return count

Пример #9

Показать файл

Файл: code_data_match.py Проект: PhilAppleby/ukb_data_parsing

def main(options):
  """
  The main match process - look up descriptions and 
  synonyms in the coding data dictionary (loaded
  on initialisation
  SEE ALSO: datahelper.py
  """
  dcount = 0
  count = 0
  match_count = 0
  miss_count = 0

  # try to load the classification system codes file
  try:
    fh = open(options.clsfile, "r")
    dh = Datahelper()
    dcount = dh.load_cls_phrases(fh)
    #print "Dictionary size = %d" % (dcount)
  except IOError as e:
    print "I/O error({0}): {1}".format(e.errno, e.strerror)
    print "I/O error:", sys.exc_info()
    exit()
  except TypeError as e:
    print "Missing arguments ", e
    exit()
  except:
    #print "Unexpected error:", sys.exc_info()[0]
    print "Unexpected error:", sys.exc_info()
    exit()

  # stdin used to read in medications coding data
  hdr = sys.stdin.readline()
  for line in sys.stdin:
    count += 1
    matched = False
    data = line.strip().split(',')
    all_phrases = [data[1]]
    if len(data) == 3:
      all_phrases += data[2].split('|')

    match_string = ""
    code_array, match_data, last_match, selected_code = dh.match_all_phrases(all_phrases)
    if len(code_array) > 0:
    # Current policy: output one line per code match (can be multiple per input record
      for code_elem in code_array:
        code_data = code_elem.split("~")
        print "%s,%s,%s,%s,%s,%s,%d" % (data[0], data[1], last_match, '|'.join(match_data), code_data[1], code_data[0], len(code_array))
      match_count += 1
    else:      
      print "%s,%s,%s,%s,%s,%s,0" % (data[0], data[1], last_match, '|'.join(match_data), "NA", "NA")
      miss_count += 1

  return count, match_count, miss_count

Пример #10

Показать файл

Файл: bnf_parse.py Проект: spiros/ukbb-srmed

def main():
    count = 0
    dh = Datahelper()
    hdr = sys.stdin.readline().strip()
    print hdr

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data[0] = dh.format_digit_code(data[0], 3)
        data[1] = data[1].lower()
        print ','.join(data)

    return count

Пример #11

Показать файл

def main():
    count = 0
    last_molno = ""
    related_synonyms = []
    dh = Datahelper()

    for line in sys.stdin:
        data = line.strip().split('\t')
        print "%s\t%s" % (data[1], data[0])

Пример #12

Показать файл

Файл: generate_atc_medication_annotations.py Проект: spiros/ukbb-srmed

def main(options):
    """
  Access the CHEMBL db for each input line and use the description
  from the appropriate level
  """

    level = int(options.level)

    dh = Datahelper()
    try:
        chembl = pymysql.connect(host=os.environ["CHHOST"],
                                 user=os.environ["CHUSER"],
                                 passwd=os.environ["CHPWD"],
                                 port=int(os.environ["CHPORT"]),
                                 db=os.environ["CHDB"])
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    print "pheno,PHENOTYPE,Category,type"

    count = 0
    for line in sys.stdin:
        data = line.strip().split(',')
        atc_code = data[0]
        query = "select level%d_description from atc_classification where level%d = '%s' limit 1" % (
            level, level, atc_code)
        count = 0

        cursor = chembl.cursor()
        cursor.execute(query)
        for row in cursor:
            count += 1
            pheno_string = dh.get_normalised_phrase(row[0])
            pheno_string = dh.make_pheno_string(pheno_string)
            data.append(data[0] + "_" + pheno_string)
            data.append(pheno_string)
            data.append("BINARY")
            print ','.join(data)

    chembl.close()
    return count

Пример #13

Показать файл

Файл: assign_codes_to_participant_data.py Проект: spiros/ukbb-srmed

def main(options):
    count = 0
    match_count = 0
    miss_count = 0
    dh = Datahelper()

    try:
        fh = open(options.codefile, "r")
        code_lookup = load_cs_codes(fh)
        #print len(synonyms)
    except IOError as e:
        print "I/O error({0}): {1}".format(e.errno, e.strerror)
        exit()
    except TypeError as e:
        print "Missing arguments ", e
        exit()
    except:
        #print "Unexpected error:", sys.exc_info()[0]
        print "Unexpected error:", sys.exc_info()
        exit()

    hdr = sys.stdin.readline().strip()
    print "%s,%s" % (hdr, "cs_code")

    for line in sys.stdin:
        count += 1
        data = line.strip().split(',')
        data.append("NA")
        if data[1] in code_lookup:
            for code in code_lookup[data[1]]:
                data[-1] = code
                match_count += 1
                print ",".join(data)
        else:
            miss_count += 1
            print ",".join(data)

    return count, match_count, miss_count