Пример #1
0
 def ns(stream, cnt):
   for x in stream:
     k = x['label']
     if k in known_markers:
       self.logger.warn('%s: already loaded' % x['label'])
       continue
     y = {
       'source': source,
       'context': context,
       'release': release,
       'label': x['label'],
       'rs_label': x['rs_label'],
       'ref_rs_genome': ref_genome,
       'dbSNP_build': dbsnp_build,
       }
     try:
       y['mask'] = convert_to_top(x['mask'])
     except ValueError:
       y['mask'] = x['mask']
       self.logger.warn('%s: could not convert mask to top' % x['label'])
       cnt["bad"] += 1
     else:
       cnt["good"] += 1
     yield y
     known_markers.add(k)
Пример #2
0
 def generator(mdefs):
     for t in mdefs:
         yield {
             'source': source,
             'context': context,
             'release': release,
             'ref_rs_genome': ref_rs_genome,
             'dbSNP_build': dbsnp_build,
             'label': t[0],
             'rs_label': t[1],
             'mask': convert_to_top(t[2]),
         }
Пример #3
0
 def generator(mdefs):
   for t in mdefs:
     yield {
       'source': source,
       'context' :context,
       'release': release,
       'ref_rs_genome': ref_rs_genome,
       'dbSNP_build': dbsnp_build,
       'label': t[0],
       'rs_label': t[1],
       'mask': convert_to_top(t[2]),
       }
Пример #4
0
try:
  N = int(sys.argv[1])
except IndexError:
  N = 100
FLANK_SIZE = 20

with open('marker_definitions.tsv', 'w') as f:
  tsv = csv.DictWriter(f, fieldnames=['label', 'rs_label', 'mask'],
                       delimiter='\t', lineterminator="\n")
  tsv.writeheader()
  j = 0
  while j < N:
    lflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)])
    rflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)])
    alleles = '/'.join(random.sample('ACGT', 2))
    mask = '%s[%s]%s' % (lflank, alleles, rflank)
    try:
      convert_to_top(mask)
    except ValueError as e:
      print 'Bad mask, skipping'
      continue
    j += 1
    t = time.time() % 1000000
    y = {
      'label': 'foo-%d-%d' % (t, j),
      'rs_label': 'rs-foo-%d-%d' % (t, j),
      'mask': mask
      }
    tsv.writerow(y)
Пример #5
0
    N = int(sys.argv[1])
except IndexError:
    N = 100
FLANK_SIZE = 20

with open('marker_definitions.tsv', 'w') as f:
    tsv = csv.DictWriter(f,
                         fieldnames=['label', 'rs_label', 'mask'],
                         delimiter='\t',
                         lineterminator="\n")
    tsv.writeheader()
    j = 0
    while j < N:
        lflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)])
        rflank = ''.join([random.choice('ACGT') for i in xrange(FLANK_SIZE)])
        alleles = '/'.join(random.sample('ACGT', 2))
        mask = '%s[%s]%s' % (lflank, alleles, rflank)
        try:
            convert_to_top(mask)
        except ValueError as e:
            print 'Bad mask, skipping'
            continue
        j += 1
        t = time.time() % 1000000
        y = {
            'label': 'foo-%d-%d' % (t, j),
            'rs_label': 'rs-foo-%d-%d' % (t, j),
            'mask': mask
        }
        tsv.writerow(y)
Пример #6
0
 def test_convert_to_top_split(self):
   for i, (s, t) in enumerate(CONVERT_PAIRS):
     s, t = map(usnp.split_mask, (s.upper(), t.upper()))
     self.assertEqual(usnp.convert_to_top(s, toupper=False), t,
                      '(%d): %r != %r' % ((i+1), s, t))
Пример #7
0
 def test_convert_to_top(self):
   for i, (s, t) in enumerate(CONVERT_PAIRS):
     self.assertEqual(usnp.convert_to_top(s), t,
                      '(%d): %r != %r' % ((i+1), s, t))