Пример #1
0
 def test_read_invalid_input(self, L):
     # cm: context_manager
     # ref: http://stackoverflow.com/questions/15672151/is-it-possible-for-a-unit-test-to-assert-that-a-method-calls-sys-exit
     # with LogCapture() as L:
     with self.assertRaises(SystemExit) as cm:
         utils.read(self.invalid_input)
         self.assertEqual(cm.exception.code, 1)
     L.check(
         ('rsempipeline.preprocess.utils', 'ERROR',
          'Please correct the invalid entries in {0}'.format(self.invalid_input)),
         ('rsempipeline.preprocess.utils', 'ERROR',
          'If unsure of the correct format, check {0}'.format(os.path.join(SHARE_DIR, 'GSE_GSM.example.csv'))),
     )
Пример #2
0
 def test_read_invalid_input(self, L):
     # cm: context_manager
     # ref: http://stackoverflow.com/questions/15672151/is-it-possible-for-a-unit-test-to-assert-that-a-method-calls-sys-exit
     # with LogCapture() as L:
     with self.assertRaises(SystemExit) as cm:
         utils.read(self.invalid_input)
         self.assertEqual(cm.exception.code, 1)
     L.check(
         ('rsempipeline.preprocess.utils', 'ERROR',
          'Please correct the invalid entries in {0}'.format(
              self.invalid_input)),
         ('rsempipeline.preprocess.utils', 'ERROR',
          'If unsure of the correct format, check {0}'.format(
              os.path.join(SHARE_DIR, 'GSE_GSM.example.csv'))),
     )
Пример #3
0
def download_soft(input_csv, soft_outdir): # pragma: no cover
    ftp = SOFTDownloader()
    current_gse = None
    for gse, gsm in read(input_csv):
        if current_gse is None or gse != current_gse:
            ftp.gen_soft(gse, soft_outdir)
            current_gse = gse
Пример #4
0
def generate_csv(input_csv, outdir, num_threads):
    # Sometimes GSM data could be private, so no species information will be
    # extracted. e.g. GSE49366 GSM1198168
    res, res_no_species = [], []

    # execute in parallel
    queue = Queue.Queue()

    def worker():
        while True:
            GSE, GSM = queue.get()
            species = find_species(GSE, GSM, outdir)
            row = [GSE, species, GSM]
            if species:
                res.append(row)
            else:
                res_no_species.append(row)
            queue.task_done()

    for i in range(num_threads):
        thrd = threading.Thread(target=worker)
        thrd.daemon = True
        thrd.start()

    for gse, gsm in read(input_csv):
        queue.put([gse, gsm])
    queue.join()

    # write output
    out_csv = os.path.join(outdir, SPECIES_CSV_BASENAME)
    no_species_csv = os.path.join(outdir, NO_SPECIES_CSV_BASENAME)
    backup_file(out_csv)
    write_csv(res, out_csv)

    if res_no_species:
        backup_file(no_species_csv)
        write_csv(res_no_species, no_species_csv)
Пример #5
0
def generate_csv(input_csv, outdir, num_threads):
    # Sometimes GSM data could be private, so no species information will be
    # extracted. e.g. GSE49366 GSM1198168
    res, res_no_species = [], []

    # execute in parallel
    queue = Queue.Queue()
    def worker():
        while True:
            GSE, GSM = queue.get()
            species = find_species(GSE, GSM, outdir)
            row = [GSE, species, GSM]
            if species:
                res.append(row)
            else:
                res_no_species.append(row)
            queue.task_done()

    for i in range(num_threads):
        thrd = threading.Thread(target=worker)
        thrd.daemon = True
        thrd.start()

    for gse, gsm in read(input_csv):
        queue.put([gse, gsm])
    queue.join()

    # write output
    out_csv = os.path.join(outdir, SPECIES_CSV_BASENAME)
    no_species_csv = os.path.join(outdir, NO_SPECIES_CSV_BASENAME)
    backup_file(out_csv)
    write_csv(res, out_csv)

    if res_no_species:
        backup_file(no_species_csv)
        write_csv(res_no_species, no_species_csv)
Пример #6
0
 def test_read_valid_input_with_commented_line(self):
     self.assertIsInstance(utils.read(self.valid_input3),
                           types.GeneratorType)
Пример #7
0
 def test_read_valid_input_with_appending_semicolon(self):
     self.assertIsInstance(utils.read(self.valid_input2),
                           types.GeneratorType)
Пример #8
0
 def test_read_valid_input(self):
     self.assertIsInstance(utils.read(self.valid_input1),
                           types.GeneratorType)
Пример #9
0
 def test_yield_gse_gsm_from_valid_input_with_commented_line(self):
     for __ in utils.read(self.valid_input3):
         self.check_yield_result(__)
Пример #10
0
 def test_yield_gse_gsm_from_valid_input_with_appending_semicolon(self):
     for __ in utils.read(self.valid_input2):
         self.check_yield_result(__)
Пример #11
0
 def test_read_valid_input_with_commented_line(self):
     self.assertIsInstance(utils.read(self.valid_input3), types.GeneratorType)
Пример #12
0
 def test_read_valid_input_with_appending_semicolon(self):
     self.assertIsInstance(utils.read(self.valid_input2), types.GeneratorType)
Пример #13
0
 def test_read_valid_input(self):
     self.assertIsInstance(utils.read(self.valid_input1), types.GeneratorType)
Пример #14
0
 def test_yield_gse_gsm_from_valid_input_with_commented_line(self):
     for __ in utils.read(self.valid_input3):
         self.check_yield_result(__)
Пример #15
0
 def test_yield_gse_gsm_from_valid_input_with_appending_semicolon(self):
     for __ in utils.read(self.valid_input2):
         self.check_yield_result(__)