def test_get_data_from_r(self): """read the data from r and write it in a CSV file""" csv_path = os.path.join(self.tmp_path, 'rm.csv') self.assertFalse(os.path.exists(csv_path)) rdl.get_data_from_r(csv_path) self.assertTrue(os.path.exists(csv_path)) os.remove(csv_path)
def test_get_cas_numbers(self): """read the CAS numbers from the R package (rownames)""" csv_path = os.path.join(self.tmp_path, 'rm.csv') rdl.get_data_from_r(csv_path) cas_numbers, _, _ = rdl.load_response_matrix(csv_path) self.assertEqual(len(cas_numbers), 249) self.assertIn('89-78-1', cas_numbers) self.assertNotIn('solvent', cas_numbers) os.remove(csv_path)
def test_get_response_matrix(self): """read the response matrix from the DoOR R package""" csv_path = os.path.join(self.tmp_path, 'rm.csv') rdl.get_data_from_r(csv_path) row_names, col_names, rm = rdl.load_response_matrix(csv_path) self.assertEqual(249, rm.shape[0]) self.assertEqual(67, rm.shape[1]) self.assertEqual(249, len(row_names)) self.assertEqual(67, len(col_names)) os.remove(csv_path)
#!/usr/bin/env python # encoding: utf-8 """ This script reads data from all the csv files and save them in a format convenient for analysis. Created by on 2012-01-27. Copyright (c) 2012. All rights reserved. """ import sys, os, json import master.libs.read_data_lib as rdl data_path = '/Users/dedan/projects/master/data/' # TODO: make it work with the new read_feature_csv function features = rdl.read_feature_csvs(os.path.join(data_path, 'features')) features = rdl.normalize_features(rdl.remove_invalid_features(features)) json.dump(features, open(os.path.join(data_path, 'features.json'), 'w')) rdl.get_data_from_r(os.path.join(data_path, 'response_matrix.csv'))