def test_get_data_from_r(self):
     """read the data from r and write it in a CSV file"""
     csv_path = os.path.join(self.tmp_path, 'rm.csv')
     self.assertFalse(os.path.exists(csv_path))
     rdl.get_data_from_r(csv_path)
     self.assertTrue(os.path.exists(csv_path))
     os.remove(csv_path)
 def test_get_cas_numbers(self):
     """read the CAS numbers from the R package (rownames)"""
     csv_path = os.path.join(self.tmp_path, 'rm.csv')
     rdl.get_data_from_r(csv_path)
     cas_numbers, _, _ = rdl.load_response_matrix(csv_path)
     self.assertEqual(len(cas_numbers), 249)
     self.assertIn('89-78-1', cas_numbers)
     self.assertNotIn('solvent', cas_numbers)
     os.remove(csv_path)
 def test_get_response_matrix(self):
     """read the response matrix from the DoOR R package"""
     csv_path = os.path.join(self.tmp_path, 'rm.csv')
     rdl.get_data_from_r(csv_path)
     row_names, col_names, rm = rdl.load_response_matrix(csv_path)
     self.assertEqual(249, rm.shape[0])
     self.assertEqual(67, rm.shape[1])
     self.assertEqual(249, len(row_names))
     self.assertEqual(67, len(col_names))
     os.remove(csv_path)
#!/usr/bin/env python
# encoding: utf-8
"""
This script reads data from all the csv files and save them in a format
convenient for analysis.

Created by  on 2012-01-27.
Copyright (c) 2012. All rights reserved.
"""
import sys, os, json
import master.libs.read_data_lib as rdl

data_path = '/Users/dedan/projects/master/data/'

# TODO: make it work with the new read_feature_csv function
features = rdl.read_feature_csvs(os.path.join(data_path, 'features'))
features = rdl.normalize_features(rdl.remove_invalid_features(features))
json.dump(features, open(os.path.join(data_path, 'features.json'), 'w'))

rdl.get_data_from_r(os.path.join(data_path, 'response_matrix.csv'))