def setUpClass(cls): super(AoudaTests, cls).setUpClass() dirname = os.path.dirname(__file__) cls.filename = os.path.join(dirname, 'small_dataset.dat') cls.resolution = 1000 cls.base_datetime = datetime.now() - timedelta(seconds=15) cls.data = extract_hr_acc(read_data(cls.filename, cls.base_datetime))
def test_read_data_unlabeled(self): """ Tests the function read_data for unlabeled data. """ references = pre.read_data(self.testfilename) truth = [ [Reference(0, 'm jones', 'symbol intersect detect method improv spatial intersect join', ['e rundensteiner', 'y huang'], 'geoinformatica', None), Reference(1, 'matthew c jones', 'improv spatial intersect join symbol intersect detect', ['e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra'], 'sigmodels.intern manag data', None), Reference(2, 'matthew c jones', 'view materi techniqu complex hirarch object', ['e rundensteiner', 'y huang'], 'ssd symposium larg spatial databas', None)], [Reference(3, 'mike w miller', 'domin draw bipartit graph', ['l berg'], 'sigucc special interest group univers comput servic', None), Reference(4, 'mike w miller', 'rel compromis statist databas', [], 'sigucc special interest group univers comput servic', None)], [Reference(5, 'c chen', 'formal approach scenario analysi', ['d kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima'], 'ieee softwar', None)], [Reference(6, 'jane j robinson', 'discours code clue context', [], 'acl meet the associ comput linguist', None), Reference(7, 'jane j robinson', 'diagram grammar dialogu', [], 'cooper interfac inform system', None)], [Reference(8, 'a gupta', 'iri h java distanc educ', ['a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem', 'x zhu'], 'acm journal educ resourc comput', None)], [Reference(9, 'mary d brown', 'intern redund represent limit bypass support pipelin adder regist' 'file', ['y patt'], 'proceed the th ieee intern symposium high ' 'perform comput architectur hpca intern symposium high perform ' 'comput architectur talk slide', None)]] self.assertEquals(references, truth)
def setUpClass(cls): """ Reads a test data set into memory """ dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'dataset.dat') cls._df = extract_hr_acc(read_data(filename))
def test_read_data_labeled(self): """ Tests the function read_data for labeled data. """ references = pre.read_data(self.testfilename, labeled=True) truth = [ [ Reference( 0, 'm jones', 'symbol intersect detect method improv spatial intersect join', ['e rundensteiner', 'y huang'], 'geoinformatica', '81'), Reference( 1, 'matthew c jones', 'improv spatial intersect join symbol intersect detect', [ 'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra' ], 'sigmodels.intern manag data', '81'), Reference(2, 'matthew c jones', 'view materi techniqu complex hirarch object', ['e rundensteiner', 'y huang'], 'ssd symposium larg spatial databas', '81') ], [ Reference( 3, 'mike w miller', 'domin draw bipartit graph', ['l berg'], 'sigucc special interest group univers comput servic', '185'), Reference( 4, 'mike w miller', 'rel compromis statist databas', [], 'sigucc special interest group univers comput servic', '185') ], [ Reference( 5, 'c chen', 'formal approach scenario analysi', ['d kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima'], 'ieee softwar', '94') ], [ Reference(6, 'jane j robinson', 'discours code clue context', [], 'acl meet the associ comput linguist', '69'), Reference(7, 'jane j robinson', 'diagram grammar dialogu', [], 'cooper interfac inform system', '69') ], [ Reference(8, 'a gupta', 'iri h java distanc educ', [ 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem', 'x zhu' ], 'acm journal educ resourc comput', '0') ], [ Reference( 9, 'mary d brown', 'intern redund represent limit bypass support pipelin adder regist ' 'file', ['y patt'], 'proceed the th ieee intern symposium high ' 'perform comput architectur hpca intern symposium high perform comput' ' architectur talk slide', '43') ] ] self.assertEquals(references, truth)
def test_extract_hr_acc(self): data = read_data(path=PreprocessingTests.filename, base_datetime=PreprocessingTests.base_datetime) frame = extract_hr_acc(data) columns = set(frame.columns.tolist()) self.assertIn('hr', columns) self.assertIn('acc', columns) self.assertIn('acc_x', columns) self.assertIn('acc_y', columns) self.assertIn('acc_z', columns) self.assertIn('ratio', columns) self.assertIn('ratio_log', columns) self.assertItemsEqual(data.index.tolist(), frame.index.tolist()) self.assertEqual(len(frame), len(notnull(frame))) data['eratio'] = (data.hr / data.IMU_Chest_Magnitude) data['eratio_log'] = log(data['eratio']) data = data.fillna(method='ffill').fillna(method='bfill') self.assertItemsEqual(data.hr.tolist(), frame.hr.tolist()) self.assertItemsEqual(data.IMU_Chest_Magnitude.tolist(), frame.acc.tolist()) self.assertItemsEqual(data.IMU_Chest_x.tolist(), frame.acc_x.tolist()) self.assertItemsEqual(data.IMU_Chest_y.tolist(), frame.acc_y.tolist()) self.assertItemsEqual(data.IMU_Chest_z.tolist(), frame.acc_z.tolist()) self.assertItemsEqual(data.eratio.tolist(), frame.ratio.tolist()) self.assertItemsEqual(data.eratio_log.tolist(), frame.ratio_log.tolist())
def test_tfidf(self): """ Tests the tfidf function. """ references = pre.read_data(self.testfilename) corpus = pre.get_corpus(references) + ['matt'] idf = mod.get_idf(corpus) tfidf = mod.tfidf('matt', mod.get_words('matt matt huang kuno jones c marron brown'), idf) truth = idf['matt'] * 2 self.assertEquals(tfidf, truth)
def test_read_data_no_base_datetime(self): frame = read_data(path=PreprocessingTests.filename) columns = set(frame.columns.tolist()) self.assertIn('activityID', columns) self.assertIn('hr', columns) self.assertIn('IMU_Chest_Magnitude', columns) self.assertIn('IMU_Chest_x', columns) self.assertIn('IMU_Chest_y', columns) self.assertIn('IMU_Chest_z', columns) self.assertLess(datetime.now(), frame.index[0])
def test_get_corpus(self): """ Tests the function get_corpus. """ references = pre.read_data(self.testfilename) corpus = pre.get_corpus(references) truth = ['m jones', 'e rundensteiner', 'y huang', 'matthew c jones', 'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra', 'matthew c jones', 'e rundensteiner', 'y huang', 'mike w miller', 'l berg', 'mike w miller', 'c chen', 'd kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima', 'jane j robinson', 'jane j robinson', 'a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem', 'x zhu', 'mary d brown', 'y patt'] self.assertEquals(corpus, truth)
def test_norm_tfidf(self): """ Tests the normalized tfidf. """ references = pre.read_data(self.testfilename) corpus = pre.get_corpus(references) + ['matt'] idf = mod.get_idf(corpus) words = mod.get_words('matt matt huang kuno jones c marron brown') n_tfidf = mod.norm_tfidf('matt', words, idf) truth = mod.tfidf('matt', words, idf) denom = 0 for word in words: denom += mod.tfidf(word, words, idf) ** 2 denom = math.sqrt(denom) truth /= denom self.assertEquals(n_tfidf, truth)
def test_get_corpus(self): """ Tests the function get_corpus. """ references = pre.read_data(self.testfilename) corpus = pre.get_corpus(references) truth = [ 'm jones', 'e rundensteiner', 'y huang', 'matthew c jones', 'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra', 'matthew c jones', 'e rundensteiner', 'y huang', 'mike w miller', 'l berg', 'mike w miller', 'c chen', 'd kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima', 'jane j robinson', 'jane j robinson', 'a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem', 'x zhu', 'mary d brown', 'y patt' ] self.assertEquals(corpus, truth)
def train(n, path_to_csv): ''' train the model Input ----- n - polynomial degree path_to_csv - training set path ''' # prepare data #------------------------------------------------- # load data raw_data = pre.read_data(path_to_csv) # rescale data feature_data, means, stds = pre.feature_scaling(raw_data, n) # split to train and test sets data = pre.split_data(feature_data) # create instance of the neural network #------------------------------------------------- model = ionn.InOutNN(n, init_random=True) # train the model #------------------------------------------------- model.train(data['train'][:, :-1], data['train'][:, -1], alpha=0.5, itmax=20000, verbose=False) #compute polynomial coefficients (from rescaled features) poly_coeffs = model.polynomial_coefficients(means, stds) print(np.flip(poly_coeffs, axis=0)) # test the model #------------------------------------------------- train_err, test_err = model.test(data['train'][:, :-1], data['train'][:, -1], data['test'][:, :-1], data['test'][:, -1]) # dump the model - Python object serialisation #------------------------------------------------- dir_path = os.path.dirname(os.path.realpath(__file__)) model_file = dir_path + "/../MODEL/inoutnn_{}.pcl".format(n) with open(model_file, 'wb') as f: pickle.dump(model, f) return poly_coeffs
def test_get_coauthorship_transactions(self): """ Tests the function get_coauthorship_transactions. """ references = pre.read_data(self.testfilename) truth = [[ ['m jones', 'e rundensteiner', 'y huang'], ['matthew c jones', 'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra'], ['matthew c jones', 'e rundensteiner', 'y huang']], [['mike w miller', 'l berg'], ['mike w miller']], [['c chen', 'd kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima']], [['jane j robinson'], ['jane j robinson']], [['a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem', 'x zhu']], [['mary d brown', 'y patt']]] for i in range(len(references)): transactions = mod.get_coauthorship_transactions(references[i]) self.assertEquals(transactions, truth[i])
def test_soft_tfidf(self): """ Tests the soft tfidf. """ references = pre.read_data(self.testfilename) corpus = pre.get_corpus(references) + ['matt', 'brow'] idf = mod.get_idf(corpus) name_a = 'matt huang kuno jones brow' words_a = mod.get_words(name_a) name_b = 'matthew jones c marron brown' words_b = mod.get_words(name_b) s_tfidf = mod.soft_tfidf(name_a, name_b, idf) close = ['matt', 'jones', 'brow'] truth = \ mod.norm_tfidf('matt', words_a, idf) * \ mod.norm_tfidf('matthew', words_b, idf) * \ Levenshtein.ratio('matt', 'matthew') + \ mod.norm_tfidf('jones', words_a, idf) * \ mod.norm_tfidf('jones', words_b, idf) * \ Levenshtein.ratio('jones', 'jones') + \ mod.norm_tfidf('brow', words_a, idf) * \ mod.norm_tfidf('brown', words_b, idf) * \ Levenshtein.ratio('brow', 'brown') self.assertEquals(s_tfidf, truth)
def setUp(self): super(RankingSampleTestCase, self).setUp() self.references = pre.read_data(self.testfilename) self.base_partitioning = [[0, 0, 1], [0, 1], [0], [0, 0], [0], [0]] self.alt_partitionings = [[[0, 1, 1], [1, 0], [0], [0, 0], [0], [0]], [[1, 0, 1], [0, 1], [0], [0, 0], [0], [0]]]
def test_get_coauthorship_rules(self): """ Tests the function get_coauthorship_rules. """ references = pre.read_data(self.testfilename) truth = [{'m jones': {'e rundensteiner': 1.0, 'y huang': 1.0}, 'e rundensteiner': {'m jones': 1.0/3, 'y huang': 2.0/3, 'matthew c jones': 2.0/3, 'h kuno': 1.0/3, 'p marron': 1.0/3, 'v taube': 1.0/3, 'y ra': 1.0/3 }, 'y huang': {'m jones': 1.0/2, 'e rundensteiner': 1.0, 'matthew c jones': 1.0/2 }, 'matthew c jones': {'e rundensteiner': 1.0, 'y huang': 1.0/2, 'h kuno': 1.0/2, 'p marron': 1.0/2, 'v taube': 1.0/2, 'y ra': 1.0/2 }, 'h kuno': {'e rundensteiner': 1.0, 'matthew c jones': 1.0, 'p marron': 1.0, 'v taube': 1.0, 'y ra': 1.0 }, 'p marron': {'e rundensteiner': 1.0, 'matthew c jones': 1.0, 'h kuno': 1.0, 'v taube': 1.0, 'y ra': 1.0 }, 'v taube': {'e rundensteiner': 1.0, 'matthew c jones': 1.0, 'h kuno': 1.0, 'p marron': 1.0, 'y ra': 1.0 }, 'y ra': {'e rundensteiner': 1.0, 'matthew c jones': 1.0, 'h kuno': 1.0, 'p marron': 1.0, 'v taube': 1.0 }}, {'mike w miller': {'l berg': 1.0/2}, 'l berg': {'mike w miller': 1.0}}, {'c chen': {'d kung': 1.0, 'j samuel': 1.0, 'j gao': 1.0, 'p hsia': 1.0, 'y toyoshima': 1.0 }, 'd kung': {'c chen': 1.0, 'j samuel': 1.0, 'j gao': 1.0, 'p hsia': 1.0, 'y toyoshima': 1.0 }, 'j samuel': {'c chen': 1.0, 'd kung': 1.0, 'j gao': 1.0, 'p hsia': 1.0, 'y toyoshima': 1.0 }, 'j gao': {'c chen': 1.0, 'd kung': 1.0, 'j samuel': 1.0, 'p hsia': 1.0, 'y toyoshima': 1.0 }, 'p hsia': {'c chen': 1.0, 'd kung': 1.0, 'j samuel': 1.0, 'j gao': 1.0, 'y toyoshima': 1.0 }, 'y toyoshima': {'c chen': 1.0, 'd kung': 1.0, 'j samuel': 1.0, 'j gao': 1.0, 'p hsia': 1.0 }}, {}, {'a gupta': {'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'a gonzalez': {'a gupta': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'a hamid': {'a gupta': 1.0, 'a gonzalez': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'c overstreet': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'h wahab': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'j wild': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'k maly': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 'k maly': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 's ghanem': 1.0, 'x zhu': 1.0 }, 's ghanem': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 'x zhu': 1.0 }, 'x zhu': {'a gupta': 1.0, 'a gonzalez': 1.0, 'a hamid': 1.0, 'c overstreet': 1.0, 'h wahab': 1.0, 'j wild': 1.0, 'k maly': 1.0, 's ghanem': 1.0 }}, {'mary d brown': {'y patt': 1.0}, 'y patt': {'mary d brown': 1.0}} ] for i in range(len(references)): rules = mod.get_coauthorship_rules(references[i]) for author_a in rules: for author_b in rules[author_a]: self.assertEquals(round(rules[author_a][author_b], 4), round(truth[i][author_a][author_b], 4)) for author_a in truth[i]: for author_b in truth[i][author_a]: self.assertEquals(round(rules[author_a][author_b], 4), round(truth[i][author_a][author_b], 4))
from PyTango import DeviceProxy sys.path.append("../../") from src.preprocessing import read_data, extract_hr_acc if __name__ == '__main__': proxy = DeviceProxy("C3/hr_monitor/1") dirname = os.path.dirname(__file__) filename = os.path.join(dirname, 'dataset.dat') print(filename) data = extract_hr_acc(read_data(filename)) data = data.resample('1000L') DP = namedtuple("DP", ["timestamp", "hr", "acc_x", "acc_y", "acc_z"]) i = 0 prow, ptimestamp = None, None for index, row in data.iterrows(): timestamp = float(index.to_datetime().strftime('%s.%f')) progress = (i / len(data)) * 100 if prow is not None: datapoint = DP(timestamp=ptimestamp, hr=prow['hr'], acc_x=prow['acc_x'], acc_y=prow['acc_y'], acc_z=prow['acc_z']) print("%s - %s, %s - %s%%" % (datetime.now(), index.to_datetime(), datapoint.hr, progress)) proxy.register_datapoint(datapoint)