Пример #1
0
    def test_generic_drop_with_list(self):
        frame_name1 = str(uuid.uuid1()).replace('-', '_')
        frame1 = ta.Frame(name=frame_name1)
        frame_name2 = str(uuid.uuid1()).replace('-', '_')
        ta.Frame(name=frame_name2)

        # Create list with frame proxy object and frame name
        frameList = [frame1, frame_name2]

        # Check that the frames we just created now exist
        self.assertTrue(
            frame_name1 in ta.get_frame_names(),
            frame_name1 + " should exist in the list of frame names")
        self.assertTrue(
            frame_name2 in ta.get_frame_names(),
            frame_name2 + " should exist in the list of frame names")

        self.assertEqual(
            2, ta.drop(frameList),
            "drop() should have deleted the 2 items from the list")

        # Check that the frames no longer exist
        self.assertFalse(
            frame_name1 in ta.get_frame_names(),
            frame_name1 + " should not be in the list of frame names")
        self.assertFalse(
            frame_name2 in ta.get_frame_names(),
            frame_name2 + " should not be in the list of frame names")
Пример #2
0
    def test_generic_drop_duplicate_items(self):
        frame_name = str(uuid.uuid1()).replace('-','_')
        frame = ta.Frame(name=frame_name)

        # Check that the frame we just created now exists
        self.assertTrue(frame_name in ta.get_frame_names(), frame_name + " should exist in the list of frame names")

        self.assertEqual(1, ta.drop(frame, frame, frame_name), "drop() should have deleted 1 item")

        # Check that the frame no longer exists
        self.assertFalse(frame_name in ta.get_frame_names(), frame_name + " should not be in the list of frame names")
Пример #3
0
    def test_frame_rename(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv", schema= [('a', str),
                                                                          ('b', ta.int32),
                                                                          ('labels', ta.int32),
                                                                          ('predictions', ta.int32)], delimiter=',', skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_rename")

        new_name = "test_frame_new_name"
        self.assertFalse(new_name in ta.get_frame_names(), "test_frame_new_name should not exist in list of frames")
        print "renaming frame"
        frame.name = new_name
        self.assertTrue(new_name in ta.get_frame_names(), "test_frame_new_name should exist in list of frames")
Пример #4
0
    def test_frame_drop(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv",
                         schema=[('a', str), ('b', ta.int32),
                                 ('labels', ta.int32),
                                 ('predictions', ta.int32)],
                         delimiter=',',
                         skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by entity"
        ta.drop_frames(frame)
        frames = ta.get_frame_names()
        self.assertFalse("test_frame_drop" in frames,
                         "test_frame_drop should not exist in list of frames")

        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by name"
        self.assertEqual(1, ta.drop_frames("test_frame_drop"),
                         "drop_frames() should have deleted one frame")
        self.assertFalse("test_frame_drop" in frames,
                         "test_frame_drop should not exist in list of frames")
Пример #5
0
    def test_generic_drop_duplicate_items(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')
        frame = ta.Frame(name=frame_name)

        # Check that the frame we just created now exists
        self.assertTrue(
            frame_name in ta.get_frame_names(),
            frame_name + " should exist in the list of frame names")

        self.assertEqual(1, ta.drop(frame, frame, frame_name),
                         "drop() should have deleted 1 item")

        # Check that the frame no longer exists
        self.assertFalse(
            frame_name in ta.get_frame_names(),
            frame_name + " should not be in the list of frame names")
Пример #6
0
    def test_access_refreshes_frames(self):
        """Tests that some actions do or do not update the last_read_date entity property"""
        csv = ta.CsvFile("/datasets/dates.csv", schema= [('start', ta.datetime),
                                                         ('id', int),
                                                         ('stop', ta.datetime),
                                                         ('color', str)], delimiter=',')
        name = "update_last_read"
        if name in ta.get_frame_names():
            ta.drop_frames(name)
        f = ta.Frame(csv, name=name)  # name it, to save it from the other GC blasting test in here
        t0 = f.last_read_date
        t1 = f.last_read_date
        #print "t0=%s" % t0.isoformat()
        self.assertEqual(t0, t1)

        f.schema  # schema, or other meta data property reads, should not update the last read date
        t2 = f.last_read_date
        #print "t2=%s" % t2.isoformat()
        self.assertEqual(t0, t2)

        f.inspect()  # inspect should update the last read date
        t3 = f.last_read_date
        #print "t3=%s" % t3.isoformat()
        self.assertLess(t2,t3)

        f.copy()  # copy should update the last read date
        t4 = f.last_read_date
        #print "t4=%s" % t4.isoformat()
        self.assertLess(t3,t4)

        f.bin_column('id', [3, 5, 8])
        t5 = f.last_read_date
        #print "t5=%s" % t5.isoformat()
        self.assertLess(t4,t5)
Пример #7
0
    def test_generic_drop_with_list(self):
        frame_name1 = str(uuid.uuid1()).replace('-','_')
        frame1 = ta.Frame(name=frame_name1)
        frame_name2 = str(uuid.uuid1()).replace('-','_')
        ta.Frame(name=frame_name2)

        # Create list with frame proxy object and frame name
        frameList = [frame1, frame_name2]

        # Check that the frames we just created now exist
        self.assertTrue(frame_name1 in ta.get_frame_names(), frame_name1 + " should exist in the list of frame names")
        self.assertTrue(frame_name2 in ta.get_frame_names(), frame_name2 + " should exist in the list of frame names")

        self.assertEqual(2, ta.drop(frameList), "drop() should have deleted the 2 items from the list")

        # Check that the frames no longer exist
        self.assertFalse(frame_name1 in ta.get_frame_names(), frame_name1 + " should not be in the list of frame names")
        self.assertFalse(frame_name2 in ta.get_frame_names(), frame_name2 + " should not be in the list of frame names")
Пример #8
0
    def test_drop_frame_that_does_not_exist(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')

        self.assertFalse(
            frame_name in ta.get_frame_names(),
            frame_name + " should not exist in the list of frames")

        self.assertEqual(0, ta.drop_frames(frame_name),
                         "drop_frames() should not have deleted any frames")
Пример #9
0
    def test_generic_drop_by_invalid_name(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')
        self.assertTrue(frame_name not in ta.get_frame_names(),
                        frame_name + " should not exist in the list of frames")

        # Verify that calling drop on a frame that does not exist does not fail
        self.assertEqual(
            0, ta.drop(frame_name),
            "drop() with a non-existent item name should not have deleted items"
        )
Пример #10
0
    def test_create_kmeans_model_with_duplicte_frame_name(self):
        frame_name = str(uuid.uuid1()).replace('-','_')

        # Create frame
        ta.Frame(name=frame_name)
        self.assertTrue(frame_name in ta.get_frame_names(), frame_name + " should be in the list of frames")

        # Try to create model with the same name as the frame (we expect an exception)
        with self.assertRaises(Exception):
            ta.KMeansModel(name=frame_name)
Пример #11
0
    def test_create_kmeans_model_with_duplicte_frame_name(self):
        frame_name = str(uuid.uuid1()).replace('-', '_')

        # Create frame
        ta.Frame(name=frame_name)
        self.assertTrue(frame_name in ta.get_frame_names(),
                        frame_name + " should be in the list of frames")

        # Try to create model with the same name as the frame (we expect an exception)
        with self.assertRaises(Exception):
            ta.KMeansModel(name=frame_name)
Пример #12
0
    def test_frame_rename(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv",
                         schema=[('a', str), ('b', ta.int32),
                                 ('labels', ta.int32),
                                 ('predictions', ta.int32)],
                         delimiter=',',
                         skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_rename")

        new_name = "test_frame_new_name"
        self.assertFalse(
            new_name in ta.get_frame_names(),
            "test_frame_new_name should not exist in list of frames")
        print "renaming frame"
        frame.name = new_name
        self.assertTrue(new_name in ta.get_frame_names(),
                        "test_frame_new_name should exist in list of frames")
Пример #13
0
    def test_duplicate_graph_rename(self):
        graph_name1 = str(uuid.uuid1()).replace('-', '_')
        graph_name2 = str(uuid.uuid1()).replace('-', '_')
        model_name = str(uuid.uuid1()).replace('-', '_')
        frame_name = str(uuid.uuid1()).replace('-', '_')

        # Create graphs, model, and frame
        graph1 = ta.Graph(name=graph_name1)
        graph2 = ta.Graph(name=graph_name2)
        ta.KMeansModel(name=model_name)
        ta.Frame(name=frame_name)

        # After creating graphs, check that graphs with each name exists on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should exist in list of graphs")

        # Try to rename graph2 to have the same name as graph1 (we expect an exception here)
        with self.assertRaises(Exception):
            graph2.name = graph_name1

        # Both graph names should still exist on the server
        self.assertTrue(graph_name1 in ta.get_graph_names(),
                        graph_name1 + " should still exist in list of graphs")
        self.assertTrue(graph_name2 in ta.get_graph_names(),
                        graph_name2 + " should still exist in list of graphs")

        # Try to rename graph1 to have the same name as the frame (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = frame_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            frame_name in ta.get_frame_names(),
            frame_name + " should still exist in the list of frames")

        # Try to rename graph1 to have the same name as the model (we expect an exception here)
        with self.assertRaises(Exception):
            graph1.name = model_name

        # graph1 and the frame name should still exist on the server
        self.assertTrue(
            graph_name1 in ta.get_graph_names(),
            graph_name1 + " should still exist in the list of graphs")
        self.assertTrue(
            model_name in ta.get_model_names(),
            model_name + " should still exist in the list of models")
Пример #14
0
    def test_gc_drop_stale_and_finalize(self):
        csv = ta.CsvFile("/datasets/dates.csv",
                         schema=[('start', ta.datetime), ('id', int),
                                 ('stop', ta.datetime), ('color', str)],
                         delimiter=',')
        f2_name = "dates_two"
        if f2_name in ta.get_frame_names():
            ta.drop_frames(f2_name)

        f1 = ta.Frame(csv)
        f1e = f1.get_error_frame()
        self.assertIsNotNone(f1e)
        self.assertIsNone(f1e.name)
        f2 = ta.Frame(csv, name=f2_name)
        f2e = f2.get_error_frame()
        self.assertIsNotNone(f2e)
        self.assertIsNone(f2e.name)

        admin.drop_stale(
        )  # first, normal drop_stale, nothing should change because these frames aren't old enough
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped(
        )  # nothing is dropped, so nothing so be finalized
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)

        admin.drop_stale(
            "1ms"
        )  # now drop with very tiny age, so non-name f1 should get dropped
        self.assertEqual("DROPPED", f1.status)
        self.assertEqual("DROPPED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped(
        )  # on f1 and f1e are dropped, so only they should be finalized
        self.assertEqual("FINALIZED", f1.status)
        self.assertEqual("FINALIZED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
Пример #15
0
    def test_gc_drop_stale_and_finalize(self):
        csv = ta.CsvFile("/datasets/dates.csv", schema= [('start', ta.datetime),
                                                         ('id', int),
                                                         ('stop', ta.datetime),
                                                         ('color', str)], delimiter=',')
        f2_name = "dates_two"
        if f2_name in ta.get_frame_names():
            ta.drop_frames(f2_name)

        f1 = ta.Frame(csv)
        f1e = f1.get_error_frame()
        self.assertIsNotNone(f1e)
        self.assertIsNone(f1e.name)
        f2 = ta.Frame(csv, name=f2_name)
        f2e = f2.get_error_frame()
        self.assertIsNotNone(f2e)
        self.assertIsNone(f2e.name)

        admin.drop_stale()  # first, normal drop_stale, nothing should change because these frames aren't old enough
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped()  # nothing is dropped, so nothing so be finalized
        self.assertEqual("ACTIVE", f1.status)
        self.assertEqual("ACTIVE", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)

        admin.drop_stale("1ms")  # now drop with very tiny age, so non-name f1 should get dropped
        self.assertEqual("DROPPED", f1.status)
        self.assertEqual("DROPPED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
        # print "f1.status=%s, f2.status=%s" % (f1.status, f2.status)

        admin.finalize_dropped()  # on f1 and f1e are dropped, so only they should be finalized
        self.assertEqual("FINALIZED", f1.status)
        self.assertEqual("FINALIZED", f1e.status)
        self.assertEqual("ACTIVE", f2.status)
        self.assertEqual("ACTIVE", f2e.status)
Пример #16
0
def get_frame(name):
    global frame

    if mode is None or mode == 'local':
        print('Warning: Not connected to ATK')
        return

    if not frame is None:
        return frame

    frames = tap.get_frame_names()

    if name in frames:
        return tap.get_frame(name)

    frame = tap.Frame(tap.UploadRows([], schema))
    frame.name = name

    return frame
Пример #17
0
    def test_frame_drop(self):
        print "define csv file"
        csv = ta.CsvFile("/datasets/classification-compute.csv", schema= [('a', str),
                                                                          ('b', ta.int32),
                                                                          ('labels', ta.int32),
                                                                          ('predictions', ta.int32)], delimiter=',', skip_header_lines=1)

        print "create frame"
        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by entity"
        ta.drop_frames(frame)
        frames = ta.get_frame_names()
        self.assertFalse("test_frame_drop" in frames, "test_frame_drop should not exist in list of frames")

        frame = ta.Frame(csv, name="test_frame_drop")

        print "dropping frame by name"
        self.assertEqual(1, ta.drop_frames("test_frame_drop"), "drop_frames() should have deleted one frame")
        self.assertFalse("test_frame_drop" in frames, "test_frame_drop should not exist in list of frames")
Пример #18
0
    def test_duplicate_model_rename(self):
        model_name1 = str(uuid.uuid1()).replace('-','_')
        model_name2 = str(uuid.uuid1()).replace('-','_')
        graph_name  = str(uuid.uuid1()).replace('-','_')
        frame_name  = str(uuid.uuid1()).replace('-','_')

        # Create models, graph, and frame to test with
        model1 = ta.KMeansModel(name=model_name1)
        model2 = ta.KMeansModel(name=model_name2)
        ta.Graph(name=graph_name)
        ta.Frame(name=frame_name)

        # After creating models, check that models with each name exists on the server
        self.assertTrue(model_name1 in ta.get_model_names(), model_name1 + " should exist in list of models")
        self.assertTrue(model_name2 in ta.get_model_names(), model_name2 + " should exist in list of models")

        # Try to rename model2 to have the same name as model1 (we expect an exception here)
        with self.assertRaises(Exception):
            model2.name = model_name1

        # Both model names should still exist on the server
        self.assertTrue(model_name1 in ta.get_model_names(), model_name1 + " should still exist in list of models")
        self.assertTrue(model_name2 in ta.get_model_names(), model_name2 + " should still exist in list of models")

        # Try to rename model1 to have the same name as the graph (we expect an exception here)
        with self.assertRaises(Exception):
            model1.name = graph_name

        # model1 and the graph should still exist on the server
        self.assertTrue(model_name1 in ta.get_model_names(), model_name1 + " should still exist in the list of models")
        self.assertTrue(graph_name in ta.get_graph_names(), graph_name + " should still exist in the list of graphs")

        # Try to rename model1 to have the same name as the frame (we expect an exception here)
        with self.assertRaises(Exception):
            model1.name = frame_name

        # model1 and the frame should still exist on the server
        self.assertTrue(model_name1 in ta.get_model_names(), model_name1 + " should still exist in the list of models")
        self.assertTrue(frame_name in ta.get_frame_names(), frame_name + " should still exist in the list of frames")
Пример #19
0
    def test_access_refreshes_frames(self):
        """Tests that some actions do or do not update the last_read_date entity property"""
        csv = ta.CsvFile("/datasets/dates.csv",
                         schema=[('start', ta.datetime), ('id', int),
                                 ('stop', ta.datetime), ('color', str)],
                         delimiter=',')
        name = "update_last_read"
        if name in ta.get_frame_names():
            ta.drop_frames(name)
        f = ta.Frame(
            csv, name=name
        )  # name it, to save it from the other GC blasting test in here
        t0 = f.last_read_date
        t1 = f.last_read_date
        #print "t0=%s" % t0.isoformat()
        self.assertEqual(t0, t1)

        f.schema  # schema, or other meta data property reads, should not update the last read date
        t2 = f.last_read_date
        #print "t2=%s" % t2.isoformat()
        self.assertEqual(t0, t2)

        f.inspect()  # inspect should update the last read date
        t3 = f.last_read_date
        #print "t3=%s" % t3.isoformat()
        self.assertLess(t2, t3)

        f.copy()  # copy should update the last read date
        t4 = f.last_read_date
        #print "t4=%s" % t4.isoformat()
        self.assertLess(t3, t4)

        f.bin_column('id', [3, 5, 8])
        t5 = f.last_read_date
        #print "t5=%s" % t5.isoformat()
        self.assertLess(t4, t5)
Пример #20
0
    def test_drop_frame_that_does_not_exist(self):
        frame_name = str(uuid.uuid1()).replace('-','_')

        self.assertFalse(frame_name in ta.get_frame_names(), frame_name + " should not exist in the list of frames")

        self.assertEqual(0, ta.drop_frames(frame_name), "drop_frames() should not have deleted any frames")
    def testLinearRegression(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/9bb351fa-7b17-4a81-b3b0-521639c1d473/d342214b-c4c0-4963-aeaf-5adf054e22b6/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'Random_forest_SampleFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        frame = ta.Frame(csv, frame_name)

        #frame = ta.Frame(csv)

        print "Initializing a RandomForestModel object"
        model_name = 'POCRandom_forest_SampleModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        #model = ta.LinearRegressionModel(name=model_name)
        classifier = ta.RandomForestClassifierModel(name=model_name)

        print "Training the model on the Frame"
        classifier .train(frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = classifier.predict(frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
Пример #22
0
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import trustedanalytics as ta
ta.connect()

for name in ta.get_frame_names():
    print 'deleting frame: %s' % name
    ta.drop_frames(name)

employees_frame = ta.Frame(
    ta.CsvFile("employees.csv",
               schema=[('Employee', str), ('Manager', str), ('Title', str),
                       ('Years', ta.int64)],
               skip_header_lines=1), 'employees_frame')

employees_frame.inspect()

#A bipartite graph
#Notice that this is a funny example since managers are also employees!
#Preseuambly Steve the manager and Steve the employee are the same person
Пример #23
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import trustedanalytics as ta
ta.connect()

for name in ta.get_frame_names():
    print 'deleting frame: %s' %name
    ta.drop_frames(name)


employees_frame = ta.Frame(ta.CsvFile("employees.csv", schema = [('Employee', str), ('Manager', str), ('Title', str), ('Years', ta.int64)], skip_header_lines=1), 'employees_frame')

employees_frame.inspect()

#A bipartite graph
#Notice that this is a funny example since managers are also employees!
#Preseuambly Steve the manager and Steve the employee are the same person

#Option 1

graph = ta.Graph()
Пример #24
0
    def test_generic_drop_by_invalid_name(self):
        frame_name = str(uuid.uuid1()).replace('-','_')
        self.assertTrue(frame_name not in ta.get_frame_names(), frame_name + " should not exist in the list of frames")

        # Verify that calling drop on a frame that does not exist does not fail
        self.assertEqual(0, ta.drop(frame_name), "drop() with a non-existent item name should not have deleted items")
Пример #25
0
    def test_naive_bayes(self):
        print "define csv file"

        csv = ta.CsvFile("hdfs://nameservice1/org/intel/hdfsbroker/userspace/ae6a38d3-191f-494f-86a6-3fe1b2255902/e3327582-f475-4dc9-8efa-96070abb606d/000000_1",
                         schema=[
                          ("GXY",ta.int32),
                          #("HPI",ta.ignore),
                          ("Age",ta.int32),
                          ("Sex",ta.int32),
                          ("Height",ta.float64),
                          ("Weight",ta.float64),
                          ("BMI",ta.float64),
                          ("DBP",ta.float64),
                          ("SBP",ta.float64),
                          ("HCT",ta.float64),
                          ("MCV",ta.float64),
                          ("RDW_SD",ta.float64),
                          ("RDW_CV",ta.float64),
                          ("HGB",ta.float64),
                          ("MCH",ta.float64),
                          ("MCHC",ta.float64),
                          ("RBC",ta.float64),
                          ("WBC",ta.float64),
                          ("NEUT1",ta.float64),
                          ("LYMPH",ta.float64),
                          ("MONO1",ta.float64),
                          ("EO1",ta.float64),
                          ("BASO1",ta.float64),
                          ("NEUT2",ta.float64),
                          ("MONO2",ta.float64),
                          ("EO2",ta.float64),
                          ("BASO2",ta.float64),
                          ("PLT",ta.float64),
                          #("PDW",ta.ignore),
                          ("MPV",ta.float64),
                          ("P_LCR",ta.float64),
                          ("PCT",ta.float64),
                          ("Lymph_3",ta.float64),
                          ("ESR",ta.float64),
                          ("PH",ta.float64),
                          ("PRO",ta.float64),
                          ("GIu",ta.float64),
                          ("KET",ta.float64),
                          ("BLD",ta.float64),
                          ("BIL",ta.float64),
                          ("URO",ta.float64),
                          ("NIT",ta.float64),
                          ("SG",ta.float64),
                          ("LEU",ta.float64),
                          ("N_QT",ta.float64),
                          ("VC",ta.float64),
                          #("ECG",ta.ignore),
                          #("BCJC1",ta.ignore),
                          #("IRDS",ta.ignore),
                          #("WK",ta.ignore),
                          ("OB",ta.float64),
                          ("FBG",ta.float64),
                          ("HBsAg",ta.float64),
                          ("HBsAb",ta.float64),
                          ("HBeAg",ta.float64),
                          ("HBeAb",ta.float64),
                          ("HBcAb",ta.float64),
                          ("TBiL",ta.float64),
                          ("ALT",ta.float64),
                          ("AST",ta.float64),
                          ("AKP",ta.float64),
                          ("GGT",ta.float64),
                          ("ADA",ta.float64),
                          ("TPO",ta.float64),
                          ("Aib",ta.float64),
                          ("Gib",ta.float64),
                          ("A_G",ta.float64),
                          ("PA",ta.float64),
                          ("AST_ALT",ta.float64),
                          ("BUN",ta.float64),
                          ("Cr",ta.float64),
                          ("UA",ta.float64),
                          ("CK",ta.float64),
                          ("LDH",ta.float64),
                          ("CK_MB",ta.float64),
                          ("LDH_MB",ta.float64),
                          ("a_HBD",ta.float64),
                          ("TNI",ta.float64),
                          ("Fg",ta.float64),
                          ("K1",ta.float64),
                          ("AFP",ta.float64),
                          ("CEA",ta.float64),
                          ("Free_PSA",ta.float64),
                          ("CA125",ta.float64),
                          ("CA19_9",ta.float64),
                          ("NSE",ta.float64),
                          ("CA242",ta.float64),
                          ("B_HCG",ta.float64),
                          ("CA15_3",ta.float64),
                          ("CA50",ta.float64),
                          ("CA72_4",ta.float64),
                          ("HGH",ta.float64),
                          ("SF",ta.float64),
                          ("QJD",ta.float64),
                          ("DCJC",ta.float64),
                          ("MJJC",ta.float64),
                          ("RUT",ta.float64),
                          ("PGI_PGII",ta.float64),
                          ("Ca2",ta.float64),
                          ("P3",ta.float64),
                          ("K2",ta.float64),
                          ("Na",ta.float64),
                          ("CI",ta.float64)
                          ], skip_header_lines=1)

        print "create frame"
        frame_name = 'ModelNaiveBayesFrame'
        exist_frames = ta.get_frame_names()
        if frame_name in exist_frames:
            print "Frame exists, delete it"
            ta.drop_frames(frame_name)
        train_frame = ta.Frame(csv, frame_name)

        print "Initializing a RandomForestModel object"
        model_name = 'POCModelNaiveBayesModel'
        exist_models = ta.get_model_names()
        if model_name in exist_models:
            print "Model exist, delete"
            ta.drop_models(model_name)
        naive = ta.NaiveBayesModel(name=model_name)

        print "Training the model on the Frame"
        naive.train(train_frame,'GXY',['Age','Sex','Height','Weight','BMI','DBP','SBP','HCT','MCV','RDW_SD',
                                 'RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH','MONO1','EO1','BASO1','NEUT2',
                                 'MONO2','EO2','BASO2','PLT','MPV','P_LCR','PCT','Lymph_3','ESR','PH','PRO',
                                 'GIu','KET','BLD','BIL','URO','NIT','SG','LEU','N_QT','VC',
                                 'OB','FBG','HBsAg','HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT',
                                 'ADA','TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH','CK_MB',
                                 'LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125','CA19_9','NSE','CA242',
                                 'B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD','DCJC','MJJC','RUT','PGI_PGII',
                                 'Ca2','P3','K2','Na','CI'],num_classes=2)


        print "Predicting on the Frame"
        output = naive.predict(train_frame)

        self.assertEqual(output.column_names,['GXY','Age','Sex','Height','Weight','BMI','DBP','SBP','HCT',
                                              'MCV','RDW_SD','RDW_CV','HGB','MCH','MCHC','RBC','WBC','NEUT1','LYMPH',
                                              'MONO1','EO1','BASO1','NEUT2','MONO2','EO2','BASO2','PLT','MPV',
                                              'P_LCR','PCT','Lymph_3','ESR','PH','PRO','GIu','KET','BLD','BIL','URO',
                                              'NIT','SG','LEU','N_QT','VC','OB','FBG','HBsAg',
                                              'HBsAb','HBeAg','HBeAb','HBcAb','TBiL','ALT','AST','AKP','GGT','ADA',
                                              'TPO','Aib','Gib','A_G','PA','AST_ALT','BUN','Cr','UA','CK','LDH',
                                              'CK_MB','LDH_MB','a_HBD','TNI','Fg','K1','AFP','CEA','Free_PSA','CA125',
                                              'CA19_9','NSE','CA242','B_HCG','CA15_3','CA50','CA72_4','HGH','SF','QJD',
                                              'DCJC','MJJC','RUT','PGI_PGII','Ca2','P3','K2','Na','CI','predicted_class'])
Пример #26
0
                          ("DBP",ta.float64),
                          ("Cr",ta.int32),
                          ("HCT",ta.float64)
                          ], skip_header_lines=1);


# In[27]:

#create frame
#frame_name = "myframe";
#if frame_name in ta.get_frame_names():
    #ta.drop_frames(frame_name)
    
    
frame_name = 'myframe'
exist_frames = ta.get_frame_names()
if frame_name in exist_frames:
    print "Frame exists, delete it"
    ta.drop_frames(frame_name)
        
my_frame = ta.Frame(csv, frame_name)
my_frame.inspect(21)


# In[31]:

#feature classify

def transformation_DBP(row):
    #<60一组,60~90每10mmHg一组,≥90一组
    dbp = row.DBP