Exemplo n.º 1
0
 def setUp(self):
     data = {
         'Name': ['Tom', 'Jack', 'Steve', 'Ricky'],
         'Age': [28, 34, 29, 42],
         'Date':
         ["2018-10-05", "2014-02-23", "2020-09-23T00:10:00", "2023213"]
     }
     self.df = pd.DataFrame(data)
     self.profiler = Profiler()
Exemplo n.º 2
0
    def __init__(self) -> None:
        """Init method of IndexBuilder.

        """

        self.resources_path = os.path.join(os.path.dirname(__file__),
                                           "resources")
        with open(os.path.join(self.resources_path, 'index_info.json'),
                  'r') as index_info_f:
            self.index_config = json.load(index_info_f)
        self.current_global_index = None
        self.GLOBAL_INDEX_INTERVAL = GLOBAL_INDEX_INTERVAL
        self.profiler = Profiler()
        self.im = IndexManager(es_host=self.index_config["es_host"],
                               es_port=self.index_config["es_port"])
Exemplo n.º 3
0
    def __init__(self, endpoint: str) -> None:
        """Init method of QuerySystem, set up connection to elastic search.

        Args:
            endpoint: query endpoint address

        Returns:

        """

        self.qm = SPARQLWrapper(endpoint)
        self.qm.setReturnFormat(JSON)
        self.qm.setMethod(POST)
        self.qm.setRequestMethod(URLENCODED)

        self.joiners = dict()
        self.profiler = Profiler()
Exemplo n.º 4
0
    def __init__(self,
                 es_index: str,
                 es_host: str = ES_HOST,
                 es_port: int = ES_PORT) -> None:
        """Init method of QuerySystem, set up connection to elastic search.

        Args:
            es_index: elastic search index.
            es_host: es_host.
            es_port: es_port.

        Returns:

        """

        self.qm = JSONQueryManager(es_host=es_host,
                                   es_port=es_port,
                                   es_index=es_index)
        self.joiners = dict()
        self.profiler = Profiler()
Exemplo n.º 5
0
    def __init__(self,
                 es_index: str,
                 es_host: str = "dsbox02.isi.edu",
                 es_port: int = 9200) -> None:
        """Init method of QuerySystem, set up connection to elastic search.

        Args:
            es_index: elastic search index.
            es_host: es_host.
            es_port: es_port.

        Returns:

        """

        self.qm = QueryManager(es_host=es_host,
                               es_port=es_port,
                               es_index=es_index)
        self.joiners = dict()
        self.profiler = Profiler()
Exemplo n.º 6
0
class TestProfiler(unittest.TestCase):
    def setUp(self):
        data = {
            'Name': ['Tom', 'Jack', 'Steve', 'Ricky'],
            'Age': [28, 34, 29, 42],
            'Date':
            ["2018-10-05", "2014-02-23", "2020-09-23T00:10:00", "2023213"]
        }
        self.df = pd.DataFrame(data)
        self.profiler = Profiler()

    @Utils.test_print
    def test_construct_global_keywords(self):
        self.assertListEqual(self.profiler.construct_global_keywords(self.df),
                             ['Age', 'Date', 'Name'])

    @Utils.test_print
    def test_construct_global_description(self):
        self.assertEqual(self.profiler.construct_global_description(self.df),
                         "Age : int64, Date : object, Name : object")

    @Utils.test_print
    def test_construct_global_title(self):
        self.assertEqual(self.profiler.construct_global_title(self.df),
                         "Age Date Name")

    @Utils.test_print
    def test_construct_variable_description(self):
        lst = [
            "column name: Age, dtype: int64",
            "column name: Date, dtype: object",
            "column name: Name, dtype: object"
        ]
        for i in range(self.df.shape[1]):
            self.assertEqual(
                self.profiler.construct_variable_description(self.df.iloc[:,
                                                                          i]),
                lst[i])

    @Utils.test_print
    def test_profile_temporal_coverage(self):
        date_col = self.df.iloc[:, 1]
        self.assertEqual(
            self.profiler.profile_temporal_coverage(coverage={
                "start": None,
                "end": None
            },
                                                    column=date_col),
            {
                "start": "2014-02-23T00:00:00",
                "end": "2020-09-23T00:10:00"
            })

        self.assertEqual(
            self.profiler.profile_temporal_coverage(coverage={
                "start": "2010-02-23T00:00:00",
                "end": None
            },
                                                    column=date_col),
            {
                "start": "2010-02-23T00:00:00",
                "end": "2020-09-23T00:10:00"
            })

        self.assertEqual(
            self.profiler.profile_temporal_coverage(coverage={
                "start":
                None,
                "end":
                "2022-09-23T00:10:00"
            },
                                                    column=date_col),
            {
                "start": "2014-02-23T00:00:00",
                "end": "2022-09-23T00:10:00"
            })

    @Utils.test_print
    def test_profile_named_entity(self):
        named_entity_col = self.df.iloc[:, 2]
        self.assertListEqual(
            self.profiler.profile_named_entity(named_entity_col),
            ['Tom', 'Jack', 'Steve', 'Ricky'])