def testSmallRowsFormatter(self):
        data = [self._random_values() for _ in range(10)]
        pd = ResultFrame(data=data, schema=self.schema, pandas=True)
        result = ResultFrame(data=data, schema=self.schema, pandas=False)
        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))

        self.assertEqual(result._values, [r for r in result])
    def testLargeColumnsFormatter(self):
        names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)]))
        types = self.schema.types * 10

        schema = Schema.from_lists(names, types)
        gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10))))
        data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)]

        pd = ResultFrame(data=data, schema=schema, pandas=True)
        result = ResultFrame(data=data, schema=schema, pandas=False)

        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
 def testLargeRowsFormatter(self):
     data = [self._random_values() for _ in range(1000)]
     pd = ResultFrame(data=data, schema=self.schema, pandas=True)
     result = ResultFrame(data=data, schema=self.schema, pandas=False)
     self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
     self.assertEqual(to_str(pd._repr_html_()),
                      to_str(result._repr_html_()))
    def testSmallRowsFormatter(self):
        data = [self._random_values() for _ in range(10)]
        data[-1][0] = None
        pd = ResultFrame(data=data, schema=self.schema, pandas=True)
        result = ResultFrame(data=data, schema=self.schema, pandas=False)
        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))

        self.assertEqual(result._values, [r for r in result])
    def testLargeColumnsFormatter(self):
        names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)]))
        types = self.schema.types * 10

        schema = Schema.from_lists(names, types)
        gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10))))
        data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)]

        pd = ResultFrame(data=data, schema=schema, pandas=True)
        result = ResultFrame(data=data, schema=schema, pandas=False)

        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
Пример #6
0
    def execute(self, line, cell=''):
        if self._odps is None:
            self._odps = enter().odps

        sql = line + '\n' + cell
        sql = sql.strip()

        if sql:
            bar = init_progress_bar()

            instance = self._odps.run_sql(sql)

            percent = 0
            while not instance.is_terminated():
                task_names = instance.get_task_names()
                last_percent = percent
                if len(task_names) > 0:
                    percent = sum(
                        self._get_task_percent(instance, name)
                        for name in task_names) / len(task_names)
                else:
                    percent = 0
                percent = min(1, max(percent, last_percent))
                bar.update(percent)

                time.sleep(1)

            instance.wait_for_success()
            bar.update(1)

            with instance.open_reader() as reader:
                try:
                    import pandas as pd

                    try:
                        return pd.read_csv(StringIO(reader.raw))
                    except ValueError:
                        return reader.raw
                except ImportError:
                    return ResultFrame(list(reader), columns=reader._columns)
Пример #7
0
    def execute(self, line, cell=''):
        self._set_odps()

        content = line + '\n' + cell
        content = content.strip()

        sql = None
        hints = dict()

        splits = content.split(';')
        for s in splits:
            stripped = s.strip()
            if stripped.lower().startswith('set '):
                hint = stripped.split(' ', 1)[1]
                k, v = hint.split('=', 1)
                k, v = k.strip(), v.strip()
                hints[k] = v
            elif len(stripped) == 0:
                continue
            else:
                if sql is None:
                    sql = s
                else:
                    sql = '%s;%s' % (sql, s)

        # replace user defined parameters
        sql = replace_sql_parameters(sql, self.shell.user_ns)

        if sql:
            bar = init_progress_bar()

            instance = self._odps.run_sql(sql, hints=hints)
            if options.verbose:
                stdout = options.verbose_log or self._to_stdout
                stdout('Instance ID: ' + instance.id)
                stdout('  Log view: ' + instance.get_logview_address())

            percent = 0
            while not instance.is_terminated():
                task_names = instance.get_task_names()
                last_percent = percent
                if len(task_names) > 0:
                    percent = sum(self._get_task_percent(instance, name)
                                  for name in task_names) / len(task_names)
                else:
                    percent = 0
                percent = min(1, max(percent, last_percent))
                bar.update(percent)

                time.sleep(1)

            instance.wait_for_success()
            bar.update(1)

            try:
                with instance.open_reader() as reader:
                    try:
                        import pandas as pd
                        from pandas.parser import CParserError

                        try:
                            return pd.read_csv(StringIO(reader.raw))
                        except (ValueError, CParserError):
                            return reader.raw
                    except ImportError:
                        try:
                            return ResultFrame(list(reader), columns=reader._columns)
                        except TypeError:
                            return reader.raw
            finally:
                bar.close()