def run_tests(): """ Setup and run the doc tests. """ import doctest from sparklingpandas.pcontext import PSparkContext globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: globs['psc'] = PSparkContext.simple('local[4]', 'PythonTest', batchSize=2) (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) globs['psc'].sc.stop() msg = "{0} test ran {1} failures".format(test_count, failure_count) try: # My kingdom for the letter u from termcolor import colored if failure_count: msg = colored(msg, 'red') else: msg = colored(msg, 'green') print msg except ImportError: if failure_count: msg = '\033[91m' + msg else: msg = '\033[92m' + msg print msg + '\033[0m' if failure_count: exit(-1)
def setUp(self): """Setup the basic panda spark test case. This right now just creates a PSparkContext.""" self._old_sys_path = list(sys.path) class_name = self.__class__.__name__ self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2) # Add a common basic input and basicpframe we can reuse in testing self.basicinput = [("tea", "happy"), ("water", "sad"), ("coffee", "happiest"), ("tea", "water")] self.basiccolumns = ['magic', 'thing'] self.basicpframe = self.psc.DataFrame(self.basicinput, columns=self.basiccolumns) self.basicframe = pandas.DataFrame(self.basicinput, columns=self.basiccolumns) # Add a numeric frame self.numericinput = [(1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)] self.numericpframe = self.psc.DataFrame(self.numericinput, columns=['a', 'b']) self.numericframe = pandas.DataFrame(self.numericinput, columns=['a', 'b']) # A three column numeric frame self.numericthreeinput = [(1, 2, -100.5), (3, 4, 93), (1, 3, 100.2), (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80), (8, 9, 20)] self.numericthreepframe = self.psc.DataFrame(self.numericthreeinput, columns=['a', 'b', 'c']) self.numericthreeframe = pandas.DataFrame(self.numericthreeinput, columns=['a', 'b', 'c']) self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")] self.mixedpframe = self.psc.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) self.mixedframe = pandas.DataFrame(self.mixedinput, columns=['a', 'b', 'c'])
def setUp(self): """ Setup the basic panda spark test case. This right now just creates a PSparkContext. """ self._old_sys_path = list(sys.path) class_name = self.__class__.__name__ self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2)
def setUp(self): """Setup the basic panda spark test case. This right now just creates a PSparkContext.""" logging.info("Setting up spark context") self._old_sys_path = list(sys.path) class_name = self.__class__.__name__ conf = SparkConf() conf.set("spark.cores.max", "4") conf.set("spark.master", "local[4]") conf.set("spark.app-name", class_name) conf.set("spark.driver.allowMultipleContexts", "true") self.psc = PSparkContext.simple(conf=conf) # Add a common basic input and basicpframe we can reuse in testing self.basicinput = [ ("tea", "happy"), ("water", "sad"), ("coffee", "happiest"), ("tea", "water")] self.basiccolumns = ['magic', 'thing'] self.basicpframe = self.psc.DataFrame( self.basicinput, columns=self.basiccolumns) self.basicframe = pandas.DataFrame( self.basicinput, columns=self.basiccolumns) # Add a numeric frame self.numericinput = [ (1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)] self.numericpframe = self.psc.DataFrame( self.numericinput, columns=['a', 'b']) self.numericframe = pandas.DataFrame( self.numericinput, columns=['a', 'b']) # A three column numeric frame self.numericthreeinput = [ (1, 2, -100.5), (3, 4, 93), (1, 3, 100.2), (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80), (8, 9, 20)] self.numericthreepframe = self.psc.DataFrame( self.numericthreeinput, columns=['a', 'b', 'c']) self.numericthreeframe = pandas.DataFrame( self.numericthreeinput, columns=['a', 'b', 'c']) self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")] self.mixedpframe = self.psc.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) self.mixedframe = pandas.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) # Mixed NA frame self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)] self.mixednapframe = self.psc.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd']) self.mixednaframe = pandas.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd'])
def setUp(self): """Setup the basic panda spark test case. This right now just creates a PSparkContext.""" self._old_sys_path = list(sys.path) class_name = self.__class__.__name__ self.psc = PSparkContext.simple('local[4]', class_name, batchSize=2) # Add a common basic input and basicpframe we can reuse in testing self.basicinput = [ ("tea", "happy"), ("water", "sad"), ("coffee", "happiest"), ("tea", "water")] self.basiccolumns = ['magic', 'thing'] self.basicpframe = self.psc.DataFrame( self.basicinput, columns=self.basiccolumns) self.basicframe = pandas.DataFrame( self.basicinput, columns=self.basiccolumns) # Add a numeric frame self.numericinput = [ (1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)] self.numericpframe = self.psc.DataFrame( self.numericinput, columns=['a', 'b']) self.numericframe = pandas.DataFrame( self.numericinput, columns=['a', 'b']) # A three column numeric frame self.numericthreeinput = [ (1, 2, -100.5), (3, 4, 93), (1, 3, 100.2), (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80), (8, 9, 20)] self.numericthreepframe = self.psc.DataFrame( self.numericthreeinput, columns=['a', 'b', 'c']) self.numericthreeframe = pandas.DataFrame( self.numericthreeinput, columns=['a', 'b', 'c']) self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")] self.mixedpframe = self.psc.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) self.mixedframe = pandas.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) # Mixed NA frame self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)] self.mixednapframe = self.psc.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd']) self.mixednaframe = pandas.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd'])
def setUp(self): """Setup the basic panda spark test case. This right now just creates a PSparkContext.""" logging.info("Setting up spark context") self._old_sys_path = list(sys.path) class_name = self.__class__.__name__ conf = SparkConf() conf.set("spark.cores.max", "4") conf.set("spark.master", "local[4]") conf.set("spark.app-name", class_name) conf.set("spark.driver.allowMultipleContexts", "true") self.psc = PSparkContext.simple(conf=conf) # Add a common basic input and basicpframe we can reuse in testing self.basicinput = [("tea", "happy"), ("water", "sad"), ("coffee", "happiest"), ("tea", "water")] self.basiccolumns = ['magic', 'thing'] self.basicpframe = self.psc.DataFrame(self.basicinput, columns=self.basiccolumns) self.basicframe = pandas.DataFrame(self.basicinput, columns=self.basiccolumns) # Add a numeric frame self.numericinput = [(1, 2), (3, 4), (1, 3), (2, 6), (3, 100), (3, 20), (8, 9)] self.numericpframe = self.psc.DataFrame(self.numericinput, columns=['a', 'b']) self.numericframe = pandas.DataFrame(self.numericinput, columns=['a', 'b']) # A three column numeric frame self.numericthreeinput = [(1, 2, -100.5), (3, 4, 93), (1, 3, 100.2), (2, 6, 0.5), (3, 100, 1.5), (3, 20, 80), (8, 9, 20)] self.numericthreepframe = self.psc.DataFrame(self.numericthreeinput, columns=['a', 'b', 'c']) self.numericthreeframe = pandas.DataFrame(self.numericthreeinput, columns=['a', 'b', 'c']) self.mixedinput = [(1, 2, "coffee"), (4, 5, "cheese")] self.mixedpframe = self.psc.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) self.mixedframe = pandas.DataFrame(self.mixedinput, columns=['a', 'b', 'c']) # Mixed NA frame self.mixednainput = [(1, 2, "coffee", None), (4, 5, "cheese", None)] self.mixednapframe = self.psc.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd']) self.mixednaframe = pandas.DataFrame(self.mixednainput, columns=['a', 'b', 'c', 'd']) self.merge = merge