print("Did you remember to build the project by running 'make'?", file=sys.stderr) print( "Alternatively perhaps you tried to copy this program out without it's adjacent libraries?", file=sys.stderr) sys.exit(4) # com.databricks:spark-avro_2.10:2.0.1 - 2.0.1 is for Spark 1.4+ # you can edit this bit if you need to run it on Spark 1.3: # https://github.com/databricks/spark-avro#linking # Must set spark-avro package to 3.0.0+ if using Spark 2.0 # for Spark < 2.0 it results in Exception: # Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.execution.datasources.FileFormat #os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-avro_2.10:3.0.0 %s' \ os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-avro_2.10:2.0.1 %s' \ % os.getenv('PYSPARK_SUBMIT_ARGS', '') pyspark_path() from pyspark import SparkContext # pylint: disable=wrong-import-position,import-error from pyspark import SparkConf # pylint: disable=wrong-import-position,import-error from pyspark.sql import SQLContext # pylint: disable=wrong-import-position,import-error __author__ = 'Hari Sekhon' __version__ = '0.8.0' class SparkAvroToParquet(CLI): def __init__(self): # Python 2.x super(SparkAvroToParquet, self).__init__() # Python 3.x # super().__init__() # logging.config.fileConfig(os.path.join(libdir, 'resources', 'logging.conf'))
sys.path.append(libdir) try: # pylint: disable=wrong-import-position from harisekhon.utils import log, isMinVersion, support_msg, isVersionLax, die, getenv, pyspark_path from harisekhon import CLI except ImportError as _: print('module import failed: %s' % _, file=sys.stderr) print("Did you remember to build the project by running 'make'?", file=sys.stderr) print("Alternatively perhaps you tried to copy this program out without it's adjacent libraries?", file=sys.stderr) sys.exit(4) # com.databricks:spark-avro_2.10:2.0.1 - 2.0.1 is for Spark 1.4+ # you can edit this bit if you need to run it on Spark 1.3: # https://github.com/databricks/spark-avro#linking os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-avro_2.10:2.0.1 %s' \ % os.getenv('PYSPARK_SUBMIT_ARGS', '') pyspark_path() from pyspark import SparkContext # pylint: disable=wrong-import-position,import-error from pyspark import SparkConf # pylint: disable=wrong-import-position,import-error from pyspark.sql import SQLContext # pylint: disable=wrong-import-position,import-error __author__ = 'Hari Sekhon' __version__ = '0.7.0' class SparkJSONToAvro(CLI): def __init__(self): # Python 2.x super(SparkJSONToAvro, self).__init__() # Python 3.x # super().__init__() # logging.config.fileConfig(os.path.join(libdir, 'resources', 'logging.conf'))