def connect_database(self): connection = pymapd.connect( user=self.configuration['user'], password=self.configuration['password'], host=self.configuration['host'], port=self.configuration['port'], dbname=self.configuration['database'] ) return connection
def __init__( self, uri=None, user=None, password=None, host=None, port=9091, database=None, protocol='binary', execution_type=EXECUTION_TYPE_CURSOR, ): """ Parameters ---------- uri : str user : str password : str host : str port : int database : str protocol : {'binary', 'http', 'https'} execution_type : { EXECUTION_TYPE_ICP, EXECUTION_TYPE_ICP_GPU, EXECUTION_TYPE_CURSOR } """ self.uri = uri self.user = user self.password = password self.host = host self.port = port self.db_name = database self.protocol = protocol if execution_type not in ( EXECUTION_TYPE_ICP, EXECUTION_TYPE_ICP_GPU, EXECUTION_TYPE_CURSOR, ): raise Exception('Execution type defined not available.') self.execution_type = execution_type self.con = pymapd.connect( uri=uri, user=user, password=password, host=host, port=port, dbname=database, protocol=protocol, )
def set_database(self, name): if self.db_name != name and name is not None: self.con.close() self.con = pymapd.connect( uri=self.uri, user=self.user, password=self.password, host=self.host, port=self.port, dbname=name, protocol=self.protocol, ) self.db_name = name
def mapd(schema, tables, data_directory, **params): import pymapd data_directory = Path(data_directory) reserved_words = ['table', 'year', 'month'] # connection logger.info('Initializing MapD...') if params['database'] != 'mapd': conn = pymapd.connect( host=params['host'], user=params['user'], password=params['password'], port=params['port'], dbname='mapd', ) database = params["database"] stmt = "DROP DATABASE {}".format(database) try: conn.execute(stmt) except Exception: logger.warning('MapD DDL statement %r failed', stmt) stmt = 'CREATE DATABASE {}'.format(database) try: conn.execute(stmt) except Exception: logger.exception('MapD DDL statement %r failed', stmt) conn.close() conn = pymapd.connect( host=params['host'], user=params['user'], password=params['password'], port=params['port'], dbname=database, ) # create tables for stmt in filter(None, map(str.strip, schema.read().split(';'))): try: conn.execute(stmt) except Exception: logger.exception('MapD DDL statement \n%r\n failed', stmt) # import data for table, df in read_tables(tables, data_directory): if table == 'batting': # float nan problem cols = df.select_dtypes([float]).columns df[cols] = df[cols].fillna(0).astype(int) # string None driver problem cols = df.select_dtypes([object]).columns df[cols] = df[cols].fillna('') elif table == 'awards_players': # string None driver problem cols = df.select_dtypes([object]).columns df[cols] = df[cols].fillna('') # rename fields for df_col in df.columns: if ' ' in df_col or ':' in df_col: column = df_col.replace(' ', '_').replace(':', '_') elif df_col in reserved_words: column = '{}_'.format(df_col) else: continue df.rename(columns={df_col: column}, inplace=True) # load geospatial data if table == 'geo': conn.load_table_rowwise( table, list(df.itertuples(index=False, name=None)) ) else: conn.load_table_columnar(table, df) conn.close()
def test_connect_uri_and_others_raises(self): uri = ('omnisci://*****:*****@localhost:6274/omnisci?' 'protocol=binary') with pytest.raises(TypeError): connect(username='******', uri=uri)
def __init__( self, uri: Optional[str] = None, user: Optional[str] = None, password: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = 6274, database: Optional[str] = None, protocol: str = 'binary', session_id: Optional[str] = None, ipc: Optional[bool] = None, gpu_device: Optional[int] = None, ): """ Initialize OmniSciDB Client. Parameters ---------- uri : str, optional user : str, optional password : str, optional host : str, optional port : int, default 6274 database : str, optional protocol : {'binary', 'http', 'https'}, default 'binary' session_id: str, optional ipc : bool, optional, default None Enable Inter Process Communication (IPC) execution type. `ipc` default value when `gpu_device` is None is False, otherwise its default value is True. gpu_device : int, optional, default None GPU Device ID. Raises ------ Exception if the given execution_type is not valid. PyMapDVersionError if session_id is given but pymapd version is less or equal to 0.12 """ self.uri = uri self.user = user self.password = password self.host = host self.port = port self.db_name = database self.protocol = protocol self.session_id = session_id self._check_execution_type(ipc=ipc, gpu_device=gpu_device) self.ipc = ipc self.gpu_device = gpu_device if session_id: if self.version < pkg_resources.parse_version('0.12.0'): raise PyMapDVersionError( 'Must have pymapd > 0.12 to use session ID' ) self.con = pymapd.connect( uri=uri, host=host, port=port, protocol=protocol, sessionid=session_id, ) else: self.con = pymapd.connect( uri=uri, user=user, password=password, host=host, port=port, dbname=database, protocol=protocol, )
def test_connect_uri_and_others_raises(self): uri = ('mapd://*****:*****@localhost:6274/mapd?protocol=' 'binary') with pytest.raises(TypeError): connect(username='******', uri=uri)
import xarray as xr import gc from pymapd import connect import pyarrow as pa import numpy as np import pandas as pd from pyproj import Proj, transform conn = connect(user="******", password="******", host="localhost", dbname="mapd") import mzgeohash import unicodedata from datetime import datetime model_name = 'analysis_assim' date = [ '20180101', '20180102', '20180103', '20180104', '20180105', '20180106', '20180107', '20180108', '20180109', '20180110' ] #date=['20180101'] time = 't00' #time=['t00','t06','t12','t18'] for datels in date: file_prefix = time + 'z' filename = 'nwm.' + file_prefix + '.analysis_assim.terrain_rt.tm00.conus.nc' path = '/raidStorage/nwm_data/' + model_name + '/' + datels + '/' + time file = path + '/' + filename print(file) ds = xr.open_dataset(file) df = ds.to_dataframe() df = df.reset_index()
import pandas as pd from pymapd import connect con = connect(user="******", password="******",protocol="https", host="ec-meit.ca",port="/api",dbname="meit") # mapmeit sql = "SELECT mapmeit as key0,SUM(nox*1) AS nox FROM DB_2015 WHERE nox IS NOT NULL GROUP BY key0 ORDER BY nox DESC LIMIT 50" pd.read_sql(sql,con).to_csv("example.csv",index=False)
def con(mapd_server): return connect(user="******", password='******', host='localhost', port=6274, protocol='binary', dbname='mapd')
def main(argv): if len(argv) == 0: key_file_location = './client_secrets.json' selected_profile = None omnisci_url = None date_ranges = None else: key_file_location = argv[0] selected_profile = argv[1] omnisci_url = argv[2] date_ranges = [(argv[3], argv[4])] with pymapd.connect(omnisci_url) as con: print('existing tables: ', [x for x in con.get_tables() if x.startswith('omnisci')]) service = get_service(key_file_location) # Construct dictionary of GA website name and ids. profile_ids = traverse_hierarchy(service) # Select the GA profile view to extract data selection_list = [0] i = 1 print('%5s %20s %5s %20s' % ("Item#", "View ID", " ", "View Name")) for profile in sorted(profile_ids): selection_list = selection_list + [profile_ids[profile]] print('%4s %20s %5s %20s' % (i, profile_ids[profile], " ", profile)) i += 1 if not selected_profile: print( 'Enter the item# of the view you would like to ingest into MapD: ') item = int(input()) if item == '' or item <= 0 or item >= len(selection_list): print('Invalid selection - %s' % item) sys.exit(0) print('Item # %s selected' % item) else: item = selection_list.index(profile_ids[selected_profile]) if not date_ranges: print( '\nEnter the begin date and end date in the following format: YYYY-MM-DD YYYY-MM-DD' ) print( 'Or hit enter to proceed with the default which is last 30 days data' ) print('Date Range: ') begin_end_date = input() if begin_end_date == '': print('Extract data from today to 30 days ago') # date_ranges = [('2017-08-27', '2018-02-22')] # date_ranges = [('30daysAgo', 'today')] date_ranges = [('2daysAgo', 'today')] else: (begin_date, end_date) = [ t(s) for t, s in zip((str, str), begin_end_date.split()) ] print('Extract data from %s to %s' % (begin_date, end_date)) date_ranges = [(begin_date, end_date)] if not omnisci_url: print( "\nEnter the OmniSci server URL if you want to upload data,\n otherwise simply hit enter to use the manual procedure to upload the data" ) print( " URL example: - omnisci://admin:[email protected]:6274/omnisci?protocol=binary" ) print('OmniSci URL: ') omnisci_url = input() if omnisci_url == '': print( 'Use MapD Immerse import user interface to load the output CSV file' ) omnisci_url = None print("") csv_list = [] for profile in sorted(profile_ids): if (selection_list[item] == profile_ids[profile]): print('\nGoing to download data for %s (%s) ...' % (profile, profile_ids[profile])) table_name = profile.lower() table_name = '%s' % (table_name.replace(' ', '')) final_csv_file = './data/%s.csv' % (table_name) final_csv_gzfile = './data/%s.csv.gz' % (table_name) csv_list = build_csv_list(service, profile_ids[profile], profile, date_ranges, csv_list) merge_tables(final_csv_file, csv_list) print("Download of analytics data done.") # TODO Lines below need to be inside the for loop above? # Gzip the CSV file if os.path.isfile(final_csv_gzfile): os.remove(final_csv_gzfile) with open(final_csv_file, 'rb') as f_in, gzip.open(final_csv_gzfile, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) # Connect to MapD if not omnisci_url or omnisci_url == '': print( "=======================================================================" ) print('Goto OmniSci Immerse UI and import the CSV file %s' % (final_csv_gzfile)) print( "=======================================================================" ) else: with pymapd.connect(omnisci_url) as con: load_table_mapd(con, table_name, final_csv_gzfile) print( "=======================================================================" ) print('Goto OmniSci Immerse UI') print( "=======================================================================" )
# Chapter10_2.py # Using pymapd and the mapd database to create a geospatial table # 20180215 import pymapd from pymapd import connect connection = connect(user="******", password="******", host="community.mapd.com", dbname="mapd") import time cursor = connection.cursor() DROP = "DROP TABLE COUNTY;" create = """CREATE TABLE county ( id integer NOT NULL, name VARCHAR(50), statefips VARCHAR(3), stpostal VARCHAR(3), geom Polygon ); """ print(time.time()) try: cursor.execute(DROP) except: pass cursor.execute(create) connection.commit() print(time.time())
import pymapd as py import matplotlib.pyplot as plt user_str = 'I97D12D9D7268474D874' password_str = 'quzPy63R6kAG4BTeQ0Y5DxnsO3sZl7pehv6u673v' host_str = 'use2-api.mapd.cloud' dbname_str = 'mapd' connection = py.connect(user=user_str, password=password_str, host=host_str, dbname=dbname_str, port=443, protocol='https') # connection = py.connect(host=host_str, dbname=dbname_str, port=443, protocol='https') query = "SELECT movement_id FROM san_francisco_taz_26 LIMIT 100" df = connection.execute(query) df2 = df.fetchall() plt.plot(df2) plt.show()
#Edited the sample from documentation at https://pymapd.readthedocs.io/en/latest/usage.html #Some parts by Tsubasa Kato (@_stingraze on Twitter) from pymapd import connect import pandas as pd con = connect(user="******", password="******", host="localhost", dbname="omnisci") df = pd.read_sql("SELECT * from flights_2008_7M limit 100", con) print(df.to_string())
"-commit", default="1234567890123456789012345678901234567890", help="Commit hash to use to record this benchmark results", ) args = parser.parse_args() if args.df <= 0: print("Bad number of data files specified", args.df) sys.exit(1) if args.iterations < 1: print("Bad number of iterations specified", args.t) con = connect( user="******", password="******", host="localhost", dbname="omnisci", port=args.port, ) db_reporter = None if args.db_user is not "": print("Connecting to database") db = mysql.connector.connect( host=args.db_server, port=args.db_port, user=args.db_user, passwd=args.db_pass, db=args.db_name, ) db_reporter = report.DbReport( db, args.db_table,
import pandas as pd from pymapd import connect from os import listdir import glob from os.path import isfile, join import pyarrow as pa;import numpy as np path = '/n/holyscratch01/cga/dkakkar/data/geotweets/results/2020/output/' # use your path all_files = glob.glob(path + "/*.gz") print("Connecting to Omnisci") conn=connect(user="******", password="******", host="localhost", port=7159, dbname="omnisci") #use your port number print("Connected",conn) #query="DROP TABLE IF EXISTS geotweets" #coinn.execute(query) l_ni=[] conn.execute("DROP TABLE IF EXISTS geotweets;") for filename in all_files: #print(filename) try: df = pd.read_csv(filename, sep='\t',dtype='unicode',index_col=None, low_memory='true',compression='gzip') except: l_ni.append(filename) #print("Corrupt file",filename) continue df = df.drop(['geom'], axis = 'columns') #print(df.head()) #df.columns=['GLOBALEVENTID','SQLDATE','MonthYear','Years','FractionDate','Actor1Code','Actor1Name','Actor1CountryCode','Actor1KnownGroupCode','Actor1EthnicCode','Actor1Religion1Code','Actor1Religion2Code','Actor1Type1Code','Actor1Type2Code','Actor1Type3Code','Actor2Code','Actor2Name','Actor2CountryCode','Actor2KnownGroupCode','Actor2EthnicCode','Actor2Religion1Code','Actor2Religion2Code','Actor2Type1Code','Actor2Type2Code','Actor2Type3Code','IsRootEvent','EventCode','EventBaseCode','EventRootCode','QuadClass','GoldsteinScale','NumMentions','NumSources','NumArticles','AvgTone','Actor1Geo_Type','Actor1Geo_FullName','Actor1Geo_CountryCode','Actor1Geo_ADM1Code','Actor1Geo_Lat','Actor1Geo_Long','Actor1Geo_FeatureID','Actor2Geo_Type','Actor2Geo_FullName','Actor2Geo_CountryCode','Actor2Geo_ADM1Code','Actor2Geo_Lat','Actor2Geo_Long','Actor2Geo_FeatureID','ActionGeo_Type','ActionGeo_FullName','ActionG] #print(df.head(5)) #li.append(df)