def connect(cls, settings): """ Bind the connection to the cassandra. """ conf = Configuration('global') connection.setup(conf.get('cassandra.hosts', ['127.0.0.1:9160']), username=conf.get('cassandra.user'), password=conf.get('cassandra.password'), consistency=conf.get('cassandra.consistency', 'ONE'))
def setup_connection(): connection.setup( settings.FEEDLY_CASSANDRA_HOSTS, consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL, default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE, metrics_enabled=settings.FEEDLY_TRACK_METRICS )
def setup_connection(): connection.setup( settings.FEEDLY_CASSANDRA_HOSTS, consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL, default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE, metrics_enabled=settings.FEEDLY_TRACK_CASSANDRA_DRIVER_METRICS, default_timeout=settings.FEEDLY_CASSANDRA_DEFAULT_TIMEOUT)
def run(self): """Runs DB server and sync models with Cassandra coloumn family.""" print 'inside db' connection.setup(self.database_ip, self.database_name) sync_table(products.ProductsDetails) print 'synched'
def setup_connection(): connection.setup( settings.FEEDLY_CASSANDRA_HOSTS, max_connections=settings.FEEDLY_CASSANDRA_CONNECTION_POOL_SIZE, consistency=settings.FEEDLY_CASSANDRA_CONSITENCY_LEVEL, default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE )
def setup_connection(): connection.setup( hosts=settings.STREAM_CASSANDRA_HOSTS, consistency=settings.STREAM_CASSANDRA_CONSISTENCY_LEVEL, default_keyspace=settings.STREAM_DEFAULT_KEYSPACE, **settings.CASSANDRA_DRIVER_KWARGS )
def __init__(self, logger, config): self.logger = logger self.config = config routes = get_routes(movie) print("Routes\n======\n\n" + json.dumps([(url, repr(rh)) for url, rh in routes], indent=2)) settings = dict( debug=options.debug, xsrf_cookies=False, # TODO: update manually cookie_secret='lpyoGs9/TAuA8IINRTRRjlgBspMDy0lKtvQNGrTnA9g=', ) super(Application, self).__init__(routes=routes, generate_docs=True, settings=settings) # Connect to the keyspace on our cluster running at 127.0.0.1 connection.setup(config.clusterNodes, config.clusterKeyspace, protocol_version=2) # Sync your model with your cql table sync_table(models.Users) sync_table(models.ActorID) sync_table(models.ActorFirst) sync_table(models.ActorLast) sync_table(models.ActorLastFirst) sync_table(models.Movie)
def setup_package(): try: CASSANDRA_VERSION = int(os.environ["CASSANDRA_VERSION"]) except: print("CASSANDRA_VERSION must be set as an environment variable. " "One of (12, 20, 21)") raise if os.environ.get('CASSANDRA_TEST_HOST'): CASSANDRA_TEST_HOST = os.environ['CASSANDRA_TEST_HOST'] else: CASSANDRA_TEST_HOST = 'localhost' if CASSANDRA_VERSION < 20: protocol_version = 1 else: protocol_version = 2 connection.setup([CASSANDRA_TEST_HOST], protocol_version=protocol_version, default_keyspace='cqlengine_test') create_keyspace("cqlengine_test", replication_factor=1, strategy_class="SimpleStrategy")
def AddToCassandra_allcountsbatch_bypartition(d_iter): # filter_missing_values=True for RDDs # from cassandra.cluster import Cluster from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table # CASSANDRA_KEYSPACE = "wikipedia_jan_2015" CASSANDRA_KEYSPACE = "test" connection.setup( ["52.89.66.139", "52.89.34.7", "52.89.116.45", "52.89.78.4", "52.89.27.115", "52.89.133.147", "52.89.1.48"], CASSANDRA_KEYSPACE, ) class url_ranks_links_23(Model): # primary key is url which is dictated by the number of links url = columns.Text(primary_key=True) ranks = columns.Float() # this will be stored as a double # this is a primary key to sort on later links = columns.List(columns.Text) # this will be stored as a double def __repr__(self): return "%s %s" % (self.url, self.ranks) sync_table(url_ranks_links_23) for d in d_iter: url_ranks_links_23.create(**d)
def global_setup(config): """Perform global cofiguration. In a given process, this should only ever be called with a single configuration instance. Doing otherwise will result in a runtime exception. """ global _global_config if _global_config is None: _global_config = config # this breaks with unicode :( connection.setup([str(v) for v in config.CASSANDRA_CLUSTER], consistency=config.CASSANDRA_CONSISTENCY) processors = [ _capture_stack_trace, _format_event, ] if config.PRETTY_LOGGING: processors.append(structlog.processors.ExceptionPrettyPrinter()) processors.append(structlog.processors.KeyValueRenderer()) else: processors.append(structlog.processors.JSONRenderer()) structlog.configure( processors=processors ) elif _global_config != config: raise Exception('global_setup called twice with different ' 'configurations')
def _init_db_connection(**kwargs): """ Please refer to the link below to find out why we didn't establish db connection in __init__ http://www.dctrwatson.com/2010/09/python-thread-safe-does-not-mean-fork-safe/ In short, the db-connection handle of parent process would be copied to memory of child process by fork. """ conf = Config() # this callback can't execute longer than 4 seconds, or would be interrupted by # celery connection.setup(hosts=conf.CQLENGINE_HOSTS, default_keyspace=const.CQL_KEYSPACE_NAME) # sqlalchemy rt = Runtime() rt.sql_session = sessionmaker(rt.sql_engine) # trigger registration of SQLAlchemy models from models import sql # create tables Preparation().Base.metadata.create_all(rt.sql_engine)
def setUp(self): connection.setup(['127.0.0.1'], KEYSPACE) create_keyspace(KEYSPACE, replication_factor=1, strategy_class='SimpleStrategy') sync_table(Avatar) sync_table(Anchor) sync_table(Message)
def setup(self): from cqlengine import connection if connection.cluster is not None: # already connected return connection.setup(self.hosts, self.keyspace, **self.connection_options) for option, value in self.session_options.iteritems(): setattr(self.session, option, value)
def run(): from cqlengine import connection connection.setup(['127.0.0.1'], "cqlengine") from cqlengine import management management.drop_table(Stock) management.sync_table(Stock) Stock.create(name="WPRO", prices={ datetime.date(2014, 12, 1): 200 , datetime.date(2014, 12, 2): 220.45 , datetime.date(2014, 12, 3): 250.67 , datetime.date(2014, 12, 4): 246.86 , datetime.date(2014, 12, 5): 201 , datetime.date(2014, 12, 6): 233 , datetime.date(2014, 12, 7): 245 , datetime.date(2014, 12, 8): 300 , datetime.date(2014, 12, 9): 307 , datetime.date(2014, 12, 10): 180 , datetime.date(2014, 12, 11): 405 , datetime.date(2014, 12, 12): 400 , datetime.date(2014, 12, 13): 670 , datetime.date(2014, 12, 14): 260 , datetime.date(2014, 12, 15): 250 , datetime.date(2014, 12, 16): 251 , datetime.date(2014, 12, 17): 254 , datetime.date(2014, 12, 18): 267 , datetime.date(2014, 12, 19): 270 }, events={ datetime.date(2014, 12, 13): "Something happened over here", datetime.date(2014, 12, 19): "The bears are playing" }) Stock.create(name="INFY", prices={ datetime.date(2014, 8, 1): 3200 , datetime.date(2014, 8, 2): 3220.45 , datetime.date(2014, 8, 3): 3250.67 , datetime.date(2014, 8, 4): 3246.86 , datetime.date(2014, 8, 5): 3201 , datetime.date(2014, 8, 6): 3233 , datetime.date(2014, 8, 7): 3245 , datetime.date(2014, 8, 8): 3300 , datetime.date(2014, 8, 9): 3307 , datetime.date(2014, 8, 10): 3180 , datetime.date(2014, 8, 11): 3405 , datetime.date(2014, 8, 12): 3400 , datetime.date(2014, 8, 13): 3670 , datetime.date(2014, 8, 14): 3260 , datetime.date(2014, 8, 15): 3250 , datetime.date(2014, 8, 16): 3251 , datetime.date(2014, 8, 17): 3254 , datetime.date(2014, 8, 18): 3267 , datetime.date(2014, 8, 19): 3270 })
def setup(self): if connection.cluster is not None: # already connected return connection.setup( self.hosts, self.keyspace, **self.options.get('connection', {}) )
def setUp(self): keyspace = 'testkeyspace{}'.format(str(uuid.uuid1()).replace('-', '')) self.keyspace = keyspace clear() # Configure cqlengine's global connection pool. setup(['localhost'], default_keyspace=keyspace) create_keyspace(keyspace) for class_name, creator in self.model_classes.items(): setattr(self, class_name, creator) #sync_table(getattr(self, class_name)) getattr(self, class_name).sync_table()
def setup(self): if connection.cluster is not None: # already connected return connection.setup( self.hosts, self.keyspace, consistency=self.options.get('consistency_level', ConsistencyLevel.ONE) )
def connect(): if connected: return print "Connecting" setup(["localhost"], "meatbot") print "Connected" sync_table(User) sync_table(Project) sync_table(StatusUpdate) sync_table(StatusUpdateUserAggregated) print "Done Syncing" global connected connected = True
def AddToCassandra_allcountsbatch_bypartition(d_iter): class userbase2(Model): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" uid = columns.Integer(primary_key=True) reviewerID = columns.Text(primary_key=True) reviewerName = columns.Text() connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(userbase2) for d in d_iter: userbase2.create(**d)
def AddToCassandra_allcountsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class predictions3(Model): user = columns.Integer(primary_key=True) product = columns.Integer() rating = columns.Float(primary_key=True, clustering_order="DESC") connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(predictions3) for d in d_iter: predictions3.create(**d)
def syncToCassandra(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) class recommendations9(Model): uid = columns.Integer(primary_key=True) mid = columns.Integer(primary_key=True) rating = columns.Float() sync_table(recommendations9) for d in d_iter: recommendations9.create(**d)
def AddToCassandra_stocktotalsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table class stock_totals_batch(Model): user = columns.Text(primary_key=True) portfolio_total = columns.Integer() host="ec2-54-215-237-86.us-west-1.compute.amazonaws.com" #cassandra seed node, TODO: do not hard code this connection.setup([host], "finance_news") sync_table(stock_totals_batch) for d in d_iter: stock_totals_batch.create(**d)
def syncToCassandra(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class movieprofile9(Model): mid = columns.Integer(primary_key=True) asin = columns.Text() title = columns.Text() imurl = columns.Text() connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(movieprofile9) for d in d_iter: movieprofile9.create(**d)
def AddToCassandra_allcountsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class reviewerProfile(Model): user = columns.Integer(primary_key=True) product = columns.Integer(primary_key=True) rating = columns.Float(primary_key=True, clustering_order="DESC") connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(reviewerProfile) for d in d_iter: reviewerProfile.create(**d)
def AddToCassandra_allcountsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class reviewerProfile(Model): reviewerID = columns.Text(primary_key=True) reviews = columns.Map(columns.Text, columns.Float) connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(reviewerProfile) for d in d_iter: reviewerProfile.create(**d)
def AddToCassandra_stocktotalsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table class stock_totals_batch(Model): user = columns.Text(primary_key=True) portfolio_total = columns.Integer() host = "ec2-54-215-237-86.us-west-1.compute.amazonaws.com" #cassandra seed node, TODO: do not hard code this connection.setup([host], "finance_news") sync_table(stock_totals_batch) for d in d_iter: stock_totals_batch.create(**d)
def syncToCassandra(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class userprofile9(Model): uid = columns.Integer(primary_key=True) reviewerid = columns.Text() reviewername = columns.Text() numofreviews = columns.Float() ratings = columns.Map(columns.Text, columns.Float) connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(userprofile9) for d in d_iter: userprofile9.create(**d)
def AddToCassandra_allcountsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" class reviewerProfile(Model): reviewerID = columns.Text(primary_key=True) reviews = columns.Map(columns.Text, columns.Float) connection.setup(["172.31.39.226"], CASSANDRA_KEYSPACE) sync_table(reviewerProfile) for d in d_iter: reviewerProfile.create(**d)
def setup(self, force=False, throw=False): if self._setup and not force: return True try: connection.setup(self.uri, self.keyspace) management.create_keyspace(self.keyspace, replication_factor=1, strategy_class='SimpleStrategy') for model in self._models: model.__keyspace__ = self.keyspace management.sync_table(model) except NoHostAvailable: logger.error('Could not connect to Cassandra, expect errors.') return False # Note: return values are for test skipping self._setup = True return True
def AddToCassandra_allcountsbatch_bypartition(d_iter): class movieCatalog2(Model): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" pid = columns.Integer(primary_key=True) asin = columns.Text(primary_key=True) brand = columns.Text() imUrl = columns.Text() price = columns.Float() title = columns.Text() connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) sync_table(movieCatalog2) for d in d_iter: movieCatalog2.create(**d)
def main(): setup(settings["CASSANDRA_CLUSTER"], settings["CASSANDRA_KEYSPACE"]) print "Gettings first 100 models from DB:" for model in SavolModel.objects.all()[:100]: print model.savol_id print "---------------------" print model.title print model.question print model.answer print model.year print model.month print model.date # print "{year}/{month}/{date}".format( # year=model.year, # month=model.month, # date=model.date # ) print "---------------------"
def __init__(self, table_name) -> None: super().__init__() self.KEYSPACE = 'keyspace_name' self.TABLE_NAME = table_name if self.TABLE_NAME == 'all_stored_samples': self.clean_table = True else: self.clean_table = False cluster = Cluster(['cassandra_api'], port=9042) # cluster = Cluster(['127.0.0.1'], port=9042) self.session = cluster.connect() self.session.row_factory = dict_factory self.create_keyspace(self.session) self.session.set_keyspace(self.KEYSPACE) setup(hosts=['cassandra_api'], default_keyspace=self.KEYSPACE) # setup(hosts=['127.0.0.1'], default_keyspace=self.KEYSPACE) self.create_samples_table()
def tweets_to_cassandra(items): from cqlengine import columns from cqlengine import connection from cqlengine.models import Model from cqlengine.management import sync_table from cqlengine.management import create_keyspace class TweetModel(Model): date = columns.Text(primary_key = True) ticker = columns.Text() host="localhost" connection.setup(['127.0.0.1'], "cqlengine") create_keyspace("cqlengine", "default_keyspace", 1) sync_table(TweetModel) for item in items: tweet_table.create(items) print "Number of elements in table:",TweetModel.objects.count()
def extract_queried_urls_ranks_links(res,url_total,ranks_total,links_total,links_listedPerURL): from cqlengine import connection from cassandra.cluster import Cluster CASSANDRA_KEYSPACE = "test" connection.setup(['52.88.228.98','52.11.49.170'],CASSANDRA_KEYSPACE) cluster = Cluster(['52.88.228.98','52.11.49.170']) session = cluster.connect(CASSANDRA_KEYSPACE) #Loop through ElasticSearch then loop through for hit in res['hits']['hits']: key_lookup= ("%(url)s"% hit["_source"]) #modify the url so that cqlsh can read it, even though it has a colon key_lookup="'" + key_lookup + "';" query1 = "SELECT * FROM url_ranks_4 WHERE url="+key_lookup #print query1 #consult the cassandra table with the above query rows=(session.execute(query1)) #check to make sure we are not taking information from nodes that are not currently in elastic search or cassandra for row in rows: if row[1] != None: #row[0]=url, row[1]=rank ###ADD HERE### #Declare filtering step by page rank value #Restrict to the top 1000 to avoid crashing the EC2 micro instance ###ADD HERE### #print row[0] key_lookup_for_links="'" + str(row[0]) + "';" #print key_lookup query2 = "SELECT * FROM url_links_3 WHERE url="+key_lookup_for_links #print query2 rows_links=(session.execute(query2)) #have a method to export all information here for row_links in rows_links: #print row_links[0], row[1], row_links[1] print row_links[0], row[1], len(row_links[1]) url_total.append(row_links[0]) ranks_total.append(row[1]) links_total.append(row_links[1]) links_listedPerURL.append(row_links[1]) print #Uncertain if these need to be returned or not return(url_total,ranks_total,links_total,links_listedPerURL)
def test(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table from cqlengine.query import ModelQuerySet CASSANDRA_KEYSPACE = "playground" class table1_20150928(Model): link_id = columns.Text(primary_key=True) comment_id = columns.Text(primary_key=True) source = columns.Text() title = columns.Text() permalink = columns.Text() subreddit = columns.Text() subreddit_id = columns.Text() selftext = columns.Text() created = columns.Text() score = columns.Text() url = columns.Text() connection.setup(['172.31.6.150'], CASSANDRA_KEYSPACE) sync_table(table1_20150928) for d in d_iter: table1_20150928.create(**d)
def AddToCassandra_allhuecountsbatch_bypartition(d_iter): from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table class allhuecountsbatch(Model): granularity = columns.Text(primary_key=True) country = columns.Text(primary_key=True) region = columns.Text(primary_key=True) county = columns.Text(primary_key=True) locality = columns.Text(primary_key=True) datetaken = columns.Text(primary_key=True) count = columns.Integer() maxhueidxs = columns.List(columns.Integer()) maxhue = columns.Float() huevalues = columns.List(columns.Integer()) connection.setup(['127.0.0.1'], CASSANDRA_KEYSPACE) sync_table(allhuecountsbatch) for d in d_iter: allhuecountsbatch.create(**d)
def run_migrations_online(): """Run migrations in 'online' mode. In this scenario we need to create an Engine and associate a connection with the context. """ settings = config.get_section(config.config_ini_section) hosts = settings['cqlengine.hosts'] keyspace = settings['cqlengine.keyspace'] # cqlengine uses a global setup that is shared by any usage of the module. setup(hosts, default_keyspace=keyspace) # ConnectionProxy talks to the global cqlengine connection. context.configure( connection=ConnectionProxy, target_metadata=target_metadata ) try: with context.begin_transaction(): context.run_migrations() finally: ConnectionProxy.close()
#first, define a model from cqlengine import columns from cqlengine.models import Model import uuid class ExampleModel(Model): read_repair_chance = 0.05 # optional - defaults to 0.1 example_id = columns.UUID(primary_key=True, default=uuid.uuid4) example_type = columns.Integer(index=True) created_at = columns.DateTime() description = columns.Text(required=False) #next, setup the connection to your cassandra server(s)... from cqlengine import connection connection.setup(['localhost:9160'], username='******', password='******' ) #...and create your CQL table from cqlengine.management import sync_table sync_table(ExampleModel) #now we can create some rows: em1 = ExampleModel.create(example_type=0, description="example1", created_at=datetime.now()) em2 = ExampleModel.create(example_type=0, description="example2", created_at=datetime.now()) em3 = ExampleModel.create(example_type=0, description="example3", created_at=datetime.now()) em4 = ExampleModel.create(example_type=0, description="example4", created_at=datetime.now()) em5 = ExampleModel.create(example_type=1, description="example5", created_at=datetime.now()) em6 = ExampleModel.create(example_type=1, description="example6", created_at=datetime.now()) em7 = ExampleModel.create(example_type=1, description="example7", created_at=datetime.now()) em8 = ExampleModel.create(example_type=1, description="example8", created_at=datetime.now())
ticket_id = columns.Text(primary_key=True) created_dt = columns.DateTime(default=datetime.now()) pg = columns.Text() duration = columns.Text() error_count = columns.Text() outage_caused = columns.Text() system_caused = columns.Text() ticket_type = columns.Text() row_create_ts = columns.DateTime(default=datetime.now()) row_end_ts = columns.DateTime(default='9999-12-31 00:00:00.00000-00') #connection.setup(['127.0.0.1'], "cqlengine", protocol_version=3) #sync_table(Tickets) from cqlengine import connection connection.setup(["localhost"], "sid") # sync_table(Tickets) # sync_table(Tickets_Division) # sync_table(Tickets_Duration) # sync_table(Tickets_Pg) # sync_table(Tickets_Error_Count) # sync_table(Tickets_Outage_Caused) # sync_table(Tickets_System_Caused) # sync_table(Tickets_Ticket_Type) # sync_table(Tickets_Addt_Notes) cluster = Cluster(['127.0.0.1']) session = cluster.connect("sid") # result = session.execute("select * from tickets ") # print dir(result)
class adsinfo(Model): ad_id = columns.BigInt(primary_key=True) actions_per_impression = columns.Float() clicks = columns.Float() cost_per_unique_click = columns.Float() cost_per_result = columns.Float() result_rate = columns.Float() reach = columns.Integer() date_start = columns.Text(primary_key=True) date_stop = columns.Text() # Connect to the demo keyspace on our cluster running at 127.0.0.1 connection.setup(['127.0.0.1'], "fb_report") #Create a sql context sc = SparkContext("spark://ip-172-31-9-43:7077", "fb_report") sqlContext = SQLContext(sc) #read json data from hdfs path = "hdfs://ec2-52-8-165-110.us-west-1.compute.amazonaws.com:9000/user/AdReport/ads_info/history" ad_camps = sqlContext.jsonFile(path) #Sync your model with your cql table sync_table(adsinfo) # Register this SchemaRDD as a table. ad_camps.registerTempTable("ad_camps")
#I am assuming that code placed here will run on startup? from cassandra.cluster import Cluster from cqlengine.connection import setup from cqlengine.management import sync_table from feed_engine.models import Photo, BlogPost, StatusUpdate, Video, User, Relationship, Comment # print "Syncing tables ..." # from users.models import UserProfile setup(['192.168.10.200', '192.168.10.201', '192.168.10.202'], "yookore") sync_table(Photo) sync_table(BlogPost) sync_table(StatusUpdate) sync_table(Video) sync_table(User) sync_table(Relationship) sync_table(Comment) sync_table(UserProfile)
def setup_connection(): connection.setup(hosts=settings.FEEDLY_CASSANDRA_HOSTS, consistency=settings.FEEDLY_CASSANDRA_CONSISTENCY_LEVEL, default_keyspace=settings.FEEDLY_DEFAULT_KEYSPACE, **settings.CASSANDRA_DRIVER_KWARGS)
from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table # define model for desired table class userfollow(Model): username = columns.Text(primary_key=True) following = columns.List(columns.Text) def __repr__(self): return '%s %d' % (self.username, self.following) # setup connection to Cassandra and sybc table connection.setup(['52.8.127.252', '52.8.41.216'], "watch_events") sync_table(userfollow) start = time.time() # access token github_pass_alvin = os.environ['my_pass'] following_url = "https://api.github.com/users/" per_page = "&per_page=100" # call github API and return following list # return False if username doesn't exist i = 0
# coding: utf-8 # In[2]: from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table CASSANDRA_KEYSPACE = "playground" connection.setup(['172.31.39.226'], CASSANDRA_KEYSPACE) import json # In[3]: from pyspark import SparkConf, SparkContext from pyspark.sql import SQLContext sc = SparkContext() sqlContext = SQLContext(sc) #df = sqlContext.read.json("s3n://patricks3db/reviews_Movies_and_TV_small.json") df = sqlContext.read.json("s3n://patricks3db/reviews_Movies_and_TV.json") df.printSchema() # In[4]: ratings = df ratings = ratings.drop("helpful") ratings = ratings.drop("reviewText") ratings = ratings.drop("reviewTime") ratings = ratings.drop("reviewerName") ratings = ratings.drop("summary") ratings = ratings.drop("unixReviewTime")
Cassandra manager """ from cqlengine import columns from cqlengine.models import Model from cqlengine import connection "******************************************************************************************" cassandra_cluster_ip = ['127.0.0.1'] cassandra_keyspace = 'tsunami_project' "******************************************************************************************" # setup the connection to our cassandra server(s) and the default keyspace connection.setup(cassandra_cluster_ip, cassandra_keyspace) # mapper object with Cassandra model class tsunami_table(Model): code_gsm = columns.Text(primary_key=True) timeslot = columns.Integer(primary_key=True) phone = columns.List(columns.Integer) """ Get the phone numbers to send them SMS alert for each code_gsm """ def get_phone_numbers(code_gsm, timeslot): phones_list = [] try:
from gsmtpd import LMTPServer from cqlengine import connection from caliop.config import Configuration Configuration.load('caliop.yaml', 'global') connection.setup(['127.0.0.1:9160']) from caliop.core.config import includeme includeme(None) from caliop.helpers.log import log from caliop.smtp.agent import DeliveryAgent class LmtpServer(LMTPServer): def process_message(self, peer, mailfrom, rcpttos, data): agent = DeliveryAgent() messages = agent.process(mailfrom, rcpttos, data) log.info('Deliver of %d messages' % len(messages)) return None if __name__ == '__main__': s = LmtpServer(("127.0.0.1", 4000)) s.serve_forever()
from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table import os # defining schema to write to cassandra table # schema defined should exactly match the table created in cassandra class getuserid(Model): username = columns.Text(primary_key=True) userid = columns.Integer() def __repr__(self): return '%s %d' % (self.username, self.userid) # connecting to cassandra key space "watch_events" and syncing the desired table connection.setup(['127.0.0.1'], "watch_events") # getting master node's IP and public DNS to run Spark job and read from HDFS master_ip = os.environ['master_ip'] master_public_dns = os.environ['master_public_dns'] # setting SparkContext and SQLContext sc = SparkContext("spark://" + master_ip + ":7077", "userid") sqlContext = SQLContext(sc) # reading data for collected usernames df = sqlContext.jsonFile("hdfs://" + master_public_dns + ":9000/camus/topics/github-usernames-good-1/hourly/2015/06/21/00/*") # Spark job to get just the login names and IDs from all the fields in the data names = df.map(lambda x: (x.login, x.id)).collect()
import uuid from cqlengine import columns from cqlengine.models import Model class ExampleModel(Model): read_repair_chance = 0.05 # optional - defaults to 0.1 example_id = columns.UUID(primary_key=True, default=uuid.uuid4) example_type = columns.Integer(index=True) created_at = columns.DateTime() description = columns.Text(required=False) #next, setup the connection to your cassandra server(s) and the default keyspace... from cqlengine import connection connection.setup(['127.0.0.1'], "cqlengine") # or if you're still on cassandra 1.2 #connection.setup(['127.0.0.1'], "cqlengine", protocol_version=1) # create your keyspace. This is, in general, not what you want in production # see https://cassandra.apache.org/doc/cql3/CQL.html#createKeyspaceStmt for options from cqlengine.management import create_keyspace create_keyspace("cqlengine", "SimpleStrategy", 1) #...and create your CQL table from cqlengine.management import sync_table sync_table(ExampleModel) #now we can create some rows: em1 = ExampleModel.create(example_type=0,
import sys sys.path.append("") # setup cassandra from cqlengine import connection try: CASSANDRA_VERSION = int(os.environ["CASSANDRA_VERSION"]) except: print( "CASSANDRA_VERSION must be set as an environment variable. One of (12, 20, 21)" ) raise if os.environ.get('CASSANDRA_TEST_HOST'): CASSANDRA_TEST_HOST = os.environ['CASSANDRA_TEST_HOST'] else: CASSANDRA_TEST_HOST = 'localhost' if CASSANDRA_VERSION < 20: protocol_version = 1 else: protocol_version = 2 connection.setup([CASSANDRA_TEST_HOST], protocol_version=protocol_version, default_keyspace='cqlengine_test') nose.main()
#Author: Filmon #!/usr/bin/python #This will push the outbound state table to cassandra #import libraries import os, sys from cqlengine import columns from cqlengine.models import Model from cqlengine import connection from cqlengine.management import sync_table # Define a model class outbound_state(Model): c_state = columns.Text(primary_key=True) c_count = columns.Text() c_year = columns.Text(primary_key=True,clustering_order="DESC") def __repr__(self): return '%s %s %s' % (self.c_state,self.c_year,self.c_count) connection.setup(['127.0.0.1'], "outbound_cassandra") sync_table(outbound_state) for line in sys.stdin: f = line.split('\t') outbound_state.create(c_year=str(f[0].strip()),c_state=str(f[1].strip()),c_count=str(f[2].strip()))
def setup_cassandra(self): setup(hosts=['127.0.0.1'], default_keyspace=self.KEYSPACE)