Пример #1
0
def _connect():
    """
    Connect to the node and keyspace
    :return:
    """
    auth_provider = None

    if args.username is not None and args.password is not None:
        auth_provider = PlainTextAuthProvider(
            username=args.username, password=args.password)

    cluster = Cluster([args.ip], protocol_version=args.protocol_version,
                      auth_provider=auth_provider, port=int(args.port))

    # connect to keyspace
    try:
        cluster = cluster.connect(args.keyspace)
    except InvalidRequest:
        _incorrect("Keyspace " + args.keyspace + "does not exist.")
    except Exception:
        _incorrect("Unable to connect to host " + args.ip + " using port " + str(args.port) +
                   " or username/password is incorrect.")

    cluster.default_timeout = int(args.timeout)
    return cluster
Пример #2
0
 def initialize_connection(self):
 
     cluster = self.host+':'+self.thriftport
     pool = ConnectionPool(self.keyspace, [cluster], timeout=30)
     col_fam = ColumnFamily(pool, self.source)
     session = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace)
     session.default_timeout=30
     # configuring spark with cassandra keyspace and columnfamily
     rdd = self.sc.cassandraTable(self.keyspace, self.source).cache()
     val = self.get_key(self.db, session)
     if val == 1:
         time_rdd = rdd.select("timestamp", "key").groupByKey().collect() # collecting rows in rdd grouped by timestamp
     else:
         time = self.get_timestamp(session,self.dest,val)
         time_rdd = rdd.select("timestamp", "key").filter(lambda row: row.timestamp > time).groupByKey().collect()
     # function call
     self.create_table(self.dest, session)
     batch = BatchStatement() # preparing a batchstatement
     row_count = 1
     for timekey in time_rdd:
         print row_count
         # function calls
         process = retrieve_count(timekey[1], timekey[0], col_fam) # create instance for the class retrieve_count
         fields = process.retrieve_fields()
         insert = batch_insert(batch, fields, self.dest, session) # create instance for the class batch_insert
         batches = insert.batch_prepare()
         if row_count % 100 == 0:
             # inserting in batches of 100
             insert.insert_fields()
             batch = BatchStatement() # creating a fresh batch
         row_count = row_count + 1
     insert.insert_fields() # inserting the final batch
     return 1
Пример #3
0
    def insert_cassandra(self, session, source, country, country_count):

        query = "SELECT id FROM main_count"
        print query
        session2 = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace)
        session2.default_timeout = 100
        data = session2.execute(query)
        for row in data:
            uid = row[0]
        # retrieving the id from the source table and updating the country_count (list) and country (list) columns
        print len(country), len(country_count)
        session.execute("UPDATE "+source+" SET country=%s, country_count=%s WHERE id=%s", parameters=[country, country_count, uid])
        return 1
Пример #4
0
    def initialize_connection(self):

        session = Cluster(contact_points=[self.host], port=self.port).connect(keyspace=self.keyspace)
        session.default_timeout = 100
        query = "SELECT host, id FROM "+self.table
        statement = SimpleStatement(query)
        getdata = session.execute(statement)
        hosts, id_val, count = [], [], 0
        count_list = {}
        for data in getdata:
            value = str(data[0]).strip()
            id_val.append(data[1])
            if value.find(',') == -1:
                hosts.append(value)
            else:
                hosts.append(value.split(",")[0])
            count += 1
        print count
        #create instance for the class retrieve_location
        process = retrieve_location()
        # function calls
        count_list = process.get_location(session, self.table, hosts, id_val)
        self.insert_cassandra(session, self.source, count_list.keys(), count_list.values())
        return 1
Пример #5
0
import cassandra
from cassandra.cluster import SimpleStatement, Cluster
import timeit,sys

cluster = Cluster(['10.1.0.104', '10.1.0.105', '127.0.0.1'], port=9233)
cluster.default_timeout = None
session = cluster.connect('group3alt')  # keyspace should be our own

indexsearch = False #turn this to true when you want to do looping queries for group by

print cluster.metadata.cluster_name  # should make sure this is group3
print cassandra.__version__,"\n"

try: Consist_Level = int(sys.argv[1])
except: Consist_Level = 1

program_st = timeit.default_timer()
#======================================================================
# QUERY 1
#======================================================================
query = SimpleStatement("""
SELECT count (*) as ten_atomic
FROM cdr 
WHERE 
(MSC_CODE ,CITY_ID,SERVICE_NODE_ID
,RUM_DATA_NUM ,DUP_SEQ_NUM ,SEIZ_CELL_NUM ,FLOW_DATA_INC ,SUB_HOME_INT_PRI ,CON_OHM_NUM,
SESS_SFC) 
> (10000,10000,10000,3,10000,1,10000,10000,10000,10000)
AND (MSC_CODE ,CITY_ID,SERVICE_NODE_ID
,RUM_DATA_NUM ,DUP_SEQ_NUM ,SEIZ_CELL_NUM ,FLOW_DATA_INC ,SUB_HOME_INT_PRI ,CON_OHM_NUM,
SESS_SFC) 
Пример #6
0
def current_time():
    return (datetime.now() - datetime(1970, 1, 1)).total_seconds()


for lines in args.lines:
    for columns in args.columns:
        initial_size = 0
        print("Deleting previous data")
        try:
            subprocess.check_output("rm " + args.queue_dir + " -R", shell=True)
        except subprocess.CalledProcessError:
            pass
        try:
            session = Cluster(['localhost']).connect('kairosdb')
            session.default_timeout = 1200
            session.execute('drop keyspace kairosdb;')
            print("Data deleted.")
        except NoHostAvailable:
            print("No previous data exists. Skipping delete.")
        print("Starting Kairosdb")
        kairos = subprocess.Popen([args.kairos_path, "run"],
                                  stderr=subprocess.DEVNULL,
                                  stdout=subprocess.DEVNULL)
        time.sleep(4)
        while True:
            try:
                subprocess.check_output("curl localhost:8080",
                                        shell=True,
                                        stderr=subprocess.DEVNULL)
            except subprocess.CalledProcessError:
Пример #7
0
import logging
import time

from uuid import UUID
from cassandra.cluster import Cluster
import cassandra

LOG = logging.getLogger(__name__)
PTM = PhysicalTopologyManager("../topologies/mmm_physical_test_tracing.yaml")
VTM = VirtualTopologyManager("../topologies/mmm_virtual_test_tracing.yaml")
BM = BindingManager(PTM, VTM)

cass_host = get_container_by_hostname("cassandra1").get_ip_address()

cassandra = Cluster([cass_host]).connect()
cassandra.default_timeout = 60.0

binding_multihost = {
    "description": "spanning across multiple MMs",
    "bindings": [
        {"binding": {"device_name": "bridge-000-001", "port_id": 2, "host_id": 1, "interface_id": 1}},
        {"binding": {"device_name": "bridge-000-002", "port_id": 2, "host_id": 2, "interface_id": 2}},
    ],
}


def set_filters(router_name, inbound_filter_name, outbound_filter_name):
    """Sets in-/out-bound filters to a router."""
    router = VTM.get_router(router_name)

    inbound_filter = None
Пример #8
0
import logging
import time

from uuid import UUID
from cassandra.cluster import Cluster
import cassandra

LOG = logging.getLogger(__name__)
PTM = PhysicalTopologyManager('../topologies/mmm_physical_test_tracing.yaml')
VTM = VirtualTopologyManager('../topologies/mmm_virtual_test_tracing.yaml')
BM = BindingManager(PTM, VTM)

cass_host = get_container_by_hostname('cassandra1').get_ip_address()

cassandra = Cluster([cass_host]).connect()
cassandra.default_timeout = 60.0

binding_multihost = {
    'description':
    'spanning across multiple MMs',
    'bindings': [
        {
            'binding': {
                'device_name': 'bridge-000-001',
                'port_id': 2,
                'host_id': 1,
                'interface_id': 1
            }
        },
        {
            'binding': {