Exemplo n.º 1
0
 def end_modify(self):
     try:
         mod_len = len(self.modify_items)
         if mod_len > 0:
             if ISDEBUG:
                 logger.log("end modify")
                 logger.log("modify concurrency level: {0}".format(self.concurency_level))
             self.cassandra_provider.execute_modify_items(self.modify_items, self.concurency_level)
     finally:
         self.modify_items = []
         pass
Exemplo n.º 2
0
 def prepare_delete_stmt(self):
     delete_stmt_str = u"DELETE FROM {0}.{1} WHERE {2};".format(
         self.keyspace, self.columnfamily,
         u" AND ".join(map(lambda str: str + u" = ?", self.rowIdColumns)))
     if ISDEBUG:
         logger.log("preparing delete statement")
         st = time.time()
     self.delete_stmt = self.session.prepare(delete_stmt_str)
     if ISDEBUG:
         logger.log("delete statement prepared in {0} ms".format(
             (time.time() - st) * 1000))
Exemplo n.º 3
0
 def insert(self, new_values):
     if self.insert_stmt is None:
         self.prepare_insert_stmt()
     args = self.get_insert_args(new_values)
     if ISDEBUG:
         logger.log("requested insert {0}".format(args))
         st = time.time()
     self.session.execute(self.insert_stmt, args)
     if ISDEBUG:
         et = time.time()
         logger.log("insert completed in {0} ms".format((et - st) * 1000))
     return new_values
Exemplo n.º 4
0
    def get_path_keys(self):
        output = []
        sorted_items = sorted(self.querableColumnsIdx.items(),
                              key=operator.itemgetter(1))
        clusteting_key_columns = []
        partition_key_columns = []
        idx_columns = []
        regular_columns = []
        ptt = []
        for tp in sorted_items:
            k = tp[0]
            v = tp[1]
            if v >= self.PARTITION_KEY_QUERY_COST and v < self.CLUSTERING_KEY_QUERY_COST:
                partition_key_columns.append(k)
                ptt.append(k)
            if v < self.IDX_QUERY_COST and v >= self.CLUSTERING_KEY_QUERY_COST:
                clusteting_key_columns.append(k)
            elif v >= self.IDX_QUERY_COST and v < self.REGULAR_QUERY_COST:
                idx_columns.append(k)
            else:
                regular_columns.append(k)

        ckc_len = len(clusteting_key_columns)
        if ckc_len == 0:
            output.append((tuple(partition_key_columns), 1))
        else:
            i = 1
            output.append(
                (tuple(partition_key_columns), self.CLUSTERING_KEY_QUERY_COST))
            for ckc in clusteting_key_columns:
                ptt.append(ckc)
                if i == ckc_len:
                    output.append((tuple(ptt), 1))
                else:
                    output.append(
                        (tuple(ptt), self.CLUSTERING_KEY_QUERY_COST - i))
                    if len(idx_columns) != 0:
                        output.append((tuple(ptt + idx_columns),
                                       self.IDX_QUERY_COST - i))
                i += 1

        for idx_col in idx_columns:
            output.append((tuple(idx_col), self.IDX_QUERY_COST))

        for t in sorted_items:
            output.append(((t[0]), self.REGULAR_QUERY_COST))

        output.append(((self.get_row_id_column()), 1))

        if ISDEBUG:
            logger.log('path keys: {0}'.format(output))
        return output
Exemplo n.º 5
0
 def delete(self, rowid):
     if self.delete_stmt is None:
         self.prepare_delete_stmt()
     if ISDEBUG:
         logger.log(u"requested delete for id: {0}".format(rowid))
     values = self.get_delete_args(rowid)
     if ISDEBUG:
         st = time.time()
     self.session.execute(self.delete_stmt, values)
     if ISDEBUG:
         et = time.time()
         logger.log("delete completed in {0} ms".format((et - st) * 1000))
     return {}
Exemplo n.º 6
0
 def execute_modify_items(self, modify_items, concurency):
     if len(modify_items) == 0:
         return {}
     statements_and_params = []
     if ISDEBUG:
         logger.log("prepare data for cassandra")
         st = time.time()
     for item in modify_items:
         if item[0] == 'insert':
             statements_and_params.append(
                 (self.get_insert_stmt(), self.get_insert_args(item[1])))
         elif item[0] == 'delete':
             statements_and_params.append(
                 (self.get_delete_stmt(), self.get_delete_args(item[1])))
         else:
             raise ValueError('unknown modify item type')
     if ISDEBUG:
         logger.log("prepare data finished in {0} ms".format(
             (time.time() - st) * 1000))
         logger.log("start modify operation. count: {0}".format(
             len(modify_items)))
         st = time.time()
     if len(statements_and_params) == 1:
         self.session.execute(statements_and_params[0][0],
                              statements_and_params[0][1])
     else:
         execute_concurrent(self.session,
                            statements_and_params,
                            raise_on_first_error=True,
                            concurrency=concurency)
     if ISDEBUG:
         logger.log("modify completed in {0} ms".format(
             (time.time() - st) * 1000))
Exemplo n.º 7
0
 def prepare_insert_stmt(self):
     insert_stmt_str = u"INSERT INTO {0}.{1} ({2}) VALUES ({3})".format(
         self.keyspace, self.columnfamily, u",".join(self.queryableColumns),
         u",".join([u"?"] * len(self.queryableColumns)))
     if self.ttl != 0:
         insert_stmt_str += " USING TTL {0}".format(self.ttl)
     if ISDEBUG:
         logger.log("insert statement: {0}".format(insert_stmt_str))
         logger.log("preparing insert statement")
         st = time.time()
     self.insert_stmt = self.session.prepare(insert_stmt_str)
     if ISDEBUG:
         logger.log("insert statement prepared in {0} ms".format(
             (time.time() - st) * 1000))
Exemplo n.º 8
0
    def init_connection(self, options, columns):
        start_time = time.time()
        if "hosts" not in options:
            logger.log("The hosts parameter is needed, setting to localhost.",
                       WARNING)
        hosts = options.get("hosts", "localhost").split(",")
        if "port" not in options:
            logger.log(
                "The port parameter is needed, setting to {0}.".format(
                    properties.DEFAULT_CASSANDRA_PORT), WARNING)
        self.port = options.get("port", properties.DEFAULT_CASSANDRA_PORT)
        self.limit = options.get("limit", None)
        self.allow_filtering = options.get(
            "allow_filtering", properties.ALLOW_FILTERING_DEFAULT) == 'True'
        self.enable_trace = options.get(
            "trace", properties.ENABLE_TRACE_STATEMENTS_DEFAULT) == 'True'
        self.ttl = int(options.get("ttl", properties.DEFAULT_TTL))
        timeout = options.get("timeout", None)
        username = options.get("username", None)
        password = options.get("password", None)

        self.cluster = Cluster(hosts)
        if (username is not None):
            self.cluster.auth_provider = PlainTextAuthProvider(
                username=username, password=password)
        # Cassandra connection init
        self.cluster.executor_threads = 4
        self.cluster.connect_timeout = int(
            options.get("connection_timeout",
                        properties.DEFAULT_CONNECTION_TIMEOUT))
        self.session = self.cluster.connect()
        end_time = time.time()
        if ISDEBUG:
            logger.log("connected in {0} ms".format(
                int((end_time - start_time) * 1000)))
        if timeout is not None:
            self.session.default_timeout = float(timeout)
Exemplo n.º 9
0
 def __init__(self, options, columns):
     start_time = time.time()
     if (("keyspace" not in options) or
         ("columnfamily" not in options)) and ("query" not in options):
         logger.log(
             "Either query or columnfamily and keyspace parameter is required.",
             ERROR)
     self.columnfamily = options.get("columnfamily", None)
     self.keyspace = options.get("keyspace", None)
     self.query = options.get("query", None)
     self.prepare_select_stmt = options.get(
         "prepare_selects", properties.PREPARE_SELECTS_DEFAULT) == 'True'
     self.init_connection(options, columns)
     start_time1 = time.time()
     self.describe_db()
     end_time = time.time()
     self.insert_stmt = None
     self.delete_stmt = None
     self.prepared_select_stmts = {}
     if ISDEBUG:
         logger.log("DB described in {0} ms".format(
             int((end_time - start_time1) * 1000)))
         logger.log("initialized in {0} ms".format(
             int((end_time - start_time) * 1000)))
Exemplo n.º 10
0
 def end_scan(self):
     if ISDEBUG:
         logger.log("end_scan. Total time: {0} ms".format((time.time() - self.scan_start_time) * 1000))
     pass
Exemplo n.º 11
0
 def commit(self):
     if ISDEBUG:
         logger.log("commit")
     if self.per_transaction_connection:
         self.close_cass_connection()
     pass
Exemplo n.º 12
0
 def begin(self, serializable):
     self.build_cassandra_provider()
     if ISDEBUG:
         logger.log("begin: {0}".format(serializable))
Exemplo n.º 13
0
 def update(self, rowid, new_values):
     if ISDEBUG:
         logger.log(u"requested update {0}".format(new_values))
     self.insert(new_values)
     return new_values
Exemplo n.º 14
0
 def sub_rollback(self, level):
     if ISDEBUG:
         logger.log("sub rollback {0}".format(level))
     pass
Exemplo n.º 15
0
 def sub_commit(self, level):
     if ISDEBUG:
         logger.log("sub commit {0}".format(level))
     pass
Exemplo n.º 16
0
 def sub_begin(self, level):
     if ISDEBUG:
         logger.log("sub begin {0}".format(level))
     pass
Exemplo n.º 17
0
    def build_select_stmt(self,
                          quals,
                          columns,
                          allow_filtering,
                          verbose=False):

        stmt_str = StringIO()
        usedQuals = {}
        filteredColumns = []
        rowid = None
        binding_values = []
        for col in columns:
            if col != self.ROWIDCOLUMN:
                filteredColumns.append(col)
        if (self.query):
            stmt_str.write(self.query)
        else:
            for col in self.rowIdColumns:
                if col not in filteredColumns:
                    filteredColumns.append(col)
            stmt_str.write(u"SELECT {0} FROM {1}.{2}".format(
                ",".join(map(lambda c: '"{0}"'.format(c), filteredColumns)),
                self.keyspace, self.columnfamily))
            isWhere = None
            eqRestricted = None
            rangeUsed = False

            if self.prepare_select_stmt:
                formatting_str = '?'
            else:
                formatting_str = '%s'

            for qual in quals:
                if qual.field_name == self.ROWIDCOLUMN:
                    rowid = qual.value
                if qual.field_name in self.queryableColumns:
                    qual.componentIdx = self.querableColumnsIdx[
                        qual.field_name]
                else:
                    qual.componentIdx = 10000

            if rowid is not None:
                ids = json.loads(rowid)
                for i in range(0, len(self.rowIdColumns)):
                    columnName = self.rowIdColumns[i]
                    binding_values.append(
                        types_mapper.map_object_to_type(
                            ids[i], self.columnsTypes[columnName]))
                stmt_str.write(u" WHERE {0}".format(u" AND ".join(
                    map(lambda str: str + u" = " + formatting_str,
                        self.rowIdColumns))))
            else:
                sortedQuals = sorted(quals, key=lambda qual: qual.componentIdx)
                last_clustering_key_idx = 0
                for qual in sortedQuals:
                    # Partition key and clustering column can't be null
                    if qual.componentIdx < self.IDX_QUERY_COST and qual.value is None:
                        return None
                    if ISDEBUG or verbose:
                        logger.log(
                            u"qual field {0}; qual index {1}; qual type {2}; qual operator: {4}; qual value {3}"
                            .format(qual.field_name, qual.componentIdx,
                                    type(qual.operator), qual.value,
                                    qual.operator))
                    if qual.operator == "=":
                        if (qual.field_name in self.queryableColumns
                                and self.queryableColumns[qual.field_name] !=
                                self.REGULAR_QUERY_COST):
                            if self.queryableColumns[
                                    qual.
                                    field_name] == self.CLUSTERING_KEY_QUERY_COST:
                                if last_clustering_key_idx == 0 and qual.componentIdx != self.CLUSTERING_KEY_QUERY_COST:
                                    eqRestricted = True
                                elif qual.componentIdx - 1 != last_clustering_key_idx and last_clustering_key_idx != 0:
                                    eqRestricted = True
                            if (qual.field_name not in usedQuals
                                    and not eqRestricted):
                                usedQuals[qual.field_name] = qual.value
                                if self.queryableColumns[
                                        qual.
                                        field_name] == self.CLUSTERING_KEY_QUERY_COST:
                                    last_clustering_key_idx = qual.componentIdx
                                formatted = u" {0} = {1} ".format(
                                    qual.field_name, formatting_str)
                                binding_values.append(
                                    types_mapper.map_object_to_type(
                                        qual.value,
                                        self.columnsTypes[qual.field_name]))
                                if isWhere:
                                    stmt_str.write(u" AND ")
                                    stmt_str.write(formatted)
                                else:
                                    stmt_str.write(u" WHERE ")
                                    stmt_str.write(formatted)
                                    isWhere = 1
                            elif allow_filtering:
                                formatted = u" {0} = {1} ".format(
                                    qual.field_name, formatting_str)
                                binding_values.append(
                                    types_mapper.map_object_to_type(
                                        qual.value,
                                        self.columnsTypes[qual.field_name]))
                                if isWhere:
                                    stmt_str.write(u" AND ")
                                    stmt_str.write(formatted)
                                else:
                                    stmt_str.write(u" WHERE ")
                                    stmt_str.write(formatted)
                                    isWhere = 1
                        elif allow_filtering:
                            formatted = u" {0} = {1} ".format(
                                qual.field_name, formatting_str)
                            binding_values.append(
                                types_mapper.map_object_to_type(
                                    qual.value,
                                    self.columnsTypes[qual.field_name]))
                            if isWhere:
                                stmt_str.write(u" AND ")
                                stmt_str.write(formatted)
                            else:
                                stmt_str.write(u" WHERE ")
                                stmt_str.write(formatted)
                                isWhere = 1
                    # IN operator
                    elif qual.operator == (u"=", True):
                        if (qual.field_name in self.queryableColumns):
                            if (self.queryableColumns[qual.field_name]
                                    == self.CLUSTERING_KEY_QUERY_COST
                                    or self.queryableColumns[qual.field_name]
                                    == self.PARTITION_KEY_QUERY_COST):
                                if (qual.field_name not in usedQuals
                                        and not eqRestricted
                                        and not rangeUsed):
                                    usedQuals[qual.field_name] = qual.value
                                    formatted = u"{0} IN {1}".format(
                                        qual.field_name, formatting_str)
                                    binding_value = []
                                    for el in qual.value:
                                        binding_value.append(
                                            types_mapper.map_object_to_type(
                                                el, self.columnsTypes[
                                                    qual.field_name]))
                                    if self.prepare_select_stmt:
                                        binding_values.append(binding_value)
                                    else:
                                        binding_values.append(
                                            ValueSequence(binding_value))

                                    if isWhere:
                                        stmt_str.write(u" AND ")
                                        stmt_str.write(formatted)
                                    else:
                                        stmt_str.write(u" WHERE ")
                                        stmt_str.write(formatted)
                                        isWhere = 1
                    elif (
                        qual.operator == "~" or qual.operator == "~~") and qual.field_name in self.indexes and \
                        self.indexes[qual.field_name] == "org.apache.cassandra.index.sasi.SASIIndex":
                        if qual.operator == "~":
                            val = "%{0}%".format(qual.value)
                        else:
                            val = qual.value
                        stmt_str.write(u" AND {0} LIKE {1}".format(
                            qual.field_name, formatting_str))
                        binding_values.append(
                            types_mapper.map_object_to_type(
                                val, self.columnsTypes[qual.field_name]))
                    else:
                        if (qual.operator == ">" or qual.operator == "<"
                                or qual.operator == ">="
                                or qual.operator == "<="):
                            if (qual.field_name in self.queryableColumns and
                                (self.queryableColumns[qual.field_name]
                                 == self.CLUSTERING_KEY_QUERY_COST
                                 # only SASI indexes support <,>,>=,<=
                                 or
                                 (qual.field_name in self.indexes
                                  and self.indexes[qual.field_name] ==
                                  "org.apache.cassandra.index.sasi.SASIIndex"))
                                    or
                                (allow_filtering
                                 and self.queryableColumns[qual.field_name] !=
                                 self.PARTITION_KEY_QUERY_COST)):
                                rangeUsed = True
                                if isWhere:
                                    stmt_str.write(u" AND {0} {1} {2}".format(
                                        qual.field_name, qual.operator,
                                        formatting_str))
                                    binding_values.append(
                                        types_mapper.map_object_to_type(
                                            qual.value, self.columnsTypes[
                                                qual.field_name]))
                                else:
                                    stmt_str.write(
                                        u" WHERE {0} {1} {2}".format(
                                            qual.field_name, qual.operator,
                                            formatting_str))
                                    isWhere = 1
                                    binding_values.append(
                                        types_mapper.map_object_to_type(
                                            qual.value, self.columnsTypes[
                                                qual.field_name]))

        if (self.limit):
            stmt_str.write(u" LIMIT {0}".format(self.limit))
        if allow_filtering:
            stmt_str.write(u" ALLOW FILTERING ")
        statement = stmt_str.getvalue()
        stmt_str.close()
        if ISDEBUG:
            logger.log(u"CQL query: {0}".format(statement), INFO)
        return (statement, binding_values, filteredColumns)
Exemplo n.º 18
0
 def pre_commit(self):
     if ISDEBUG:
         logger.log("pre commit")
     pass
Exemplo n.º 19
0
 def get_row_id_column(self):
     if ISDEBUG:
         logger.log(u"rowid requested")
     return self.ROWIDCOLUMN
Exemplo n.º 20
0
 def execute(self, quals, columns, sortkeys=None):
     if ISDEBUG:
         logger.log(
             "building select statement... Quals: {0}, columns: {1}, sortkeys: {2}, allow filtering: {3}"
             .format(quals, columns, sortkeys, self.allow_filtering))
     res = self.build_select_stmt(quals, columns, self.allow_filtering)
     if res is None:
         yield {}
         return
     stmt = res[0]
     binding_values = res[1]
     filtered_columns = res[2]
     if self.prepare_select_stmt:
         if stmt not in self.prepared_select_stmts:
             if ISDEBUG:
                 logger.log(u"preparing statement...")
             self.prepared_select_stmts[stmt] = self.session.prepare(stmt)
         elif ISDEBUG:
             logger.log(u"statement already prepared")
     if ISDEBUG:
         logger.log(u"executing statement...")
         st = time.time()
     elif self.enable_trace:
         logger.log(u"executing statement '{0}'".format(stmt))
     if self.prepare_select_stmt:
         result = self.session.execute(self.prepared_select_stmts[stmt],
                                       binding_values)
     else:
         result = self.session.execute(SimpleStatement(stmt),
                                       binding_values)
     if ISDEBUG:
         logger.log(u"cursor got in {0} ms".format(
             (time.time() - st) * 1000))
     for row in result:
         line = {}
         idx = 0
         for column_name in filtered_columns:
             value = row[idx]
             if self.columnsTypes[
                     column_name].main_type == cassandra_types.cql_timestamp and value is not None:
                 line[column_name] = u"{0}+00:00".format(value)
             elif self.columnsTypes[
                     column_name].main_type == cassandra_types.cql_time and value is not None:
                 line[column_name] = u"{0}+00:00".format(value)
             elif isinstance(value, tuple):
                 tuple_values = []
                 for t in value:
                     tuple_values.append(str(t))
                 line[column_name] = json.dumps(tuple_values)
             elif isinstance(value, OrderedMapSerializedKey):
                 dict_values = {}
                 for i in value:
                     dict_values[str(i)] = str(value[i])
                 line[column_name] = json.dumps(dict_values)
             else:
                 line[column_name] = value
             idx = idx + 1
         rowid_values = []
         for idcolumn in self.rowIdColumns:
             rowid_values.append(str(line[idcolumn]))
         line[self.ROWIDCOLUMN] = json.dumps(rowid_values)
         yield line
Exemplo n.º 21
0
def import_schema(schema, srv_options, options, restriction_type, restricts):
    if ISDEBUG:
        logger.log(
            u"import schema {0} requiested with options {1}; restriction type: {2}; restrictions: {3}".format(schema,
                                                                                                              options,
                                                                                                              restriction_type,
                                                                                                              restricts))
    if "hosts" not in srv_options:
        logger.log("The hosts parameter is needed, setting to localhost.", WARNING)
    hosts = srv_options.get("hosts", "localhost").split(",")
    if "port" not in srv_options:
        logger.log("The port parameter is needed, setting to 9042.", WARNING)
    port = srv_options.get("port", "9042")
    username = srv_options.get("username", None)
    password = srv_options.get("password", None)
    with_row_id = options.get('with_row_id', 'True') == 'True'
    names_mapping = options.get('mapping', '').split(';')
    mapping_dict = {}
    mapping_dict_backward = {}
    for s in names_mapping:
        kp = s.split('=')
        if len(kp) != 2:
            continue
        key = kp[0].strip()
        value = kp[1].strip()
        mapping_dict[key] = value
        mapping_dict_backward[value] = key

    cluster = Cluster(hosts)
    if (username is not None):
        cluster.auth_provider = PlainTextAuthProvider(username=username, password=password)
    # Cassandra connection init
    session = cluster.connect()
    keyspace = cluster.metadata.keyspaces[schema]
    cassandra_tables = []
    tables = keyspace.tables
    views = keyspace.views
    if restriction_type is None:
        for t in tables:
            if t in tables:
                cassandra_tables.append(tables[t])
            else:
                cassandra_tables.append(views[t])
    elif restriction_type == 'limit':
        for r in restricts:
            t_name = r
            if t_name in mapping_dict_backward:
                t_name = mapping_dict_backward[t_name]
            if t_name in tables:
                cassandra_tables.append(tables[t_name])
            else:
                cassandra_tables.append(views[t_name])
    elif restriction_type == 'except':
        for t in tables:
            if t not in restricts:
                if t in tables:
                    cassandra_tables.append(tables[t])
                else:
                    cassandra_tables.append(views[t])
    pg_tables = []
    for c_table in cassandra_tables:
        if ISDEBUG:
            logger.log("Importing table {0}...".format(c_table.name))
        pg_table_name = c_table.name
        if pg_table_name in mapping_dict:
            if ISDEBUG:
                logger.log("Cassandra table name '{0}' maps to PostgreSQL table name '{1}'".format(pg_table_name,
                                                                                                   mapping_dict[
                                                                                                       pg_table_name]))
            pg_table_name = mapping_dict[pg_table_name]
        pg_table = TableDefinition(pg_table_name)
        pg_table.options['keyspace'] = schema
        pg_table.options['columnfamily'] = c_table.name
        for c_column_name in c_table.columns:
            cql_type = c_table.columns[c_column_name].cql_type
            pg_type = types_mapper.get_pg_type(cql_type)
            if ISDEBUG:
                logger.log("Adding column {0} with PostgreSQL type {2} (CQL type {1})".format(c_column_name, cql_type,
                                                                                              pg_type))
            pg_table.columns.append(ColumnDefinition(c_column_name, type_name=pg_type))
        if with_row_id:
            pg_table.columns.append(ColumnDefinition('__rowid__', type_name='text'))
        pg_tables.append(pg_table)
        if ISDEBUG:
            logger.log("Table imported: {0}".format(c_table.name))
    session.shutdown()
    return pg_tables
Exemplo n.º 22
0
 def rollback(self):
     if ISDEBUG:
         logger.log("rollback")
     pass