def handler(start_response, request_data): datatable = request_data['datatable'] dataset = request_data['dataset'] two_d_properties = request_data['2D_properties'].split('~') col_properties = request_data['col_properties'].split('~') row_properties = request_data['row_properties'].split('~') col_qry = request_data['col_qry'] col_order = request_data['col_order'] row_qry = request_data['row_qry'] row_order = request_data['row_order'] first_dimension = request_data['first_dimension'] db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data), dataset) col_table, row_table = get_table_names(db, datatable) col_properties.append(datatable + '_column_index') row_properties.append(datatable + '_row_index') col_result = index_table_query(db, col_table, col_properties, col_qry, col_order) row_result = index_table_query(db, row_table, row_properties, row_qry, row_order) col_idx = col_result[datatable + '_column_index'] row_idx = row_result[datatable + '_row_index'] del col_result[datatable + '_column_index'] del row_result[datatable + '_row_index'] db.close() hdf5_file = h5py.File(os.path.join(config.BASEDIR, '2D_data', dataset+'_' + datatable + '.hdf5'), 'r') two_d_properties = dict((prop, None) for prop in two_d_properties) for prop in two_d_properties.keys(): two_d_properties[prop] = hdf5_file[prop] if len(col_idx) == 0 or len(row_idx) == 0: two_d_result = {} for prop in two_d_properties.keys(): two_d_result[prop] = np.array([], dtype=two_d_properties[prop].id.dtype) else: two_d_result = select_by_list(two_d_properties, row_idx, col_idx, first_dimension) result_set = [] for name, array in col_result.items(): result_set.append((('col_'+name), array)) for name, array in row_result.items(): result_set.append((('row_'+name), array)) for name, array in two_d_result.items(): result_set.append((('2D_'+name), array)) data = gzip(''.join(arraybuffer.encode_array_set(result_set))) status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip')] start_response(status, response_headers) yield data
def handler(start_response, request_data): datatable = request_data['datatable'] dataset = request_data['dataset'] two_d_properties = request_data['2D_properties'].split('~') col_properties = request_data['col_properties'].split('~') row_properties = request_data['row_properties'].split('~') col_qry = request_data['col_qry'] col_order = request_data['col_order'] row_qry = request_data['row_qry'] row_order = request_data['row_order'] first_dimension = request_data['first_dimension'] db = DQXDbTools.OpenDatabase(DQXDbTools.ParseCredentialInfo(request_data), dataset) col_table, row_table = get_table_names(db, datatable) col_properties.append(datatable + '_column_index') row_properties.append(datatable + '_row_index') col_result = index_table_query(db, col_table, col_properties, col_qry, col_order) row_result = index_table_query(db, row_table, row_properties, row_qry, row_order) col_idx = col_result[datatable + '_column_index'] row_idx = row_result[datatable + '_row_index'] del col_result[datatable + '_column_index'] del row_result[datatable + '_row_index'] db.close() hdf5_file = h5py.File( os.path.join(config.BASEDIR, '2D_data', dataset + '_' + datatable + '.hdf5'), 'r') two_d_properties = dict((prop, None) for prop in two_d_properties) for prop in two_d_properties.keys(): two_d_properties[prop] = hdf5_file[prop] if len(col_idx) == 0 or len(row_idx) == 0: two_d_result = {} for prop in two_d_properties.keys(): two_d_result[prop] = np.array( [], dtype=two_d_properties[prop].id.dtype) else: two_d_result = select_by_list(two_d_properties, row_idx, col_idx, first_dimension) result_set = [] for name, array in col_result.items(): result_set.append((('col_' + name), array)) for name, array in row_result.items(): result_set.append((('row_' + name), array)) for name, array in two_d_result.items(): result_set.append((('2D_' + name), array)) data = gzip(''.join(arraybuffer.encode_array_set(result_set))) status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip')] start_response(status, response_headers) yield data
def handler(start_response, requestData): try: length = int(requestData['environ'].get('CONTENT_LENGTH', '0')) except ValueError: length = 0 content = requestData['environ']['wsgi.input'].read(length).decode("utf-8") content = json.loads(content) if len(content) > 0 else None if not content: raise SyntaxError('No query parameters supplied') database = content['database'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(requestData) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database)) tableId = content['table'] query = content['query'] orderBy = json.loads(content.get('orderBy', '[]')) distinct = content.get('distinct', 'false') == 'true' rawColumns = json.loads(content['columns']) columns = list(map(decode, rawColumns)) groupBy = content.get('groupBy', None) startRow, endRow = None, None if content.get('limit', False): startRow, endRow = content['limit'].split('~') startRow = int(startRow) endRow = int(endRow) if startRow < 0: startRow = 0 if endRow <= startRow: endRow = startRow + 1 randomSample = None if content.get('randomSample', False): randomSample = int(content['randomSample']) cacheData = content.get('cache', True) joins = json.loads(content.get('joins', '[]')) auth_query = credentials.get_auth_query( database, [join['foreignTable'] for join in joins] + [tableId]) cache = getCache() cacheKey = json.dumps([ tableId, query, orderBy, distinct, columns, groupBy, database, startRow, endRow, joins, auth_query ]) data = None if cacheData and randomSample is None: # Don't serve cache on random sample!! try: data = cache[cacheKey] except KeyError: pass if data is None: with DQXDbTools.DBCursor(requestData, database, read_timeout=config.TIMEOUT) as cur: whereClause = DQXDbTools.WhereClause() whereClause.ParameterPlaceHolder = '%s' whereClause.Decode(query, True) if auth_query: whereClause.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [whereClause.query, auth_query], "Tpe": "AND" } whereClause.CreateSelectStatement() sqlQuery = "SELECT " if distinct: sqlQuery += " DISTINCT " sqlQuery += "{0} FROM {1}".format(','.join(columns), DBTBESC(tableId)) for join in joins: if 'type' in join and join['type'] in [ '', 'INNER', 'LEFT', 'RIGHT', 'FULL' ]: sqlQuery += " {0} JOIN {1} ON {2} = {3}".format( join['type'].upper(), DBTBESC(join['foreignTable']), DBCOLESC(join['foreignColumn']), DBCOLESC(join['column'])) else: raise SyntaxError('Join type not valid') if len(whereClause.querystring_params) > 0: sqlQuery += " WHERE {0}".format(whereClause.querystring_params) if groupBy and len(groupBy) > 0: sqlQuery += " GROUP BY " + ','.join( map(DBCOLESC, groupBy.split('~'))) if len(orderBy) > 0: sqlQuery += " ORDER BY {0}".format(','.join([ DBCOLESC(col) + ' ' + direction for direction, col in orderBy ])) if startRow is not None and endRow is not None: sqlQuery += " LIMIT {0} OFFSET {1}".format( endRow - startRow + 1, startRow) if randomSample is not None: sqlQuery += " SAMPLE {0}".format(randomSample) if DQXDbTools.LogRequests: DQXUtils.LogServer('###QRY:' + sqlQuery) DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams)) cur.execute(sqlQuery, whereClause.queryparams) rows = cur.fetchall() result = {} for rawCol, (i, desc) in zip(rawColumns, enumerate(cur.description)): # Figure out the name we should return for the column - by deafult monet doesn't qualify names col_name = name(rawCol, desc[0]) dtype = desciptionToDType(desc[1]) if dtype in ['i1', 'i2', 'i4', 'S']: null_value = NULL_VALUES[dtype] result[col_name] = np.array( [(str(row[i]).encode('utf-8') if dtype == 'S' else row[i]) if row[i] is not None else null_value for row in rows], dtype=dtype) else: result[col_name] = np.array([row[i] for row in rows], dtype=dtype) data = gzip(data=b''.join( arraybuffer.encode_array_set(list(result.items())))) if cacheData: cache[cacheKey] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*')] start_response(status, response_headers) yield data
def handler(start_response, request_data): datatable = request_data['table'] dataset = request_data['dataset'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(request_data) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset)) two_d_properties = request_data['2DProperties'].split('~') col_properties = request_data['colProperties'].split('~') row_properties = request_data['rowProperties'].split('~') col_qry = request_data['colQry'] col_order = request_data['colOrder'] row_qry = request_data['rowQry'] row_order = request_data.get('rowOrder', None) row_order_columns = [] if row_order == 'columns': try: row_order_columns = request_data['rowSortCols'].split('~') except KeyError: pass row_order = None try: col_limit = int(request_data['colLimit']) except KeyError: col_limit = None try: row_limit = int(request_data['rowLimit']) except KeyError: row_limit = None try: col_offset = int(request_data['colOffset']) except KeyError: col_offset = None try: row_offset = int(request_data['rowOffset']) except KeyError: row_offset = None #Set fail limit to one past so we know if we hit it try: col_fail_limit = int(request_data['colFailLimit']) + 1 except KeyError: col_fail_limit = None try: row_sort_property = request_data['rowSortProperty'] except KeyError: row_sort_property = None try: col_key = request_data['colKey'] except KeyError: col_key = None try: sort_mode = request_data['sortMode'] except KeyError: sort_mode = None try: row_random_sample = int(request_data['rowRandomSample']) except KeyError: row_random_sample = None col_index_field = datatable + '_column_index' row_index_field = datatable + '_row_index' col_properties.append(col_index_field) row_properties.append(row_index_field) with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_tablename, row_tablename = get_table_ids(cur, dataset, datatable) col_auth_query = credentials.get_auth_query(dataset, [col_tablename]) row_auth_query = credentials.get_auth_query(dataset, [row_tablename]) cache = getCache() cache_key = json.dumps([ datatable, dataset, two_d_properties, col_properties, row_properties, col_qry, col_order, row_qry, row_order, row_order_columns, row_random_sample, col_limit, row_limit, col_offset, row_offset, col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query, row_auth_query ]) data = None try: data = cache[cache_key] except KeyError: print('2D Cache miss') pass if data is None: with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_result = index_table_query(dataset, cur, col_tablename, col_properties, col_qry, col_auth_query, col_order, col_limit, col_offset, col_fail_limit, col_index_field, None) if len(row_order_columns) > 0: #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort. row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, None, None, None, row_index_field, row_random_sample) else: row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, row_limit, row_offset, None, row_index_field, row_random_sample) col_idx = col_result[col_index_field] row_idx = row_result[row_index_field] del col_result[col_index_field] del row_result[row_index_field] if len(col_idx) == col_fail_limit: result_set = [('_over_col_limit', np.array([0], dtype='i1'))] for name, array in list(row_result.items()): result_set.append((('row_' + name), array)) else: if len(row_order_columns) > 0 and len(row_idx) > 0: #Translate primkeys to idx sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format( idx_field=DQXDbTools.ToSafeIdentifier(col_index_field), table=DQXDbTools.ToSafeIdentifier(col_tablename), params="'" + "','".join( map(DQXDbTools.ToSafeIdentifier, row_order_columns)) + "'", col_field=DQXDbTools.ToSafeIdentifier(col_key)) idx_for_col = dict((k, v) for k, v in cur.fetchall()) #Sort by the order specified - reverse so last clicked is major sort sort_col_idx = list( reversed( [idx_for_col[key] for key in row_order_columns])) #grab the data needed to sort sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, [row_sort_property]) rows = list(zip(row_idx, sort_data[row_sort_property])) if sort_mode == 'call': polyploid_key_func = lambda row: ''.join( summarise_call(calls) for calls in row[1]) haploid_key_func = lambda row: ''.join( [str(c).zfill(2) for c in row[1]]) if len(rows[0][1].shape) == 1: rows.sort(key=haploid_key_func, reverse=True) else: rows.sort(key=polyploid_key_func, reverse=True) elif sort_mode == 'fraction': for i in range(len(sort_col_idx)): #TODO Shuld be some fancy bayesian shizzle def key_func(row): if sum(row[1][i]) == 0: return '-1' return str(1 - float(row[1][i][0]) / sum(row[1][i])) + str(sum( row[1][i])).zfill(4) rows.sort(key=key_func, reverse=True) else: print("Unimplemented sort_mode") row_pos_for_idx = dict( list(zip(row_idx, list(range(len(row_idx)))))) #Now just get the row_idx to pass to 2d extract for the slice we need row_idx = np.array( map(itemgetter(0), rows)[row_offset:row_offset + row_limit]) #Use this row idx to retieve the row data from the initial query for name, array in list(row_result.items()): row_result[name] = array[[ row_pos_for_idx[idx] for idx in row_idx ]] two_d_result = extract2D(dataset, datatable, row_idx, col_idx, two_d_properties) result_set = [] for name, array in list(col_result.items()): result_set.append((('col_' + name), array)) for name, array in list(row_result.items()): result_set.append((('row_' + name), array)) for name, array in list(two_d_result.items()): result_set.append((('2D_' + name), array)) data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set))) cache[cache_key] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*')] start_response(status, response_headers) yield data
def handler(start_response, requestData): try: length = int(requestData['environ'].get('CONTENT_LENGTH', '0')) except ValueError: length = 0 content = requestData['environ']['wsgi.input'].read(length).decode("utf-8") content = json.loads(content) if len(content) > 0 else None if not content: raise SyntaxError('No query parameters supplied') database = content['database'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(requestData) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(database)) tableId = content['table'] query = content['query'] orderBy = json.loads(content.get('orderBy', '[]')) distinct = content.get('distinct', 'false') == 'true' rawColumns = json.loads(content['columns']) columns = list(map(decode, rawColumns)) groupBy = content.get('groupBy', None) startRow, endRow = None, None if content.get('limit', False): startRow, endRow = content['limit'].split('~') startRow = int(startRow) endRow = int(endRow) if startRow < 0: startRow = 0 if endRow <= startRow: endRow = startRow + 1 randomSample = None if content.get('randomSample', False): randomSample = int(content['randomSample']) cacheData = content.get('cache', True) joins = json.loads(content.get('joins', '[]')) auth_query = credentials.get_auth_query(database, [join['foreignTable'] for join in joins] + [tableId]) cache = getCache() cacheKey = json.dumps([tableId, query, orderBy, distinct, columns, groupBy, database, startRow, endRow, joins, auth_query]) data = None if cacheData and randomSample is None: # Don't serve cache on random sample!! try: data = cache[cacheKey] except KeyError: pass if data is None: with DQXDbTools.DBCursor(requestData, database, read_timeout=config.TIMEOUT) as cur: whereClause = DQXDbTools.WhereClause() whereClause.ParameterPlaceHolder = '%s' whereClause.Decode(query, True) if auth_query: whereClause.query = { "whcClass": "compound", "isCompound": True, "isRoot": True, "Components": [ whereClause.query, auth_query ], "Tpe": "AND" } whereClause.CreateSelectStatement() sqlQuery = "SELECT " if distinct: sqlQuery += " DISTINCT " sqlQuery += "{0} FROM {1}".format(','.join(columns), DBTBESC(tableId)) for join in joins: if 'type' in join and join['type'] in ['', 'INNER', 'LEFT', 'RIGHT', 'FULL']: sqlQuery += " {0} JOIN {1} ON {2} = {3}".format(join['type'].upper(), DBTBESC(join['foreignTable']), DBCOLESC(join['foreignColumn']), DBCOLESC(join['column'])) else: raise SyntaxError('Join type not valid') if len(whereClause.querystring_params) > 0: sqlQuery += " WHERE {0}".format(whereClause.querystring_params) if groupBy and len(groupBy) > 0: sqlQuery += " GROUP BY " + ','.join(map(DBCOLESC, groupBy.split('~'))) if len(orderBy) > 0: sqlQuery += " ORDER BY {0}".format( ','.join([DBCOLESC(col) + ' ' + direction for direction, col in orderBy])) if startRow is not None and endRow is not None: sqlQuery += " LIMIT {0} OFFSET {1}".format(endRow - startRow + 1, startRow) if randomSample is not None: sqlQuery += " SAMPLE {0}".format(randomSample) if DQXDbTools.LogRequests: DQXUtils.LogServer('###QRY:' + sqlQuery) DQXUtils.LogServer('###PARAMS:' + str(whereClause.queryparams)) cur.execute(sqlQuery, whereClause.queryparams) rows = cur.fetchall() result = {} for rawCol, (i, desc) in zip(rawColumns, enumerate(cur.description)): # Figure out the name we should return for the column - by deafult monet doesn't qualify names col_name = name(rawCol, desc[0]) dtype = desciptionToDType(desc[1]) if dtype in ['i1', 'i2', 'i4', 'S']: null_value = NULL_VALUES[dtype] result[col_name] = np.array([(row[i].encode('ascii', 'replace') if dtype == 'S' else row[i]) if row[ i] is not None else null_value for row in rows], dtype=dtype) elif desc[1] == 'timestamp': result[col_name] = np.array( [datetimeToJulianDay(row[i]) if row[i] is not None else None for row in rows], dtype=dtype) else: result[col_name] = np.array([row[i] for row in rows], dtype=dtype) data = gzip(data=b''.join(arraybuffer.encode_array_set(list(result.items())))) if cacheData: cache[cacheKey] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*') ] start_response(status, response_headers) yield data
def handler(start_response, request_data): datatable = request_data['datatable'] dataset = request_data['dataset'] workspace = request_data['workspace'] two_d_properties = request_data['2D_properties'].split('~') col_properties = request_data['col_properties'].split('~') row_properties = request_data['row_properties'].split('~') col_qry = request_data['col_qry'] col_order = request_data['col_order'] row_qry = request_data['row_qry'] row_order = request_data['row_order'] row_order_columns = [] if row_order == 'columns': try: row_order_columns = request_data['row_sort_cols'].split('~') except KeyError: pass row_order = 'NULL' first_dimension = request_data['first_dimension'] try: col_limit = int(request_data['col_limit']) except KeyError: col_limit = None try: row_limit = int(request_data['row_limit']) except KeyError: row_limit = None try: col_offset = int(request_data['col_offset']) except KeyError: col_offset = None try: row_offset = int(request_data['row_offset']) except KeyError: row_offset = None #Set fail limit to one past so we know if we hit it try: col_fail_limit = int(request_data['col_fail_limit'])+1 except KeyError: col_fail_limit = None try: row_sort_property = request_data['row_sort_property'] except KeyError: row_sort_property = None try: col_key = request_data['col_key'] except KeyError: col_key = None try: sort_mode = request_data['sort_mode'] except KeyError: sort_mode = None col_index_field = datatable + '_column_index' row_index_field = datatable + '_row_index' col_properties.append(col_index_field) row_properties.append(row_index_field) with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_tableid, row_tableid = get_table_ids(cur, datatable) col_tablename = get_workspace_table_name(col_tableid, workspace) row_tablename = get_workspace_table_name(row_tableid, workspace) col_result = index_table_query(cur, col_tablename, col_properties, col_qry, col_order, col_limit, col_offset, col_fail_limit, col_index_field) if len(row_order_columns) > 0: #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort. row_result = index_table_query(cur, row_tablename, row_properties, row_qry, row_order, None, None, None, row_index_field) else: row_result = index_table_query(cur, row_tablename, row_properties, row_qry, row_order, row_limit, row_offset, None, row_index_field) col_idx = col_result[col_index_field] row_idx = row_result[row_index_field] if len(col_idx) == col_fail_limit: result_set = [('_over_col_limit', np.array([0], dtype='i1'))] else: del col_result[col_index_field] del row_result[row_index_field] if len(row_order_columns) > 0 and len(row_idx) > 0: #Translate primkeys to idx sqlquery = "SELECT {col_field}, {idx_field} FROM {table} WHERE {col_field} IN ({params})".format( idx_field=DQXDbTools.ToSafeIdentifier(col_index_field), table=DQXDbTools.ToSafeIdentifier(col_tablename), params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'", col_field=DQXDbTools.ToSafeIdentifier(col_key)) print sqlquery cur.execute(sqlquery) idx_for_col = dict((k, v) for k,v in cur.fetchall()) #Sort by the order specified - reverse so last clicked is major sort sort_col_idx = list(reversed(map(lambda key: idx_for_col[key], row_order_columns))) #grab the data needed to sort sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, first_dimension, [row_sort_property]) rows = zip(row_idx, sort_data[row_sort_property]) if sort_mode == 'call': polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1]) haploid_key_func = lambda row: ''.join(map(lambda c: str(c).zfill(2), row[1])) if len(rows[0][1].shape) == 1: rows.sort(key=haploid_key_func, reverse=True) else: rows.sort(key=polyploid_key_func, reverse=True) elif sort_mode == 'fraction': for i in range(len(sort_col_idx)): #TODO Shuld be some fancy bayesian shizzle def key_func(row): if sum(row[1][i]) == 0: return '-1' return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4) rows.sort(key=key_func, reverse=True) else: print "Unimplemented sort_mode" row_pos_for_idx = dict(zip(row_idx, range(len(row_idx)))) #Now just get the row_idx to pass to 2d extract for the slice we need row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit]) #Use this row idx to retieve the row data from the initial query for name, array in row_result.items(): row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]] two_d_result = extract2D(dataset, datatable, row_idx, col_idx, first_dimension, two_d_properties) result_set = [] for name, array in col_result.items(): result_set.append((('col_'+name), array)) for name, array in row_result.items(): result_set.append((('row_'+name), array)) for name, array in two_d_result.items(): result_set.append((('2D_'+name), array)) data = gzip(data=''.join(arraybuffer.encode_array_set(result_set))) status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip')] start_response(status, response_headers) yield data
def handler(start_response, request_data): datatable = request_data['table'] dataset = request_data['dataset'] # Due to caching we check for auth here, as otherwise auth is only checked on DB read. credentials = DQXDbTools.CredentialInformation(request_data) credentials.VerifyCanDo(DQXDbTools.DbOperationRead(dataset)) two_d_properties = request_data['2DProperties'].split('~') col_properties = request_data['colProperties'].split('~') row_properties = request_data['rowProperties'].split('~') col_qry = request_data['colQry'] col_order = request_data['colOrder'] row_qry = request_data['rowQry'] row_order = request_data.get('rowOrder', None) row_order_columns = [] if row_order == 'columns': try: row_order_columns = request_data['rowSortCols'].split('~') except KeyError: pass row_order = None try: col_limit = int(request_data['colLimit']) except KeyError: col_limit = None try: row_limit = int(request_data['rowLimit']) except KeyError: row_limit = None try: col_offset = int(request_data['colOffset']) except KeyError: col_offset = None try: row_offset = int(request_data['rowOffset']) except KeyError: row_offset = None #Set fail limit to one past so we know if we hit it try: col_fail_limit = int(request_data['colFailLimit'])+1 except KeyError: col_fail_limit = None try: row_sort_property = request_data['rowSortProperty'] except KeyError: row_sort_property = None try: col_key = request_data['colKey'] except KeyError: col_key = None try: sort_mode = request_data['sortMode'] except KeyError: sort_mode = None try: row_random_sample = int(request_data['rowRandomSample']) except KeyError: row_random_sample = None col_index_field = datatable + '_column_index' row_index_field = datatable + '_row_index' col_properties.append(col_index_field) row_properties.append(row_index_field) with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_tablename, row_tablename = get_table_ids(cur, dataset, datatable) col_auth_query = credentials.get_auth_query(dataset, [col_tablename]) row_auth_query = credentials.get_auth_query(dataset, [row_tablename]) cache = getCache() cache_key = json.dumps([datatable, dataset, two_d_properties, col_properties, row_properties, col_qry, col_order, row_qry, row_order, row_order_columns, row_random_sample, col_limit, row_limit, col_offset, row_offset, col_fail_limit, row_sort_property, col_key, sort_mode, col_auth_query, row_auth_query]) data = None try: data = cache[cache_key] except KeyError: print('2D Cache miss') pass if data is None: with DQXDbTools.DBCursor(request_data, dataset, read_timeout=config.TIMEOUT) as cur: col_result = index_table_query(dataset, cur, col_tablename, col_properties, col_qry, col_auth_query, col_order, col_limit, col_offset, col_fail_limit, col_index_field, None) if len(row_order_columns) > 0: #If we are sorting by 2d data then we need to grab all the rows as the limit applies post sort. row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, None, None, None, row_index_field, row_random_sample) else: row_result = index_table_query(dataset, cur, row_tablename, row_properties, row_qry, row_auth_query, row_order, row_limit, row_offset, None, row_index_field, row_random_sample) col_idx = col_result[col_index_field] row_idx = row_result[row_index_field] del col_result[col_index_field] del row_result[row_index_field] if len(col_idx) == col_fail_limit: result_set = [('_over_col_limit', np.array([0], dtype='i1'))] for name, array in list(row_result.items()): result_set.append((('row_'+name), array)) else: if len(row_order_columns) > 0 and len(row_idx) > 0: #Translate primkeys to idx sqlquery = 'SELECT "{col_field}", "{idx_field}" FROM "{table}" WHERE "{col_field}" IN ({params})'.format( idx_field=DQXDbTools.ToSafeIdentifier(col_index_field), table=DQXDbTools.ToSafeIdentifier(col_tablename), params="'"+"','".join(map(DQXDbTools.ToSafeIdentifier, row_order_columns))+"'", col_field=DQXDbTools.ToSafeIdentifier(col_key)) idx_for_col = dict((k, v) for k,v in cur.fetchall()) #Sort by the order specified - reverse so last clicked is major sort sort_col_idx = list(reversed([idx_for_col[key] for key in row_order_columns])) #grab the data needed to sort sort_data = extract2D(dataset, datatable, row_idx, sort_col_idx, [row_sort_property]) rows = list(zip(row_idx, sort_data[row_sort_property])) if sort_mode == 'call': polyploid_key_func = lambda row: ''.join(summarise_call(calls) for calls in row[1]) haploid_key_func = lambda row: ''.join([str(c).zfill(2) for c in row[1]]) if len(rows[0][1].shape) == 1: rows.sort(key=haploid_key_func, reverse=True) else: rows.sort(key=polyploid_key_func, reverse=True) elif sort_mode == 'fraction': for i in range(len(sort_col_idx)): #TODO Shuld be some fancy bayesian shizzle def key_func(row): if sum(row[1][i]) == 0: return '-1' return str(1-float(row[1][i][0])/sum(row[1][i]))+str(sum(row[1][i])).zfill(4) rows.sort(key=key_func, reverse=True) else: print("Unimplemented sort_mode") row_pos_for_idx = dict(list(zip(row_idx, list(range(len(row_idx)))))) #Now just get the row_idx to pass to 2d extract for the slice we need row_idx = np.array(map(itemgetter(0), rows)[row_offset: row_offset+row_limit]) #Use this row idx to retieve the row data from the initial query for name, array in list(row_result.items()): row_result[name] = array[[row_pos_for_idx[idx] for idx in row_idx]] two_d_result = extract2D(dataset, datatable, row_idx, col_idx, two_d_properties) result_set = [] for name, array in list(col_result.items()): result_set.append((('col_'+name), array)) for name, array in list(row_result.items()): result_set.append((('row_'+name), array)) for name, array in list(two_d_result.items()): result_set.append((('2D_'+name), array)) data = gzip(data=b''.join(arraybuffer.encode_array_set(result_set))) cache[cache_key] = data status = '200 OK' response_headers = [('Content-type', 'text/plain'), ('Content-Length', str(len(data))), ('Content-Encoding', 'gzip'), ('Access-Control-Allow-Origin', '*') ] start_response(status, response_headers) yield data