def parse_clause(self, clause, base_clause_parser, table): clause = clause.strip() depth = 0 min_depth = 100000 #Arbitrary bound on nr of nested clauses. in_wildcard_clause = False for i in range(0, len(clause)): if clause[i] == '[': in_wildcard_clause = True elif clause[i] == ']': in_wildcard_clause = False elif in_wildcard_clause: #currently in wildcard thingy, so doesn't mean anything. Move on. continue elif clause[i] == '(': depth += 1 elif clause[i] == ')': depth -= 1 elif i < len(clause) - 2: if clause[i:i + 2] == "||": if depth == 0: left = self.parse_clause(clause[:i].strip(), base_clause_parser, table) right = self.parse_clause(clause[i + 2:].strip(), base_clause_parser, table) return OR_expression(left, right) else: min_depth = min(min_depth, depth) elif clause[i:i + 2] == "&&": if depth == 0: left = self.parse_clause(clause[:i].strip(), base_clause_parser, table) right = self.parse_clause(clause[i + 2:].strip(), base_clause_parser, table) return AND_expression(left, right) else: min_depth = min(min_depth, depth) elif i < len(clause) - 3: if clause[i:i + 3] == "NOT": if depth == 0: body = self.parse_clause(clause[i + 3:].strip(), base_clause_parser, table) return NOT_expression( body, table, self.get_partition_key(table), self.n_variants) else: min_depth = min(min_depth, depth) if depth == 0: if min_depth < 100000: #Strip away all brackets to expose uppermost boolean operator return self.parse_clause( clause[min_depth:len(clause) - min_depth], base_clause_parser, table) else: #No more boolean operators, strip all remaining brackets token = clause.strip('(').strip(')') return base_clause_parser(token) else: sys.exit("ERROR in %s. Brackets don't match" % clause)
def run(self, query, gt_filter=None, show_variant_samples=False, variant_samples_delim=',', predicates=None, needs_genotypes=False, needs_genes=False, show_families=False, test_mode=False, needs_sample_names=False, nr_cores=1, start_time=-42, use_header=False, exp_id="Oink", timeout=10.0, batch_size=100): """ Execute a query against a Gemini database. The user may specify: 1. (reqd.) an SQL `query`. 2. (opt.) a genotype filter. """ self.query = self.formatter.format_query(query).replace('==', '=') self.gt_filter = gt_filter #print self.query + '; gt-filter = %s \n' % gt_filter self.nr_cores = nr_cores self.start_time = start_time self.use_header = use_header self.exp_id = exp_id self.timeout = timeout self.batch_size = batch_size if self._is_gt_filter_safe() is False: sys.exit("ERROR: unsafe --gt-filter command.") self.show_variant_samples = show_variant_samples self.variant_samples_delim = variant_samples_delim self.test_mode = test_mode self.needs_genotypes = needs_genotypes self.needs_vcf_columns = False if self.formatter.name == 'vcf': self.needs_vcf_columns = True self.needs_sample_names = needs_sample_names self.needs_genes = needs_genes self.show_families = show_families if predicates: self.predicates += predicates # make sure the SELECT columns are separated by a # comma and a space. then tokenize by spaces. self.query = self.query.replace(',', ', ') self.query_pieces = self.query.split() if not any(s.startswith("gt") for s in self.query_pieces) and \ not any(s.startswith("(gt") for s in self.query_pieces) and \ not any(".gt" in s for s in self.query_pieces): if self.gt_filter is None: self.query_type = "no-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" else: if self.gt_filter is None: self.query_type = "select-genotypes" else: self.gt_filter_exp = self._correct_genotype_filter() self.query_type = "filter-genotypes" (self.requested_columns, self.from_table, where_clause, self.rest_of_query) = get_query_parts(self.query) self.extra_columns = [] if where_clause != '': self.where_exp = self.parse_where_clause(where_clause, self.from_table) if not self.gt_filter is None: self.where_exp = AND_expression(self.where_exp, self.gt_filter_exp) else: if not self.gt_filter is None: self.where_exp = self.gt_filter_exp else: self.where_exp = None self._apply_query() self.query_executed = True