def test_selection_from_bedfile_and_subselection(conn): """Test the creation of a selection based on BED data .. note:: Please note that the bedreader **is not** tested here! """ larger_string = """ chr1 1 10 feature1 0 + chr1 50 60 feature2 0 - chr1 51 59 another_feature 0 + """ # According to VARIANTS global variable with 3 variants (pos 10, 50 and 45) # 1: chr1, pos 1 to 10 => 1 variant concerned (pos 10) # 2: chr1, pos 50 to 60 => 1 variant concerned (pos 50) # 3: chr1, pos 51 to 59 => 0 variants bedtool = BedReader(larger_string) # Create now a sub selection => 2 variants (pos 10, 45) query = "SELECT variants.id,chr,pos,ref,alt FROM variants WHERE ref='G'" set_A_id = sql.create_selection_from_sql(conn, query, "setA", count=None) # 2nd selection (1st is the default "variants") assert set_A_id == 2 assert "setA" in list(s["name"] for s in sql.get_selections(conn)) # 1: chr1, pos 1 to 10 => 1 remaining variant # 2: chr1, pos 50 to 60 => 0 variant # 3: chr1, pos 51 to 59 => 0 variant ret = sql.create_selection_from_bed(conn, "setA", "sub_bedname", bedtool) # id of selection assert ret == 3 data = conn.execute( "SELECT * FROM selection_has_variant WHERE selection_id = ?", (ret, )) expected = ((1, ret), ) record = tuple([tuple(i) for i in data]) assert record == expected
def _fill_completer(self): """Create Completer with his model Fill the model with the SQL keywords and database fields """ # preload samples , selection and wordset samples = [i["name"] for i in sql.get_samples(self.conn)] selections = [i["name"] for i in sql.get_selections(self.conn)] wordsets = [i["name"] for i in sql.get_wordsets(self.conn)] # keywords = [] self.text_edit.completer.model.clear() self.text_edit.completer.model.beginResetModel() # register keywords for keyword in self.text_edit.syntax.sql_keywords: self.text_edit.completer.model.add_item(keyword, "VQL keywords", FIcon(0xF0169), "#f6ecf0") for selection in selections: self.text_edit.completer.model.add_item(selection, "Source table", FIcon(0xF04EB), "#f6ecf0") for wordset in wordsets: self.text_edit.completer.model.add_item(f"WORDSET['{wordset}']", "WORDSET", FIcon(0xF04EB), "#f6ecf0") for field in sql.get_fields(self.conn): name = field["name"] description = "<b>{}</b> ({}) from {} <br/><br/> {}".format( field["name"], field["type"], field["category"], field["description"]) color = style.FIELD_TYPE.get(field["type"], "str")["color"] icon = FIcon( style.FIELD_TYPE.get(field["type"], "str")["icon"], "white") if field["category"] == "variants" or field[ "category"] == "annotations": self.text_edit.completer.model.add_item( name, description, icon, color) if field["category"] == "samples": # Overwrite name for sample in samples: name = "sample['{}'].{}".format(sample, field["name"]) description = "<b>{}</b> ({}) from {} {} <br/><br/> {}".format( field["name"], field["type"], field["category"], sample, field["description"], ) self.text_edit.completer.model.add_item( name, description, icon, color) self.text_edit.completer.model.endResetModel()
def load(self): """Load all selections into the model""" if self.conn is None: return self.beginResetModel() # Add all selections from the database # Dictionnary of all attributes of the table. # :Example: {"name": ..., "count": ..., "query": ...} self.records = list(sql.get_selections(self.conn)) self.endResetModel()
def test_selection_from_bedfile(conn): """Test the creation of a selection based on BED data .. note:: Please note that the bedreader **is not** tested here! """ larger_string = """ chr1 1 10 feature1 0 + chr1 50 60 feature2 0 - chr1 51 59 another_feature 0 + """ # According to VARIANTS global variable with 3 variants (pos 10, 50 and 45) # 1: chr1, pos 1 to 10 => 1 variant concerned (pos 10) # 2: chr1, pos 50 to 60 => 1 variant concerned (pos 50) # 3: chr1, pos 51 to 59 => 0 variants bedtool = BedReader(larger_string) # Create a new selection (a second one, since there is a default one during DB creation) selection_name = "bedname" ret = sql.create_selection_from_bed(conn, "variants", selection_name, bedtool) # Test last id of the selection assert ret == 2 # Query the association table (variant_id, selection_id) data = conn.execute( "SELECT * FROM selection_has_variant WHERE selection_id = ?", (ret, )) # 2 variants (see above) # format: [(id variant, id selection),] expected = ((1, ret), (2, ret)) record = tuple([tuple(i) for i in data]) # Is the association table 'selection_has_variant' ok ? print("record:", record) assert record == expected bed_selection = [ s for s in sql.get_selections(conn) if s["name"] == selection_name ][0] print("selection content", bed_selection) assert bed_selection["name"] == selection_name assert bed_selection["count"] == 2 # 2 variants retrieved
def _create_completer(self): """Create Completer with his model Fill the model with the SQL keywords and database fields """ model = QStringListModel() completer = QCompleter() keywords = [] samples = [i["name"] for i in sql.get_samples(self.conn)] selections = [i["name"] for i in sql.get_selections(self.conn)] for field in sql.get_fields(self.conn): if field["category"] == "samples": for sample in samples: keywords.append("sample['{}'].{}".format(sample, field["name"])) else: keywords.append(field["name"]) keywords.extend(VqlSyntaxHighlighter.sql_keywords) keywords.extend(selections) model.setStringList(keywords) completer.setModel(model) return completer
def test_sql_selection_operation(conn): """Test set operations on selections using SQL API .. Todo:: Only union is tested here test intersect and expect (intersect is tested in test_command) """ cursor = conn.cursor() # Query the first default selection all_selection = cursor.execute("SELECT * FROM selections").fetchone() # {'id': 1, 'name': 'variants', 'count': 3, 'query': ''} print("all", dict(all_selection)) # index 0: id in db assert all_selection[1] == "variants" assert all_selection[2] == len(VARIANTS) # Create a selection from sql query = "SELECT id, chr, pos FROM variants where alt = 'A' " sql.create_selection_from_sql(conn, query, "test") # check if selection has been created assert "test" in [record["name"] for record in sql.get_selections(conn)] # Check if selection of variants returns same data than selection query selection_id = 2 insert_data = cursor.execute(query).fetchall() read_data = cursor.execute(f""" SELECT variants.id, variants.chr, variants.pos FROM variants INNER JOIN selection_has_variant sv ON variants.rowid = sv.variant_id AND sv.selection_id = {selection_id} """).fetchall() # set because, it can contains duplicate variants assert set(read_data) == set(insert_data) # TEST Unions query1 = "SELECT id, chr, pos FROM variants where alt = 'A' " # 2 variants query2 = "SELECT id, chr, pos FROM variants where alt = 'C' " # 1 variant union_query = sql.union_variants(query1, query2) print(union_query) selection_id = sql.create_selection_from_sql(conn, union_query, "union_GT") print("union_GT selection id: ", selection_id) assert selection_id is not None record = cursor.execute( f"SELECT id, name FROM selections WHERE name = 'union_GT'").fetchone() print("Found record:", dict(record)) selection_id = record[0] selection_name = record[1] assert selection_id == 3 # test if selection id equal 2 ( the first is "variants") assert selection_name == "union_GT" # Select statement from union_GT selection must contains only variant.alt G or T records = cursor.execute(f""" SELECT variants.chr, variants.pos, variants.ref, variants.alt FROM variants INNER JOIN selection_has_variant sv ON variants.rowid = sv.variant_id AND sv.selection_id = {selection_id} """).fetchall() # {'chr': 'chr1', 'pos': 10, 'ref': 'G', 'alt': 'A'} # {'chr': 'chr1', 'pos': 50, 'ref': 'C', 'alt': 'C'} # {'chr': 'chr1', 'pos': 45, 'ref': 'G', 'alt': 'A'} for found_variants, record in enumerate(records, 1): print(dict(record)) assert record["alt"] in ("A", "C") assert found_variants == 3
def main(): # noinspection PyTypeChecker parser = argparse.ArgumentParser( formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter(prog), description=""" Cutevariant cli mode helps to run actions directly from command-line.\n The env variable $CUTEVARIANT_DB can be used to define a database instead of the arguments.""", epilog="""Examples: $ cutevariant-cli show --db my_database.db samples or $ export CUTEVARIANT_DB=my_database.db $ cutevariant-cli show samples""", ) # Default log level: critical parser.add_argument( "-vv", "--verbose", nargs="?", default="error", choices=["debug", "info", "critical", "error", "warning"], ) sub_parser = parser.add_subparsers(dest="subparser") # Common parser: Database file requirement ################################# parent_parser = argparse.ArgumentParser(add_help=False) parent_parser.add_argument( "--db", help="SQLite database. By default, $CUTEVARIANT_DB is used." ) # Create DB parser ######################################################### createdb_parser = sub_parser.add_parser( "createdb", help="Build a SQLite database from a vcf file", parents=[parent_parser], epilog="""Examples: $ cutevariant-cli createdb -i "examples/test.snpeff.vcf" """, ) createdb_parser.add_argument("-i", "--input", help="VCF file path", required=True) # Show parser ############################################################## show_parser = sub_parser.add_parser( "show", help="Display table content", parents=[parent_parser] ) show_parser.add_argument( "table", choices=["fields", "selections", "samples", "wordsets"], help="Possible names of tables.", ) # Remove parser ############################################################ remove_parser = sub_parser.add_parser( "remove", help="remove selection", parents=[parent_parser] ) remove_parser.add_argument("names", nargs="+", help="Name(s) of selection(s).") # VQL parser ############################################################### select_parser = sub_parser.add_parser( "exec", help="Execute a VQL statement.", parents=[parent_parser], formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""Examples: $ cutevariant-cli exec "SELECT favorite,chr,pos,ref,alt FROM variants" or $ cutevariant-cli exec "SELECT chr,ref,alt FROM variants" -s myselection or $ cutevariant-cli exec "IMPORT WORDSETs 'examples/gene.txt' AS mygenes" or $ cutevariant-cli exec "DROP WORDSETS mygenes" or $ cutevariant-cli exec "CREATE myselection1 FROM variants WHERE gene = 'CHID1'" $ cutevariant-cli exec "CREATE myselection2 FROM variants WHERE gene = 'CICP23'" $ cutevariant-cli exec "CREATE myselection3 = myselection2 | myselection2" or $ cutevariant-cli exec "CREATE boby FROM variants INTERSECT 'examples/test.bed'" """, ) select_parser.add_argument("vql", help="A VQL statement.") select_parser.add_argument( "-l", "--limit", help="Limit the number of lines in output.", type=int, default=100, ) # select_parser.add_argument( # "-g", # "--group", # action="store_true", # help="Group SELECT query by...(chr,pos,ref,alt).", # ) select_parser.add_argument( "-s", "--to-selection", help="Save SELECT query into a selection name." ) # Set parser ############################################################### # set_parser = sub_parser.add_parser("set", help="Set variable", parents=[parent_parser]) # Workaround for sphinx-argparse module that require the object parser # before the call of parse_args() if "html" in sys.argv: return parser args = parser.parse_args() if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) # Set log level # _logger.setLevel(logging.DEBUG) log_level(args.verbose) # Create DB parser ######################################################### if args.subparser == "createdb": if not args.db: # Database file is not set: # The output file will be based on the name of the VCF one args.db = args.input + ".db" if os.path.exists(args.db): # Remove existing file os.remove(args.db) conn = sql.get_sql_connection(args.db) if conn: # TODO: bug ... max is not 100... for i, message in progressbar.progressbar( async_import_file(conn, args.input), redirect_stdout=True ): print(message) print("The database is successfully created!") exit() # Prepare SQL connection on DB file if "CUTEVARIANT_DB" in os.environ and args.subparser != "createdb": args.db = os.environ["CUTEVARIANT_DB"] elif not args.db: print("You must specify a database file via $CUTEVARIANT_DB or --db argument") exit() # Init SQL connection conn = sql.get_sql_connection(args.db) # Show parser ############################################################## if args.subparser == "show": if args.table == "fields": display_sql_results( (i.values() for i in sql.get_fields(conn)), ["id", "name", "table", "type", "description"], ) if args.table == "samples": display_sql_results( (i.values() for i in sql.get_samples(conn)), ["id", "name"] ) if args.table == "selections": display_sql_results( (i.values() for i in sql.get_selections(conn)), ["id", "name", "variant_count"], ) if args.table == "wordsets": display_sql_results( (i.values() for i in sql.get_wordsets(conn)), ["id", "word_count"] ) # Remove parser ############################################################ if args.subparser == "remove": for name in args.names: sql.delete_selection_by_name(conn, name) # VQL parser ############################################################### if args.subparser == "exec": query = "".join(args.vql) vql_command = None # Test the VQL query try: cmd = vql.parse_one_vql(query) except (vql.textx.TextXSyntaxError, vql.VQLSyntaxError) as e: # Available attributes: e.message, e.line, e.col print("%s: %s, col: %d" % (e.__class__.__name__, e.message, e.col)) print("For query:", query) exit(0) # Select command with redirection to selection if cmd["cmd"] == "select_cmd" and args.to_selection: vql_command = partial( command.create_cmd, conn, args.to_selection, source=cmd["source"], filters=cmd["filters"], ) try: # Is it redundant with check_vql ? # No because we also execute SQL statement here if vql_command: ret = vql_command() else: ret = command.create_command_from_obj(conn, cmd)() if not isinstance(ret, dict): # For drop_cmd, import_cmd, ret = list(ret) except (sqlite3.DatabaseError, vql.VQLSyntaxError) as e: LOGGER.exception(e) exit(0) LOGGER.debug("SQL result: %s", ret) LOGGER.debug("VQL command: %s", cmd["cmd"]) # Note: show_cmd is supported in a separated command option # Select command if cmd["cmd"] in ("select_cmd",) and not args.to_selection: display_sql_results((i.values() for i in ret), ["id"] + cmd["fields"]) exit(1) if ( cmd["cmd"] in ("drop_cmd", "import_cmd", "create_cmd", "set_cmd", "bed_cmd") or args.to_selection ): # PS: to_selection is used to detect select_cmd with selection creation display_query_status(ret)