def __init__(self, database: Database, host='ftp.ncbi.nlm.nih.gov', data_dir='gene/DATA/', src_data_dir=APP_ROOT / 'data' / 'ncbi'): """Construct the NCBI ETL instance. :param Database database: gene database for adding new data :param str host: FTP host name :param str data_dir: FTP data directory to use :param Path src_data_dir: Data directory for NCBI """ super().__init__(database, host, data_dir, src_data_dir) self._sequence_location = SequenceLocation() self._chromosome_location = ChromosomeLocation() self._data_url = f"ftp://{host}" self._assembly = None self._date_today = datetime.today().strftime('%Y%m%d')
def __init__(self, database: Database, host='ftp.ebi.ac.uk', data_dir='pub/databases/genenames/hgnc/json/', src_data_dir=APP_ROOT / 'data' / 'hgnc', fn='hgnc_complete_set.json'): """Initialize HGNC ETL class. :param Database database: DynamoDB database :param str host: FTP host name :param str data_dir: FTP data directory to use :param Path src_data_dir: Data directory for HGNC :param str fn: Data file to download """ super().__init__(database, host, data_dir, src_data_dir) self._chromosome_location = ChromosomeLocation() self._data_url = f"ftp://{host}/{data_dir}{fn}" self._fn = fn self._version = None
class NCBI(Base): """ETL class for NCBI source""" def __init__(self, database: Database, host='ftp.ncbi.nlm.nih.gov', data_dir='gene/DATA/', src_data_dir=APP_ROOT / 'data' / 'ncbi'): """Construct the NCBI ETL instance. :param Database database: gene database for adding new data :param str host: FTP host name :param str data_dir: FTP data directory to use :param Path src_data_dir: Data directory for NCBI """ super().__init__(database, host, data_dir, src_data_dir) self._sequence_location = SequenceLocation() self._chromosome_location = ChromosomeLocation() self._data_url = f"ftp://{host}" self._assembly = None self._date_today = datetime.today().strftime('%Y%m%d') def perform_etl(self): """Perform ETL methods. :return: Concept IDs of concepts successfully loaded """ self._extract_data() self._transform_data() self._database.flush_batch() return self._processed_ids def _download_data(self): """Download NCBI info, history, and GRCh38 files. :param str ncbi_dir: The NCBI data directory """ # Download info data_dir = f'{self._data_dir}GENE_INFO/Mammalia/' fn = f'ncbi_info_{self._date_today}.tsv' data_fn = 'Homo_sapiens.gene_info.gz' logger.info('Downloading NCBI gene_info....') self._ftp_download(self._host, data_dir, fn, self.src_data_dir, data_fn) logger.info('Successfully downloaded NCBI gene_info.') # Download history fn = f'ncbi_history_{self._date_today}.tsv' data_fn = 'gene_history.gz' logger.info('Downloading NCBI gene_history...') self._ftp_download(self._host, self._data_dir, fn, self.src_data_dir, data_fn) logger.info('Successfully downloaded NCBI gene_history.') # Download gff self._download_gff() def _download_gff(self) -> None: """Download latest gff data""" regex_patern = r"GCF_\d+\.\d+_(?P<assembly>GRCh\d+\.\S+)_genomic.gff.gz" regex = re.compile(regex_patern) with FTP(self._host) as ftp: ftp.login() ftp.cwd("genomes/refseq/vertebrate_mammalian/Homo_sapiens/" "latest_assembly_versions") dir = ftp.nlst()[0] ftp.cwd(dir) for f in ftp.nlst(): match = regex.match(f) if match: resp = match.groupdict() self._assembly = resp["assembly"] new_fn = f"ncbi_{self._assembly}.gff" if not (self.src_data_dir / new_fn).exists(): self._ftp_download_file(ftp, f, self.src_data_dir, new_fn) logger.info(f"Successfully downloaded NCBI {f} data.") else: logger.info(f"NCBI {f} already exists.") break def _files_downloaded(self, data_dir: Path) -> bool: """Check whether needed source files exist. :param Path data_dir: source data directory :return: true if all needed files exist, false otherwise """ files = data_dir.iterdir() info_downloaded: bool = False history_downloaded: bool = False gff_downloaded: bool = False for f in files: if f.name.startswith(f'ncbi_info_{self._date_today}'): info_downloaded = True elif f.name.startswith(f'ncbi_history_{self._date_today}'): history_downloaded = True elif f.name.startswith('ncbi_GRCh38.p13'): gff_downloaded = True return info_downloaded and history_downloaded and gff_downloaded def _extract_data(self): """Gather data from local files or download from source. - Data is expected to be in <PROJECT ROOT>/data/ncbi. - For now, data files should all be from the same source data version. """ self._create_data_directory() if not self._files_downloaded(self.src_data_dir): self._download_data() local_files = [ f for f in self.src_data_dir.iterdir() if f.name.startswith('ncbi') ] local_files.sort(key=lambda f: f.name.split('_')[-1], reverse=True) self._info_src = [ f for f in local_files if f.name.startswith('ncbi_info') ][0] self._history_src = [ f for f in local_files if f.name.startswith('ncbi_history') ][0] self._gff_src = [ f for f in local_files if f.name.startswith('ncbi_GRCh') ][0] self._version = self._info_src.stem.split('_')[-1] def _get_prev_symbols(self): """Store a gene's symbol history. :return: A dictionary of a gene's previous symbols """ # get symbol history history_file = open(self._history_src, 'r') history = csv.reader(history_file, delimiter='\t') next(history) prev_symbols = {} with self._database.genes.batch_writer() as batch: for row in history: # Only interested in rows that have h**o sapiens tax id if row[0] == '9606': if row[1] != '-': gene_id = row[1] if gene_id in prev_symbols.keys(): prev_symbols[gene_id].append(row[3]) else: prev_symbols[gene_id] = [row[3]] else: # Load discontinued genes params = { 'concept_id': f'{NamespacePrefix.NCBI.value.lower()}:' f'{row[2]}', 'symbol': row[3], 'symbol_status': SymbolStatus.DISCONTINUED.value } self._load_gene(params, batch) history_file.close() return prev_symbols def _add_xrefs_associated_with(self, val, params): """Add xrefs and associated_with refs to a transformed gene. :param list val: A list of source ids for a given gene :param dict params: A transformed gene record """ params['xrefs'] = [] params['associated_with'] = [] for src in val: src_name = src.split(':')[0].upper() src_id = src.split(':')[-1] if src_name == "GENEID": params['concept_id'] = f"{NamespacePrefix.NCBI.value}:{src_id}" elif src_name in NamespacePrefix.__members__ and \ NamespacePrefix[src_name].value in PREFIX_LOOKUP: params['xrefs'].append(f"{NamespacePrefix[src_name].value}" f":{src_id}") else: if src_name.startswith("MIM"): prefix = NamespacePrefix.OMIM.value elif src_name.startswith("IMGT/GENE-DB"): prefix = NamespacePrefix.IMGT_GENE_DB.value elif src_name.startswith("MIRBASE"): prefix = NamespacePrefix.MIRBASE.value else: prefix = None if prefix: params['associated_with'].append(f"{prefix}:{src_id}") else: logger.info(f"{src_name} is not in NameSpacePrefix.") if not params['xrefs']: del params['xrefs'] if not params['associated_with']: del params['associated_with'] def _get_gene_info(self, prev_symbols): """Store genes from NCBI info file. :param dict prev_symbols: A dictionary of a gene's previous symbols :return: A dictionary of gene's from the NCBI info file. """ # open info file, skip headers info_file = open(self._info_src, 'r') info = csv.reader(info_file, delimiter='\t') next(info) info_genes = dict() for row in info: params = dict() params['concept_id'] = f"{NamespacePrefix.NCBI.value}:{row[1]}" # get symbol params['symbol'] = row[2] # get aliases if row[4] != '-': params['aliases'] = row[4].split('|') else: params['aliases'] = [] # get associated_with if row[5] != '-': associated_with = row[5].split('|') self._add_xrefs_associated_with(associated_with, params) # get chromosome location vrs_chr_location = self._get_vrs_chr_location(row, params) if 'exclude' in vrs_chr_location: # Exclude genes with multiple distinct locations (e.g. OMS) continue if not vrs_chr_location: vrs_chr_location = [] params['locations'] = vrs_chr_location # get label if row[8] != '-': params['label'] = row[8] # add prev symbols if row[1] in prev_symbols.keys(): params['previous_symbols'] = prev_symbols[row[1]] info_genes[params['symbol']] = params # get type params['gene_type'] = row[9] return info_genes def _get_gene_gff(self, db, info_genes, sr): """Store genes from NCBI gff file. :param FeatureDB db: GFF database :param dict info_genes: A dictionary of gene's from the NCBI info file. :param SeqRepo sr: Access to the seqrepo """ for f in db.all_features(): if f.attributes.get('ID'): f_id = f.attributes.get('ID')[0] if f_id.startswith('gene'): symbol = f.attributes['Name'][0] if symbol in info_genes: # Just need to add SequenceLocation params = info_genes.get(symbol) vrs_sq_location = \ self._get_vrs_sq_location(db, sr, params, f_id) if vrs_sq_location: params['locations'].append(vrs_sq_location) else: # Need to add entire gene gene = self._add_gff_gene(db, f, sr, f_id) info_genes[gene['symbol']] = gene def _add_gff_gene(self, db, f, sr, f_id): """Create a transformed gene recor from NCBI gff file. :param FeatureDB db: GFF database :param Feature f: A gene from the gff data file :param SeqRepo sr: Access to the seqrepo :param str f_id: The feature's ID :return: A gene dictionary if the ID attribute exists. Else return None. """ params = dict() params['src_name'] = SourceName.NCBI.value self._add_attributes(f, params) sq_loc = self._get_vrs_sq_location(db, sr, params, f_id) if sq_loc: params['locations'] = [sq_loc] else: params['locations'] = list() params['label_and_type'] = \ f"{params['concept_id'].lower()}##identity" return params def _add_attributes(self, f, gene): """Add concept_id, symbol, and xrefs/associated_with to a gene record. :param gffutils.feature.Feature f: A gene from the data :param gene: A transformed gene record """ attributes = ['ID', 'Name', 'description', 'Dbxref'] for attribute in f.attributes.items(): key = attribute[0] if key in attributes: val = attribute[1] if len(val) == 1 and key != 'Dbxref': val = val[0] if key == 'Dbxref': self._add_xrefs_associated_with(val, gene) elif key == 'Name': gene['symbol'] = val def _get_vrs_sq_location(self, db, sr, params, f_id): """Store GA4GH VRS SequenceLocation in a gene record. https://vr-spec.readthedocs.io/en/1.1/terms_and_model.html#sequencelocation :param FeatureDB db: GFF database :param SeqRepo sr: Access to the seqrepo :param dict params: A transformed gene record :param str f_id: The feature's ID :return: A GA4GH VRS SequenceLocation """ gene = db[f_id] params['strand'] = gene.strand return self._sequence_location.add_location(gene.seqid, gene, params, sr) def _get_xref_associated_with(self, src_name, src_id): """Get xref or associated_with ref. :param str src_name: Source name :param src_id: The source's accession number :return: A dict containing an xref or associated_with ref """ source = dict() if src_name.startswith('HGNC'): source['xrefs'] = \ [f"{NamespacePrefix.HGNC.value}:{src_id}"] elif src_name.startswith('NCBI'): source['xrefs'] = \ [f"{NamespacePrefix.NCBI.value}:{src_id}"] elif src_name.startswith('UniProt'): source['associated_with'] = [ f"{NamespacePrefix.UNIPROT.value}:{src_id}" ] # noqa E501 elif src_name.startswith('miRBase'): source['associated_with'] = [ f"{NamespacePrefix.MIRBASE.value}:{src_id}" ] # noqa E501 elif src_name.startswith('RFAM'): source['associated_with'] = [ f"{NamespacePrefix.RFAM.value}:{src_id}" ] # noqa E501 return source def _get_vrs_chr_location(self, row, params): """Store GA4GH VRS ChromosomeLocation in a gene record. https://vr-spec.readthedocs.io/en/1.1/terms_and_model.html#chromosomelocation :param list row: A row in NCBI data file :param dict params: A transformed gene record :return: A list of GA4GH VRS ChromosomeLocations """ params['location_annotations'] = list() chromosomes_locations = self._set_chromsomes_locations(row, params) locations = chromosomes_locations['locations'] chromosomes = chromosomes_locations['chromosomes'] if chromosomes_locations['exclude']: return ['exclude'] location_list = list() if chromosomes and not locations: for chromosome in chromosomes: if chromosome == 'MT': params['location_annotations'].append( Chromosome.MITOCHONDRIA.value) else: params['location_annotations'].append(chromosome.strip()) elif locations: self._add_chromosome_location(locations, location_list, params) if not params['location_annotations']: del params['location_annotations'] return location_list def _set_chromsomes_locations(self, row, params): """Set chromosomes and locations for a given gene record. :param list row: A gene row in the NCBI data file :param dict params: A transformed gene record :return: A dictionary containing a gene's chromosomes and locations """ chromosomes = None if row[6] != '-': if '|' in row[6]: chromosomes = row[6].split('|') else: chromosomes = [row[6]] if len(chromosomes) >= 2: if chromosomes and 'X' not in chromosomes and \ 'Y' not in chromosomes: logger.info(f'{row[2]} contains multiple distinct ' f'chromosomes: {chromosomes}.') chromosomes = None locations = None exclude = False if row[7] != '-': if '|' in row[7]: locations = row[7].split('|') elif ';' in row[7]: locations = row[7].split(';') elif 'and' in row[7]: locations = row[7].split('and') else: locations = [row[7]] # Sometimes locations will store the same location twice if len(locations) == 2: if locations[0] == locations[1]: locations = [locations[0]] # Exclude genes where there are multiple distinct locations # i.e. OMS: '10q26.3', '19q13.42-q13.43', '3p25.3' if len(locations) > 2: logger.info(f'{row[2]} contains multiple distinct ' f'locations: {locations}.') locations = None exclude = True # NCBI sometimes contains invalid map locations if locations: for i in range(len(locations)): loc = locations[i].strip() if not re.match("^([1-9][0-9]?|X[pq]?|Y[pq]?)", loc): logger.info(f'{row[2]} contains invalid map location:' f'{loc}.') params['location_annotations'].append(loc) del locations[i] return { 'locations': locations, 'chromosomes': chromosomes, 'exclude': exclude } def _add_chromosome_location(self, locations, location_list, params): """Add a chromosome location to the location list. :param list locations: NCBI map locations for a gene record. :param list location_list: A list to store chromosome locations. :param dict params: A transformed gene record """ for i in range(len(locations)): loc = locations[i].strip() location = dict() if Annotation.ALT_LOC.value in loc: loc = loc.split(f"{Annotation.ALT_LOC.value}")[0].strip() params['location_annotations'].append(Annotation.ALT_LOC.value) contains_centromere = False if 'cen' in loc: contains_centromere = True arm_match = re.search("[pq]", loc) if arm_match and not contains_centromere: arm_ix = arm_match.start() chromosome = loc[:arm_ix].strip() # NCBI sometimes stores invalid map locations # i.e. 7637 stores 'map from Rosati ref via FISH [AFS]' if not re.match("^([1-9][0-9]?|X|Y|MT)$", chromosome): continue location['chr'] = chromosome # Check to see if there is a band / sub band included if arm_ix != len(loc) - 1: if '-' in loc: self._chromosome_location.set_interval_range( loc, arm_ix, location) else: # Location only gives start start = loc[arm_ix:] location['start'] = start location['end'] = start else: # Only arm is included location['start'] = loc[arm_ix] location['end'] = loc[arm_ix] elif contains_centromere: self._set_centromere_location(loc, location) else: # Location only gives chr params['location_annotations'].append(loc) chr_location = \ self._chromosome_location.get_location(location, params) if chr_location: location_list.append(chr_location) def _set_centromere_location(self, loc, location): """Set centromere location for a gene. :param str loc: A gene location :param dict location: GA4GH location """ centromere_ix = re.search("cen", loc).start() if '-' in loc: # Location gives both start and end range_ix = re.search('-', loc).start() if 'q' in loc: location['chr'] = loc[:centromere_ix].strip() location['start'] = "cen" location['end'] = loc[range_ix + 1:] elif 'p' in loc: p_ix = re.search("p", loc).start() location['chr'] = loc[:p_ix].strip() location['end'] = "cen" location['start'] = loc[:range_ix] else: location['chr'] = loc[:centromere_ix].strip() location['start'] = "cen" location['end'] = "cen" def _transform_data(self): """Modify data and pass to loading functions.""" logger.info('Transforming NCBI...') self._add_meta() prev_symbols = self._get_prev_symbols() info_genes = self._get_gene_info(prev_symbols) # create db for gff file db = gffutils.create_db(str(self._gff_src), dbfn=":memory:", force=True, merge_strategy="create_unique", keep_order=True) self._get_gene_gff(db, info_genes, self.seqrepo) with self._database.genes.batch_writer() as batch: for gene in info_genes.keys(): self._load_gene(info_genes[gene], batch) logger.info('Successfully transformed NCBI.') def _add_meta(self): """Load metadata""" metadata = SourceMeta( data_license="custom", data_license_url="https://www.ncbi.nlm.nih.gov/home/" "about/policies/", version=self._version, data_url=self._data_url, rdp_url="https://reusabledata.org/ncbi-gene.html", data_license_attributes={ 'non_commercial': False, 'share_alike': False, 'attribution': False }, genome_assemblies=[self._assembly]) self._load_meta(self._database, metadata, SourceName.NCBI.value)
class HGNC(Base): """ETL the HGNC source into the normalized database.""" def __init__(self, database: Database, host='ftp.ebi.ac.uk', data_dir='pub/databases/genenames/hgnc/json/', src_data_dir=APP_ROOT / 'data' / 'hgnc', fn='hgnc_complete_set.json'): """Initialize HGNC ETL class. :param Database database: DynamoDB database :param str host: FTP host name :param str data_dir: FTP data directory to use :param Path src_data_dir: Data directory for HGNC :param str fn: Data file to download """ super().__init__(database, host, data_dir, src_data_dir) self._chromosome_location = ChromosomeLocation() self._data_url = f"ftp://{host}/{data_dir}{fn}" self._fn = fn self._version = None def _download_data(self, *args, **kwargs): """Download HGNC JSON data file.""" logger.info('Downloading HGNC data file...') self._create_data_directory() tmp_fn = 'hgnc_version.json' self._version = \ self._ftp_download(self._host, self._data_dir, tmp_fn, self.src_data_dir, self._fn) shutil.move(f"{self.src_data_dir}/{tmp_fn}", f"{self.src_data_dir}/hgnc_{self._version}.json") logger.info('Successfully downloaded HGNC data file.') def _extract_data(self, *args, **kwargs): """Extract data from the HGNC source.""" if 'data_path' in kwargs: self._data_src = kwargs['data_path'] else: self._data_src = sorted(list(self.src_data_dir.iterdir()))[-1] def _transform_data(self, *args, **kwargs): """Transform the HGNC source.""" logger.info('Transforming HGNC...') with open(self._data_src, 'r') as f: data = json.load(f) records = data['response']['docs'] with self._database.genes.batch_writer() as batch: for r in records: gene = dict() gene['concept_id'] = r['hgnc_id'].lower() gene['label_and_type'] = \ f"{gene['concept_id']}##identity" gene['item_type'] = 'identity' gene['symbol'] = r['symbol'] gene['label'] = r['name'] gene['src_name'] = SourceName.HGNC.value if r['status']: if r['status'] == 'Approved': gene['symbol_status'] = \ SymbolStatus.APPROVED.value elif r['status'] == 'Entry Withdrawn': gene['symbol_status'] =\ SymbolStatus.WITHDRAWN.value gene['src_name'] = SourceName.HGNC.value # store alias, xref, associated_with, prev_symbols, location self._get_aliases(r, gene) self._get_xrefs_associated_with(r, gene) if 'prev_symbol' in r: self._get_previous_symbols(r, gene) if 'location' in r: self._get_location(r, gene) if "locus_type" in r: gene["gene_type"] = r["locus_type"] self._load_gene(gene, batch) logger.info('Successfully transformed HGNC.') def _get_aliases(self, r, gene): """Store aliases in a gene record. :param dict r: A gene record in the HGNC data file :param dict gene: A transformed gene record """ alias_symbol = list() enzyme_id = list() if 'alias_symbol' in r: alias_symbol = r['alias_symbol'] if 'enzyme_id' in r: enzyme_id = r['enzyme_id'] if alias_symbol or enzyme_id: gene['aliases'] = list(set(alias_symbol + enzyme_id)) def _get_previous_symbols(self, r, gene): """Store previous symbols in a gene record. :param dict r: A gene record in the HGNC data file :param dict gene: A transformed gene record """ prev_symbols = r['prev_symbol'] if prev_symbols: gene['previous_symbols'] = list(set(prev_symbols)) def _get_xrefs_associated_with(self, r, gene): """Store xrefs and/or associated_with refs in a gene record. :param dict r: A gene record in the HGNC data file :param dict gene: A transformed gene record """ xrefs = list() associated_with = list() sources = [ 'entrez_id', 'ensembl_gene_id', 'vega_id', 'ucsc_id', 'ccds_id', 'uniprot_ids', 'pubmed_id', 'cosmic', 'omim_id', 'mirbase', 'homeodb', 'snornabase', 'orphanet', 'horde_id', 'merops', 'imgt', 'iuphar', 'kznf_gene_catalog', 'mamit-trnadb', 'cd', 'lncrnadb', 'ena', 'pseudogene.org', 'refseq_accession' ] for src in sources: if src in r: if '-' in src: key = src.split('-')[0] elif '.' in src: key = src.split('.')[0] elif '_' in src: key = src.split("_")[0] else: key = src if key.upper() in NamespacePrefix.__members__: if NamespacePrefix[key.upper()].value \ in PREFIX_LOOKUP.keys(): self._get_xref_associated_with(key, src, r, xrefs) else: self._get_xref_associated_with(key, src, r, associated_with) else: logger.warning(f"{key} not in schemas.py") if xrefs: gene['xrefs'] = xrefs if associated_with: gene['associated_with'] = associated_with def _get_xref_associated_with(self, key, src, r, src_type): """Add an xref or associated_with ref to a gene record. :param str key: The source's name :param str src: HGNC's source field :param dict r: A gene record in the HGNC data file :param list src_type: Either xrefs or associated_with list """ if type(r[src]) == list: for xref in r[src]: src_type.append(f"{NamespacePrefix[key.upper()].value}:{xref}") else: if isinstance(r[src], str) and ':' in r[src]: r[src] = r[src].split(':')[-1].strip() src_type.append(f"{NamespacePrefix[key.upper()].value}" f":{r[src]}") def _get_location(self, r, gene): """Store GA4GH VRS ChromosomeLocation in a gene record. https://vr-spec.readthedocs.io/en/1.1/terms_and_model.html#chromosomelocation :param dict r: A gene record in the HGNC data file :param dict gene: A transformed gene record """ # Get list of a gene's map locations if 'and' in r['location']: locations = r['location'].split('and') else: locations = [r['location']] location_list = list() gene['location_annotations'] = list() for loc in locations: loc = loc.strip() loc = self._set_annotation(loc, gene) if loc: if loc == 'mitochondria': gene['location_annotations'].append( Chromosome.MITOCHONDRIA.value) else: location = dict() self._set_location(loc, location, gene) chr_location = \ self._chromosome_location.get_location(location, gene) if chr_location: location_list.append(chr_location) if location_list: gene['locations'] = location_list if not gene['location_annotations']: del gene['location_annotations'] def _set_annotation(self, loc, gene): """Set the annotations attribute if one is provided. Return `True` if a location is provided, `False` otherwise. :param str loc: A gene location :return: A bool whether or not a gene map location is provided """ annotations = {v.value for v in Annotation.__members__.values()} for annotation in annotations: if annotation in loc: gene['location_annotations'].append(annotation) # Check if location is also included loc = loc.split(annotation)[0].strip() if not loc: return None return loc def _set_location(self, loc, location, gene): """Set a gene's location. :param str loc: A gene location :param dict location: GA4GH location :param dict gene: A transformed gene record """ arm_match = re.search('[pq]', loc) if arm_match: # Location gives arm and sub / sub band arm_ix = arm_match.start() location['chr'] = loc[:arm_ix] if '-' in loc: # Location gives both start and end self._chromosome_location.set_interval_range( loc, arm_ix, location) else: # Location only gives start start = loc[arm_ix:] location['start'] = start location['end'] = start else: # Only gives chromosome gene['location_annotations'].append(loc) def perform_etl(self, *args, **kwargs): """Extract, Transform, and Load data into DynamoDB database. :return: Concept IDs of concepts successfully loaded """ self._download_data() self._extract_data() self._add_meta() self._transform_data() self._database.flush_batch() return self._processed_ids def _add_meta(self, *args, **kwargs): """Add HGNC metadata to the gene_metadata table.""" metadata = SourceMeta( data_license='custom', data_license_url='https://www.genenames.org/about/', version=self._version, data_url=self._data_url, rdp_url=None, data_license_attributes={ 'non_commercial': False, 'share_alike': False, 'attribution': False }, genome_assemblies=[]) self._load_meta(self._database, metadata, SourceName.HGNC.value)