def parse_pharmgkb_line(self, this_line: str, header_items) -> dict: """Parse a single line from relationships.tsv and return a dict with data :param this_line: line from relationship.tsv to parse :param header_items: header from relationships.tsv :return: dict with key value containing data """ items = this_line.strip().split('\t') return data_to_dict(header_items, items)
def parse_drug_central_line(this_line: str, header_items: List) -> Dict: """Methods processes a line of text from Drug Central. Args: this_line: A string containing a line of text. header_items: A list of header items. Returns: item_dict: A dictionary of header items and a processed Drug Central string. """ data = this_line.strip().split("\t") data = [i.replace('"', '') for i in data] item_dict = data_to_dict(header_items, data) return item_dict
def make_id_mapping_file(self, map_file: str, sep: str = '\t', pharmgkb_id_col: str = 'PharmGKB Accession Id', id_key: str = 'Cross-references', id_sep: str = ',', id_key_val_sep: str = ':') -> dict: """Fxn to parse gene ID mappings or drug ID mapping for PharmGKB ids This is to parse both genes.tsv and drugs.tsv files :param map_file: genes.tsv file, containing mappings :param pharmgkb_id_col: column containing pharmgkb, to be used as key for map :param sep: separator between columns [\t] :param id_key: column name that contains ids [Cross-references] :param id_sep: separator between each id key:val pair [,] :param id_key_val_sep: separator between key:val pair [:] :return: """ map: dict = defaultdict() with open(map_file) as f: header_items = f.readline().split(sep) if pharmgkb_id_col not in header_items: raise CantFindPharmGKBKey( "Can't find PharmGKB id in map file!") for line in f: items = line.strip().split(sep) dat = data_to_dict(header_items, items) if id_key in dat: for item in dat[id_key].split(id_sep): if not item: continue # not xrefs, skip item = item.strip( '\"') # remove quotes around each item key, value = item.split(id_key_val_sep, 1) # split on first : if self.key_parsed_ids not in dat: dat[self.key_parsed_ids] = dict() dat[self.key_parsed_ids][key] = value map[dat[pharmgkb_id_col]] = dat return map