def validate_descrip(string): if string is None: return None if type(string) != str and math.isnan(string): return None # S = ruamel.yaml.scalarstring.DoubleQuotedScalarString # st = f"""{string}""" string = ' '.join([stripper(s) for s in string.split('\n')]) string = stripper(string) return string.replace(':', '-')
def validate_desc(string): """ Validate the description text """ if string is None: return None string = ' '.join([stripper(s) for s in string.split('\n')]) string = stripper(string) return string
def get_enumdefs(enum): if isinstance(enum, str): if '{' in enum: start = enum.find('{') + 1 end = enum.find('}') assert end != -1, f"Closing bracket mismatch in enum {enum}" refs = stripper(enum[start:end]) if refs == '': return None cre = refs.split(',') crefs = [stripper(r) for r in cre] return [{'$ref': S(r)} for r in crefs]
def get_enum(enum): if isinstance(enum, str): if '{' in enum: end = enum.find('{') enum = stripper(enum[:end]) return enum return enum
def string2list(key, val): """ Converts complex comma & pipe separated link string into list of lists """ link_lst = [] # in some cases val is False which will be evaluated & will return empty list if val is not None: val = str(val) for i in val.split('|'): tmp_lst= [] for j in i.split(','): j_ = stripper(j) if key in ['required', 'group_required', 'group_exclusive']: if j_ is not None and j_ != '': tmp_lst.append(eval(j_.title())) else: tmp_lst.append('') else: tmp_lst.append(j_) link_lst.append(tmp_lst) return link_lst return [['']]
def get_terms(terms): """ Converts terms string into list of terms """ if isinstance(terms, str): lterms = terms.split(',') terms = [{'$ref': dbl_quote(stripper(t))} for t in lterms] return terms return None
def reqs2list(string): """ Converts comma separated value string into list """ if isinstance(string, type(None)): return [] elif isinstance(string, str): rlis = [stripper(r) for r in string.split(',')] return rlis return string
def enum2list(enums): """Transforms a string of enums into a list. For values that could be interpreted in yaml as nonstring, it adds quotations""" if isinstance(enums, str): #Enums already are in double quotes. Assumes no missing quotes splitenums = enums.split('|') final = set() clean_enums = [stripper(x) for x in splitenums] fenums = list(filter(lambda x: x != '|' and x != '', clean_enums)) # enumpro = list(filter(lambda x: x != ' ' and x != ',' and x != '' an, splitenums)) for s in fenums: if s.lower() in ['yes', 'no', 'true', 'false'] or isinstance( s, int) or isinstance(s, float): final.add(S(s)) else: final.add(s) return list(final) return [enums]
def links2list(links): """Parses the string read in from links input field and transforms it to a list""" if type(links) != str and math.isnan(links): return None outlinks = [] groups = [] while '[' in links: start = links.find('[') + 1 end = links.find(']') group = stripper(links[start:end]) groups.append(group) links = links[end + 2:] for l in groups: outlinks.append([links2list(l)]) if isinstance(links, str): links = links.split(',') links = [x.strip() for x in links] nongrouplinks = [] for l in links: if l.upper() == 'TRUE': # nongrouplinks += True nongrouplinks.append(True) elif l.upper() == 'FALSE': # nongrouplinks += False nongrouplinks.append(False) else: nongrouplinks.append(l) # outlinks.append(nongrouplinks) outlinks += nongrouplinks return outlinks # if ',' in links: # return links.split(',') return [links]
def validate_enum(temp_enum): """ Strips spaces & converts values that could be interpreted in yaml as nonstring, to double quotation string """ enum = stripper(temp_enum) # enum = enum.replace(':', '-') if enum != 'open' and '/' not in enum: if isinstance(enum, str) and enum.lower() in ['yes', 'no', 'true', 'false', 'root', 'na']: enum = dbl_quote(enum) else: try: enum_ = eval(enum) # convert to int if enum_ == 0: enum = dbl_quote(enum) elif (isinstance(enum_, int) or isinstance(enum_, float)) and not(re.search('''[^0-9]''',enum)) : enum = dbl_quote(enum) except: try: enum_ = int(enum) enum = dbl_quote(enum) except: try: enum_ = float(enum) enum = dbl_quote(enum) except: pass return enum
def get_node_values(temp_node, node_dict): """ Gets the values of the fields in the nodes schema, inputs their values into the row dictionary. Then appends dictionary to the nodes dataframe. Arguments: temp_node {[dictionary]} -- [The yaml file in dictionary form] node_dict {[dictionary]} -- [The dict of dict which will be converted to dataframe and exported as a csv] Returns: [dictionary] -- [Returns a dict of dict with the fields and their values from the node part of the yaml file] """ node = dict(temp_node) row = {} for k, v in node.items(): if k not in ['links', 'properties', 'preferred', 'constraints']: if k == 'required': row['<' + k + '>'] = ', '.join(v) elif k == 'systemProperties': row['<' + k + '>'] = ', '.join(v) elif k == 'deprecated': if v: v = ', '.join(v) row['<' + k + '>'] = v elif k == 'uniqueKeys': row['<' + k + '>'] = ' | '.join([', '.join(i) for i in v]) else: row['<' + k + '>'] = v row = get_links(node['links'], row) propref = node['properties'].get('$ref') if propref is not None: if isinstance(propref, list): propref = [stripper(x) for x in propref] elif isinstance(propref, str): propref = [stripper(propref)] row['<property_ref>'] = ', '.join(propref) try: nterms = node['<nodeTerms>'] except: nterms = None if nterms is not None: row['<nodeTerms>'] = lrefs_to_srefs(nterms) node_dict[node['id']] = row return node_dict
def get_var_values(props, val_dict, enum_dict): """ Gets the fields and values from the properties portion of the yaml file and adds it to the dataframe. Arguments: props {[dictionary]} -- [yaml properties] val_dict {[dictionary]} -- [The dict of dict which will be converted to dataframe and exported as a csv] Returns: [dictionary] -- [Returns a dict of dict with the fields and their values from the properties part of the yaml file] """ for key, val in props[1].items(): if isinstance(key, str) and key == '$ref': continue row = { '<node>': props[0], '<property>': key, '<terms>': None, '<description>': None, '<type>': None, '<pattern>': None, '<maximum>': None, '<minimum>': None } if isinstance(val, dict): for k, v in val.items(): if k in ['enum', 'deprecated_enum', 'enumDef', 'enumTerms']: row['<type>'] = 'enum' elif k == 'type': if isinstance(v, list): row['<type>'] = ', '.join(v) elif isinstance(v, str): row['<type>'] = stripper(v) elif k == 'oneOf': temp_type = [] for t_dict in v: for m, n in t_dict.items(): if m == 'type': temp_type.append(stripper(n)) elif m in ['pattern', 'maximum', 'minimum']: row['<' + m + '>'] = stripper(n) row['<type>'] = ', '.join(temp_type) elif k in ['$ref', 'term', 'terms']: val_l = None if isinstance(v, dict): val_l = v.get('$ref') if isinstance(val_l, str): val_l = [stripper(val_l)] elif isinstance(v, str): val_l = [stripper(v)] elif isinstance(v, list): val_l = v if val_l: val_l_common = [] for val_ in val_l: if '_terms.yaml#/' in val_ and '/common' not in val_: val_ += '/common' val_l_common.append(val_) row['<terms>'] = ', '.join(val_l_common) else: row['<' + k + '>'] = v enums = val.get('enum') enum_def = val.get('enumDef') dep_enum = val.get('deprecated_enum') temp_enums = {} if enums: for e in enums: enum_row = { '<node>': props[0], '<property>': key, '<enum_value>': e, # validate_text(e), '<enum_def>': None, '<deprecated>': None } temp_enums[props[0] + ':' + key + ':' + e] = enum_row if enum_def: for k, v in enum_def.items(): temp_enums[props[0] + ':' + key + ':' + k]['<enum_def>'] = ', '.join(v['$ref']) if dep_enum: for e in dep_enum: enum_row = { '<node>': props[0], '<property>': key, '<enum_value>': e, # validate_text(e), '<enum_def>': None, '<deprecated>': 'yes' } temp_enums[props[0] + ':' + key + ':' + 'dep_' + e] = enum_row for k, v in temp_enums.items(): enum_dict[k] = v val_dict[props[0] + ':' + key] = row return val_dict, enum_dict
def getnodevalues(node, frame): """Gets the values of the fields in the nodes schema, inputs their values into the row dictionary. Then appends dictionary to the nodes dataframe. Arguments: node {[dictionary]} -- [The yaml file in dictionary form] frame {[dataframe]} -- [The dataframe that will be exported as a csv] Returns: [dataframe] -- [Returns a dataframe with the fields and their values from the node part of the yaml file] """ global noderows row = { '<node>': node['id'], '<namespace>': node['namespace'], '<title>': node['title'], '<nodeTerms>': None, '<category>': node['category'], '<program>': node['program'], '<project>': node['project'], '<property_ref>': None, '<required>': stripper(reqlist2string(node['required'])), '<submittable>': node['submittable'], '<description>': node['description'], '<additionalProperties>': node['additionalProperties'], '<link_name>': None, '<backref>': None, '<label>': None, '<target>': None, '<multiplicity>': None, '<link_required>': None, '<link_group_required>': None, '<group_exclusive>': None } names = '' backrefs = '' labels = '' targets = '' multis = '' lreqs = '' if has_subgroup(node['links']): sname = [] sbackref = [] slabel = [] starget = [] smulti = [] sreq = [] try: #By convention the last element in links list is the subgroup, if it exists for l in node['links'][-1]['subgroup']: sname.append(l['name']) sbackref.append(l['backref']) slabel.append(l['label']) starget.append(l['target_type']) smulti.append(l['multiplicity']) sreq.append(l['required']) names = names + str(sname) backrefs = backrefs + str(sbackref) labels = labels + str(slabel) targets = targets + str(starget) multis = multis + str(smulti) lreqs = lreqs + str(sreq) row['<group_exclusive>'] = node['links'][-1]['exclusive'] row['<link_group_required>'] = node['links'][-1]['required'] except KeyError: #If for some reason the subgroup is not the last element it should be the first for l in node['links'][0]['subgroup']: sname.append(l['name']) sbackref.append(l['backref']) slabel.append(l['label']) starget.append(l['target_type']) smulti.append(l['multiplicity']) sreq.append(l['required']) names = names + str(sname) backrefs = backrefs + str(sbackref) labels = labels + str(slabel) targets = targets + str(starget) multis = multis + str(smulti) lreqs = lreqs + str(sreq) row['<group_exclusive>'] = node['links'][0]['exclusive'] row['<link_group_required>'] = node['links'][0]['required'] for l in node['links']: try: names = names + "," + l['name'] backrefs = backrefs + "," + l['backref'] labels = labels + "," + l['label'] targets = targets + "," + l['target_type'] multis = multis + "," + l['multiplicity'] lreqs = lreqs + "," + str(l['required']) except KeyError: pass row['<link_name>'] = stripper(names) row['<backref>'] = stripper(backrefs) row['<label>'] = stripper(labels) row['<target>'] = stripper(targets) row['<multiplicity>'] = stripper(multis) row['<link_required>'] = stripper(lreqs) propref = node['properties'].get('$ref') if propref is not None: row['<property_ref>'] = propref nterms = node['<nodeTerms>'] if nterms is not None: row['<nodeTerms>'] = lrefs_to_srefs(nterms) noderows += 1 return frame.append(row, ignore_index=True)
def getvarVals(props, frame): """Gets the fields and values from the properties portion of the yaml file and addes it to the dataframe. Arguments: props {[dictionary]} -- [yaml properties] frame {[dataframe]} -- [properties dataframe] Returns: [dataframe] -- [The properties (variables) dataframe to be exported as a csv] """ global propertyrows rows = [] global tosave propd = defaultdict(lambda: None, props[1]) for k in propd: if isinstance(k, dict): propd[k] = defaultdict(lambda: None, propd[k]) for key in propd.keys(): # rows.append({'<node>': props[0],'<field>': key,'<description>': propd[key]['description'],'<type>': propd[key]['type'],'<options>': propd[key]['enum'],'<required>': propd[key]['required'],'<terms>': propd[key]['terms'], '<pattern>': propd[key]['pattern'], '<maximum>': propd[key]['maximum'], '<minimum>': propd[key]['minimum']}) if isinstance(key, str) and key == '$ref': continue row = { '<node>': None, '<field>': None, '<description>': None, '<type>': None, '<terms>': None, '<pattern>': None, '<maximum>': None, '<minimum>': None, '<options1>': None, '<options2>': None, '<options3>': None, '<options4>': None, '<options5>': None, '<options6>': None, '<options7>': None, '<options8>': None, } row['<node>'] = props[0] row['<field>'] = key if isinstance(propd[key], dict): row['<description>'] = propd[key].get('description') #For all implementations of enums if propd[key].get('enum') or propd[key].get( 'enumDef') or propd[key].get('enumTerms'): row['<type>'] = 'enum' else: row['<type>'] = propd[key].get('type') enums = propd[key].get('enum') numDef = propd[key].get('enumDef') numterms = propd[key].get('enumTerms') if enums is not None and numDef is not None: tosave.add(f"{args.yamls}" + f"{props[0]}.yaml") enumrefs = [] for e in enums: pair = [e, []] for dic in numDef: idd = stripper(dic.get('term_id')) ename = e.lower().replace('/s', '_') enumer = dic['enumeration'].lower().replace('/s', '_') if ename == enumer and idd is not None: pair[1].append(f"_terms.yaml#/{idd}") b = '{' for p in pair[1]: b += (p + ', ') b += '}' pair[1] = b enumrefs.append(pair) chunks = enums_chunker(dlist2string(enumrefs)) for chunk in range(len(chunks)): row[f'<options{chunk+1}>'] = stripper(chunks[chunk]) elif enums is not None: chunks = enums_chunker(list2string(enums)) for chunk in range(len(chunks)): row[f'<options{chunk+1}>'] = stripper(chunks[chunk]) elif numterms is not None: enums = [] for k, v in numterms.items(): enum = k + ' ' + '{' if isinstance(v, list): for ref in v: r = (ref['$ref'] + ', ') enum += r enum += '}' enums.append(enum) chunks = enums_chunker(list2string(enums)) for chunk in range(len(chunks)): row[f'<options{chunk+1}>'] = stripper(chunks[chunk]) t = propd[key].get('term') tdef = propd[key].get('termDef') trefs = propd[key].get('terms') rerefs = propd[key].get('$ref') if isinstance(t, dict): row['<terms>'] = t.get('$ref') elif isinstance(t, str): row['<terms>'] = stripper(t) elif tdef is not None: tosave.add(f"{args.yamls}" + f"{props[0]}.yaml") refs = [] for n in tdef: try: na = n.get('term_id') if na is not None: refs.append({'$ref': f"_terms.yaml#/{na}"}) except AttributeError: print( f"termDef {tdef} in property {key} of {props[0]} node should be a list of dictionaries" ) row['<terms>'] = stripper(lrefs_to_srefs(refs)) elif trefs is not None: row['<terms>'] = stripper(lrefs_to_srefs(trefs)) elif rerefs is not None: row['<terms>'] = stripper(rerefs) row['<maximum>'] = propd[key].get('maximum') row['<minimum>'] = propd[key].get('minimum') row['<pattern>'] = propd[key].get('pattern') rows.append(row) for r in rows: frame = frame.append(r, ignore_index=True) propertyrows += 1 return frame
def build_enums(enum_df): """ Converts enum dataframe into proper dict of dict containing enums & enumDefs """ enum_list = enum_df.to_dict('records') enum_dict = {} for enum in enum_list: node = '' field = '' enum_val = '' enum_def = '' dep_enum = '' for key, val in enum.items(): if val: key = key[1:-1] if key == 'node': node = validate_name(val, 'node') elif key == 'property': field = validate_name(val, 'property') elif key == 'enum_value': enum_val = validate_enum(val) elif key == 'enum_def': enum_def = val elif key == 'deprecated': dep_enum = val if node != '': if node not in enum_dict: enum_dict[node] = {} if field != '': if field not in enum_dict[node]: enum_dict[node][field] = {} if 'enum' not in enum_dict[node][field]: enum_dict[node][field]['enum'] = [] enum_dict[node][field]['deprecated_enum'] = [] enum_dict[node][field]['enumDef'] = {} if not dep_enum: enum_dict[node][field]['enum'].append(enum_val) if dep_enum == 'yes': enum_dict[node][field]['deprecated_enum'].append(enum_val) if enum_def == 'common': enum_dict[node][field]['enumDef'][enum_val] = {'$ref': dbl_quote('_terms.yaml#/'+re.sub('[\W]+', '', enum_val.lower().strip().replace(' ', '_'))+'/'+enum_def)} elif enum_def == 'specific': enum_dict[node][field]['enumDef'][enum_val] = {'$ref': dbl_quote('_terms.yaml#/'+re.sub('[\W]+', '', enum_val.lower().strip().replace(' ', '_'))+'/'+node+'/'+field)} elif enum_def: enum_dict[node][field]['enumDef'][enum_val] = {'$ref': dbl_quote(stripper(x)) for x in enum_def.split(',')} # Validate deprecated enums present in enum section missing_deprecated_enums = [] for key, val in enum_dict.items(): for k,v in val.items(): if 'deprecated_enum' in v: for d in v['deprecated_enum']: if d not in v['enum']: missing_deprecated_enums.append(d + ' - ' + key + ' : ' + k) if missing_deprecated_enums !=[]: sys.exit('ERROR : Missing enum defs in main section for below deprecated enums: \n{0}'.format(missing_deprecated_enums)) # Remove empty deprecated_enum, enumDefs for key, val in enum_dict.items(): for k,v in val.items(): if 'deprecated_enum' in v and v['deprecated_enum'] == []: v.pop('deprecated_enum') if 'enumDef' in v and v['enumDef'] == {}: v.pop('enumDef') return enum_dict
def nodeTerms2list(string): if type(links) != str and math.isnan(links): return None strs = string.split(',') strs = [{'$ref': stripper(s)} for s in strs] return strs
def reqs2list(rstring): if isinstance(rstring, str): rlis = [stripper(r) for r in rstring.split(',')] return rlis return rstring
def get_terms(terms): if isinstance(terms, str): lterms = terms.split(',') terms = [{'$ref': S(stripper(t))} for t in lterms] return terms return None
def properties_builder(node_name, vdictlist, category, omitterms, ndicts): """Constructs the properties dictionary that will be added to the main node dictionary.""" global properties_added # if category in ['data_file', 'index_file', 'metadata_file']: # propdict = {'$ref' : S("_definitions.yaml#/data_file_properties")} # elif category == 'analysis': # propdict = {'$ref' : S("_definitions.yaml#/workflow_properties")} # else: # propdict = {'$ref' : S("_definitions.yaml#/ubiquitous_properties")} propdict = {'$ref': None} for n in vdictlist: if n['<node>'] == node_name: ndict = None for v in ndicts: if v['<node>'] == node_name: ndict = v break n['<description>'] = validate_descrip(n['<description>']) if omitterms == 'at': propdict[str(validate_property_name(n['<field>']))] = { 'description': n['<description>'], 'type': stripper(n['<type>']), 'enum': enums_builder_noterms( enum_merger(n['<options1>'] + n['<options2>'] + n['<options3>'] + n['<options4>'] + n['<options5>'] + n['<options6>'] + n['<options7>'] + n['<options8>'])) } if propdict[str(validate_property_name( n['<field>']))]['description'] is None: del propdict[str(validate_property_name( n['<field>']))]['description'] if n['<type>'] == 'string': propdict[n['<field>']]['pattern'] = stripper( n['<pattern>']) if propdict[n['<field>']]['pattern'] == None: del propdict[n['<field>']]['pattern'] if not math.isnan(n['<maximum>']): propdict[n['<field>']]['maximum'] = stripper( int(n['<maximum>'])) if not math.isnan(n['<minimum>']): propdict[n['<field>']]['minimum'] = stripper( int(n['<minimum>'])) # if 'project' in links: # propdict[n['<field>']].update({'$ref': }) if n['<type>'] == 'enum': try: del propdict[n['<field>']]['type'] except KeyError: pass else: del propdict[n['<field>']]['enum'] properties_added += 1 elif omitterms == 'et': propdict['$ref'] = S(ndict['<property_ref>']) term = get_termnoref(n['<terms>']) propdict[str(validate_property_name(n['<field>']))] = { 'description': n['<description>'], 'term': { '$ref': S(term) }, 'type': stripper(n['<type>']), 'enum': enums_builder_noterms( enum_merger(n['<options1>'] + n['<options2>'] + n['<options3>'] + n['<options4>'] + n['<options5>'] + n['<options6>'] + n['<options7>'] + n['<options8>'])) } if isinstance(term, str) and "_definitions.yaml" in term: del propdict[str(validate_property_name( n['<field>']))]['term'] propdict[str(validate_property_name( n['<field>']))]['$ref'] = S(term) del propdict[str(validate_property_name( n['<field>']))]['type'] if term is None: del propdict[str(validate_property_name( n['<field>']))]['term'] if propdict[str(validate_property_name( n['<field>']))]['description'] is None: del propdict[str(validate_property_name( n['<field>']))]['description'] if n['<type>'] == 'string': propdict[n['<field>']]['pattern'] = stripper( n['<pattern>']) if propdict[n['<field>']]['pattern'] == None: del propdict[n['<field>']]['pattern'] if not math.isnan(n['<maximum>']): propdict[n['<field>']]['maximum'] = stripper( int(n['<maximum>'])) if not math.isnan(n['<minimum>']): propdict[n['<field>']]['minimum'] = stripper( int(n['<minimum>'])) if n['<type>'] == 'enum': try: del propdict[n['<field>']]['type'] except KeyError: pass else: del propdict[n['<field>']]['enum'] properties_added += 1 else: propdict['$ref'] = S(ndict['<property_ref>']) propdict[str(validate_property_name(n['<field>']))] = { # '$ref': ndict['<property_ref>'], 'description': n['<description>'], 'terms': get_terms(n['<terms>']), 'type': stripper(n['<type>']), 'enumTerms': enums_builder( enum_merger(n['<options1>'] + n['<options2>'] + n['<options3>'] + n['<options4>'] + n['<options5>'] + n['<options6>'] + n['<options7>'] + n['<options8>'])) } # if isinstance(term, str) and "_definitions.yaml" in term: # del propdict[str(validate_property_name(n['<field>']))]['term'] # propdict[str(validate_property_name(n['<field>']))]['$ref'] = S(term) if propdict[str(validate_property_name( n['<field>']))]['description'] is None: del propdict[str(validate_property_name( n['<field>']))]['description'] if n['<type>'] == 'string': propdict[n['<field>']]['pattern'] = stripper( n['<pattern>']) if propdict[n['<field>']]['pattern'] == None: del propdict[n['<field>']]['pattern'] if not math.isnan(n['<maximum>']): propdict[n['<field>']]['maximum'] = stripper( int(n['<maximum>'])) if not math.isnan(n['<minimum>']): propdict[n['<field>']]['minimum'] = stripper( int(n['<minimum>'])) # if 'project' in links: # propdict[n['<field>']].update({'$ref': }) if n['<type>'] == 'enum': try: del propdict[n['<field>']]['type'] except KeyError: pass else: del propdict[n['<field>']]['enumTerms'] properties_added += 1 return schema_utils.sortdictionary(propdict)
def addlinks(ndict, maindict): """Builds a links dictionary template and adds values from the input data. Then merges to the main node dictionary""" links = [] if type(ndict['<link_name>']) != list and math.isnan(ndict['<link_name>']): return None #Hackish attempt to remove false links todel = [] for i in range(len(ndict['<link_name>'])): if ndict['<link_name>'][i] == '': todel.append(i) for i in todel: del ndict['<link_name>'][i] for lin in range(len(ndict['<link_name>'])): if isinstance(ndict['<link_name>'][lin], str): start = lin # for l in range(start, len(ndict['<link_name>'])): link = { 'name': stripper(ndict['<link_name>'][start]), 'backref': stripper(ndict['<backref>'][start]), 'label': stripper(ndict['<label>'][start]), 'target_type': stripper(ndict['<target>'][start]), 'multiplicity': stripper(ndict['<multiplicity>'][lin]), 'required': stripper(ndict['<link_required>'][lin]) } links.append(link) # if len(ndict['<link_name>'][-1]) > 1: # for l in range(len(ndict['<link_name>'][-1])): # if not isinstance(ndict['<link_name>'][-1][l], list): # link = {'name': stripper(ndict['<link_name>'][-1][l]), # 'backref': stripper(ndict['<backref>'][-1][l]), # 'label': stripper(ndict['<label>'][-1][l]), # 'target_type': stripper(ndict['<target>'][-1][l]), # 'multiplicity': stripper(ndict['<multiplicity>'][-1][l]), # 'required': stripper(ndict['<link_required>'][-1][l])} # links.append(link) if not math.isnan(ndict['<link_group_required>']): subgroups = [] #Currently only supports 1 subgroup for l in range(len(ndict['<link_name>'][0][0])): subgroup = { 'name': stripper(ndict['<link_name>'][0][0][l]), 'backref': stripper(ndict['<backref>'][0][0][l]), 'label': stripper(ndict['<label>'][0][0][l]), 'target_type': stripper(ndict['<target>'][0][0][l]), 'multiplicity': stripper(ndict['<multiplicity>'][0][0][l]), 'required': stripper(ndict['<link_required>'][0][0][l]) } subgroups.append(subgroup) sub = { 'exclusive': ndict['<group_exclusive>'], 'required': ndict['<link_group_required>'], 'subgroup': subgroups } links.append(sub) return links