def insert_inf_elt(src, defn): elts = xmllexer.lex(src) li = len(elts) - 1 while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag: li -= 1 offs = elts[li].offs return src[:offs] + os.linesep + defn + os.linesep + src[offs:]
def __init__(self, fileName): global write_reformatted_sources self.elementZones = [] self.fileName = fileName self.text = None self.offsets = [] offset = 0 with open(fileName, encoding='utf-8') as ifs: self.lines = [] for line in ifs: self.offsets.append(offset) # lst = line.rstrip() + '\n' # leave leading blanks + add separator (we do not need \r, so no os.linesep) lst = line.replace('\r', '').replace('\n', '') + '\n' # leave leading blanks + add separator (we do not need \r, so no os.linesep) lst = lst.replace('\t', ' ') # Clone Miner is very brave to consider TAB equal to 4 spaces offset += len(lst) self.lines.append(lst) self.offsets.append(offset) self.text = "".join(self.lines) if write_reformatted_sources: with open(fileName + ".reformatted", 'w+', encoding='utf-8', newline='\n') as ofs: ofs.write(self.text) # then calculate XML zones marker = XMLZoneMarker(self) global checkmarkup if checkmarkup: # -cmup no and -cmup shrink do not need this marker.discover() self.zones, self.rzones = marker.zones, marker.rzones self.textzoneoffsets = marker.textzoneoffsets self.textzoneends = marker.textzoneends self.textzones = marker.textzones else: marker.discoverURLs() self.urlzones = marker.urlzones # calculate tag coordinates using pygments lexer (hope correctly) self.lexintervals = xmllexer.lex(self.text)
def insert_dict_entry(src, defn): elts = xmllexer.lex(src) dopentag = os.linesep + """<d:Dictionary id="doc_clone_finder">""" + os.linesep dclosetag = os.linesep + "</d:Dictionary>" + os.linesep offs = -1 for e in reversed(elts): if e.int_type == xmllexer.IntervalType.opentag and e.srepr == dopentag: offs = e.end break if offs == -1: li = len(elts) - 1 while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag: li -= 1 offs = elts[li].offs return src[:offs] + dopentag + defn + dclosetag + src[offs:] else: return src[:offs] + os.linesep + defn + src[offs:]
def create_reuse_entry(tinput: "str", clone_desc: "str", drl_elt_type: "str") -> "tuple(str, str)": """ Converts clone instances to dictionary entry references :param tinput: input DRL source :param clone_desc: clone group descriptor :return: tuple of output text and dictionary element source """ cd = CandidateDescriptor(clone_desc) drl_elts = { "dict": ("""<d:Entry id="%s">%s</d:Entry>""", """<d:DictRef entryid="%s" dictid="doc_clone_finder" />%s"""), "infelt": ("""<d:InfElement id="%s">%s</d:InfElement>""", """<d:InfElemRef infelemid="%s">%s</d:InfElemRef>"""), } drl_defs, drl_refs = drl_elts[drl_elt_type] if drl_elt_type != "infelt" and cd.variative: logging.fatal("Requested to refactor variative unit, but not information element") assert False def xs(xel): return "".join([el.srepr for el in xel]) eid = str(uuid.uuid4()) whole_instances = cd.get_whole_instances(tinput) # outer balancing using outer instance #0 outer_def_prepend, outer_def_append, outer_ref_prepend, outer_ref_append = xmlfixup.balance_unbalanced_text( xmllexer.lex(whole_instances[0]) ) # prepend and appends for extension points vps_pa = [] vps = cd.get_variative_parts(tinput) for vp in vps: # balancing using variative part #0 vps_pa.append(xmlfixup.balance_unbalanced_text(xmllexer.lex(vp[0]))) # definition vardefs = [] cnt = 0 for inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append in vps_pa: cnt += 1 vardefs.append(xs(inner_ref_prepend) + ("""<d:Nest id="%s.%d"/>""" % (eid, cnt)) + xs(inner_ref_append)) condefs = cd.get_constant_parts(tinput) bodydef = "".join([cv[0] + cv[1] for cv in zip(condefs, vardefs + [""])]) complete_def = drl_defs % (eid, xs(outer_def_prepend) + bodydef + xs(outer_def_append)) # reference vrefs = [] for vpv in vps: replace_nests = [] cnt = 0 for vp, pa in zip(vpv, vps_pa): cnt += 1 inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append = pa replace_nests.append( ("""<d:Replace-Nest nestid="%s.%d">""" % (eid, cnt)) + xs(inner_ref_prepend) + vp + xs(inner_ref_append) + """</d:Replace-Nest>""" ) vrefs.append(xs(outer_ref_prepend) + drl_refs % (eid, "".join(replace_nests)) + xs(outer_ref_append)) if not len(vrefs): # non-variative one vrefs = [xs(outer_ref_prepend) + (drl_refs % (eid, "")) + xs(outer_ref_append)] # instance intervals intervals = cd.get_whole_instance_coordinates(tinput) restext = replace_str_intervals_with(tinput, vrefs, intervals) return restext, complete_def
# going left stack = [] back_intervals = list(covered_intervals) back_intervals.reverse() for i in back_intervals: if i.int_type == xmllexer.IntervalType.closetag: stack.append(i) elif i.int_type == xmllexer.IntervalType.opentag and len(stack): stack.pop() for i in stack: elem_prepend.append(i.create_opposite_tag()) ref_prepend.insert(0, i) return elem_prepend, elem_append, ref_prepend, ref_append # just a test if __name__ == '__main__': src = """t0</a>t1</b>t2<c>t3<d>t4""" ints = xmllexer.lex(src) p, a, rp, ra = balance_unbalanced_text(ints) print("".join([pi.srepr for pi in p])) print(src) print("".join([ai.srepr for ai in a])) print("============") print("".join([pi.srepr for pi in rp]) + "<REF/>" + "".join([pi.srepr for pi in ra]))
for i in stack: elem_append.insert(0, i.create_opposite_tag()) ref_append.append(i) # going left stack = [] back_intervals = list(covered_intervals) back_intervals.reverse() for i in back_intervals: if i.int_type == xmllexer.IntervalType.closetag: stack.append(i) elif i.int_type == xmllexer.IntervalType.opentag and len(stack): stack.pop() for i in stack: elem_prepend.append(i.create_opposite_tag()) ref_prepend.insert(0, i) return elem_prepend, elem_append, ref_prepend, ref_append # just a test if __name__ == '__main__': src = """t0</a>t1</b>t2<c>t3<d>t4""" ints = xmllexer.lex(src) p, a, rp, ra = balance_unbalanced_text(ints) print("".join([pi.srepr for pi in p])) print(src) print("".join([ai.srepr for ai in a])) print("============") print("".join([pi.srepr for pi in rp]) + "<REF/>" + "".join([pi.srepr for pi in ra]))
def create_reuse_entry(tinput: 'str', clone_desc: 'str', drl_elt_type: 'str') -> 'tuple(str, str)': """ Converts clone instances to dictionary entry references :param tinput: input DRL source :param clone_desc: clone group descriptor :return: tuple of output text and dictionary element source """ cd = CandidateDescriptor(clone_desc) drl_elts = { "dict": ("""<d:Entry id="%s">%s</d:Entry>""", """<d:DictRef entryid="%s" dictid="doc_clone_finder" />%s"""), "infelt": ("""<d:InfElement id="%s">%s</d:InfElement>""", """<d:InfElemRef infelemid="%s">%s</d:InfElemRef>""") } drl_defs, drl_refs = drl_elts[drl_elt_type] if drl_elt_type != 'infelt' and cd.variative: logging.fatal( "Requested to refactor variative unit, but not information element" ) assert False def xs(xel): return ''.join([el.srepr for el in xel]) eid = str(uuid.uuid4()) whole_instances = cd.get_whole_instances(tinput) # outer balancing using outer instance #0 outer_def_prepend, outer_def_append, outer_ref_prepend, outer_ref_append = \ xmlfixup.balance_unbalanced_text(xmllexer.lex(whole_instances[0])) # prepend and appends for extension points vps_pa = [] vps = cd.get_variative_parts(tinput) for vp in vps: # balancing using variative part #0 vps_pa.append(xmlfixup.balance_unbalanced_text(xmllexer.lex(vp[0]))) # definition vardefs = [] cnt = 0 for inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append in vps_pa: cnt += 1 vardefs.append( xs(inner_ref_prepend) + ("""<d:Nest id="%s.%d"/>""" % (eid, cnt)) + xs(inner_ref_append)) condefs = cd.get_constant_parts(tinput) bodydef = ''.join([cv[0] + cv[1] for cv in zip(condefs, vardefs + [''])]) complete_def = drl_defs % (eid, xs(outer_def_prepend) + bodydef + xs(outer_def_append)) # reference vrefs = [] for vpv in vps: replace_nests = [] cnt = 0 for vp, pa in zip(vpv, vps_pa): cnt += 1 inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append = pa replace_nests.append(("""<d:Replace-Nest nestid="%s.%d">""" % (eid, cnt)) + xs(inner_ref_prepend) + vp + xs(inner_ref_append) + """</d:Replace-Nest>""") vrefs.append( xs(outer_ref_prepend) + drl_refs % (eid, ''.join(replace_nests)) + xs(outer_ref_append)) if not len(vrefs): # non-variative one vrefs = [ xs(outer_ref_prepend) + (drl_refs % (eid, '')) + xs(outer_ref_append) ] # instance intervals intervals = cd.get_whole_instance_coordinates(tinput) restext = replace_str_intervals_with(tinput, vrefs, intervals) return restext, complete_def