def run_one_test(id=0): src_file = 'source.txt' tar_file = 'target.txt' gen_file = 'generate.txt' diff_file = 'diff.txt' source = [] for i in range(100): source.append(random_line()) with open(src_file, 'w') as f: f.writelines(source) target = copy.copy(source) for i in range(20): d = dice() if d == 1: idx = random.randint(0, len(target) - 1) target.remove(target[idx]) elif d == 2: idx = random.randint(0, len(target) - 1) target.insert(idx, random_line()) elif d == 3: idx = random.randint(0, len(target) - 1) target[idx] = random_line() with open(tar_file, 'w') as f: f.writelines(target) # generate correct diff result with open(diff_file, 'w') as f: p = subprocess.Popen("diff %s %s" % (src_file, tar_file), stdout=f) p.wait() # generate result with open(src_file, 'r') as f: source = f.readlines() with open(diff_file, 'r') as f: diff = f.readlines() parser = DiffParser() lines = parser.Parse(source, diff) with open(gen_file, 'w') as f: f.writelines(lines) with open(tar_file, 'r') as f: target = f.readlines() # compare correct result and generated result if len(target) != len(lines): store_badcase(source, target, lines, diff_file, id) return False for i in range(len(target)): if target[i] != lines[i]: store_badcase(source, target, lines, diff_file, id) return False return True
def testDiffMultiple(self): diffText = """6c6 < abcd --- > abc 10c10 < dcba --- > cba""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(2, len(diffs)) self.assertEqual(1, len(diffs[0].getAppends())) self.assertEqual(1, len(diffs[0].getDeletes())) self.assertEqual(1, len(diffs[1].getAppends())) self.assertEqual(1, len(diffs[1].getDeletes())) self.assertEqual('> abc', diffs[0].getAppends()[0]) self.assertEqual('< abcd', diffs[0].getDeletes()[0]) self.assertEqual('> cba', diffs[1].getAppends()[0]) self.assertEqual('< dcba', diffs[1].getDeletes()[0]) self.assertEqual(DiffParser.OP_TYPE_CHANGE, diffs[0].getOpType()) self.assertEqual(DiffParser.OP_TYPE_CHANGE, diffs[1].getOpType()) self.assertEqual(6, diffs[0].getStartLineNo()) self.assertEqual(10, diffs[1].getStartLineNo()) self.assertEqual('6c6', diffs[0].getHeader()) self.assertEqual('10c10', diffs[1].getHeader())
def testDiffFailBrokenChange(self): diffText = """6c6 < abcd < efgh > abc > def""" with self.assertRaises(ValueError): diffs = DiffParser().getDiffs(diffText)
def __init__(self, repo_location, repo_command, repo_type='hg', date_range=''): if not os.path.exists(repo_location): raise ChurnDriverError("Repo Location does not exist: %s" % repo_location) if not repo_command: raise ChurnDriverError("Repo Command cannot be empty") self._repo_location = os.path.abspath(repo_location) self._repo_type = repo_type self._cmd = repo_command self._dp = DiffParser(self._repo_type) self._ch = ChurnHash() self._backend = SQLiteBackend() self._daterange = date_range
def main(): global config cmdargs = sys.argv[1:] returnargs = " ".join(cmdargs) if str(cmdargs[1]) == "commit": #This is the only case we process. If not, return control to Git immediately. if "--dry-run" in cmdargs: #Ignore this, pass straight to git print subprocess.check_output("git " + returnargs) else: #We need to update the API before committing the changes to remote repo. config = loadConfigFile() if config is None: return 0 # No need to output anything to user, it's already been done try: Parser = DiffParser(subprocess.check_output("git diff --name-status --diff-filter=[ADM]")) BadResponses = 0 for f in Parser.getUpdates(): BadResponses += updateResource(f) for f in Parser.getDeletes(): BadResponses += deleteResource(f) for f in Parser.getCreates(): BadResponses += createResource(f) if BadResponses == 0: #Update dependencies print "--Content was updated on server successfully--" for m in config["models"]: deps = Dependencies(m) deps.UpdateDependencyCache() #Return control to git print subprocess.check_output("git " + returnargs) else: go_on = input("There were " + BadResponses + " unsuccessful updates. Commit repository anyway (y/n)?") if go_on == "y": for m in config["models"]: deps = Dependencies(m) deps.UpdateDependencyCache() #Return control to git print subprocess.check_output("git " + returnargs) else: return 0 except subprocess.CalledProcessError: print "Error invoking git diff" else: #If this isn't a commit, there's nothing to update print subprocess.check_output("git " + returnargs)
def testDiffAdd(self): diffText = """8a8 > abc""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(1, len(diffs)) self.assertEqual(1, len(diffs[0].getAppends())) self.assertEqual('> abc', diffs[0].getAppends()[0]) self.assertEqual(0, len(diffs[0].getDeletes())) self.assertEqual(DiffParser.OP_TYPE_APPEND, diffs[0].getOpType()) self.assertEqual(8, diffs[0].getStartLineNo()) self.assertEqual('8a8', diffs[0].getHeader())
def testDiffRemove(self): diffText = """6d5 < abc""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(1, len(diffs)) self.assertEqual(0, len(diffs[0].getAppends())) self.assertEqual(1, len(diffs[0].getDeletes())) self.assertEqual('< abc', diffs[0].getDeletes()[0]) self.assertEqual(DiffParser.OP_TYPE_DELETE, diffs[0].getOpType()) self.assertEqual(6, diffs[0].getStartLineNo()) self.assertEqual('6d5', diffs[0].getHeader())
def testDiffMultiline(self): diffText = """8a10 > abc > def""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(1, len(diffs)) self.assertEqual(2, len(diffs[0].getAppends())) self.assertEqual('> abc', diffs[0].getAppends()[0]) self.assertEqual('> def', diffs[0].getAppends()[1]) self.assertEqual(0, len(diffs[0].getDeletes())) self.assertEqual(DiffParser.OP_TYPE_APPEND, diffs[0].getOpType()) self.assertEqual(8, diffs[0].getStartLineNo()) self.assertEqual('8a10', diffs[0].getHeader())
def testDiffChange(self): diffText = """6c6 < abcd --- > abc""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(1, len(diffs)) self.assertEqual(1, len(diffs[0].getAppends())) self.assertEqual(1, len(diffs[0].getDeletes())) self.assertEqual('> abc', diffs[0].getAppends()[0]) self.assertEqual('< abcd', diffs[0].getDeletes()[0]) self.assertEqual(DiffParser.OP_TYPE_CHANGE, diffs[0].getOpType()) self.assertEqual(6, diffs[0].getStartLineNo()) self.assertEqual('6c6', diffs[0].getHeader())
def testDiffAll(self): diffText = """6c6 < abcd < efgh --- > abc > def 10d8 < dcba < cba 12a14 > dcba > cba""" diffs = DiffParser().getDiffs(diffText) self.assertEqual(3, len(diffs)) self.assertEqual(2, len(diffs[0].getAppends())) self.assertEqual(2, len(diffs[0].getDeletes())) self.assertEqual(0, len(diffs[1].getAppends())) self.assertEqual(2, len(diffs[1].getDeletes())) self.assertEqual(2, len(diffs[2].getAppends())) self.assertEqual(0, len(diffs[2].getDeletes())) self.assertEqual('> abc', diffs[0].getAppends()[0]) self.assertEqual('> def', diffs[0].getAppends()[1]) self.assertEqual('< abcd', diffs[0].getDeletes()[0]) self.assertEqual('< efgh', diffs[0].getDeletes()[1]) self.assertEqual('< dcba', diffs[1].getDeletes()[0]) self.assertEqual('< cba', diffs[1].getDeletes()[1]) self.assertEqual('> dcba', diffs[2].getAppends()[0]) self.assertEqual('> cba', diffs[2].getAppends()[1]) self.assertEqual(DiffParser.OP_TYPE_CHANGE, diffs[0].getOpType()) self.assertEqual(DiffParser.OP_TYPE_DELETE, diffs[1].getOpType()) self.assertEqual(DiffParser.OP_TYPE_APPEND, diffs[2].getOpType()) self.assertEqual(6, diffs[0].getStartLineNo()) self.assertEqual(10, diffs[1].getStartLineNo()) self.assertEqual(12, diffs[2].getStartLineNo()) self.assertEqual('6c6', diffs[0].getHeader()) self.assertEqual('10d8', diffs[1].getHeader()) self.assertEqual('12a14', diffs[2].getHeader())
def loadDiff(self, diffText): self.diffs = DiffParser().getDiffs(diffText)
def testDiffFailBrokenHeader(self): diffText = """6x4 > abc > def""" with self.assertRaises(ValueError): diffs = DiffParser().getDiffs(diffText)
def testDiffFailBrokenAdd(self): diffText = """6a8 < abcd < efgh""" with self.assertRaises(ValueError): diffs = DiffParser().getDiffs(diffText)
class ChurnDriver(object): def __init__(self, repo_location, repo_command, repo_type='hg', date_range=''): if not os.path.exists(repo_location): raise ChurnDriverError("Repo Location does not exist: %s" % repo_location) if not repo_command: raise ChurnDriverError("Repo Command cannot be empty") self._repo_location = os.path.abspath(repo_location) self._repo_type = repo_type self._cmd = repo_command self._dp = DiffParser(self._repo_type) self._ch = ChurnHash() self._backend = SQLiteBackend() self._daterange = date_range def run(self): args = shlex.split(self._cmd) p = subprocess.Popen(args, cwd=self._repo_location, stdout=subprocess.PIPE) sr = StreamReader(p.stdout) now = time.time() # Wait for 5 seconds with no output count = 0 while (time.time() - now < 20): if sr.is_empty(): print '.', time.sleep(1) continue lines = sr.readline(0.5) count += 1 if count % 100 == 0: print "Read %d lines" % count # Got a line of output, reset timer now = time.time() if lines: diffs = self._dp.parse(lines.split('\n')) if diffs: while len(diffs): d = diffs.popitem() # These are now key, value tuples, the second half is the embedded dict # I can't decide what to do next. Either this is an aggregated metric or it isn't # So, I'm going to store both and we can see which turns out to be useful. We will # calculate an aggregate metric and a per file metric and store both. # It may be that it's better to have the database do the aggregation for us in which case # churnhash.py is totally useless. chgset = d[0] user = d[1]['user'] timestamp = d[1]['timestamp'] for k in d[1].keys(): if k not in ('user', 'timestamp'): # Then it's a file name with a churn value self._ch.add_file_path(k, d[1][k]) # Add non-aggregated values to our backend if self._backend: self._backend.add_single_file_value( chgset, user, timestamp, k, d[1][k]) p.wait() # TODO: Now we save to some backend - or perhaps just wire this into churnhash directly # For now, we pull this back and return it if self._backend: h = self._ch.get_hash() for i in h: self._backend.store_churn_hash(i, h[i]['file'], self._daterange, h[i]['lines_changed']) return self._ch
class ChurnDriver(object): def __init__(self, repo_location, repo_command, repo_type='hg', date_range=''): if not os.path.exists(repo_location): raise ChurnDriverError("Repo Location does not exist: %s" % repo_location) if not repo_command: raise ChurnDriverError("Repo Command cannot be empty") self._repo_location = os.path.abspath(repo_location) self._repo_type = repo_type self._cmd = repo_command self._dp = DiffParser(self._repo_type) self._ch = ChurnHash() self._backend = SQLiteBackend() self._daterange = date_range def run(self): args = shlex.split(self._cmd) p = subprocess.Popen(args, cwd=self._repo_location, stdout=subprocess.PIPE) sr = StreamReader(p.stdout) now = time.time() # Wait for 5 seconds with no output count = 0 while (time.time() - now < 20): if sr.is_empty(): print '.', time.sleep(1) continue lines = sr.readline(0.5) count += 1 if count % 100 == 0: print "Read %d lines" % count # Got a line of output, reset timer now = time.time() if lines: diffs = self._dp.parse(lines.split('\n')) if diffs: while len(diffs): d = diffs.popitem() # These are now key, value tuples, the second half is the embedded dict # I can't decide what to do next. Either this is an aggregated metric or it isn't # So, I'm going to store both and we can see which turns out to be useful. We will # calculate an aggregate metric and a per file metric and store both. # It may be that it's better to have the database do the aggregation for us in which case # churnhash.py is totally useless. chgset = d[0] user = d[1]['user'] timestamp = d[1]['timestamp'] for k in d[1].keys(): if k not in ('user','timestamp'): # Then it's a file name with a churn value self._ch.add_file_path(k, d[1][k]) # Add non-aggregated values to our backend if self._backend: self._backend.add_single_file_value(chgset, user, timestamp, k, d[1][k]) p.wait() # TODO: Now we save to some backend - or perhaps just wire this into churnhash directly # For now, we pull this back and return it if self._backend: h = self._ch.get_hash() for i in h: self._backend.store_churn_hash(i, h[i]['file'], self._daterange, h[i]['lines_changed']) return self._ch