class TestCoder(unittest.TestCase): def setUp(self): # Init coder print("Initializing coder...") self.checker = self.checkerList[self.checkerIndex] self.dictionary = Dictionary(self.checker) self.coder = Coder(self.dictionary) # Load all data from DB print("Fetching data from database...") self.allData = self.db.getFixDataForChecker(self.checker) self.allDataLen = len(self.allData) print("Done, fetched {0} records".format(self.allDataLen)) def tearDown(self): self.checkerIndex += 1 @classmethod def setUpClass(self): print("Starting up...") self.db = CFDatabase(config.getCfDbFile()) self.checkers = Checkers() self.checkerList = ['deadcode.DeadStores'] self.checkerIndex = 0 def testDeadcodeDeadStores(self): self.assertTrue(self.allDataLen > 0, msg="No data found") # Encode all data print("Testing encoding") i = 0 while i < self.allDataLen: checkerInfo = self.checkers.extractTokensForChecker( self.checker, self.allData[i][4]) encodedBugData, initialUnkList = self.coder.encode( self.allData[i][1], checkerData=checkerInfo) encodedFixData, finalUnkList = self.coder.encode( self.allData[i][2], unkList=initialUnkList, reverse=False) if -1 in encodedBugData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif -1 in encodedFixData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) else: print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format( i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) textBug = self.coder.decode(encodedBugData, finalUnkList, True) textFix = self.coder.decode(encodedFixData, finalUnkList) self.assertEqual(textBug, self.allData[i][1]) self.assertEqual(textFix, self.allData[i][2]) i += 1 print("All done.")
def get_postions_and_labels(align, ref, region): """ Returns list of corresponding positions and labels. Parameters ---------- align : align for which positions and labels are required ref : corresponding reference sequence region : corresponding region """ start, end = region.start, region.end if start is None: start = 0 if end is None: end = float('inf') start, end = max(start, align.start), min(end, align.end) positions = [] labels = [] pairs = get_pairs(align.align, ref) current_position = None insert_count = 0 for pair in itertools.dropwhile(lambda p: (p.ref_position is None) or (p.ref_position < start), pairs): if pair.ref_position == align.align.reference_end or (pair.ref_position is not None and pair.ref_position >= end): break if pair.ref_position is None: insert_count += 1 else: insert_count = 0 current_position = pair.ref_position position = (current_position, insert_count) positions.append(position) label = pair.query_base.upper() if pair.query_base else Coder.GAP try: encoded_label = Coder.encode(label) except KeyError: encoded_label = Coder.encode(Coder.UNKNOWN) labels.append(encoded_label) return positions, labels
def test_digits(verbose=False): epochs = 1000 pad = 0.0001 feedback = True split_learn = True biases = True Ns = [256 for _ in range(3)] tokens = [str(x) for x in range(100)] net = CHL_Net(Ns, feedback, split_learn, biases) # Input/output pattern pairs (0-99) in_coder = Coder(tanh_activator(pad, (Ns[0]))) out_coder = Coder(tanh_activator(pad, Ns[-1])) patterns = [(in_coder.encode(tok), out_coder.encode(tok)) for tok in tokens] net.train(epochs, patterns, verbose=verbose)
def _send_message(self): self.chat_id = input('Enter chat id:\n') if (not self.chat_id): self.chat_id = LISTENER message = input('Enter message:\n') self._send_bits(START_MESSAGE) self._send_bits(Coder.encode(message)) self._send_bits(END_MESSAGE)
def main(variant): with open('variant', 'w') as f: f.write(variant) encoder = Coder(variant) paths = [] chunk_num = 0 max_chunk_num = 2 while True: tokens = {} i = 1 if chunk_num == max_chunk_num: break documents = docreader.DocumentStreamReader( docreader.parse_command_line().files) for doc in documents: if chunk_num == 0: paths.append(doc.url) words = doc2words.extract_words(doc.text) for word in set(words): if word in tokens: tokens[word].append(i) elif len(word) % max_chunk_num == chunk_num: tokens[word] = array('l', [i]) i += 1 for token in tokens: tokens[token] = encoder.encode(tokens[token]) with open('index{}.pkl'.format(chunk_num), 'wb') as f: pickle.dump(tokens, f) chunk_num += 1 first = False with open('paths.pkl', 'wb') as f: pickle.dump(paths, f)
def test_arith(verbose=False): epochs = 100 feedback = False split_learn = False biases = True pad = 0.0001 N = 256 Ns = [N * 3, N * 2, N * 2] net = CHL_Net(Ns, feedback, split_learn, biases) in_size = int(Ns[0] / 3) out_size = int(Ns[-1] / 2) in1_coder = Coder(tanh_activator(pad, in_size)) in2_coder = Coder(tanh_activator(pad, in_size)) in3_coder = Coder(tanh_activator(pad, in_size)) out1_coder = Coder(tanh_activator(pad, out_size)) out2_coder = Coder(tanh_activator(pad, out_size)) # (x,y,op) => op(x,y) pairs patterns = [] for op in arith_ops: for i in range(10): for j in range(10): in1 = in1_coder.encode(str(i)) in2 = in2_coder.encode(str(j)) in3 = in3_coder.encode(op) try: f0, f1 = arith_ops[op] out1 = out1_coder.encode(f0(i, j)) out2 = out2_coder.encode(f1(i, j)) except: out1 = out1_coder.encode("null") out2 = out2_coder.encode("null") patterns.append( (np.append(np.append(in1, in2, axis=0), in3, axis=0), np.append(out1, out2, axis=0))) net.train(epochs, patterns, verbose=verbose)
class Predictor(): def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) self.checkers = Checkers() self.loadCommitList() def loadCommitList(self): self.commits = self.vcs.getAllVersions(config.getBranch()) self.currentCommitIndex = 0 def convertFilePathToRepoRelativePath(self, path): return os.path.relpath(path, config.getRepoDir()) def getDiffResolvedIds(self): resolved = self.codeChecker.diffResolved(config.getCcRunName(), config.getTmpDir(), self.ccdb) ids = [] for bug in resolved: ids.append(bug['reportId']) return ids def predict(self, id, checker): # Load all bugs print("Loading bug data...") ids = [] if id == -1: bugs = self.ccdb.getAllBugsForChecker(checker) ids = [x[0] for x in bugs] else: ids.append(id) # Loading model print("Loading model...") model = load_model(config.cfModelFilenameFormat.format(checker)) model.summary() vLabels = ['NOT OK', 'OK', 'Skipped'] # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) self.totalDictionaryLength = self.dictionary.length() # Predicting print("Starting predictions...") for i in ids: allData = self.ccdb.getBugData(i) if allData.getChecker( ) not in globals.availableCheckers or allData.getChecker( ) != checker: print("Bug #{0} - checker not supported".format(i)) else: # Load extra tokens from checker message checkerInfo = self.checkers.extractTokensForChecker( allData.getChecker(), allData.getMessage()) # Retrieve code fragment with bug fileRelativePath = self.convertFilePathToRepoRelativePath( allData.getFile()) fullCodeWithBug = self.vcs.getFileContents( fileRelativePath, self.commits[self.currentCommitIndex]) extractor = CodeExtractor(allData) extractor.loadCodeFromText(fullCodeWithBug) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = self.coder.encode( bugCodeFragment, checkerData=checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print( "Bug #{0} - Code too big for model, ignored".format(i)) continue elif id == -1: print("Bug #{0} - Good to go".format(i)) continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = self.coder.applyPadding( encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength)) X[0] = self.coder.convertToOneHot( encodedBugData, np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength))) # Predict and convert from one-hot Y = self.coder.convertFromOneHot(model.predict(X)[0]) print(Y) # Decode Y = self.coder.removePadding(Y) fixCodeFragment = self.coder.decode(Y, initialUnkList) #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) # Run CodeChecker and analyze code self.codeChecker.check(True) resolvedIds = self.getDiffResolvedIds() # Check if ID is resolved in tmp folder isFixed = i in resolvedIds # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 #Print print("Bug #{0} - summary".format(i)) print("== Code fragment with bug ==") print(bugCodeFragment) print("== Suggested fix ==") print(fixCodeFragment) print("Verification: {0}".format(vLabels[vStatus])) a = ' ' while a != 'y' and a != 'n': a = input("Apply fix? (y/n): ") if a == 'y': if not config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) elif config.cfVerifyPrediction: # Revert file contents self.vcs.checkout(self.commits[self.currentCommitIndex]) print('Done') print("All done, exiting...")
for from_layer in ["FEF", "SC"]: v_new = s.add_transit(new_state=to_layer + from_layer, gates=v_old, op1=to_layer, op2=from_layer) print(c.list_tokens()) weights, biases, _, residual = s.flash() for k in weights: w, b = weights[k], biases[k] print(k) print(w) print(b.T) a = {"gates": v_old, "op1": c.encode("SC"), "op2": c.encode("SC")} wvb = np.zeros(v_old.shape) for k in weights: w, b = weights[k], biases[k] wvb += w.dot(a[k[1]]) + b z = np.zeros(v_old.shape) a = {"gates": act.f(wvb), "op1": z, "op2": z} wvb = np.zeros(v_old.shape) for k in weights: w, b = weights[k], biases[k] wvb += w.dot(a[k[1]]) + b v_test = act.f(wvb) for v in [v_old, v_test, v_new]: print(c.decode(v), v.T) print(act.e(v_test, v_new).T)
def main(self): # Do analysis shutil.rmtree(config.getTmpDir()) self.codeChecker.check(True) # Diff new newBugs = self.getDiffNew() if len(newBugs) < 1: print('No new bugs introduced, commit is accepted!') return print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs))) # Load models models = {} for checker in globals.availableCheckers: models[checker] = load_model(config.cfModelFilenameFormat.format(checker)) # Load all content from files having new files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs]) fileContents = {} for f in files: fn = config.getRepoDir() + f with open(fn, 'r') as fh: fileContents[f] = ''.join(fh.readlines()) # For each file sort by bug line desc suggestions = [] validSuggestions = 0 for f in files: bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f] bugs.sort(key=lambda x: x.getLine(), reverse=True) print("=== File: {0} ===".format(f)) # For each bug get a suggestion and test it for b in bugs: print("L{0}, Type: {1}".format(b.getLine(), b.getChecker())) # Prepare useful data dictionary = Dictionary(b.getChecker()) coder = Coder(dictionary) totalDictionaryLength = dictionary.length() # Prepare and extract bug fragment checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage()) extractor = CodeExtractor(b) extractor.loadCodeFromText(fileContents[f]) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print("Ignored: Code too big for model") continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength)) X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength))) # Predict and convert from one-hot Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0]) Y = coder.removePadding(Y) # Decode fixCodeFragment = coder.decode(Y, initialUnkList)[:-1] #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(b.getFile()) # Run CodeChecker and analyze code shutil.rmtree(config.getTmpDir()) compilationLog = self.codeChecker.check(True) newBugsAfterFix = self.getDiffNew() # Check if ID is resolved in tmp folder isFixed = 'Build failed' not in compilationLog for nb in newBugsAfterFix: if self.isBugDataEqual(b, nb): isFixed = False # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 # Revert file extractor.loadCodeFromText(fileContents[f]) extractor.saveToFile(b.getFile()) if vStatus == 0: print("Verification: Negative, cannot be applied") elif vStatus == 1: print("Verification: Positive, can be applied") validSuggestions += 1 elif vStatus == 2: print("Verification: Skipped") validSuggestions += 1 sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus) suggestions.append(sugg) print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs))) if validSuggestions > 0: print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?") apply = False choice = True while choice: c = sys.stdin.read(1) if c == 'a': apply = True choice = False print("Applying fixes...") elif c == 'i': choice = False print("Fixes ignored...") elif c == 'd': self.displaySuggestions(suggestions) print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?") elif c == 'q': print("Aborting commit...") sys.exit(1) if apply: self.applyValidFixes(suggestions, files) print("Fixes applied!") if validSuggestions != len(newBugs): print("Unable to fix all bugs, continue with commit (c) or abort (q)?") choice = True while choice: c = sys.stdin.read(1) if c == 'c': choice = False print("Continuing...") elif c == 'q': print("Aborting commit...") sys.exit(1) else: print("Bugs corrected, commit is good to go!")
def generate_train_data(args): """ Generates train data for the region provided through arguments. Parameters ---------- reads_path : path to the aligned reads file truth_genome_path : path to the truth genome ref : reference sequence region : region for which data is required Returns ------- region_name : region name positions : positions corresponding provided region examples : examples corresponding provided region labels : labels corresponding provided region """ reads_path, truth_genome_path, ref, region = args aligns = get_aligns(truth_genome_path, region) filtered_aligns = filter_aligns(aligns) print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}') if not filtered_aligns: print(f'>> no alignments') return None positions = [] examples = [] labels = [] for align in filtered_aligns: position_label_dict = dict() positions_with_unknown_base = set() pos, lbls = get_postions_and_labels(align, ref, region) for position, label in zip(pos, lbls): if label == Coder.encode(Coder.UNKNOWN): positions_with_unknown_base.add(position) else: position_label_dict[position] = label sorted_positions = sorted(list(position_label_dict.keys())) region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}' result = gen.generate_features(reads_path, str(ref), region_string) for P, X in zip(*result): Y = [] to_yield = True for p in P: assert is_in_region(p[0], filtered_aligns) if p in positions_with_unknown_base: to_yield = False break try: y_label = position_label_dict[p] except KeyError: if p[1] != 0: y_label = Coder.encode(Coder.GAP) else: raise KeyError(f'error: No label mapping for position {p}!') Y.append(y_label) if to_yield: positions.append(P) examples.append(X) labels.append(Y) print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}') return region.name, positions, examples, labels
""" Python program to realize the simple stenography which implements both coding and decoding part. :Author: Manthan C S :GitHub: mnthnx64 """ from coder import Coder from decoder import Decoder if __name__ == '__main__': cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.") cdr.encode() dcdr = Decoder() text = dcdr.decode() print(text)
class LearningDataBuilder(): def __init__(self): self.db = CFDatabase(config.getCfDbFile()) self.checkers = Checkers() def build(self, checker): # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) # Load all data from DB print("Fetching data from database...") allData = self.db.getFixDataForChecker(checker) allDataLen = len(allData) print("Done, fetched {0} records".format(allDataLen)) if allDataLen < 1: print("No data found") return # Encode all data print("Encoding all data and writing to output file...") i = 0 (maxBug, maxFix, maxUnk) = self.checkers.getModelStatsForChecker(checker) with open(config.cfTrainFilenameFormat.format(checker), 'w') as f: while i < allDataLen: checkerInfo = self.checkers.extractTokensForChecker( checker, allData[i][4]) encodedBugData, initialUnkList = self.coder.encode( allData[i][1], checkerData=checkerInfo) encodedFixData, finalUnkList = self.coder.encode( allData[i][2], unkList=initialUnkList, reverse=False) if -1 in encodedBugData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif -1 in encodedFixData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif len(encodedBugData) > maxBug or len( encodedFixData) > maxFix or len(finalUnkList) > maxUnk: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (lengths), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) else: print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format( i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) f.write( json.dumps({ 'x': encodedBugData, 'y': encodedFixData }) + '\n') i += 1 print('Done {0}'.format(i), file=sys.stderr) print("All done, exiting...")
from coder import Coder, MorseCoder import string if __name__ == "__main__": # By default, the translator will encode files by switching them to uppercase translator = Coder(string.ascii_lowercase, string.ascii_uppercase) while (1): line = raw_input() coded = translator.encode(line) print coded print translator.decode(coded)