示例#1
0
class TestCoder(unittest.TestCase):
    def setUp(self):
        # Init coder
        print("Initializing coder...")
        self.checker = self.checkerList[self.checkerIndex]
        self.dictionary = Dictionary(self.checker)
        self.coder = Coder(self.dictionary)
        # Load all data from DB
        print("Fetching data from database...")
        self.allData = self.db.getFixDataForChecker(self.checker)
        self.allDataLen = len(self.allData)
        print("Done, fetched {0} records".format(self.allDataLen))

    def tearDown(self):
        self.checkerIndex += 1

    @classmethod
    def setUpClass(self):
        print("Starting up...")
        self.db = CFDatabase(config.getCfDbFile())
        self.checkers = Checkers()
        self.checkerList = ['deadcode.DeadStores']
        self.checkerIndex = 0

    def testDeadcodeDeadStores(self):
        self.assertTrue(self.allDataLen > 0, msg="No data found")

        # Encode all data
        print("Testing encoding")
        i = 0
        while i < self.allDataLen:
            checkerInfo = self.checkers.extractTokensForChecker(
                self.checker, self.allData[i][4])
            encodedBugData, initialUnkList = self.coder.encode(
                self.allData[i][1], checkerData=checkerInfo)
            encodedFixData, finalUnkList = self.coder.encode(
                self.allData[i][2], unkList=initialUnkList, reverse=False)
            if -1 in encodedBugData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            elif -1 in encodedFixData:
                print(
                    "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})"
                    .format(i + 1, len(finalUnkList), len(encodedBugData),
                            len(encodedFixData)))
            else:
                print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format(
                    i + 1, len(finalUnkList), len(encodedBugData),
                    len(encodedFixData)))
                textBug = self.coder.decode(encodedBugData, finalUnkList, True)
                textFix = self.coder.decode(encodedFixData, finalUnkList)
                self.assertEqual(textBug, self.allData[i][1])
                self.assertEqual(textFix, self.allData[i][2])
            i += 1

        print("All done.")
示例#2
0
def get_postions_and_labels(align, ref, region):
    """
    Returns list of corresponding positions and labels.

    Parameters
    ----------
    align : align for which positions and labels are required
    ref : corresponding reference sequence
    region : corresponding region
    """

    start, end = region.start, region.end
    if start is None: start = 0
    if end is None: end = float('inf')
    start, end = max(start, align.start), min(end, align.end)

    positions = []
    labels = []

    pairs = get_pairs(align.align, ref)
    current_position = None
    insert_count = 0

    for pair in itertools.dropwhile(lambda p: (p.ref_position is None) or (p.ref_position < start), pairs):
        if pair.ref_position == align.align.reference_end or (pair.ref_position is not None and pair.ref_position >= end):
            break

        if pair.ref_position is None:
            insert_count += 1
        else:
            insert_count = 0
            current_position = pair.ref_position

        position = (current_position, insert_count)
        positions.append(position)

        label = pair.query_base.upper() if pair.query_base else Coder.GAP

        try:
            encoded_label = Coder.encode(label)
        except KeyError:
            encoded_label = Coder.encode(Coder.UNKNOWN)

        labels.append(encoded_label)

    return positions, labels
示例#3
0
def test_digits(verbose=False):
    epochs = 1000
    pad = 0.0001
    feedback = True
    split_learn = True
    biases = True
    Ns = [256 for _ in range(3)]
    tokens = [str(x) for x in range(100)]

    net = CHL_Net(Ns, feedback, split_learn, biases)

    # Input/output pattern pairs (0-99)
    in_coder = Coder(tanh_activator(pad, (Ns[0])))
    out_coder = Coder(tanh_activator(pad, Ns[-1]))
    patterns = [(in_coder.encode(tok), out_coder.encode(tok))
                for tok in tokens]

    net.train(epochs, patterns, verbose=verbose)
示例#4
0
    def _send_message(self):
        self.chat_id = input('Enter chat id:\n')
        if (not self.chat_id):
            self.chat_id = LISTENER

        message = input('Enter message:\n')

        self._send_bits(START_MESSAGE)
        self._send_bits(Coder.encode(message))
        self._send_bits(END_MESSAGE)
示例#5
0
def main(variant):
    with open('variant', 'w') as f:
        f.write(variant)

    encoder = Coder(variant)
    paths = []
    chunk_num = 0
    max_chunk_num = 2

    while True:
        tokens = {}
        i = 1
        if chunk_num == max_chunk_num:
            break

        documents = docreader.DocumentStreamReader(
            docreader.parse_command_line().files)
        for doc in documents:
            if chunk_num == 0:
                paths.append(doc.url)

            words = doc2words.extract_words(doc.text)

            for word in set(words):
                if word in tokens:
                    tokens[word].append(i)
                elif len(word) % max_chunk_num == chunk_num:
                    tokens[word] = array('l', [i])

            i += 1

        for token in tokens:
            tokens[token] = encoder.encode(tokens[token])

        with open('index{}.pkl'.format(chunk_num), 'wb') as f:
            pickle.dump(tokens, f)

        chunk_num += 1
        first = False

    with open('paths.pkl', 'wb') as f:
        pickle.dump(paths, f)
示例#6
0
def test_arith(verbose=False):
    epochs = 100
    feedback = False
    split_learn = False
    biases = True
    pad = 0.0001

    N = 256
    Ns = [N * 3, N * 2, N * 2]
    net = CHL_Net(Ns, feedback, split_learn, biases)

    in_size = int(Ns[0] / 3)
    out_size = int(Ns[-1] / 2)
    in1_coder = Coder(tanh_activator(pad, in_size))
    in2_coder = Coder(tanh_activator(pad, in_size))
    in3_coder = Coder(tanh_activator(pad, in_size))
    out1_coder = Coder(tanh_activator(pad, out_size))
    out2_coder = Coder(tanh_activator(pad, out_size))

    # (x,y,op) => op(x,y) pairs
    patterns = []
    for op in arith_ops:
        for i in range(10):
            for j in range(10):
                in1 = in1_coder.encode(str(i))
                in2 = in2_coder.encode(str(j))
                in3 = in3_coder.encode(op)

                try:
                    f0, f1 = arith_ops[op]
                    out1 = out1_coder.encode(f0(i, j))
                    out2 = out2_coder.encode(f1(i, j))
                except:
                    out1 = out1_coder.encode("null")
                    out2 = out2_coder.encode("null")

                patterns.append(
                    (np.append(np.append(in1, in2, axis=0), in3,
                               axis=0), np.append(out1, out2, axis=0)))

    net.train(epochs, patterns, verbose=verbose)
示例#7
0
class Predictor():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())
        self.checkers = Checkers()
        self.loadCommitList()

    def loadCommitList(self):
        self.commits = self.vcs.getAllVersions(config.getBranch())
        self.currentCommitIndex = 0

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(),
                                                 config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids

    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")
示例#8
0
        for from_layer in ["FEF", "SC"]:
            v_new = s.add_transit(new_state=to_layer + from_layer,
                                  gates=v_old,
                                  op1=to_layer,
                                  op2=from_layer)

    print(c.list_tokens())

    weights, biases, _, residual = s.flash()
    for k in weights:
        w, b = weights[k], biases[k]
        print(k)
        print(w)
        print(b.T)

    a = {"gates": v_old, "op1": c.encode("SC"), "op2": c.encode("SC")}
    wvb = np.zeros(v_old.shape)
    for k in weights:
        w, b = weights[k], biases[k]
        wvb += w.dot(a[k[1]]) + b
    z = np.zeros(v_old.shape)
    a = {"gates": act.f(wvb), "op1": z, "op2": z}
    wvb = np.zeros(v_old.shape)
    for k in weights:
        w, b = weights[k], biases[k]
        wvb += w.dot(a[k[1]]) + b
    v_test = act.f(wvb)

    for v in [v_old, v_test, v_new]:
        print(c.decode(v), v.T)
    print(act.e(v_test, v_new).T)
示例#9
0
    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")
示例#10
0
def generate_train_data(args):
    """
    Generates train data for the region provided through arguments.

    Parameters
    ----------
    reads_path : path to the aligned reads file
    truth_genome_path : path to the truth genome
    ref : reference sequence
    region : region for which data is required

    Returns
    -------
    region_name : region name
    positions : positions corresponding provided region
    examples : examples corresponding provided region
    labels : labels corresponding provided region
    """

    reads_path, truth_genome_path, ref, region = args

    aligns = get_aligns(truth_genome_path, region)
    filtered_aligns = filter_aligns(aligns)

    print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}')

    if not filtered_aligns: 
        print(f'>> no alignments')
        return None

    positions = []
    examples = []
    labels = []

    for align in filtered_aligns:
        position_label_dict = dict()
        positions_with_unknown_base = set()

        pos, lbls = get_postions_and_labels(align, ref, region)
        for position, label in zip(pos, lbls):
            if label == Coder.encode(Coder.UNKNOWN):
                positions_with_unknown_base.add(position)
            else:
                position_label_dict[position] = label

        sorted_positions = sorted(list(position_label_dict.keys()))
        region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}'
        result = gen.generate_features(reads_path, str(ref), region_string)

        for P, X in zip(*result):
            Y = []
            to_yield = True

            for p in P:
                assert is_in_region(p[0], filtered_aligns)

                if p in positions_with_unknown_base:
                    to_yield = False
                    break

                try:
                    y_label = position_label_dict[p]
                except KeyError:
                    if p[1] != 0:
                        y_label = Coder.encode(Coder.GAP)
                    else:
                        raise KeyError(f'error: No label mapping for position {p}!')

                Y.append(y_label)

            if to_yield:
                positions.append(P)
                examples.append(X)
                labels.append(Y)

    print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}')
    return region.name, positions, examples, labels
示例#11
0
"""
Python program to realize the
simple stenography which implements both 
coding and decoding part.

:Author: Manthan C S
:GitHub: mnthnx64
"""

from coder import Coder
from decoder import Decoder

if __name__ == '__main__':
    cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.")
    cdr.encode()
    dcdr = Decoder()
    text = dcdr.decode()
    print(text)
示例#12
0
class LearningDataBuilder():
    def __init__(self):
        self.db = CFDatabase(config.getCfDbFile())
        self.checkers = Checkers()

    def build(self, checker):
        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)

        # Load all data from DB
        print("Fetching data from database...")
        allData = self.db.getFixDataForChecker(checker)
        allDataLen = len(allData)
        print("Done, fetched {0} records".format(allDataLen))
        if allDataLen < 1:
            print("No data found")
            return

        # Encode all data
        print("Encoding all data and writing to output file...")
        i = 0
        (maxBug, maxFix,
         maxUnk) = self.checkers.getModelStatsForChecker(checker)
        with open(config.cfTrainFilenameFormat.format(checker), 'w') as f:
            while i < allDataLen:
                checkerInfo = self.checkers.extractTokensForChecker(
                    checker, allData[i][4])
                encodedBugData, initialUnkList = self.coder.encode(
                    allData[i][1], checkerData=checkerInfo)
                encodedFixData, finalUnkList = self.coder.encode(
                    allData[i][2], unkList=initialUnkList, reverse=False)
                if -1 in encodedBugData:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                elif -1 in encodedFixData:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                elif len(encodedBugData) > maxBug or len(
                        encodedFixData) > maxFix or len(finalUnkList) > maxUnk:
                    print(
                        "{0}: [{2} - {3} ({1})] Some tokens were not parsed (lengths), ignoring (lenUnk = {1})"
                        .format(i + 1, len(finalUnkList), len(encodedBugData),
                                len(encodedFixData)))
                else:
                    print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format(
                        i + 1, len(finalUnkList), len(encodedBugData),
                        len(encodedFixData)))
                    f.write(
                        json.dumps({
                            'x': encodedBugData,
                            'y': encodedFixData
                        }) + '\n')

                i += 1
                print('Done {0}'.format(i), file=sys.stderr)

        print("All done, exiting...")
示例#13
0
from coder import Coder, MorseCoder
import string

if __name__ == "__main__":
    # By default, the translator will encode files by switching them to uppercase
    translator = Coder(string.ascii_lowercase, string.ascii_uppercase)

    while (1):
        line = raw_input()
        coded = translator.encode(line)
        print coded
        print translator.decode(coded)