Пример #1
0
 def loadloc(self):
     filereader = FileReader("../NE_INFO.csv")
     neididx = filereader.getattridx("NE_ID")
     noidx = filereader.getattridx("NE_NO")
     nameidx = filereader.getattridx("NE_NAME")
     ididx = filereader.getattridx("ID_IN_NM")
     self.m_locdata = []
     while True:
         tmptran = filereader.readtransection()
         if tmptran is None:
             break
         innminfo = tmptran[ididx]
         siteididx = innminfo.find("BtsSiteMgr=BCF-")
         if siteididx == -1:
             siteididx = innminfo.find("BtsSiteMgr=")
             if siteididx == -1:
                 innm = "-1"
             else:
                 innm = innminfo[siteididx + len("BtsSiteMgr="):]
         else:
             # innm = innminfo[siteididx+len("BtsSiteMgr=BCF-"):]
             innm = "-1"
         self.m_locdata.append(
             NE(tmptran[noidx], tmptran[nameidx], tmptran[ididx],
                tmptran[neididx]))
Пример #2
0
def printinfo():
    fnamelist = ["../10"+str(v)+".csv" for v in xrange(22,23)]
    cnt = 0
    found = 0
    wholeresult = {}
    # writefile = open("../cleandata","w")

    missloc = {}
    for fname in fnamelist:
        filereader = FileReader(fname)
        alarmcode = filereader.getattridx("ALARMCODE")
        attridx = filereader.getattridx("SUMMARY")
        locidx= filereader.getattridx("LOCATION")
        timeidx = filereader.getattridx("ALARMHAPPENTIME")
        cntidx = 0
        while True:
            tmptran = filereader.readtransection()
            cntidx += 1
            # print cntidx
            if tmptran is None:
                filereader.close()
                break

            summary = tmptran[attridx]
            location = tmptran[locidx]
            if location.startswith("SU6095-SU2551"):
                print "SUMMARY:"
                print summary
                print "--------------"
                print "LOCATION"
                print location
 def setUp(self):
     self.tmpfn = tempfile.mktemp("filereadertest")
     fp = open(self.tmpfn, 'w')
     for line in self.lines:
         fp.write(line)
     fp.close()
     self.f = FileReader(self.tmpfn)
Пример #4
0
def tongjidistype(fname, attrname):
    filereader = FileReader(fname)
    attridx = filereader.getattridx(attrname)
    trandata = {}
    while True:
        tmptran = filereader.readtransection()
        if tmptran is None:
            break
        summary = tmptran[attridx]
        if summary not in trandata:
            trandata[summary] = 0
        trandata[summary] += 1

    valuelist = trandata.values()
    valuelist.sort()
    c = Counter(valuelist)
    keylist = c.keys()
    keylist.sort()
    for key in keylist:
        print key, "\t:\t", c[key]

    itemslist = trandata.items()
    itemslist.sort(key=lambda v: v[1], reverse=True)
    # for key,value in itemslist:
    #     print key
    #     print value
    #     raw_input()

    # print valuelist
    # raw_input()
    print "========================================================"
    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(trandata)
    print "length:", len(trandata)
Пример #5
0
def execution():

    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbosity",
                        type=str,
                        help="increase output verbosity")

    parser.add_argument("-i",
                        "--input",
                        type=str,
                        default="data_kidney",
                        help="input")

    parser.add_argument("-o",
                        "--output",
                        type=str,
                        default='data_kidney.jpg',
                        help="output")

    args = parser.parse_args()

    file_reader = FileReader(args.input)
    scanconversion = ScanConverter(file_reader)
    scanconversion.convert(file_reader, args.output)
Пример #6
0
def tongjilocation():
    filereader = FileReader("../1022.csv")
    attridx = filereader.getattridx("LOCATION")
    trandata = {}
    while True:
        tmptran = filereader.readtransection()
        if tmptran is None:
            break
        loc = tmptran[attridx]
        try:
            locinfo = loc.split(";")[1].split("/")
            targetloc = locinfo[0]
            if targetloc not in trandata:
                trandata[targetloc] = 0
            trandata[targetloc] += 1
        except:
            print loc
            # raw_input()

    valuelist = trandata.values()
    valuelist.sort()
    c = Counter(valuelist)
    keylist = c.keys()
    keylist.sort()
    for key in keylist:
        print key,"\t:\t",c[key]
        raw_input()
    raw_input()
    itemslist = trandata.items()
    itemslist.sort(key=lambda v:v[1],reverse=True)
    for key,value in itemslist:
        print key
        print value
        raw_input()
Пример #7
0
    def __init__(self):

        # this is determined by whether we read from a file or not
        self.data_set = None
        self.data_targets = None

        self.file_reader = FileReader()
        # these hold our training and testing values after we split the data
        self.training_data = None
        self.training_targets = None
        self.test_data = None
        self.test_targets = None
        # these values hold the label encoded arrays for working with sklearn's implementation
        self.sklearn_training_data = None
        self.sklearn_training_targets = None
        self.sklearn_testing_data = None
        self.sklearn_testing_targets = None

        self.most_common = None

        self.classifier = None
        self.model = None
        self.predicted_targets = None

        pandas.options.mode.chained_assignment = None
Пример #8
0
 def list_file_recursive(self, path):
     """Print list of given file's contents recursively."""
     with FileReader(path, 'rb') as file:
         decoder = codec.getDecoderForFile(file)
         decoder = decoder(file, None)
         items = []
         for obj in decoder.objects:
             self._list_recursive(obj)
Пример #9
0
 def read_file_test(self):
     file_reader = FileReader("data/partitions.txt")
     cluster_points = file_reader.read_file_to_cluster_points()
     self.assertIsInstance(cluster_points, list)
     self.assertGreater(len(cluster_points), 1)
     self.assertEqual(cluster_points[0].point_id, 1)
     self.assertEqual(cluster_points[0].cluster_id, 2)
     for cluster_point in cluster_points:
         self.assertIsInstance(cluster_point, ClusterPoint)
Пример #10
0
 def makeFileReader(self, file, mode='rb') -> FileReader:
     """Make a FileReader for the given file,
     with default settings for this app.
     """
     log.debug("makeFileReader(%s: %s)", file, getattr(file, 'name', None))
     if isinstance(file, FileReader): return file
     return FileReader(file,
                       mode,
                       endian=self.endian,
                       defaultStringLengthFmt='H')
Пример #11
0
def printidentifier():
    filereader = FileReader("../1022.csv")
    identifieridx = filereader.getattridx("NEIDENTIFIER")
    while True:
        tmptran = filereader.readtransection()
        identifier = tmptran[identifieridx]
        if tmptran is None:
            filereader.close()
            break
        print identifier
        raw_input()
Пример #12
0
def process_dump(input_file, out_file, workers_count):
    """
        :param input_file: name of the wikipedia dump file; '-' to read from stdin
        :param out_file: directory where to store extracted data, or '-' for stdout
        :param workers_count: number of extraction processes to spawn.
        """

    logging.info("Starting map reduce processes...")

    workers_count = max(1, workers_count)
    maxsize = 10 * workers_count

    # output queue
    output_queue = Queue(maxsize=maxsize)
    # input queue
    jobs_queue = Queue(maxsize=maxsize)

    file_reader = FileReader(input_file)
    database_writer = DatabaseWriter(config, buffer_size=1000)
    # database_writer.check_connection()

    workers = []
    for i in range(workers_count):
        worker = json_processor_class(i)
        extractor = Instance(target=worker.execute,
                             args=(jobs_queue, output_queue))
        extractor.daemon = True  # only live while parent process lives
        extractor.start()

        worker.process = extractor
        workers.append(worker)

    output = Instance(target=database_writer.execute, args=(output_queue, ))
    output.start()

    output_queue_size = lambda: output_queue.qsize()
    # map job that sorts and prints output
    map = Instance(target=file_reader.execute,
                   args=(jobs_queue, output_queue_size))
    map.start()

    map.join()

    logging.info("Completing workers...")
    for _ in workers:
        jobs_queue.put(None)

    for w in workers:
        w.process.join()

    logging.info("Completing database writer...")
    output_queue.put(None)
    output.join()
Пример #13
0
    def loadtopo(self):
        self.m_directtopo = {}
        self.m_fatherdata = {}
        self.m_topodict = {}
        filereader = FileReader("../NE_TOPO_INFO.csv")
        neidx = filereader.getattridx("NE_ID")
        parentidx = filereader.getattridx("PARENT_NE_ID")
        empty = 0
        nonempty = 0
        halfempty = 0
        while True:
            tmptran = filereader.readtransection()
            if tmptran is None:
                break
            neid = tmptran[neidx]
            parentneid = tmptran[parentidx]
            childne = self.getnebysiteid(neid)
            parentne = self.getnebysiteid(parentneid)
            if childne is None and parentne is None:
                empty += 1
                continue
            elif childne is None or parentne is None:
                halfempty += 1
                if parentne is None:
                    childnename = childne.m_name
                    parentnename = parentneid

                    if childnename not in self.m_fatherdata:
                        self.m_fatherdata[childnename] = []
                    self.m_fatherdata[childnename].append(parentnename)
                continue
            else:
                nonempty += 1
            childnename = childne.m_name
            parentnename = parentne.m_name

            if childnename not in self.m_fatherdata:
                self.m_fatherdata[childnename] = []
            self.m_fatherdata[childnename].append(parentnename)

            if childnename not in self.m_topodict:
                self.m_topodict[childnename] = []
            if parentnename not in self.m_topodict:
                self.m_topodict[parentnename] = []
            if parentnename not in self.m_directtopo:
                self.m_directtopo[parentnename] = []
            self.m_topodict[childnename].append(parentnename)
            self.m_topodict[parentnename].append(childnename)
            self.m_directtopo[parentnename].append(childnename)
        print "empty:", empty
        print "halfempty:", halfempty
        print "nonempty:", nonempty
Пример #14
0
def tongjinetype():
    filereader = FileReader("../NE_INFO.csv")
    attridx = filereader.getattridx("NE_CAT_ID")
    trandata = {}
    while True:
        tmptran = filereader.readtransection()
        if tmptran is None:
            break
        summary = tmptran[attridx]
        if summary not in trandata:
            trandata[summary] = 0
        trandata[summary] += 1
    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(trandata)
Пример #15
0
    def addmesh(self, meshfilelocation, pixelrange):
        meshfile = FileReader(meshfilelocation, "read")
        meshdata = []
        color = [255, 255, 255]
        complete = False
        width = 1
        for x in meshfile.fileOutput:
            if "color" in x:
                color = list(map(int, x.split("=")[1].split(" ")))
            elif "linewidth" in x:
                width = int(x.split("=")[1])
            elif "complete" in x:
                complete = bool(x.split("=")[1])
            else:
                meshdata.append(list(map(int, x.split(" "))))

        self.drawMeshList.append(
            Mesh(meshdata, pixelrange, color, complete, width))
Пример #16
0
def test_file_reader():
    # try:
    #     reader = FileReader('file.txt')
    #     reader.read_next_line()
    #     reader.read_next_line()
    #     reader.read_next_line()
    # # except IOError:
    # #     print('IOError exception')
    # finally:
    #     # check if variable reader exists
    #     # this might be dangerous if other reader
    #     # has been declared before
    #     if ('reader' in locals()):
    #         reader.close()

    # ensure that resources will be freed properly
    with FileReader('file.txt') as reader:
        reader.read_next_line()
Пример #17
0
    def __init__(self):

        self.iris = datasets.load_iris()
        # this is determined by whether we read from a file or not
        self.data_set = None
        self.data_targets = None

        self.file_reader = FileReader()
        # these hold our training and testing values after we split the data
        self.training_data = None
        self.training_targets = None
        self.test_data = None
        self.test_targets = None

        self.classifier = None
        self.model = None

        self.predicted_targets = None
Пример #18
0
def calneidlen():
    filereader = FileReader("../NE_INFO.csv")
    attridx = filereader.getattridx("NE_NO")
    lendict = {}
    idx = 0
    while True:
        tmptran = filereader.readtransection()
        if tmptran is None:
            break
        neid = tmptran[attridx]
        neidlen = len(neid)
        if neidlen not in lendict:
            lendict[neidlen] = 0
        lendict[neidlen] += 1
        idx += 1
        print neid
        if idx % 100 == 0:
            raw_input()
    import pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(lendict)
Пример #19
0
def testwrongfile():
    filereader = FileReader("../wrongdocfile")
    alarmcodeidx = filereader.getattridx("ALARMCODE")
    attridx = filereader.getattridx("SUMMARY")
    locidx = filereader.getattridx("LOCATION")
    timeidx = filereader.getattridx("ALARMHAPPENTIME")
    print "idxdata:", timeidx, alarmcodeidx, locidx, attridx
    print filereader.m_header
    print filereader.m_headerlen
    while True:
        tmptran = filereader.readtransection()
        if tmptran is None:
            filereader.close()
            break

        summary = tmptran[attridx]
        location = tmptran[locidx]
        alarmcode = tmptran[alarmcodeidx]
        timestr = tmptran[timeidx]
        print tmptran
        raw_input()
Пример #20
0
    def solve(self):
        results = {}
        reader = FileReader(self.fileName)
        procNum = reader.readline()
        taskNum = reader.readline()

        execTimes = [reader.readline() for _ in range(taskNum)]

        times = []

        genetics = [
            PCMaxGenetic(execTimes, procNum) for _ in range(self.instNum)
        ]
        for genetic in genetics:
            try:
                _, cmax = genetic.solve(self.iterNum, self.normIter,
                                        self.mutIter)
                times.append(cmax)
            except OptimumFoundException, e:
                times.append(e.cmax)
                break
Пример #21
0
def process_file():
    file_name = request.json['file_name']
    chunk_size = request.json['chunk_size']

    logger.file_logger.init_logger()

    api_manager = MeliApiManager(config.AppConfig.api_base_url)
    formatter = file_format_classes[config.AppConfig.file_format](
        config.AppConfig.file_line_separator)
    file_reader = FileReader(file_name, formatter,
                             config.AppConfig.file_encoding)

    fp = FileProcessor(file_reader, chunk_size, api_manager)
    st = time.time()
    fp.process()
    print('Total api calls were ' + str(api_manager.api_calls_count))
    total_time = time.time() - st
    print("Time is " + str(total_time))
    logger.file_logger.stop_logger()
    return jsonify({
        "total_time_seconds": total_time,
        "http_requests_performed": api_manager.api_calls_count,
        "more_info": config.AppConfig.info
    })
Пример #22
0
class ReproductionAndTraining:
    """
    Checks the commit data and reproduces the filtering
    It also provides a classifier that can be used on commits.
    """

    # Files to be read
    f_all = os.getcwd() + "/data/raw/all_commits.txt"
    f_f_band = os.getcwd() + "/data/raw/filterband.txt"
    f_f_frame = os.getcwd() + "/data/raw/filterframe.txt"
    f_f_memory = os.getcwd() + "/data/raw/filtermem.txt"
    f_f_perf = os.getcwd() + "/data/raw/filterperf.txt"
    f_p_band_cache = os.getcwd() + "/data/processed/Categories/Band/cache.txt"
    f_p_band_redundancy = os.getcwd(
    ) + "/data/processed/Categories/Band/reduncancy.txt"
    f_p_band_throttling = os.getcwd(
    ) + "/data/processed/Categories/Band/throttling.txt"
    f_p_band_unknown = os.getcwd(
    ) + "/data/processed/Categories/Band/unknown.txt"
    f_p_frame_redundant = os.getcwd(
    ) + "/data/processed/Categories/Frame/redundant.txt"
    f_p_frame_threading = os.getcwd(
    ) + "/data/processed/Categories/Frame/Threading.txt"
    f_p_frame_unknown = os.getcwd(
    ) + "/data/processed/Categories/Frame/Unknown.txt"
    f_p_frame_visual = os.getcwd(
    ) + "/data/processed/Categories/Frame/Visual.txt"
    f_p_memory_assets = os.getcwd(
    ) + "/data/processed/Categories/Memory/Assests.txt"
    f_p_memory_fixleak = os.getcwd(
    ) + "/data/processed/Categories/Memory/FixLeak.txt"
    f_p_memory_lowmem = os.getcwd(
    ) + "/data/processed/Categories/Memory/LowMem.txt"
    f_p_memory_reducesizedata = os.getcwd(
    ) + "/data/processed/Categories/Memory/reduceSizeData.txt"
    f_p_memory_unknown = os.getcwd(
    ) + "/data/processed/Categories/Memory/Unknown.txt"
    f_p_perf_algorithm = os.getcwd(
    ) + "/data/processed/Categories/Perf/Algorithm.txt"
    f_p_perf_assets = os.getcwd(
    ) + "/data/processed/Categories/Perf/assets.txt"
    f_p_perf_caching = os.getcwd(
    ) + "/data/processed/Categories/Perf/caching.txt"
    f_p_perf_concurrency = os.getcwd(
    ) + "/data/processed/Categories/Perf/Concurrency.txt"
    f_p_perf_datastructure = os.getcwd(
    ) + "/data/processed/Categories/Perf/DataStructure.txt"
    f_p_perf_earlyreturn = os.getcwd(
    ) + "/data/processed/Categories/Perf/EarlyReturn.txt"
    f_p_perf_orderofoperations = os.getcwd(
    ) + "/data/processed/Categories/Perf/OrderOFOperations.txt"
    f_p_perf_parsing = os.getcwd(
    ) + "/data/processed/Categories/Perf/Parsing.txt"
    f_p_perf_redundancy = os.getcwd(
    ) + "/data/processed/Categories/Perf/redundancy.txt"
    f_p_perf_sqlquery = os.getcwd(
    ) + "/data/processed/Categories/Perf/SQLQuery.txt"
    f_p_perf_timeout = os.getcwd(
    ) + "/data/processed/Categories/Perf/TimeOut.txt"
    f_p_perf_unknown = os.getcwd(
    ) + "/data/processed/Categories/Perf/Unknown.txt"

    # from https://github.com/amazuerar/perf-bugs-mobile/blob/master/bug-fixing-commits-performance.csv
    f_external_perf = os.getcwd() + "/data/external/performance_commits.txt"
    # from http://gustavopinto.org/energy-aware-mining/
    f_external_energy = os.getcwd() + "/data/external/energy_commits.txt"
    # full external sets, not just the ones they identified as relevant
    f_external_dataset_perf = os.getcwd(
    ) + "/data/external/performance_full.txt"
    f_external_dataset_energy = os.getcwd() + "/data/external/energy_full.txt"
    f_toBeClassified = os.getcwd() + "/data/commits.txt"
    #f_compare = os.getcwd() + "/relevant.txt"
    # keywords used for each type
    keywords_band = [
        "network", "bandwidth", "size", "download", "upload", "socket"
    ]
    keywords_frame = ["jank", "frame", "respons", "lag"]  # excluded "hang"
    keywords_memory = ["memory", "leak", "size", "cache", "buffer", "space"]
    keywords_perf = ["effic", "speed", "time", "perform", "slow", "fast"]
    keywords = []
    keywords.extend(keywords_band)
    keywords.extend(keywords_frame)
    keywords.extend(keywords_memory)
    keywords.extend(keywords_perf)
    band = []
    frame = []
    memory = []
    performance = []

    # processing units
    fr = FileReader()

    def compare_keywords(self, relevant, all, compare, originalDs=False):

        if not originalDs:
            new_all = list()
            count_keywords = 0
            for commit in all:
                if any(word in commit.text for word in self.keywords):
                    count_keywords += 1
                    new_all.append(commit)
            print("Keyword filter " + str(count_keywords))
            # I have to use the pre-filter here becaues my PC does not have the RAM.
            all = new_all

        # TODO pre_fn and feature_fn of best
        print("Starting algorithm analysis on keywords" +
              datetime.now().strftime("%H:%M:%S"))
        labels = ["relevant" for i in range(len(relevant))]
        relevant_features = [
            " ".join(self.stem_text(x.text)) for x in relevant
        ]
        irrelevant_features = [
            " ".join(self.stem_text(x.text)) for x in self.irrelevant_commits
        ]
        unknown_features = [" ".join(self.stem_text(x.text)) for x in all]
        features = []
        features.extend(relevant_features)
        features.extend(irrelevant_features)
        features.extend(unknown_features)

        x = self.tf_idf(features)
        y = labels
        y.extend(["irrelevant" for i in range(len(irrelevant_features))])
        print("Featurized: " + datetime.now().strftime("%H:%M:%S"))

        # ORIGINAL VERSION + Version where relevant are balanced
        x_sub = x[:len(relevant_features) + len(irrelevant_features)]
        y_sub = y[:len(relevant_features) + len(irrelevant_features)]
        x_unknown = x[len(relevant_features) + len(irrelevant_features):]

        classifier = DecisionTreeClassifier()

        # train classifier
        classifier.fit(x_sub, y_sub)
        print("Trained: " + datetime.now().strftime("%H:%M:%S"))

        # evaluate model
        x_all_prediction = classifier.predict(x_unknown)
        for i in range(len(x_all_prediction)):
            if x_all_prediction[i] == "relevant":
                print(all[i])
        calc_relevant = 0
        calc_additional = 0
        for i in range(len(all)):
            if x_all_prediction[i] == "relevant":
                if all[i] in compare:
                    calc_relevant += 1
                else:
                    calc_additional += 1
                    print("additional commit " + all[i].cmt_hash)

        print("relevant: " + str(calc_relevant))
        print("additional: " + str(calc_additional))

        print("results for repo:")
        for i in range(len(all)):
            if x_all_prediction[i] == "relevant":
                print(all[i].fullString)
        if originalDs:
            calc_perf = 0
            calc_mem = 0
            calc_band = 0
            calc_fram = 0
            for i in range(len(all)):
                if x_all_prediction[i] == "relevant":
                    if all[i] in self.performance:
                        calc_perf += 1
                    if all[i] in self.memory:
                        calc_mem += 1
                    if all[i] in self.band:
                        calc_band += 1
                    if all[i] in self.frame:
                        calc_fram += 1
            print("execution time " + str(calc_perf))
            print("memory " + str(calc_mem))
            print("bandwidth " + str(calc_band))
            print("framerate " + str(calc_fram))

        return None

    def __init__(self) -> None:
        super().__init__()

        self.important_words = dict()
        self.important_words["new_word"] = {
            "r1": 0.1,
            "r2": 2,
            "word": "new_word"
        }

        # init ntlk and skelearn
        nltk.download("punkt")
        nltk.download("stopwords")
        nltk.download("wordnet")
        nltk.download("averaged_perceptron_tagger")
        nltk.download("words")
        nltk.download("maxent_ne_chunker")
        nltk.download("vader_lexicon")
        self.stops = stopwords.words("english")
        self.count_vectorizer = CountVectorizer()
        self.tfdif_vectorizer = TfidfVectorizer()
        self.stemmer = nltk.PorterStemmer()
        self.lemmatizer = nltk.WordNetLemmatizer()

        print("Initializing")

        band_cache = self.fr.parse(self.f_p_band_cache)
        self.band.extend(band_cache)
        band_redundancy = self.fr.parse(self.f_p_band_redundancy)
        self.band.extend(band_redundancy)
        band_throttling = self.fr.parse(self.f_p_band_throttling)
        self.band.extend(band_throttling)
        band_unknown = self.fr.parse(self.f_p_band_unknown)
        self.band.extend(band_unknown)
        # self.check_duplicates("band", self.band)
        self.band = list(set(self.band))

        frame_redundant = self.fr.parse(self.f_p_frame_redundant)
        self.frame.extend(frame_redundant)
        frame_threading = self.fr.parse(self.f_p_frame_threading)
        self.frame.extend(frame_threading)
        frame_unknown = self.fr.parse(self.f_p_frame_unknown)
        self.frame.extend(frame_unknown)
        frame_visual = self.fr.parse(self.f_p_frame_visual)
        self.frame.extend(frame_visual)
        # self.check_duplicates("frame", self.frame)
        self.frame = list(set(self.frame))

        memory_assets = self.fr.parse(self.f_p_memory_assets)
        self.memory.extend(memory_assets)
        memory_fixleak = self.fr.parse(self.f_p_memory_fixleak)
        self.memory.extend(memory_fixleak)
        memory_lowmem = self.fr.parse(self.f_p_memory_lowmem)
        self.memory.extend(memory_lowmem)
        memory_unknown = self.fr.parse(self.f_p_memory_unknown)
        self.memory.extend(memory_unknown)
        memory_reducesizedata = self.fr.parse(self.f_p_memory_reducesizedata)
        self.memory.extend(memory_reducesizedata)
        # self.check_duplicates("memory", self.memory)
        self.memory = list(set(self.memory))

        performance_algorithm = self.fr.parse(self.f_p_perf_algorithm)
        self.performance.extend(performance_algorithm)
        performance_assets = self.fr.parse(self.f_p_perf_assets)
        self.performance.extend(performance_assets)
        performance_caching = self.fr.parse(self.f_p_perf_caching)
        self.performance.extend(performance_caching)
        performance_concurrency = self.fr.parse(self.f_p_perf_concurrency)
        self.performance.extend(performance_concurrency)
        performance_datastructure = self.fr.parse(self.f_p_perf_datastructure)
        self.performance.extend(performance_datastructure)
        performance_earlyreturn = self.fr.parse(self.f_p_perf_earlyreturn)
        self.performance.extend(performance_earlyreturn)
        performance_orderofoperations = self.fr.parse(
            self.f_p_perf_orderofoperations)
        self.performance.extend(performance_orderofoperations)
        performance_parsing = self.fr.parse(self.f_p_perf_parsing)
        self.performance.extend(performance_parsing)
        performance_redundancy = self.fr.parse(self.f_p_perf_redundancy)
        self.performance.extend(performance_redundancy)
        performance_sqlquery = self.fr.parse(self.f_p_perf_sqlquery)
        self.performance.extend(performance_sqlquery)
        performance_timeout = self.fr.parse(self.f_p_perf_timeout)
        self.performance.extend(performance_timeout)
        performance_unknown = self.fr.parse(self.f_p_perf_unknown)
        self.performance.extend(performance_unknown)
        # self.check_duplicates("performance", self.performance)
        self.performance = list(set(self.performance))

        self.ext_performance = self.fr.parse(self.f_external_perf)
        self.ext_energy = self.fr.parse(self.f_external_energy)
        # self.ext_performance_all = self.fr.parse(self.f_external_dataset_perf)
        self.ext_toBeClassified = self.fr.parse(self.f_toBeClassified)
        self.ext_compare = []  #self.fr.parse(self.f_compare)
        print("Preparing Commit Sets")

        relevant_commits = []
        relevant_commits.extend(self.band)
        relevant_commits.extend(self.frame)
        relevant_commits.extend(self.memory)
        relevant_commits.extend(self.performance)
        # reduce duplicates
        self.relevant_commits = list(set(relevant_commits))
        # add external set
        self.relevant_commits_plus = list(self.relevant_commits)
        self.relevant_commits_plus.extend(self.ext_performance)

        self.f_band = self.fr.parse(self.f_f_band)
        self.f_band = list(set(self.f_band))
        self.f_frame = self.fr.parse(self.f_f_frame)
        self.f_frame = list(set(self.f_frame))
        self.f_memory = self.fr.parse(self.f_f_memory)
        self.f_perf = self.fr.parse(self.f_f_perf)

        self.filtered_commits = []
        self.filtered_commits.extend(self.f_band)
        self.filtered_commits.extend(self.f_frame)
        self.filtered_commits.extend(self.f_memory)
        self.filtered_commits.extend(self.f_perf)

        self.important_words = dict()

        irrelevant_commits = [
            i for i in self.filtered_commits if i not in self.relevant_commits
        ]
        self.irrelevant_commits = list(set(irrelevant_commits))

        self.all_commits = self.fr.parse(self.f_all)
        unknown_commits = [
            i for i in self.all_commits if i not in self.relevant_commits
            and i not in self.irrelevant_commits
        ]
        self.unknown_commits = list(set(unknown_commits))
        self.vocab = set()

        print("Initialization finished")

    def featurize(self, pre_fn, feature_fn, labels, relevant_group):
        # Not repairing the balanced ones as thy are pretty terrible
        relevant_features = [" ".join(pre_fn(x.text)) for x in relevant_group]
        irrelevant_features = [
            " ".join(pre_fn(x.text)) for x in self.irrelevant_commits
        ]
        unknown_features = [
            " ".join(pre_fn(x.text)) for x in self.unknown_commits
        ]
        features = []
        features.extend(relevant_features)
        features.extend(irrelevant_features)
        features.extend(unknown_features)
        # VERSION RELEVANT BALANCED -> ALSO add excluded to test set
        # excluded_group = [x for x in self.relevant_commits if x not in relevant_group]
        # features.extend([" ".join(pre_fn(x.text)) for x in excluded_group])

        x = feature_fn(features)
        y = labels
        y.extend(["irrelevant" for i in range(len(irrelevant_features))])
        print("Featurized: " + datetime.now().strftime("%H:%M:%S"))

        # ORIGINAL VERSION + Version where relevant are balanced
        x_sub = x[:len(relevant_features) + len(irrelevant_features)]
        y_sub = y[:len(relevant_features) + len(irrelevant_features)]
        x_unknown = x[len(relevant_features) + len(irrelevant_features):]
        # create sets
        # x_train, x_test, y_train, y_test = train_test_split(x_sub, y_sub, test_size=0.2, random_state=0)
        # # VERSION RELEVANT BALANCED -> ALSO add excluded to test set
        # x_test = np.concatenate((x_test, x[len(x)-len(excluded_group):]))
        # y_test = np.concatenate((y_test, ["relevant" for i in range(len(excluded_group))]))

        # VERSION with equal irrelevant / relevant
        # x_sub = x[:len(relevant_features) + len(relevant_features)]
        # y_sub = y[:len(relevant_features) + len(relevant_features)]
        # x_unknown = x[len(relevant_features) + len(irrelevant_features):]
        # # create sets
        # x_train, x_test, y_train, y_test = train_test_split(x_sub, y_sub, test_size=0.2, random_state=0)
        # # also add the excluded back
        # x_test = np.concatenate((x_test, x[len(relevant_features) + len(relevant_features):len(relevant_features) + len(irrelevant_features)]))
        # y_test = np.concatenate((y_test, y[len(relevant_features) + len(relevant_features):len(relevant_features) + len(irrelevant_features)]))
        # # VERSION RELEVANT BALANCED -> ALSO add excluded to test set
        # x_test = np.concatenate((x_test, x[len(x) - len(excluded_group):]))
        # y_test = np.concatenate((y_test, ["relevant" for i in range(len(excluded_group))]))

        return [x_sub, y_sub]

    def classifier(self, data, out_txt):
        """"
        Attempts to create a classifier based on the manually filtered texts
        For now let's just attempt to predict "relevant" or "not-relevant"
        """
        x_sub = data[0]
        y_sub = data[1]

        print("RUNNING CONFIG " + out_txt)

        # You can test different algorithms by switching the text_clf and parameters around
        # WARNING! Grid-Search is very expensive.
        classifiers = [
            MLPClassifier(),
            KNeighborsClassifier(),
            SVC(),
            # NuSVC(),
            LinearSVC(),
            GaussianProcessClassifier(),
            # RBF(),
            DecisionTreeClassifier(),
            ExtraTreeClassifier(),
            RandomForestClassifier(),
            AdaBoostClassifier(),
            # ExtraTreeClassifier(),
            BaggingClassifier(),
            GradientBoostingClassifier(),
            # VotingClassifier(('lr', LogisticRegression()), ('rf', RandomForestClassifier()), ('gnb', GaussianNB()), voting="soft"),
            GaussianNB(),
            MultinomialNB(),
            # CategoricalNB(),
            BernoulliNB(),
            ComplementNB(),
            QuadraticDiscriminantAnalysis(),
            LinearDiscriminantAnalysis(),
            SGDClassifier(),
            RidgeClassifier(),
            PassiveAggressiveClassifier()
        ]

        class_dict = dict()
        for classifier in classifiers:
            alg_name = type(classifier).__name__
            class_dict[alg_name] = dict()

        count = 10

        for i in range(count):
            x_train, x_test, y_train, y_test = train_test_split(x_sub,
                                                                y_sub,
                                                                test_size=0.2,
                                                                random_state=0)

            for classifier in classifiers:
                alg_name = type(classifier).__name__
                try:
                    # featurize
                    print("Start: " + alg_name + " " +
                          datetime.now().strftime("%H:%M:%S"))

                    # train classifier
                    classifier.fit(x_train, y_train)
                    print("Trained: " + datetime.now().strftime("%H:%M:%S"))

                    # evaluate model
                    y_pred = classifier.predict(x_test)
                    print("Predicted: " + datetime.now().strftime("%H:%M:%S"))
                    class_dict[alg_name]["confusion " +
                                         str(i)] = confusion_matrix(
                                             y_test, y_pred)
                    class_dict[alg_name]["report " +
                                         str(i)] = classification_report(
                                             y_test, y_pred)
                    print(confusion_matrix(y_test, y_pred))
                    print(classification_report(y_test, y_pred))
                    print(accuracy_score(y_test, y_pred))
                except Exception:
                    print(type(classifier).__name__ + " has failed")

        f = open(os.getcwd() + "/data/results/" + out_txt, "a")

        for key, val in class_dict.items():
            f.write("Algorithm " + key + "\n")
            f.write("Confusion Matrices" + "\n")
            f.write(
                "TrueIrrelevant;FalseIrrelevant;FalseRelevant;TrueRelevant" +
                "\n")
            confusionValues = list()
            for i in range(count):
                confString = str(val["confusion " + str(i)][0][0]) + ";" + str(
                    val["confusion " + str(i)][0][1]) + ";" + str(
                        val["confusion " + str(i)][1][0]) + ";" + str(
                            val["confusion " + str(i)][1][1])
                f.write(confString + "\n")
                confusionValues.append(confString)
            f.write(average(confusionValues) + "\n")
            f.write("\nReports\n")
            f.write(
                "I_Precision;I_Recall;I_F1;I_Support;R_Precision;R_Recall;R_F1;R_Support\n"
            )
            repValues = list()
            for i in range(count):
                repString = transformReport(val["report " + str(i)] + "\n")
                f.write(repString + "\n")
                repValues.append(repString)
            f.write(average(repValues) + "\n")
            f.write("\n\n\n")

        f.close()

        # print("Grid search result:")
        # print(classifier.best_params_)
        # allscores = classifier.cv_results_['mean_test_score']
        # print(allscores)

        # Train Classifier for actual use
        # classifier.fit(x_sub, y_sub)
        # print("Trained: " + datetime.now().strftime("%H:%M:%S"))
        #
        # self.predict(classifier, x_unknown, self.unknown_commits, out_txt)
        return None

    def predict(self, classifier, features_x, features: List[Commit], file):
        """
        Predicts a list of commits according to the given features and classifier
        :param classifier: to be used for prediction
        :param features_x: feature vector
        :param features: commits according to feature vector
        :param file: to print results to
        :return: nothing
        """

        f = open(file, "a")
        prediction = classifier.predict(features_x)
        i = 0
        print("Predicting " +
              str(len([x for x in prediction if x != "irrelevant"])) +
              " to be relevant " + file)
        while i < len(prediction):
            if prediction[i] != "irrelevant":
                commit = features[i]
                f.write("commit " + commit.cmt_hash + " " + prediction[i] +
                        "\n")
                f.write("Author: " + commit.author + "\n")
                f.write("Date: " + commit.date + "\n")
                f.write("\n" + commit.text + "\n\n")
            i += 1

    def check(self) -> List[Commit]:  # pylint: disable=R0201
        """"
        Checks if all commits are mapping correctly from raw -> filtered -> manually evaluated
        """
        print("")
        print("----- Checking Commit subset validity -----")

        print("Loaded all " + str(len(self.all_commits)) + " Commits")

        # check Bandwith
        print("Loaded " + str(len(self.f_band)) + " Filtered Bandwith Commits")
        self.contains(self.all_commits, self.f_band)

        print("Loaded " + str(len(self.band)) + " Bandwith Commits")
        self.contains(self.f_band, self.band)

        # Check framerate
        print("Loaded " + str(len(self.f_frame)) +
              " Filtered Framerate Commits")
        self.contains(self.all_commits, self.f_frame)

        print("Loaded " + str(len(self.frame)) + " Framerate Commits")
        self.contains(self.f_frame, self.frame)

        # check memory
        print("Loaded " + str(len(self.f_memory)) + " Filtered Memory Commits")
        self.contains(self.all_commits, self.f_memory)

        print("Loaded " + str(len(self.memory)) + " Memory Commits")
        self.contains(self.f_memory, self.memory)

        # check performance
        print("Loaded " + str(len(self.f_perf)) +
              " Filtered Performance Commits")
        self.contains(self.all_commits, self.f_perf)

        print("Loaded " + str(len(self.performance)) + " Performance Commits")
        self.contains(self.f_perf, self.performance)

        print("----- Checking Overlaps validity -----")
        o_p_m = [i for i in self.performance if i in self.memory]
        o_p_b = [i for i in self.performance if i in self.band]
        o_p_j = [i for i in self.performance if i in self.frame]
        o_m_p = [i for i in self.memory if i in self.performance]
        o_m_b = [i for i in self.memory if i in self.band]
        o_m_j = [i for i in self.memory if i in self.frame]
        o_b_p = [i for i in self.band if i in self.performance]
        o_b_m = [i for i in self.band if i in self.memory]
        o_b_j = [i for i in self.band if i in self.frame]
        o_j_p = [i for i in self.frame if i in self.performance]
        o_j_m = [i for i in self.frame if i in self.memory]
        o_j_b = [i for i in self.frame if i in self.band]

        print("            Performance Memory Bandwith Jankiness")
        print("Performance " + str(len(self.performance)).ljust(12) +
              str(len(o_p_m)).ljust(7) + str(len(o_p_b)).ljust(9) +
              str(len(o_p_j)).ljust(8))
        print("Memory      " + str(len(o_m_p)).ljust(12) +
              str(len(self.memory)).ljust(7) + str(len(o_m_b)).ljust(9) +
              str(len(o_m_j)).ljust(8))
        print("Bandwidth   " + str(len(o_b_p)).ljust(12) +
              str(len(o_b_m)).ljust(7) + str(len(self.band)).ljust(9) +
              str(len(o_b_j)).ljust(8))
        print("Jankiness   " + str(len(o_j_p)).ljust(12) +
              str(len(o_j_m)).ljust(7) + str(len(o_j_b)).ljust(9) +
              str(len(self.frame)).ljust(8))

        print("----- Checking Keyword validity -----")

        print("Relevant to Irrelevant to Unknown: " +
              str(len(self.relevant_commits)) + " / " +
              str(len(self.irrelevant_commits)) + " / " +
              str(len(self.unknown_commits)))

        # check that all commits correspond to a keyword
        print("The following commits do not correspond to any keyword:")
        for commit in self.relevant_commits:
            if not any(word in commit.text for word in self.keywords):
                print(commit.cmt_hash)

        print("")

        # check keyword efficiency
        self.keywords.append("optimi")
        self.keywords.append("storage")
        keydict = dict()
        for word in self.keywords:
            k = Keyword(word)
            keydict[word] = k

        sum = 0
        sizedict = dict()
        for size in range(0, len(self.keywords)):
            sizedict[size] = 0
        for commit in self.relevant_commits:
            match = ""
            cnt = 0
            for keyword in self.keywords:
                if keyword in commit.text:
                    keydict[keyword].positive_true += 1
                    match += keyword + " "
                    cnt += 1
            # print(str(cnt) + " " + match)
            sizedict[cnt] += 1
            sum += cnt
        print("Average matched keywords: " +
              str(sum / len(self.relevant_commits)))
        print("Matches per keyword count: ")
        for k, v in sizedict.items():
            if (v > 0):
                print("  " + str(k) + " - " + str(v))
        for commit in self.irrelevant_commits:
            for keyword in self.keywords:
                if keyword in commit.text:
                    keydict[keyword].positive_false += 1

        for k, v in keydict.items():
            ratio = 100
            if (v.positive_false > 0):
                ratio = v.positive_true / v.positive_false
            print(
                v.keyword.ljust(10) + " p: " + str(v.positive_true).ljust(3) +
                " n: " + str(v.positive_false).ljust(3) + " u: " +
                str(v.unknown).ljust(4) + " r: " + str(ratio)[:3])

        # count word occurences to see where we get
        print("----- Tokens in commit ratios -----")
        word_dict = dict()
        for commit in self.relevant_commits:
            # exclude token occuring > once
            for token in set(self.tokenize(commit.text)):
                if token in word_dict:
                    word_dict[token] += 1
                else:
                    word_dict[token] = 1
        irrelevant_commits_tokenized = [
            self.tokenize(commit.text) for commit in self.irrelevant_commits
        ]
        unknown_comits_tokenized = [
            self.tokenize(commit.text) for commit in self.unknown_commits
        ]
        for k, v in sorted(word_dict.items(),
                           key=lambda item: item[1],
                           reverse=True):
            negative = 0
            unknown = 0
            ratio = 1000000
            ratio2 = 1000000
            for commit in irrelevant_commits_tokenized:
                if k in commit:
                    negative += 1
            for commit in unknown_comits_tokenized:
                if k in commit:
                    unknown += 1
            if (negative > 0):
                ratio = v / negative
            if (unknown > 0):
                ratio2 = v / unknown
            self.important_words[str(k)] = {
                "r1": ratio,
                "r2": ratio2,
                "word": k
            }
            if ((ratio > 0.5 and unknown > 0) or
                (ratio2 > 0.5 and unknown > 0)) and k not in self.keywords:
                print(
                    str(k).ljust(24) + " p: " + str(v).ljust(2) + " n: " +
                    str(negative).ljust(3) + " u: " + str(unknown).ljust(3) +
                    " rn: " + str(ratio)[:3] + " ru: " + str(ratio2)[:3])

    def tokenize(self, text: str) -> List[str]:
        """"
        Tokenizes a text string
        :param text: string of words to be tokenized
        :return: list of tokens.
        """
        tokens = [
            word for word in word_tokenize(text.lower()) if word.isalpha()
        ]
        tokens = list(re.findall(r"[A-Za-z]+", " ".join(tokens)))
        tokens = [word for word in tokens if word not in self.stops]
        return tokens

    def lemmatize_text(self, text: str) -> List[str]:
        """"
        Conducts lemmatization of given text
        :param text: Text to be tokenized and lemmatized
        :return: array of lemmatized tokens.
        """
        out = [
            self.lemmatizer.lemmatize(token) for token in self.tokenize(text)
        ]
        self.vocab = self.vocab.union(out)
        return out

    def lemmatize_new_text(self, text):
        lemmatized = [
            self.lemmatizer.lemmatize(token) for token in self.tokenize(text)
        ]
        out = []
        for lem in lemmatized:
            if lem in self.vocab:
                out.append(lem)
        return out

    def bag_of_words(self, docs: List[str]) -> List[List[int]]:
        """"
        Featurization via bag of words.
        :param docs: Documents (texts) to be BOWed.
        :return: Feature vector
        """
        return self.count_vectorizer.fit_transform(docs).toarray()

    def bag_of_important_words_stem(self,
                                    docs: List[str],
                                    ratioP=0.75,
                                    ratioN=0.2) -> List[List[int]]:
        """"
        Featurization via bag of words.
        :param docs: Documents (texts) to be BOWed.
        :return: Feature vector
        """
        important_words = dict()
        for item in self.important_words.values():
            new_word = self.stemmer.stem(item["word"])
            important_words[new_word] = {
                "r1": item["r1"],
                "r2": item["r2"],
                "word": new_word
            }

        imp_words = [
            word["word"] for word in important_words.values()
            if word["r1"] > ratioP or word["r2"] < ratioN
        ]
        return [[1 if word in doc else 0 for word in imp_words]
                for doc in docs]

    def bag_of_important_words_lem(self,
                                   docs: List[str],
                                   ratioP=0.75,
                                   ratioN=0.2) -> List[List[int]]:
        """"
        Featurization via bag of words.
        :param docs: Documents (texts) to be BOWed.
        :return: Feature vector
        """
        important_words = dict()
        for item in self.important_words.values():
            new_word = self.lemmatizer.lemmatize(item["word"])
            important_words[new_word] = {
                "r1": item["r1"],
                "r2": item["r2"],
                "word": new_word
            }

        imp_words = [
            word["word"] for word in important_words.values()
            if word["r1"] > ratioP or word["r2"] < ratioN
        ]
        return [[1 if word in doc else 0 for word in imp_words]
                for doc in docs]

    def tf_idf(self, docs: List[str]) -> csr_matrix:
        """"
        Featurization via TF/IDF.
        :param docs: Documents (texts) to be featurized.
        :return: Feature vector
        """
        return self.tfdif_vectorizer.fit_transform(docs).toarray()

    def stem_text(self, text: str) -> List[str]:
        """"
        Conducts stemming of given text
        :param text: Text to be tokenized and stemmed
        :return: array of stemmed tokens.
        """
        return [self.stemmer.stem(token) for token in self.tokenize(text)]

    def check_duplicates(self, name, group: List[Commit]):
        """
        Checks a group of duplicates
        :param name: name of grop (for output)
        :param group: of commits
        :return: nothing
        """
        seen = set()
        not_uniq = set()
        for x in group:
            if x not in seen:
                seen.add(x)
            else:
                not_uniq.add(x)
        for val in not_uniq:
            print(name + " duplicate: " + val.cmt_hash)

    def contains(self, group: List[Commit], contained: List[Commit]):
        """
        Checks if the contained commits are really contained in group
        :param group: to check if contained is in
        :param contained: commits that should be in group
        :return: nothing. Console print if contained items NOT in group
        """
        if not all(e in group for e in contained):
            print("  does not check out")
            for i in contained:
                if not i in group:
                    print("  Failed: " + i.cmt_hash)
Пример #23
0
def main():
    print('Welcome to IPOL interpreter!')

    # returns lines of string containing the cleaned code
    file_reader = FileReader()
    # tabs removed, double spaces removed
    lines = file_reader.read_file()

    tokenizer = Tokenizer()
    # returns a 2d list containing the tokens per line of code
    tokens_list = tokenizer.tokenize(lines)
    tokens_list_copy = tokens_list.copy()

    # create instance of the parser with the syntax declared in Syntax class
    parser = Parser(syntax=Syntax().get_syntax())

    # iterate each line of the list containing the tokens
    for line in tokens_list:
        recursive_parse(parser, line, callback)


    # create a new instance of the parser now with the syntax for recuding operations to expressions
    parser = Parser(syntax=Syntax().get_final_syntax())

    # Parse to an expression to see if it is valid
    for line in parsed_list:
        recursive_parse(parser, line, callback1)  
        


    exception_checker = ExceptionCheker()

    for i in range(len(final_parsed_list)):
        # there must be a syntax error because it cannot be converted to a single statement
        # check which kind of exception it is
        if len(final_parsed_list[i]) > 1:
            exception = exception_checker.check_exception(
                final_parsed_list[i], i)

            if isinstance(exception, IpolException):
                exceptions.append(exception)

    # now check if the overall structure of the code is valid
    # check if there are unused values
    # for index, token in enumerate(reduce(final_parsed_list)):
    #     if token.type == Type.NUMBER or token.type == Type.STR:
    #         exceptions.append(IpolException(
    #             ExceptionType.UNUSED_VALUE_ERROR, None, index))

    # print exceptions if there are any and halt the build process
    if len(exceptions) > 0:
        for exception in exceptions:
            exception.print()
        return
    else:
        # create a new instance of the parser now with the syntax of the overall ipol code
        parser = Parser(syntax=Syntax().get_ipol_syntax())

        # finally, verify that the full code is valid
        reduced_final_parsed_list = reduce(final_parsed_list)

        # recursive_parse(parser, reduced_final_parsed_list, callback2)
        reduced_final_parsed_list[:] = (token for token in reduced_final_parsed_list \
        if token.type != Type.EMPTY_LINE)

        recursive_parse(parser, reduced_final_parsed_list, callback2)

        for line in ipol_code_verified:
            for token in line:
                print(token.type)

        # check syntax in class Syntax
        # Type.E means accepted
        build_failed_message = 'Build Failed.'
        try:
            if ipol_code_verified[0][0].type == Type.E:
                print('Build Successful\n')
            else:
                print(build_failed_message)
                return
        except:
            print(build_failed_message)
            return

        # there are no exceptions
        # continue with code generation
        tokens_list_copy.pop(0)
        tokens_list_copy.pop(len(tokens_list_copy) - 1)

        generated_code = CodeGenerator().generate(tokens_list_copy)

        # this may return a bool data type
        if isinstance(generated_code, list):
            runnable_code = '\n'.join(generated_code)
            runnable_code = runnable_code.replace('&n0', '')
            # run the generated python code
            with open('ic.py', '+w') as ic:
                ic.write(runnable_code)

            print('\nBuild Complete.\nView logs on ipol_logs.txt\nView generated code on ic.py\n')
            exec(runnable_code, globals())

            with open('ipol_logs.txt', '+w') as logs:
                text_to_write = 'PARSING LOGS\n\nGENERATED TOKENS\n'
                for line in tokens_list:
                    for token in line:
                        text_to_write = text_to_write + '{} -> {}'.format(token.type, token.val) + ", "
                    text_to_write = text_to_write + '\n'

                text_to_write = text_to_write + '\PARSED AS...\n'
                for line in parsed_list:
                    for token in line:
                        text_to_write = text_to_write + str(token.type) + ', '
                    text_to_write = text_to_write + '\n'

                text_to_write = text_to_write + '\nGENERATED INTERMEDIATE CODE\n' + runnable_code
                logs.write(text_to_write)
        # if bool is returned, that means there was something wrong with the ipol code
        else:
            print('Build failed')
Пример #24
0
 def __init__(self):
     self.utils = Utils()
     self.filereader = FileReader()
Пример #25
0
 def read(self, fs):
     """Basic file reader for single column hex files."""
     reader = FileReader(fs, fields=(('values', 'x8'), ))
     values = reader.read()['values']
     self.deserialize(values)
Пример #26
0
 def read(self, fs):
     """Basic file reader for multiple 32 bit colums hex files."""
     reader = FileReader(fs, fields=(('values', 'x8', self.columns), ))
     self.clear()
     for column, values in enumerate(reader.read()['values']):
         self.inject(values, column, 1)
Пример #27
0
    def testfound(self):
        # fnamelist = ["../wrongdocfile",]
        # fnamelist = ["../10"+str(v)+".csv" for v in xrange(22,23)]
        # fnamelist = ["../1125.csv",]
        fnamelist = ["../10"+str(v)+".csv" for v in xrange(22,32)] + \
            ["../110"+str(v)+".csv" for v in xrange(01,10)] + \
            ["../11"+str(v)+".csv" for v in xrange(10,31)] + \
            ["../120"+str(v)+".csv" for v in xrange(01,10)] + \
            ["../12"+str(v)+".csv" for v in xrange(10,23)]
        cnt = 0
        found = 0
        wholeresult = {}
        writeflag = True
        if writeflag:
            writefile = open("../cleandata", "w")

        wrongdocflag = False

        if wrongdocflag:
            wrongdocfile = open("../wrongdocfile", "w")
            wrongdocfile.write(",".join([
                '"ALARMHAPPENTIME"', '"ALARMCODE"', '"LOCATION"', '"SUMMARY"'
            ]) + "\n")

        missloc = {}
        print "||||||||||||||||"
        fnamecnt = open("fnamecnt", "w")
        for fname in fnamelist:
            print fname
            filereader = FileReader(fname)
            alarmcodeidx = filereader.getattridx("ALARMCODE")
            attridx = filereader.getattridx("SUMMARY")
            locidx = filereader.getattridx("LOCATION")
            timeidx = filereader.getattridx("ALARMHAPPENTIME")
            identifieridx = filereader.getattridx("NEIDENTIFIER")
            cntidx = 0
            while True:
                tmptran = filereader.readtransection()
                cntidx += 1
                # print cntidx
                if tmptran is None:
                    filereader.close()
                    break

                summary = tmptran[attridx]
                location = tmptran[locidx]
                alarmcode = tmptran[alarmcodeidx]
                identifier = tmptran[identifieridx]
                warn = Warning(summary, location)
                if warn.m_type == NOTP4:
                    continue
                ftword = warn.getfirstword()
                if ftword not in wholeresult:
                    wholeresult[ftword] = {"cnt": 0, "good": 0}
                wholeresult[ftword]["cnt"] += 1
                cnt += 1
                loc = self.m_topo.getnebyidentifier(identifier)
                if loc is None:
                    loc = warn.fetchloc(self.m_topo)
                if loc is None:
                    locstr = warn.fetchlocstr()
                    if warn.m_type != NOTP5 and warn.m_type != TP9:
                        if locstr not in missloc:
                            missloc[locstr] = 0
                            print "==============================================="
                            print warn.m_summary
                            print "----------------------------------"
                            print warn.m_location
                            print "----------------------------------"
                            print identifier
                            print "locstr:", warn.m_type, locstr
                        missloc[locstr] += 1
                    if wrongdocflag:
                        wrongdocfile.write(",".join(['\"'+v+'\"' for v in \
                        [tmptran[timeidx],tmptran[alarmcodeidx],tmptran[locidx],tmptran[attridx],]])+"\r\n")
                    continue
                wholeresult[ftword]["good"] += 1
                found += 1
                summary = summary.replace("\n", "_")
                if writeflag:
                    writefile.write(alarmcode + "\t" + loc.m_name + "\t" +
                                    summary + "\t" + tmptran[timeidx] + "\n")
            print fname, "\t", cntidx
            fnamecnt.write(fname + "\t" + str(cntidx) + "\n")
        fnamecnt.close()
        if writeflag:
            writefile.close()
        if wrongdocflag:
            wrongdocfile.close()

        print "result:"
        print "cnt:", cnt
        print "found:", found
        print "pcg:", found * 1.0 / cnt

        for v in wholeresult.keys():
            if wholeresult[v]["good"] == wholeresult[v]["cnt"]:
                del wholeresult[v]
            else:
                wholeresult[v]["pcg"] = wholeresult[v][
                    "good"] * 1.0 / wholeresult[v]["cnt"]
        import pprint
        pprint.pprint(wholeresult)
        print "-----------------------"
        pprint.pprint(missloc)
        print "missloclen:", len(missloc)
        json.dump(wholeresult, open("tmpwholeresult", "w"))
        json.dump(missloc, open("missloc", "w"))
Пример #28
0
from filereader import FileReader

File = FileReader('temperature.log')
temperatures = File.get_temperatures()

for temperature in sorted(temperatures):
    print("Day:{} Temps:{}".format(temperature, temperatures[temperature]))
Пример #29
0
    def false_positive(self, partitions: ClusterPoints,
                       clusters: ClusterPoints):
        return self.sum_of_pairs(clusters) - self.true_positive(
            partitions, clusters)

    @staticmethod
    def sum_of_pairs(cluster_points: ClusterPoints):
        combinations = 0
        for cluster_id in cluster_points.cluster_ids():
            cluster_count = cluster_points.points_count(cluster_id)
            combinations += cluster_count * (cluster_count - 1)
        return combinations / 2


nmi = ClusterEvaluator(NormalizedMutualInformation(), FileReader("", " "))
jcs = ClusterEvaluator(JaccardSimilarity(), FileReader("", " "))
results = list()
for iii in range(1, 6):
    nmi_result = nmi.evaluate("data/partitions.txt",
                              "data/clustering_" + str(iii) + ".txt")
    jcs_result = jcs.evaluate("data/partitions.txt",
                              "data/clustering_" + str(iii) + ".txt")
    print("////// " + str(iii) + " ///////")
    print(nmi_result)
    print(jcs_result)
    print()
    results.append([nmi_result, jcs_result])

writer = FileWriter("data/scores.txt", " ")
writer.write_list_of_rows_to_file(results)
Пример #30
0
 def __init__(self):
     self.utils = Utils()
     self.filereader = FileReader()
     self.parser = Parser()
     pass