Exemplo n.º 1
0
    def make_childmap(self):
        visited = set([])

        q = Queue()
        q.put((self.git.head.commit, None))  # (commit, child_hash)

        progress = Progress(u'[%s] making git childmap' % self.__name__, 500,
                            10000, False)
        progress.set_point(0)
        progress.start()
        while q.empty() is False:
            progress.check()
            commit, child_hash = q.get()
            commit_hash = str(commit)[:7]

            # create child map
            if commit_hash not in self.childmap:
                self.childmap[commit_hash] = set([])
            if child_hash is not None:
                self.childmap[commit_hash].add(child_hash)

            if commit_hash in visited: continue
            visited.add(commit_hash)

            # pass itself to parent
            for parent in commit.parents:
                q.put((parent, commit_hash))

            # add ancestors if this commit has no parents
            if len(commit.parents) == 0:
                self.ancestors.add(commit_hash)
        progress.done()

        pass
Exemplo n.º 2
0
    def _encode(self):

        logger.info('encoding lua tables')

        Progress.start('Decoding MIZ file', length=3)

        Progress.set_label('Encoding map resource')
        logger.debug('encoding map resource')
        with open(self.map_res_file, mode='w', encoding=ENCODING) as f:
            f.write(SLTP().encode(self._map_res, self._map_res_qual))
        Progress.set_value(1)

        Progress.set_label('Encoding map resource')
        logger.debug('encoding l10n dictionary')
        with open(self.dictionary_file, mode='w', encoding=ENCODING) as f:
            f.write(SLTP().encode(self.l10n, self._l10n_qual))
        Progress.set_value(2)

        Progress.set_label('Encoding map resource')
        logger.debug('encoding mission dictionary')
        with open(self.mission_file, mode='w', encoding=ENCODING) as f:
            f.write(SLTP().encode(self.mission.d, self._mission_qual))
        Progress.set_value(3)

        logger.info('encoding done')
Exemplo n.º 3
0
    def loads(self):
        '''
		loads a raw file of bug report
		:return:
		'''
        fileConnt = self.getFileCounts(self.SourceBugPath)

        bugitems = []

        # show progress
        progress = Progress(u'[%s] Loading bug reports' % self.__name__, 2, 10,
                            True)
        progress.set_upperbound(fileConnt)
        progress.start()
        for root, dirs, files in os.walk(self.SourceBugPath):
            for f in files:
                if f[:f.find(u'-')].strip().lower() != self.ProjectName.lower(
                ):
                    continue
                #shutil.copy(os.path.join(root, f), os.path.join(_dest, f))
                bugitem = self.get_bugitem(os.path.join(root, f))
                if bugitem is not None:
                    bugitems.append(bugitem)
                progress.check()
        progress.done()
        return bugitems
Exemplo n.º 4
0
    def load(self, _force=False):
        '''
		Load commit info from GitLogPath
		:return:  {bugID:[{'hash':u'', 'author':u'', 'commit_date':u'', 'message':u'', 'fixedFiles':{}}, {}, ...], ...}
		'''
        if os.path.exists(self.GitLogPath) is False or _force is True:
            self.make()

        logfile = codecs.open(self.GitLogPath, 'r', 'utf-8')
        progress = Progress(u'[%s] loading git log data' % self.__name__, 1000,
                            20000, False)
        progress.set_point(0)
        progress.start()
        for logitem in self.file_loader(logfile, _with_filter=False):
            # filter unuseful logs
            if len(logitem['fixedFiles']) == 0: continue

            # We only use bug report id in log message
            # mapping bug report ID
            logitem['linked_bug'] = re.findall(
                r'%s-[0-9]+' % self.ProjectName.upper(), logitem['message'])
            logitem['linked_bug'] = set(logitem['linked_bug'])
            for linked_id in logitem['linked_bug']:
                if linked_id not in self.logs:
                    self.logs[linked_id] = [logitem]
                else:
                    self.logs[linked_id].append(logitem)
            progress.check()
        progress.done()
        logfile.close()
        return self.logs
Exemplo n.º 5
0
    def _decode(self):

        logger.info('decoding lua tables')

        if not self.zip_content:
            self.unzip(overwrite=False)

        Progress.start('Decoding MIZ file', length=3)

        Progress.set_label('Decoding map resource')
        logger.debug('reading map resource file')
        with open(self.map_res_file, encoding=ENCODING) as f:
            self._map_res, self._map_res_qual = SLTP().decode(f.read())
        Progress.set_value(1)

        Progress.set_label('Decoding dictionary file')
        logger.debug('reading l10n file')
        with open(self.dictionary_file, encoding=ENCODING) as f:
            self._l10n, self._l10n_qual = SLTP().decode(f.read())
        Progress.set_value(2)

        Progress.set_label('Decoding mission file')
        logger.debug('reading mission file')
        with open(self.mission_file, encoding=ENCODING) as f:
            mission_data, self._mission_qual = SLTP().decode(f.read())
            self._mission = Mission(mission_data, self._l10n)
        Progress.set_value(3)

        logger.info('decoding done')
Exemplo n.º 6
0
	def fill_SubjectSheet(self, _sheet, _group, _srcCounts, _bugCounts, _dupCounts):
		projects = _bugCounts.keys()
		projects.sort()

		size = sum([len(_bugCounts[project]) for project in projects])
		progress = Progress(u'[%s] fill subject' % self.__name__, 2, 10, True)
		progress.set_point(0).set_upperbound(size)
		progress.start()

		styles = [self.base_format, self.base_format, self.base_format, self.number_format, self.number_format]
		for project in projects:
			for version in _bugCounts[project].keys():
				if version == 'all': continue
				values = [_group, project, version.upper(), _bugCounts[project][version], _srcCounts[project][version]]
				self.input_row(_sheet, self.subj_data_row, 6, values, styles)
				self.subj_data_row += 1
				progress.check()
		progress.done()

		#summary
		styles = [self.subtitle_format, self.subtitle_format, self.number_format, self.number_format, self.number_format]
		for project in projects:
			values = [_group, project.upper(),  _bugCounts[project]['all'], _dupCounts[project], _srcCounts[project]['all']]
			self.input_row(_sheet, self.subj_summary_row, 0, values, styles)
			self.subj_summary_row += 1
		pass
Exemplo n.º 7
0
    def source_counting(self, _group, _project):
        statistics = {}

        progress = Progress('source counting', 2, 10, True)
        progress.set_upperbound(len(self.S.versions[_project].keys()))
        progress.start()
        for version in self.S.versions[_project].keys():
            vname = VersionUtil.get_versionName(version, _project)
            repo = os.path.join(self.S.getPath_source(_group, _project,
                                                      vname), )
            result = self.getCodeCount(repo)
            if result is None: continue
            statistics[vname] = result
            progress.check()
        progress.done()

        maxValue = 0
        for vname in statistics:
            if maxValue < statistics[vname]:
                maxValue = statistics[vname]
        statistics['max'] = maxValue

        pretty = PrettyStringBuilder(_indent_depth=2)
        text = pretty.get_dicttext({_project: statistics})

        f = open(
            os.path.join(self.S.getPath_base(_group, _project),
                         u'sources.txt'), 'w')
        f.write(text)
        f.close()
Exemplo n.º 8
0
def get_hump_error_per_type():
    query = """SELECT DISTINCT run.aggregate_id, aggregate_type.type_id FROM `run`
               LEFT OUTER JOIN aggregate_type
                    ON run.aggregate_id = aggregate_type.aggregate_id
               WHERE dissipation < 0.01"""

    session = get_session()

    aggregate_ids = session.execute(query).fetchall()
    session.close()

    errors = {
        1: [],
        2: [],
        3: [],
        4: [],
        5: []
    }
    p = Progress(len(aggregate_ids))
    p.start()
    cnt = 0
    for aggregate_id,type_id in aggregate_ids:
        err,alpha = get_hump_info(aggregate_id)
        errors[type_id].append(err)
        cnt += 1
        p.update(cnt)
    p.finish()

    x = errors.keys()
    y = [np.mean(yi) for yi in errors.values()]
    yerr = [np.std(yi) for yi in errors.values()]

    ymax = [np.max(yi) for yi in errors.values()]
    ymin = [np.min(yi) for yi in errors.values()]

    with file('./simulation_data/type_hump_error_ranges.bin','wb') as fp:
        pickle.dump(errors,fp)

    fig = pplot.figure()
    ax = fig.add_subplot(311)

    ax.bar(x,y,yerr=yerr,color='b')
    ax.set_ylabel("Avergage power law error")
    ax.set_xlabel("Type")

    ax = fig.add_subplot(312)
    ax.set_ylabel("Maximum power law error")
    ax.set_xlabel("Type")
    ax.bar(x,ymax, color='r')

    ax = fig.add_subplot(313)
    ax.set_ylabel("Minimum power law error")
    ax.set_xlabel("Type")
    ax.bar(x,ymin, color='g')

    pplot.show()
Exemplo n.º 9
0
    def unhash_folder(_src, _dest):
        '''
		hashed folder ==> unshed folder
		example) path/aa/00/filename  ==> path/filename
		:param _src:
		:param _dest:
		:return:
		'''
        if os.path.exists(_dest) is False:
            os.makedirs(_dest)
        progress = Progress(u'Bug reports is merging', 20, 1000, False)
        progress.start()
        for root, dirs, files in os.walk(_src):
            for f in files:
                shutil.copy(os.path.join(root, f), os.path.join(_dest, f))
                progress.check()
        progress.done()
Exemplo n.º 10
0
    def make_tagmap(self, ):
        q = Queue()
        visited = set([])

        # root node find (queue init)
        for item in list(self.ancestors):
            q.put((item, None))  # (commit_hash, tagname)

        # For each item in queue
        progress = Progress(u'[%s] making git tagmaps' % self.__name__, 500,
                            10000, False)
        progress.set_point(0)
        progress.start()
        while q.empty() is False:
            commit_hash, parent_tag = q.get()

            # If this commit in tags, map with commit_hash and tag
            if commit_hash in self.tags:
                commit_tag = self.tags[commit_hash]
                self.tagmap[commit_hash] = commit_tag

            # if this commit not in tags, map with child commit_hash and tag
            else:
                if commit_hash not in self.tagmap:
                    self.tagmap[commit_hash] = parent_tag
                else:
                    # compare time previous_tag and parent_tag
                    previous_tag = self.tagmap[commit_hash]
                    pre_time = self.tagtimes[previous_tag]
                    par_time = self.tagtimes[parent_tag]
                    if par_time > pre_time:
                        self.tagmap[commit_hash] = parent_tag
                commit_tag = parent_tag

            if commit_hash not in visited:
                visited.add(commit_hash)
                for child_hash in self.childmap[commit_hash]:
                    q.put((child_hash, commit_tag))

            progress.check()
        progress.done()
        pass
Exemplo n.º 11
0
def download(url,
             local_file,
             progress_title: str,
             progress_text: str = '',
             file_size: int = None):
    logger.info('downloading {} -> {}'.format(url, local_file))

    Progress.start(progress_title)
    Progress.set_label(progress_text)

    def hook(data):
        Progress.set_value(float(data['percent_complete']))

    dl = Downloader(
        url=url,
        filename=local_file,
        progress_hooks=[hook],
        content_length=file_size,
    )

    return dl.download()
Exemplo n.º 12
0
    def load_raw(self, _force=False):
        '''
		Load commit info from GitLogPath
		:return:  {bugID:[{'hash':u'', 'author':u'', 'commit_date':u'', 'message':u'', 'fixedFiles':{}}, {}, ...], ...}
		'''
        if os.path.exists(self.GitLogPath) is False or _force is True:
            self.make()

        self.logs = []
        logfile = codecs.open(self.GitLogPath, 'r', 'utf-8')
        progress = Progress(u'[%s] loading git log data' % self.__name__, 1000,
                            20000, False)
        progress.set_point(0)
        progress.start()
        for logitem in self.file_loader(logfile):
            # filter unuseful logs
            #if len(logitem['fixedFiles'])==0: continue
            if logitem['hash'] == '': continue
            self.logs.insert(0, logitem)
            progress.check()
        progress.done()
        logfile.close()
        return self.logs
Exemplo n.º 13
0
    def run(self):
        agstart = time.time()
        for i in xrange(self.no_sims):
            logging.info("Going for simulation %d"%(i+1))
            gc.collect()
            run_id = str(uuid4())

            with DataContainer(self.config,run_id,self.aggregate_id) as dc:
                p = Progress(self.config['model']['no_steps'])

                model_class = None
                if(self.market_type == 1):
                    logging.info("Using default Market")
                    model_class = Market
                elif(self.market_type == 2):
                    logging.info("Using ShuffleIRSMarket")
                    model_class = ShuffleIRSMarket
                elif(self.market_type == 3):
                    logging.info("Using SortedIRSMarket")
                    model_class = SortedIRSMarket
                elif(self.market_type == 4):
                    logging.info("Using RandomSortedIRSMarket")
                    model_class = SortedRandomIRSMarket
                elif(self.market_type == 5):
                    logging.info("Using RandomShuffleIRSMarket")
                    model_class = ShuffleRandomIRSMarket
                elif(self.market_type == 6):
                    logging.info("Using ConstantRandomShuffleIRSMarket")
                    model_class = ConstShuffleIRSMarket
                elif(self.market_type == 7):
                    logging.info("Using quick CRS-IRS-Mkt")
                    model_class = sim
                else:
                    raise "No such market type"

                p.start()
                start = time.time()
                with model_class(self.config['model'],dc,p.update) as m:
                    m.run()

                t = time.time()-start
                p.finish()

                print ""
                logging.info("Run took %f seconds"%t)

                if(self.config['analysis']['do_analysis']):
                    start = time.time()
                    self.do_analysis(dc,run_id)
                    t = time.time()-start
                    logging.info("Analysis took %f seconds"%t)

                if(self.save_data):
                    start = time.time()
                    dc.save_data()
                    t = time.time()-start
                    logging.info("Saving data took %f seconds"%t)

            gc.collect()
            print ""
            print ""

        gc.collect()
        dt = (time.time() - agstart) / 60
        logging.info("Experiment took %f minutes"%dt)

        if(self.config['aggregate']['do_aggregate'] and self.save_data):
            start = time.time()
            self.do_aggregate(dc,run_id)
            logging.info('Aggregation took %f seconds'%(time.time()-start))
Exemplo n.º 14
0
    if (s.save_density_for_avalanche_size):
        s.density_per_avalanche_size = defaultdict(list)

    s.save_gross_risk_for_avalanche_size = save_gross_risk_for_avalanche_size
    if (s.save_gross_risk_for_avalanche_size):
        s.gross_risk_per_avalanche_size = defaultdict(list)
        s.gross_risk_per_avalanche_size = defaultdict(list)

    if (s.save_avalanche_tree):
        os.makedirs(s.avalanche_tree_file_path)

    if (save_giant_component): s.giant_components = np.zeros(s.no_steps)

    start = time.time()

    p.start()
    s.run()
    p.finish()

    print
    print "Run took %d seconds" % (time.time() - start)

    if (save):
        print "Saving data"
        dc.save_defaults()
        dc.save_run()

    if s.save_avalanche_progression:
        print "Saving avalanche progression"
        file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
Exemplo n.º 15
0
def do_run(steps,
           no_banks,
           threshold,
           max_tenure,
           max_irs_value,
           avalanche_fraction=0.9):
    #steps = 10000
    save = False
    save_risk = False
    save_risk_avalanche_time_series = False
    save_dist = False
    save_giant_component = False
    save_avalanche_progression = False
    save_critical_info = False
    save_avalanche_tree = False
    save_degree_distribution = False
    no_connection_scatter_moments = 0
    connection_scatter_moments = np.random.randint(
        0, steps, no_connection_scatter_moments)

    seed = np.random.randint(0, 1000)
    dcconfig = {
        'model': {
            'no_banks': no_banks,
            'no_steps': steps,
            'threshold': threshold,
            'sigma': 1,
            'max_irs_value': max_irs_value,
            'irs_threshold': -1,
            'dissipation': 0.0,
            'max_tenure': max_tenure
        },
        'analysis': {
            'data_to_save': ['defaults']
        },
        'file_root': './simulation_data/',
        'market_type': 7,
        'seed': seed
    }

    measure_no_steps = 2 * dcconfig['model']['max_tenure']

    ###########################################################################
    dc = DataContainer(dcconfig, str(uuid4()), str(uuid4()))
    p = Progress(steps)

    s = sim(dcconfig['model'],
            dc,
            p.update,
            save_risk,
            save_dist,
            connection_scatter_moments,
            seed,
            avalanche_fraction=avalanche_fraction)
    s.save_degree_distribution = save_degree_distribution
    if (s.save_degree_distribution):
        s.degrees = np.zeros((steps, dcconfig['model']['no_banks']))
        s.no_irs = np.zeros((steps, dcconfig['model']['no_banks']))
    s.save_avalanche_progression = save_avalanche_progression
    s.save_risk_avalanche_time_series = save_risk_avalanche_time_series
    s.collect_critical_info = save_critical_info
    s.save_giant_component = save_giant_component
    s.save_avalanche_tree = save_avalanche_tree
    s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id

    s.irs_creations = np.zeros(steps)
    s.irs_removals = np.zeros(steps)

    if (s.save_avalanche_tree):
        os.makedirs(s.avalanche_tree_file_path)

    if (save_giant_component): s.giant_components = np.zeros(s.no_steps)
    ###########################################################################

    start = time.time()
    p.start()
    tme, size = s.run()
    print
    p.finish()

    defaulting_bank = s.defaulting_bank_no
    start_at = tme - measure_no_steps + 1

    print "Large enough avalanche found at %d of size %d" % (tme, size)

    print
    print "Run took %d seconds" % (time.time() - start)
    print
    print "Going for the analysis"

    ###########################################################################
    ## Actual stuff thats needed
    dc = DataContainer(dcconfig, str(uuid4()), str(uuid4()))
    p = Progress(steps)

    s = sim(dcconfig['model'],
            dc,
            p.update,
            save_risk,
            save_dist,
            connection_scatter_moments,
            seed,
            start_at,
            defaulting_bank,
            avalanche_fraction=avalanche_fraction)

    nb = dcconfig['model']['no_banks']
    s.measured_balances = np.zeros((measure_no_steps, nb))
    s.measured_gross_balances = np.zeros((measure_no_steps, nb))
    s.degrees = np.zeros((measure_no_steps, nb))
    s.no_irs = np.zeros((measure_no_steps, nb))
    #s.giant_component = []
    s.defaulted_nodes = []
    s.irs_pb = []
    s.network = np.zeros((nb, nb))
    s.irs_creations = np.zeros(steps)
    s.irs_removals = np.zeros(steps)

    #################
    s.save_degree_distribution = save_degree_distribution
    s.save_avalanche_progression = save_avalanche_progression
    s.save_risk_avalanche_time_series = save_risk_avalanche_time_series
    s.collect_critical_info = save_critical_info
    s.save_giant_component = save_giant_component
    s.save_avalanche_tree = save_avalanche_tree
    s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id
    if (s.save_avalanche_tree):
        os.makedirs(s.avalanche_tree_file_path)
    if (save_giant_component): s.giant_components = np.zeros(s.no_steps)
    ###########################################################################

    start = time.time()
    p.start()
    tme, size = s.run()
    p.finish()
    print
    print "Large enough avalanche found at %d of size %d" % (tme, size)

    if s.save_avalanche_progression:
        print "Saving avalanche progression"
        file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.avalanche_progressions, fp)
            pickle.dump(dcconfig, fp)

    if s.collect_critical_info:
        print "Critical info"
        file_path = './simulation_data/critical/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.critical_info, fp)
            pickle.dump(s.max_default_size_t.tolist(), fp)
            if (s.save_giant_component):
                pickle.dump(s.giant_components.tolist(), fp)
            pickle.dump(dcconfig, fp)

    if len(connection_scatter_moments) > 0:
        print "Connection Scatters"
        file_path = './simulation_data/connection_scatters/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.connection_scatters, fp)

    if save_dist:
        file_path = './simulation_data/dists/%s.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.trials, fp)
            pickle.dump(dcconfig['model']['no_banks'], fp)

    if (True):
        os.makedirs("./simulation_data/large_avalanche_data/%s" %
                    dc.aggregate_id)
        print "Saving stuff"
        file_path = './simulation_data/large_avalanche_data/%s/degrees.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.degrees.tolist(), fp)

        file_path = './simulation_data/large_avalanche_data/%s/no_irs.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.no_irs.tolist(), fp)
            pickle.dump(s.irs_pb, fp)

        file_path = './simulation_data/large_avalanche_data/%s/balances.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.measured_balances.tolist(), fp)
            pickle.dump(s.measured_gross_balances.tolist(), fp)

        #file_path = './simulation_data/large_avalanche_data/%s/gc.bin'%dc.aggregate_id
        #with file(file_path,'wb') as fp:
        #    pickle.dump(s.giant_component,fp)

        file_path = './simulation_data/large_avalanche_data/%s/network.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.network.tolist(), fp)

        file_path = './simulation_data/large_avalanche_data/%s/defaulted.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.defaulted_nodes, fp)

        file_path = './simulation_data/large_avalanche_data/%s/irs_data.bin' % dc.aggregate_id
        with file(file_path, 'wb') as fp:
            pickle.dump(s.irs_creations.tolist(), fp)
            pickle.dump(s.irs_removals.tolist(), fp)

        dcconfig['failed_bank'] = s.defaulting_bank_no
        file_path = './simulation_data/large_avalanche_data/%s/config.json' % dc.aggregate_id
        with open(file_path, 'w') as fp:
            json.dump(dcconfig, fp, indent=4)

    print dc.aggregate_id