class TilRAnalyzer(object): def __init__(self): self.wpattern_histogram = Dict2() self.concurso = ConcursoBase() self.n_slots = 5 tildefiner = TilDefiner(self.n_slots, self.concurso.N_DE_DEZENAS) self.tilprodbyNBase = TilProducerByNumberBase(tildefiner) self.run_history() self.summarize() def run_history(self): for nDoConc in xrange(1301, self.concurso.get_total_concursos()+1): concurso = self.concurso.get_concurso_by_nDoConc(nDoConc) tilrobj = TilR(self.n_slots, concurso) wpatt = tilrobj.get_wpattern() wpatt_index = self.tilprodbyNBase.index(wpatt) print concurso, wpatt self.wpattern_histogram.add1_or_set1_to_key((wpatt_index, wpatt)) def summarize(self): to_unpack = self.wpattern_histogram.items() index_and_pattern_tuple_list, n_occurrences = zip(*to_unpack ) index_list, pattern_list = zip(*index_and_pattern_tuple_list) triple_list = zip(index_list, pattern_list, n_occurrences) triple_list.sort( key = lambda x : x[2]) triple_list.reverse() n_last_index = self.tilprodbyNBase.get_total() - 1 for triple in triple_list: wpattern = triple[1] self.tilprodbyNBase.move_to_wpattern(wpattern) n_combinations = self.tilprodbyNBase.get_n_combinations(self.concurso) print triple, n_combinations print len(self.wpattern_histogram) print 'not happened'; c=0 for index in xrange(n_last_index + 1): if index not in index_list: c+=1 print '>>>%d' %c, index, self.tilprodbyNBase.at(index), '::',
class DistanceAllOccurrred(object): ''' The idea behind this metric is to measure the number of concursos, from a past concurso onwards, all dozens appear. Eg. It took 41 concursos at the beginning of Megasena for all 60 dozens appear (at least one, which happens for at least one dozen) As of this time, max "spike-distance", as it is called, is 87. Min is 27. Average is around 41/42. ''' def __init__(self): self.distancesDict = {} self.frequencies_within_spike_range = {} self.spiked_tuple_list = None # eg. [(1,40),(3,60),(36,47),(42,49)...] self.concursoBase = ConcursoBase() self.process() #self.distancesDict = distancesDict self.summarize() def process(self): FINISH_PROCESSING = False for parked_nDoConc in range(1, self.concursoBase.get_total_concursos() + 1): # print 'Processing DistanceAllOccurrred metric:', parked_nDoConc, concurso = self.concursoBase.get_concurso_by_nDoConc(parked_nDoConc) # concurso_parked all_dezenas_frequency_dict = DezenasVolanteFrequencyDict(self.concursoBase.N_DE_DEZENAS_NO_VOLANTE) all_dezenas_frequency_dict.add_1_to_values_given_key_list(concurso.get_dezenas()) distance = 1 while 1: if not all_dezenas_frequency_dict.is_there_still_a_zero_among_values(): # print parked_nDoConc, ':: n of concs that span occurences of all dozens = ', distance self.distancesDict[parked_nDoConc] = distance self.frequencies_within_spike_range[parked_nDoConc] = all_dezenas_frequency_dict.extract_frequencies_in_order() print ' >>>>>>>>> freq within spike', self.frequencies_within_spike_range[parked_nDoConc] break concurso = concurso.get_next() if concurso == None: FINISH_PROCESSING = True self.frequencies_within_spike_range[parked_nDoConc] = all_dezenas_frequency_dict.extract_frequencies_in_order() break all_dezenas_frequency_dict.add_1_to_values_given_key_list(concurso.get_dezenas()) # print 'all_dezenas_dict', all_dezenas_dict distance += 1 if FINISH_PROCESSING: return def get_last_nDoConc_that_has_distance(self): pass nDoConcWithDistanceTupleList = self.distancesDict.items() # nDoConcWithDistanceTupleList.sort( key lambda x,y: ) return nDoConcWithDistanceTupleList def compact_distanceDict_into_spikes(self): ''' This metric generally happens in a decreasing manner, one by one, until a "spike" # eg. [(1,39),(3,59),(36,46),(42,48)...] The example (eg) can be read as such: - conc 1 needs another 38 concs to have all dozens happen at least once, ie, have every one of them occurring - conc 2, though it's not there, needs another 37 (ie, 38-1) - conc 3 "spikes" needing another 58 concs - from conc 4 to conc 35, distance diminishes one by one, ie, (4,58),(5,57),...,(35,27) - conc 36 "spikes" again ''' self.spiked_tuple_list = [] frequencies_within_spike_range_to_retain = {} if self.distancesDict.items() == 0: return nDoConcWithDistanceTupleList = self.distancesDict.items() first_spiked = nDoConcWithDistanceTupleList[0] self.spiked_tuple_list = [first_spiked] for i, nDoConcWithDistanceTuple in enumerate(nDoConcWithDistanceTupleList[1:]): previous_distance = nDoConcWithDistanceTupleList[i][1] distance = nDoConcWithDistanceTuple[1] if previous_distance != distance + 1: # CAUTION: the "i" here is tricky, because index starts looping at 1. whereas i (from enumerate()) starts at 0 # this means: a spike happened, so register it self.spiked_tuple_list.append(nDoConcWithDistanceTuple) nDoConc = nDoConcWithDistanceTuple[0] frequencies_within_spike_range_to_retain[nDoConc] = self.frequencies_within_spike_range[nDoConc][:] # must be a hard copy, for right-side object will be reassigned # self.frequencies_within_spike_range will have only the spikes, not all history self.frequencies_within_spike_range = frequencies_within_spike_range_to_retain self.generate_stats() def generate_stats(self): self.spiked_nDoConcs, self.spiked_distances = zip(*self.get_spiked_tuple_list()) self.spiked_distances = numpy.array(self.spiked_distances) self.max_distance = max(self.spiked_distances) self.min_distance = min(self.spiked_distances) self.avg_distance = self.spiked_distances.mean() self.std_distance = self.spiked_distances.std() def get_spiked_tuple_list(self, reprocess=False): if self.spiked_tuple_list != None and not reprocess: return self.spiked_tuple_list self.compact_distanceDict_into_spikes() return self.spiked_tuple_list def summarize(self): print self.distancesDict print self.get_spiked_tuple_list() for nDoConc in self.frequencies_within_spike_range: print nDoConc, self.frequencies_within_spike_range[nDoConc] print 'max', self.max_distance print 'min', self.min_distance print 'avg', self.avg_distance print 'std', self.std_distance print 'strides', self.spiked_distances.strides #print 'cumsum', self.spiked_distances.cumsum() print 'len spiked list', len(self.get_spiked_tuple_list())