def outliers(self, hsize=10, vsize=10, dump_invalid=False): print '********************' print 'Overview of indices:' rows = [['Index Name', 'Keys', 'Total Members']] for index in self.indices: rows += [[ index, len(self.indices[index]), index_size(self.indices[index]) ]] printrows(padrows(rows)) print '********************' if len(self.by_name) > 0: scardname = sorted(self.by_name, lambda x, y: cmp(len(x), len(y)), reverse=False)[0] print 'Shortest Cardname: (' + str(len(scardname)) + ')' print ' ' + scardname lcardname = sorted(self.by_name, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Longest Cardname: (' + str(len(lcardname)) + ')' print ' ' + lcardname d = sorted( self.by_name, lambda x, y: cmp(len(self.by_name[x]), len(self.by_name[y])), reverse=True) rows = [] for k in d[0:vsize]: if len(self.by_name[k]) > 1: rows += [[k, len(self.by_name[k])]] if rows == []: print('No duplicated cardnames') else: print '-- Most duplicated names: --' printrows(padrows(rows)) else: print 'No cards indexed by name?' print '--------------------' if len(self.by_type) > 0: ltypes = sorted(self.by_type, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Longest card type: (' + str(len(ltypes)) + ')' print ' ' + ltypes else: print 'No cards indexed by type?' if len(self.by_subtype) > 0: lsubtypes = sorted(self.by_subtype, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Longest subtype: (' + str(len(lsubtypes)) + ')' print ' ' + lsubtypes else: print 'No cards indexed by subtype?' if len(self.by_supertype) > 0: lsupertypes = sorted(self.by_supertype, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Longest supertype: (' + str(len(lsupertypes)) + ')' print ' ' + lsupertypes else: print 'No cards indexed by supertype?' print '--------------------' if len(self.by_cost) > 0: lcost = sorted(self.by_cost, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Longest mana cost: (' + str(len(lcost)) + ')' print ' ' + utils.from_mana(lcost) print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n' else: print 'No cards indexed by cost?' if len(self.by_cmc) > 0: lcmc = sorted(self.by_cmc, reverse=True)[0] print 'Largest cmc: (' + str(lcmc) + ')' print ' ' + str(self.by_cmc[lcmc][0].cost) print '\n' + plimit(self.by_cmc[lcmc][0].encode()) else: print 'No cards indexed by cmc?' print '--------------------' if len(self.by_power) > 0: lpower = sorted(self.by_power, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Largest creature power: ' + utils.from_unary(lpower) print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n' else: print 'No cards indexed by power?' if len(self.by_toughness) > 0: ltoughness = sorted(self.by_toughness, lambda x, y: cmp(len(x), len(y)), reverse=True)[0] print 'Largest creature toughness: ' + utils.from_unary(ltoughness) print '\n' + plimit(self.by_toughness[ltoughness][0].encode()) else: print 'No cards indexed by toughness?' print '--------------------' if len(self.by_textlines) > 0: llines = sorted(self.by_textlines, reverse=True)[0] print 'Most lines of text in a card: ' + str(llines) print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n' else: print 'No cards indexed by line count?' if len(self.by_textlen) > 0: ltext = sorted(self.by_textlen, reverse=True)[0] print 'Most chars in a card text: ' + str(ltext) print '\n' + plimit(self.by_textlen[ltext][0].encode()) else: print 'No cards indexed by char count?' print '--------------------' print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.' if dump_invalid: for card in self.invalid_cards: print '\n' + repr(card.fields) elif len(self.invalid_cards) > 0: print 'Not summarizing.' print '--------------------' print 'There were ' + str(len( self.unparsed_cards)) + ' unparsed cards.' if dump_invalid: for card in self.unparsed_cards: print '\n' + repr(card.fields) elif len(self.unparsed_cards) > 0: print 'Not summarizing.' print '===================='
def outliers(self, hsize = 10, vsize = 10, dump_invalid = False): print '********************' print 'Overview of indices:' rows = [['Index Name', 'Keys', 'Total Members']] for index in self.indices: rows += [[index, len(self.indices[index]), index_size(self.indices[index])]] printrows(padrows(rows)) print '********************' if len(self.by_name) > 0: scardname = sorted(self.by_name, lambda x,y: cmp(len(x), len(y)), reverse = False)[0] print 'Shortest Cardname: (' + str(len(scardname)) + ')' print ' ' + scardname lcardname = sorted(self.by_name, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Longest Cardname: (' + str(len(lcardname)) + ')' print ' ' + lcardname d = sorted(self.by_name, lambda x,y: cmp(len(self.by_name[x]), len(self.by_name[y])), reverse = True) rows = [] for k in d[0:vsize]: if len(self.by_name[k]) > 1: rows += [[k, len(self.by_name[k])]] if rows == []: print('No duplicated cardnames') else: print '-- Most duplicated names: --' printrows(padrows(rows)) else: print 'No cards indexed by name?' print '--------------------' if len(self.by_type) > 0: ltypes = sorted(self.by_type, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Longest card type: (' + str(len(ltypes)) + ')' print ' ' + ltypes else: print 'No cards indexed by type?' if len(self.by_subtype) > 0: lsubtypes = sorted(self.by_subtype, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Longest subtype: (' + str(len(lsubtypes)) + ')' print ' ' + lsubtypes else: print 'No cards indexed by subtype?' if len(self.by_supertype) > 0: lsupertypes = sorted(self.by_supertype, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Longest supertype: (' + str(len(lsupertypes)) + ')' print ' ' + lsupertypes else: print 'No cards indexed by supertype?' print '--------------------' if len(self.by_cost) > 0: lcost = sorted(self.by_cost, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Longest mana cost: (' + str(len(lcost)) + ')' print ' ' + utils.from_mana(lcost) print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n' else: print 'No cards indexed by cost?' if len(self.by_cmc) > 0: lcmc = sorted(self.by_cmc, reverse = True)[0] print 'Largest cmc: (' + str(lcmc) + ')' print ' ' + str(self.by_cmc[lcmc][0].cost) print '\n' + plimit(self.by_cmc[lcmc][0].encode()) else: print 'No cards indexed by cmc?' print '--------------------' if len(self.by_power) > 0: lpower = sorted(self.by_power, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Largest creature power: ' + utils.from_unary(lpower) print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n' else: print 'No cards indexed by power?' if len(self.by_toughness) > 0: ltoughness = sorted(self.by_toughness, lambda x,y: cmp(len(x), len(y)), reverse = True)[0] print 'Largest creature toughness: ' + utils.from_unary(ltoughness) print '\n' + plimit(self.by_toughness[ltoughness][0].encode()) else: print 'No cards indexed by toughness?' print '--------------------' if len(self.by_textlines) > 0: llines = sorted(self.by_textlines, reverse = True)[0] print 'Most lines of text in a card: ' + str(llines) print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n' else: print 'No cards indexed by line count?' if len(self.by_textlen) > 0: ltext = sorted(self.by_textlen, reverse = True)[0] print 'Most chars in a card text: ' + str(ltext) print '\n' + plimit(self.by_textlen[ltext][0].encode()) else: print 'No cards indexed by char count?' print '--------------------' print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.' if dump_invalid: for card in self.invalid_cards: print '\n' + repr(card.fields) elif len(self.invalid_cards) > 0: print 'Not summarizing.' print '--------------------' print 'There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.' if dump_invalid: for card in self.unparsed_cards: print '\n' + repr(card.fields) elif len(self.unparsed_cards) > 0: print 'Not summarizing.' print '===================='
def summarize(self, hsize=10, vsize=10, cmcsize=20): print '====================' print str(len(self.cards)) + ' valid cards, ' + str( len(self.invalid_cards)) + ' invalid cards.' print str(len(self.allcards)) + ' cards parsed, ' + str( len(self.unparsed_cards)) + ' failed to parse' print '--------------------' print str(len(self.by_name)) + ' unique card names' print '--------------------' print( str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), ' + str(len(self.by_color)) + ' combinations') print 'Breakdown by color:' rows = [self.by_color_inclusive.keys()] rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print 'Breakdown by number of colors:' rows = [self.by_color_count.keys()] rows += [[len(self.by_color_count[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_type_inclusive)) + ' unique card types, ' + str( len(self.by_type)) + ' combinations' print 'Breakdown by type:' d = sorted(self.by_type_inclusive, lambda x, y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])), reverse=True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print( str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' + str(len(self.by_subtype)) + ' combinations') print '-- Popular subtypes: --' d = sorted(self.by_subtype_inclusive, lambda x, y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_subtype_inclusive[k])]] printrows(padrows(rows)) print '-- Top combinations: --' d = sorted( self.by_subtype, lambda x, y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_subtype[k])]] printrows(padrows(rows)) print '--------------------' print( str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' + str(len(self.by_supertype)) + ' combinations') print 'Breakdown by supertype:' d = sorted(self.by_supertype_inclusive, lambda x, y: cmp(len(self.by_supertype_inclusive[x]), len(self.by_supertype_inclusive[y])), reverse=True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_cmc)) + ' different CMCs, ' + str( len(self.by_cost)) + ' unique mana costs' print 'Breakdown by CMC:' d = sorted(self.by_cmc, reverse=False) rows = [[k for k in d[:cmcsize]]] rows += [[len(self.by_cmc[k]) for k in rows[0]]] printrows(padrows(rows)) print '-- Popular mana costs: --' d = sorted( self.by_cost, lambda x, y: cmp(len(self.by_cost[x]), len(self.by_cost[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[utils.from_mana(k), len(self.by_cost[k])]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_pt)) + ' unique p/t combinations' if len(self.by_power) > 0 and len(self.by_toughness) > 0: print('Largest power: ' + str(max(map(len, self.by_power)) - 1) + ', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1)) print '-- Popular p/t values: --' d = sorted(self.by_pt, lambda x, y: cmp(len(self.by_pt[x]), len(self.by_pt[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_pt[k])]] printrows(padrows(rows)) print '--------------------' print 'Loyalty values:' d = sorted( self.by_loyalty, lambda x, y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_loyalty[k])]] printrows(padrows(rows)) print '--------------------' if len(self.by_textlen) > 0 and len(self.by_textlines) > 0: print('Card text ranges from ' + str(min(self.by_textlen)) + ' to ' + str(max(self.by_textlen)) + ' characters in length') print('Card text ranges from ' + str(min(self.by_textlines)) + ' to ' + str(max(self.by_textlines)) + ' lines') print '-- Line counts by frequency: --' d = sorted(self.by_textlines, lambda x, y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])), reverse=True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_textlines[k])]] printrows(padrows(rows)) print '===================='
def summarize(self, hsize = 10, vsize = 10, cmcsize = 20): print '====================' print str(len(self.cards)) + ' valid cards, ' + str(len(self.invalid_cards)) + ' invalid cards.' print str(len(self.allcards)) + ' cards parsed, ' + str(len(self.unparsed_cards)) + ' failed to parse' print '--------------------' print str(len(self.by_name)) + ' unique card names' print '--------------------' print (str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), ' + str(len(self.by_color)) + ' combinations') print 'Breakdown by color:' rows = [self.by_color_inclusive.keys()] rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print 'Breakdown by number of colors:' rows = [self.by_color_count.keys()] rows += [[len(self.by_color_count[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(len(self.by_type)) + ' combinations' print 'Breakdown by type:' d = sorted(self.by_type_inclusive, lambda x,y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])), reverse = True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print (str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' + str(len(self.by_subtype)) + ' combinations') print '-- Popular subtypes: --' d = sorted(self.by_subtype_inclusive, lambda x,y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_subtype_inclusive[k])]] printrows(padrows(rows)) print '-- Top combinations: --' d = sorted(self.by_subtype, lambda x,y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_subtype[k])]] printrows(padrows(rows)) print '--------------------' print (str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' + str(len(self.by_supertype)) + ' combinations') print 'Breakdown by supertype:' d = sorted(self.by_supertype_inclusive, lambda x,y: cmp(len(self.by_supertype_inclusive[x]),len(self.by_supertype_inclusive[y])), reverse = True) rows = [[k for k in d[:hsize]]] rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_cmc)) + ' different CMCs, ' + str(len(self.by_cost)) + ' unique mana costs' print 'Breakdown by CMC:' d = sorted(self.by_cmc, reverse = False) rows = [[k for k in d[:cmcsize]]] rows += [[len(self.by_cmc[k]) for k in rows[0]]] printrows(padrows(rows)) print '-- Popular mana costs: --' d = sorted(self.by_cost, lambda x,y: cmp(len(self.by_cost[x]), len(self.by_cost[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[utils.from_mana(k), len(self.by_cost[k])]] printrows(padrows(rows)) print '--------------------' print str(len(self.by_pt)) + ' unique p/t combinations' if len(self.by_power) > 0 and len(self.by_toughness) > 0: print ('Largest power: ' + str(max(map(len, self.by_power)) - 1) + ', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1)) print '-- Popular p/t values: --' d = sorted(self.by_pt, lambda x,y: cmp(len(self.by_pt[x]), len(self.by_pt[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_pt[k])]] printrows(padrows(rows)) print '--------------------' print 'Loyalty values:' d = sorted(self.by_loyalty, lambda x,y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[utils.from_unary(k), len(self.by_loyalty[k])]] printrows(padrows(rows)) print '--------------------' if len(self.by_textlen) > 0 and len(self.by_textlines) > 0: print('Card text ranges from ' + str(min(self.by_textlen)) + ' to ' + str(max(self.by_textlen)) + ' characters in length') print('Card text ranges from ' + str(min(self.by_textlines)) + ' to ' + str(max(self.by_textlines)) + ' lines') print '-- Line counts by frequency: --' d = sorted(self.by_textlines, lambda x,y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])), reverse = True) rows = [] for k in d[0:vsize]: rows += [[k, len(self.by_textlines[k])]] printrows(padrows(rows)) print '===================='