Пример #1
0
 def outliers(self, hsize=10, vsize=10, dump_invalid=False):
     print '********************'
     print 'Overview of indices:'
     rows = [['Index Name', 'Keys', 'Total Members']]
     for index in self.indices:
         rows += [[
             index,
             len(self.indices[index]),
             index_size(self.indices[index])
         ]]
     printrows(padrows(rows))
     print '********************'
     if len(self.by_name) > 0:
         scardname = sorted(self.by_name,
                            lambda x, y: cmp(len(x), len(y)),
                            reverse=False)[0]
         print 'Shortest Cardname: (' + str(len(scardname)) + ')'
         print '  ' + scardname
         lcardname = sorted(self.by_name,
                            lambda x, y: cmp(len(x), len(y)),
                            reverse=True)[0]
         print 'Longest Cardname: (' + str(len(lcardname)) + ')'
         print '  ' + lcardname
         d = sorted(
             self.by_name,
             lambda x, y: cmp(len(self.by_name[x]), len(self.by_name[y])),
             reverse=True)
         rows = []
         for k in d[0:vsize]:
             if len(self.by_name[k]) > 1:
                 rows += [[k, len(self.by_name[k])]]
         if rows == []:
             print('No duplicated cardnames')
         else:
             print '-- Most duplicated names: --'
             printrows(padrows(rows))
     else:
         print 'No cards indexed by name?'
     print '--------------------'
     if len(self.by_type) > 0:
         ltypes = sorted(self.by_type,
                         lambda x, y: cmp(len(x), len(y)),
                         reverse=True)[0]
         print 'Longest card type: (' + str(len(ltypes)) + ')'
         print '  ' + ltypes
     else:
         print 'No cards indexed by type?'
     if len(self.by_subtype) > 0:
         lsubtypes = sorted(self.by_subtype,
                            lambda x, y: cmp(len(x), len(y)),
                            reverse=True)[0]
         print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
         print '  ' + lsubtypes
     else:
         print 'No cards indexed by subtype?'
     if len(self.by_supertype) > 0:
         lsupertypes = sorted(self.by_supertype,
                              lambda x, y: cmp(len(x), len(y)),
                              reverse=True)[0]
         print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
         print '  ' + lsupertypes
     else:
         print 'No cards indexed by supertype?'
     print '--------------------'
     if len(self.by_cost) > 0:
         lcost = sorted(self.by_cost,
                        lambda x, y: cmp(len(x), len(y)),
                        reverse=True)[0]
         print 'Longest mana cost: (' + str(len(lcost)) + ')'
         print '  ' + utils.from_mana(lcost)
         print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n'
     else:
         print 'No cards indexed by cost?'
     if len(self.by_cmc) > 0:
         lcmc = sorted(self.by_cmc, reverse=True)[0]
         print 'Largest cmc: (' + str(lcmc) + ')'
         print '  ' + str(self.by_cmc[lcmc][0].cost)
         print '\n' + plimit(self.by_cmc[lcmc][0].encode())
     else:
         print 'No cards indexed by cmc?'
     print '--------------------'
     if len(self.by_power) > 0:
         lpower = sorted(self.by_power,
                         lambda x, y: cmp(len(x), len(y)),
                         reverse=True)[0]
         print 'Largest creature power: ' + utils.from_unary(lpower)
         print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n'
     else:
         print 'No cards indexed by power?'
     if len(self.by_toughness) > 0:
         ltoughness = sorted(self.by_toughness,
                             lambda x, y: cmp(len(x), len(y)),
                             reverse=True)[0]
         print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
         print '\n' + plimit(self.by_toughness[ltoughness][0].encode())
     else:
         print 'No cards indexed by toughness?'
     print '--------------------'
     if len(self.by_textlines) > 0:
         llines = sorted(self.by_textlines, reverse=True)[0]
         print 'Most lines of text in a card: ' + str(llines)
         print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n'
     else:
         print 'No cards indexed by line count?'
     if len(self.by_textlen) > 0:
         ltext = sorted(self.by_textlen, reverse=True)[0]
         print 'Most chars in a card text: ' + str(ltext)
         print '\n' + plimit(self.by_textlen[ltext][0].encode())
     else:
         print 'No cards indexed by char count?'
     print '--------------------'
     print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.'
     if dump_invalid:
         for card in self.invalid_cards:
             print '\n' + repr(card.fields)
     elif len(self.invalid_cards) > 0:
         print 'Not summarizing.'
     print '--------------------'
     print 'There were ' + str(len(
         self.unparsed_cards)) + ' unparsed cards.'
     if dump_invalid:
         for card in self.unparsed_cards:
             print '\n' + repr(card.fields)
     elif len(self.unparsed_cards) > 0:
         print 'Not summarizing.'
     print '===================='
Пример #2
0
 def outliers(self, hsize = 10, vsize = 10, dump_invalid = False):
     print '********************'
     print 'Overview of indices:'
     rows = [['Index Name', 'Keys', 'Total Members']]
     for index in self.indices:
         rows += [[index, len(self.indices[index]), index_size(self.indices[index])]]
     printrows(padrows(rows))
     print '********************'
     if len(self.by_name) > 0:
         scardname =  sorted(self.by_name, 
                             lambda x,y: cmp(len(x), len(y)), 
                             reverse = False)[0]
         print 'Shortest Cardname: (' + str(len(scardname)) + ')'
         print '  ' + scardname
         lcardname =  sorted(self.by_name, 
                             lambda x,y: cmp(len(x), len(y)), 
                             reverse = True)[0]
         print 'Longest Cardname: (' + str(len(lcardname)) + ')'
         print '  ' + lcardname
         d = sorted(self.by_name, 
                    lambda x,y: cmp(len(self.by_name[x]), len(self.by_name[y])), 
                    reverse = True)
         rows = []
         for k in d[0:vsize]:
             if len(self.by_name[k]) > 1:
                 rows += [[k, len(self.by_name[k])]]
         if rows == []:
             print('No duplicated cardnames')
         else:
             print '-- Most duplicated names: --'
             printrows(padrows(rows))
     else:
         print 'No cards indexed by name?'
     print '--------------------'
     if len(self.by_type) > 0:
         ltypes = sorted(self.by_type, 
                         lambda x,y: cmp(len(x), len(y)), 
                         reverse = True)[0]
         print 'Longest card type: (' + str(len(ltypes)) + ')'
         print '  ' + ltypes
     else:
         print 'No cards indexed by type?'
     if len(self.by_subtype) > 0:
         lsubtypes = sorted(self.by_subtype, 
                            lambda x,y: cmp(len(x), len(y)), 
                            reverse = True)[0]
         print 'Longest subtype: (' + str(len(lsubtypes)) + ')'
         print '  ' + lsubtypes
     else:
         print 'No cards indexed by subtype?'
     if len(self.by_supertype) > 0:
         lsupertypes = sorted(self.by_supertype, 
                         lambda x,y: cmp(len(x), len(y)), 
                              reverse = True)[0]
         print 'Longest supertype: (' + str(len(lsupertypes)) + ')'
         print '  ' + lsupertypes
     else:
         print 'No cards indexed by supertype?'
     print '--------------------'
     if len(self.by_cost) > 0:
         lcost = sorted(self.by_cost, 
                        lambda x,y: cmp(len(x), len(y)), 
                        reverse = True)[0]
         print 'Longest mana cost: (' + str(len(lcost)) + ')'
         print '  ' + utils.from_mana(lcost)
         print '\n' + plimit(self.by_cost[lcost][0].encode()) + '\n'
     else:
         print 'No cards indexed by cost?'
     if len(self.by_cmc) > 0:
         lcmc = sorted(self.by_cmc, reverse = True)[0]
         print 'Largest cmc: (' + str(lcmc) + ')'
         print '  ' + str(self.by_cmc[lcmc][0].cost)
         print '\n' + plimit(self.by_cmc[lcmc][0].encode())
     else:
         print 'No cards indexed by cmc?'
     print '--------------------'
     if len(self.by_power) > 0:
         lpower = sorted(self.by_power, 
                         lambda x,y: cmp(len(x), len(y)), 
                         reverse = True)[0]
         print 'Largest creature power: ' + utils.from_unary(lpower)
         print '\n' + plimit(self.by_power[lpower][0].encode()) + '\n'
     else: 
         print 'No cards indexed by power?'
     if len(self.by_toughness) > 0:
         ltoughness = sorted(self.by_toughness, 
                         lambda x,y: cmp(len(x), len(y)), 
                         reverse = True)[0]
         print 'Largest creature toughness: ' + utils.from_unary(ltoughness)
         print '\n' + plimit(self.by_toughness[ltoughness][0].encode())
     else: 
         print 'No cards indexed by toughness?'
     print '--------------------'
     if len(self.by_textlines) > 0:
         llines = sorted(self.by_textlines, reverse = True)[0]
         print 'Most lines of text in a card: ' + str(llines)
         print '\n' + plimit(self.by_textlines[llines][0].encode()) + '\n'
     else: 
         print 'No cards indexed by line count?'
     if len(self.by_textlen) > 0:
         ltext = sorted(self.by_textlen, reverse = True)[0]
         print 'Most chars in a card text: ' + str(ltext)
         print '\n' + plimit(self.by_textlen[ltext][0].encode())
     else: 
         print 'No cards indexed by char count?'
     print '--------------------'
     print 'There were ' + str(len(self.invalid_cards)) + ' invalid cards.'
     if dump_invalid:
         for card in self.invalid_cards:
             print '\n' + repr(card.fields)
     elif len(self.invalid_cards) > 0:
         print 'Not summarizing.'
     print '--------------------'
     print 'There were ' + str(len(self.unparsed_cards)) + ' unparsed cards.'
     if dump_invalid:
         for card in self.unparsed_cards:
             print '\n' + repr(card.fields)
     elif len(self.unparsed_cards) > 0:
         print 'Not summarizing.'
     print '===================='
Пример #3
0
 def summarize(self, hsize=10, vsize=10, cmcsize=20):
     print '===================='
     print str(len(self.cards)) + ' valid cards, ' + str(
         len(self.invalid_cards)) + ' invalid cards.'
     print str(len(self.allcards)) + ' cards parsed, ' + str(
         len(self.unparsed_cards)) + ' failed to parse'
     print '--------------------'
     print str(len(self.by_name)) + ' unique card names'
     print '--------------------'
     print(
         str(len(self.by_color_inclusive)) +
         ' represented colors (including colorless as \'A\'), ' +
         str(len(self.by_color)) + ' combinations')
     print 'Breakdown by color:'
     rows = [self.by_color_inclusive.keys()]
     rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print 'Breakdown by number of colors:'
     rows = [self.by_color_count.keys()]
     rows += [[len(self.by_color_count[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(
         len(self.by_type)) + ' combinations'
     print 'Breakdown by type:'
     d = sorted(self.by_type_inclusive,
                lambda x, y: cmp(len(self.by_type_inclusive[x]),
                                 len(self.by_type_inclusive[y])),
                reverse=True)
     rows = [[k for k in d[:hsize]]]
     rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print(
         str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' +
         str(len(self.by_subtype)) + ' combinations')
     print '-- Popular subtypes: --'
     d = sorted(self.by_subtype_inclusive,
                lambda x, y: cmp(len(self.by_subtype_inclusive[x]),
                                 len(self.by_subtype_inclusive[y])),
                reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_subtype_inclusive[k])]]
     printrows(padrows(rows))
     print '-- Top combinations: --'
     d = sorted(
         self.by_subtype,
         lambda x, y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])),
         reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_subtype[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print(
         str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' +
         str(len(self.by_supertype)) + ' combinations')
     print 'Breakdown by supertype:'
     d = sorted(self.by_supertype_inclusive,
                lambda x, y: cmp(len(self.by_supertype_inclusive[x]),
                                 len(self.by_supertype_inclusive[y])),
                reverse=True)
     rows = [[k for k in d[:hsize]]]
     rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_cmc)) + ' different CMCs, ' + str(
         len(self.by_cost)) + ' unique mana costs'
     print 'Breakdown by CMC:'
     d = sorted(self.by_cmc, reverse=False)
     rows = [[k for k in d[:cmcsize]]]
     rows += [[len(self.by_cmc[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '-- Popular mana costs: --'
     d = sorted(
         self.by_cost,
         lambda x, y: cmp(len(self.by_cost[x]), len(self.by_cost[y])),
         reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_mana(k), len(self.by_cost[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_pt)) + ' unique p/t combinations'
     if len(self.by_power) > 0 and len(self.by_toughness) > 0:
         print('Largest power: ' + str(max(map(len, self.by_power)) - 1) +
               ', largest toughness: ' +
               str(max(map(len, self.by_toughness)) - 1))
     print '-- Popular p/t values: --'
     d = sorted(self.by_pt,
                lambda x, y: cmp(len(self.by_pt[x]), len(self.by_pt[y])),
                reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_unary(k), len(self.by_pt[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print 'Loyalty values:'
     d = sorted(
         self.by_loyalty,
         lambda x, y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])),
         reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_unary(k), len(self.by_loyalty[k])]]
     printrows(padrows(rows))
     print '--------------------'
     if len(self.by_textlen) > 0 and len(self.by_textlines) > 0:
         print('Card text ranges from ' + str(min(self.by_textlen)) +
               ' to ' + str(max(self.by_textlen)) + ' characters in length')
         print('Card text ranges from ' + str(min(self.by_textlines)) +
               ' to ' + str(max(self.by_textlines)) + ' lines')
     print '-- Line counts by frequency: --'
     d = sorted(self.by_textlines,
                lambda x, y: cmp(len(self.by_textlines[x]),
                                 len(self.by_textlines[y])),
                reverse=True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_textlines[k])]]
     printrows(padrows(rows))
     print '===================='
Пример #4
0
 def summarize(self, hsize = 10, vsize = 10, cmcsize = 20):
     print '===================='
     print str(len(self.cards)) + ' valid cards, ' + str(len(self.invalid_cards)) + ' invalid cards.'
     print str(len(self.allcards)) + ' cards parsed, ' + str(len(self.unparsed_cards)) + ' failed to parse'
     print '--------------------'
     print str(len(self.by_name)) + ' unique card names'
     print '--------------------'
     print (str(len(self.by_color_inclusive)) + ' represented colors (including colorless as \'A\'), ' 
            + str(len(self.by_color)) + ' combinations')
     print 'Breakdown by color:'
     rows = [self.by_color_inclusive.keys()]
     rows += [[len(self.by_color_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print 'Breakdown by number of colors:'
     rows = [self.by_color_count.keys()]
     rows += [[len(self.by_color_count[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_type_inclusive)) + ' unique card types, ' + str(len(self.by_type)) + ' combinations'
     print 'Breakdown by type:'
     d = sorted(self.by_type_inclusive, 
                lambda x,y: cmp(len(self.by_type_inclusive[x]), len(self.by_type_inclusive[y])), 
                reverse = True)
     rows = [[k for k in d[:hsize]]]
     rows += [[len(self.by_type_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print (str(len(self.by_subtype_inclusive)) + ' unique subtypes, ' 
            + str(len(self.by_subtype)) + ' combinations')
     print '-- Popular subtypes: --'
     d = sorted(self.by_subtype_inclusive, 
                lambda x,y: cmp(len(self.by_subtype_inclusive[x]), len(self.by_subtype_inclusive[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_subtype_inclusive[k])]]
     printrows(padrows(rows))
     print '-- Top combinations: --'
     d = sorted(self.by_subtype, 
                lambda x,y: cmp(len(self.by_subtype[x]), len(self.by_subtype[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_subtype[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print (str(len(self.by_supertype_inclusive)) + ' unique supertypes, ' 
            + str(len(self.by_supertype)) + ' combinations')
     print 'Breakdown by supertype:'
     d = sorted(self.by_supertype_inclusive, 
                lambda x,y: cmp(len(self.by_supertype_inclusive[x]),len(self.by_supertype_inclusive[y])), 
                reverse = True)
     rows = [[k for k in d[:hsize]]]
     rows += [[len(self.by_supertype_inclusive[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_cmc)) + ' different CMCs, ' + str(len(self.by_cost)) + ' unique mana costs'
     print 'Breakdown by CMC:'
     d = sorted(self.by_cmc, reverse = False)
     rows = [[k for k in d[:cmcsize]]]
     rows += [[len(self.by_cmc[k]) for k in rows[0]]]
     printrows(padrows(rows))
     print '-- Popular mana costs: --'
     d = sorted(self.by_cost, 
                lambda x,y: cmp(len(self.by_cost[x]), len(self.by_cost[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_mana(k), len(self.by_cost[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print str(len(self.by_pt)) + ' unique p/t combinations'
     if len(self.by_power) > 0 and len(self.by_toughness) > 0:
         print ('Largest power: ' + str(max(map(len, self.by_power)) - 1) + 
                ', largest toughness: ' + str(max(map(len, self.by_toughness)) - 1))
     print '-- Popular p/t values: --'
     d = sorted(self.by_pt, 
                lambda x,y: cmp(len(self.by_pt[x]), len(self.by_pt[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_unary(k), len(self.by_pt[k])]]
     printrows(padrows(rows))
     print '--------------------'
     print 'Loyalty values:'
     d = sorted(self.by_loyalty, 
                lambda x,y: cmp(len(self.by_loyalty[x]), len(self.by_loyalty[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[utils.from_unary(k), len(self.by_loyalty[k])]]
     printrows(padrows(rows))
     print '--------------------'
     if len(self.by_textlen) > 0 and len(self.by_textlines) > 0:
         print('Card text ranges from ' + str(min(self.by_textlen)) + ' to ' 
               + str(max(self.by_textlen)) + ' characters in length')
         print('Card text ranges from ' + str(min(self.by_textlines)) + ' to '
               + str(max(self.by_textlines)) + ' lines')
     print '-- Line counts by frequency: --'
     d = sorted(self.by_textlines, 
                lambda x,y: cmp(len(self.by_textlines[x]), len(self.by_textlines[y])), 
                reverse = True)
     rows = []
     for k in d[0:vsize]:
         rows += [[k, len(self.by_textlines[k])]]
     printrows(padrows(rows))
     print '===================='