def explore(filename, attrs, cond=False, wait=False, quiet=False): count = 0 if quiet: wait = False with open(filename, "rb") as fp: # iterate through objects in file for obj in iterload(fp): # print out entire object if attrs[0] == "all": if not quiet: print(json.dumps(obj, indent=2)) count += 1 if wait: raw_input() # only print out the desired attributes else: # check if we are going to filter results if cond: # the only arg given is conditional expression if len(attrs) == 1: # test the current object if Flt.filter_exp(attrs[0], obj): if not quiet: print(json.dumps(obj, indent=2)) count += 1 if wait: raw_input() # the last arg is the conditional expression else: if Flt.filter_exp(attrs[-1], obj): try: stuff = {attr: obj[attr] for attr in attrs[:-1]} if not quiet: print(json.dumps(stuff, indent=2)) count += 1 except KeyError: print "ERROR: " + str(attrs[:-1]) + " contains an invalid attribute name " exit() if wait: raw_input() # don't filter the results else: # return only the desired attributes try: stuff = {attr: obj[attr] for attr in attrs} if not quiet: print(json.dumps(stuff, indent=2)) count += 1 except KeyError: print "ERROR: " + str(attrs[:-1]) + " contains an invalid attribute name " exit() if wait: raw_input() print "Total objects found: " + str(count)
def create_lookup_dict(infile, args, id_to_indx=None, indx_to_id=None, pick=False, cond=False): # if output files were not specified and pickling, set them if pick: if id_to_indx == None: id_to_indx = pickle_directory + "id_to_indx.p" else: id_to_indx = pickle_directory + id_to_indx if indx_to_id == None: indx_to_id = pickle_directory + "indx_to_id.p" else: indx_to_id = pickle_directory + indx_to_id # set attr1/2 to correct values # attr1 used as unique id attr1 = args[0] attr2 = "none" if cond: if len(args) == 3: attr2 = args[1] if len(args) > 3: print "ERROR: invalid number of arguments" else: if len(args) == 2: attr2 = args[1] flag = 0 if attr2.lower() == "none": flag = 1 count = 0 # lookup dictionary to hold id's and index number lookup = {} with open(infile, "rb") as fp: # iterate through objects in file for obj in iterload(fp): # use args[0] and args[1] as given if flag == 0: # check if we are filtering, test obj is cond=True if cond and not Flt.filter_exp(args[-1], obj): continue # make sure a valid attribute name was given try: attr_value = obj[attr1] except KeyError: print "ERROR: %s, is not a valid attribute name " %attr1 exit() try: attr2_value = obj[attr2] except KeyError: print "ERROR: %s, is not a valid attribute name " %attr2 exit() # upadate dictionary if attr_value not in lookup: lookup[ attr_value ] = attr2_value else: print attr_value + " already in lookup dictionary. Skipping duplicate" # use args[0] as id/attr1 and attr2 will be index if flag == 1: # check if we are filtering, test obj is cond=True if cond and not Flt.filter_exp(args[-1], obj): continue # make sure a valid attribute name was given try: attr_value = obj[attr1] except KeyError: print "ERROR: %s, is not a valid attribute name " %attr1 exit() # upadate dictionary if attr_value not in lookup: lookup[ attr_value ] = count count += 1 else: print attr_value + " already in lookup dictionary. Skipping duplicate" # only sort and add index if flag was 0 if flag == 0: # sort by descending review count in to a list sorted_list = sorted(lookup.iteritems(), key=operator.itemgetter(1), reverse=True) # change the value in lookup to be index from sorted list for item, indx in zip(sorted_list, xrange(len(sorted_list))): lookup[item[0]] = indx # dict for looking up id from an index lookup2 = {y:x for x,y in lookup.iteritems()} # Pickle it if pick: with open(id_to_indx, "wb") as outfp: pickle.dump(lookup, outfp) print "Pickled id --> index dictionary into: " + id_to_indx with open(indx_to_id, "wb") as outfp: pickle.dump(lookup2, outfp) print "Pickled index --> id dictionary into: " + indx_to_id # print to test that everything is working else: print(json.dumps(lookup, indent=3)) print() print(json.dumps(lookup2, indent=3))