Пример #1
0
def explore(filename, attrs, cond=False, wait=False, quiet=False):
    count = 0
    if quiet:
        wait = False
    with open(filename, "rb") as fp:
        # iterate through objects in file
        for obj in iterload(fp):

            # print out entire object
            if attrs[0] == "all":
                if not quiet:
                    print(json.dumps(obj, indent=2))
                count += 1
                if wait:
                    raw_input()

            # only print out the desired attributes
            else:
                # check if we are going to filter results
                if cond:
                
                    # the only arg given is conditional expression
                    if len(attrs) == 1:
                        # test the current object
                        if Flt.filter_exp(attrs[0], obj):
                            if not quiet:
                                print(json.dumps(obj, indent=2))
                            count += 1
                            if wait:
                                raw_input()
                    # the last arg is the conditional expression
                    else:
                        if Flt.filter_exp(attrs[-1], obj):
                            try:
                                stuff = {attr: obj[attr] for attr in attrs[:-1]}
                                if not quiet:
                                    print(json.dumps(stuff, indent=2))
                                count += 1
                            except KeyError:
                                print "ERROR: " + str(attrs[:-1]) + " contains an invalid attribute name "
                                exit()
                            if wait:
                                raw_input()

                # don't filter the results
                else:
                    # return only the desired attributes
                    try:
                        stuff = {attr: obj[attr] for attr in attrs}
                        if not quiet:
                            print(json.dumps(stuff, indent=2))
                        count += 1
                    except KeyError:
                        print "ERROR: " + str(attrs[:-1]) + " contains an invalid attribute name "
                        exit()
                    if wait:
                        raw_input()
        print "Total objects found: " + str(count)
Пример #2
0
def create_lookup_dict(infile, args, id_to_indx=None, indx_to_id=None, pick=False, cond=False):
    
    # if output files were not specified and pickling, set them
    if pick:
        if id_to_indx == None:
            id_to_indx = pickle_directory + "id_to_indx.p"
        else:
            id_to_indx = pickle_directory + id_to_indx 
        if indx_to_id == None:
            indx_to_id = pickle_directory + "indx_to_id.p" 
        else:
            indx_to_id = pickle_directory + indx_to_id 

    # set attr1/2 to correct values
    # attr1 used as unique id
    attr1 = args[0]
    attr2 = "none"
    if cond:
        if len(args) == 3:
            attr2 = args[1]
        if len(args) > 3:
            print "ERROR: invalid number of arguments"
    else:
        if len(args) == 2:
            attr2 = args[1]

    flag = 0
    if attr2.lower() == "none":
        flag = 1
        count = 0

    # lookup dictionary to hold id's and index number
    lookup = {}
    with open(infile, "rb") as fp:
        # iterate through objects in file
        for obj in iterload(fp):

            # use args[0] and args[1] as given
            if flag == 0:
                # check if we are filtering, test obj is cond=True
                if cond and not Flt.filter_exp(args[-1], obj):
                    continue

                # make sure a valid attribute name was given
                try:
                    attr_value = obj[attr1]
                except KeyError:
                    print "ERROR: %s, is not a valid attribute name " %attr1
                    exit()
                try:
                    attr2_value = obj[attr2]
                except KeyError:
                    print "ERROR: %s, is not a valid attribute name " %attr2
                    exit()
                # upadate dictionary
                if attr_value not in lookup:
                    lookup[ attr_value ] = attr2_value
                else:
                    print attr_value + " already in lookup dictionary. Skipping duplicate" 

            # use args[0] as id/attr1 and attr2 will be index
            if flag == 1:
                # check if we are filtering, test obj is cond=True
                if cond and not Flt.filter_exp(args[-1], obj):
                    continue

                # make sure a valid attribute name was given
                try:
                    attr_value = obj[attr1]
                except KeyError:
                    print "ERROR: %s, is not a valid attribute name " %attr1
                    exit()
                # upadate dictionary
                if attr_value not in lookup:
                    lookup[ attr_value ] = count
                    count += 1
                else:
                    print attr_value + " already in lookup dictionary. Skipping duplicate" 

    # only sort and add index if flag was 0
    if flag == 0:
        # sort by descending review count in to a list
        sorted_list = sorted(lookup.iteritems(), key=operator.itemgetter(1), reverse=True)
        # change the value in lookup to be index from sorted list
        for item, indx in zip(sorted_list, xrange(len(sorted_list))):
            lookup[item[0]] = indx

    # dict for looking up id from an index
    lookup2 = {y:x for x,y in lookup.iteritems()}

    # Pickle it
    if pick:
        with open(id_to_indx, "wb") as outfp:
            pickle.dump(lookup, outfp)
            print "Pickled id --> index dictionary into: " + id_to_indx
        with open(indx_to_id, "wb") as outfp:
            pickle.dump(lookup2, outfp)
            print "Pickled index --> id dictionary into: " + indx_to_id
    # print to test that everything is working
    else:
        print(json.dumps(lookup, indent=3))
        print()
        print(json.dumps(lookup2, indent=3))