def gen_dataset_files(pdates, busjson, revjson, tipjson, outdir):
    # load business objects
    print 'Loading business objects from %s...' % busjson
    all_buses, junk = jsonutils.load_objects(busjson)

    # load review objects
    print 'loading review objects from %s...' % revjson
    all_reviews, junk = jsonutils.load_objects(revjson)

    # load tip objects
    print 'loading tip objects from %s...' % tipjson
    all_tips, junk = jsonutils.load_objects(tipjson)

    # generate the datsets
    for pdatestr in pdates:
        # convert prediction date to int (seconds since epoch)
        pdate = date2int(str2date(pdatestr))

        # generate the dataset for the specified prediction date
        print 'generating dataset for prediction date %s (%d)...' % (pdatestr,pdate)
        buses = gen_dataset(pdate, all_buses, all_reviews, all_tips)

        # generate filename for dataset
        outfile = outdir + '/' + pdatestr + '.json'

        # write dataset to file
        print 'writing %d JSON objects to %s...' % (len(buses),outfile)
        jsonutils.save_objects(buses, outfile)
def run_script(pdate_str, busjson, revjson, tipjson, senticsv, outfile):
    # convert pdate to seconds since the epoch
    pdate = du.date2int(du.str2date(pdate_str))

    # load business objects
    print 'Loading business objects from %s...' % busjson
    all_buses, junk = ju.load_objects(busjson)

    # load review objects
    print 'loading review objects from %s...' % revjson
    all_reviews, junk = ju.load_objects(revjson)

    # load tip objects
    print 'loading tip objects from %s...' % tipjson
    all_tips, junk = ju.load_objects(tipjson)
    
    # load sentiment ranking data derived from tip and review data
    print 'loading sentiment rankings from %s...' % senticsv
    all_senti = cu.load_matrix(senticsv, has_hdr=False)

    # generate a data set the specified prediction date
    print('generate data set for prediction date %s...' % pdate_str)
    buses = du.gen_dataset(pdate, all_buses, all_reviews, all_tips, all_senti)
    
    # write data set to file
    print('writing generated data set to %s...' % outfile)
    ju.save_objects(buses, outfile)
def filter_yelp_data(in_busjson, out_busjson, in_revjson, out_revjson,
                     in_tipjson, out_tipjson, in_demoeconcsv):
    # initialize the column names
    #feat_columns = feat_info.data_feat_names
    bus_feats = fi.bus_feat_names
    rev_feats = fi.rev_feat_names
    tip_feats = fi.tip_feat_names
    
    # make sure the data features have been initialized
    #if (len(feat_columns)==0):
    #    print('\nWARNING: data features have not been initialized\n')
    
    # load the restaurant objects
    print 'loading business JSON objects from %s...' % in_busjson
    objects,junk = load_restaurants(in_busjson)

    # load the review and tip objects and add first/last review/tip date
    # and census tract to objects
    objects,reviews,tips = process_review_tip_census_data(in_revjson, in_tipjson,
                                                          in_demoeconcsv, objects)
    
    # create feature matrix
    #feat_mat, columns = get_feature_matrix(objects, feat_columns)
    
    # write the 2D feature array to file
    #print 'writing data features to %s...' % out_buscsv
    #write_feature_matrix_csv(out_buscsv, feat_mat, feat_columns)
    
    # write meta data to file
    print 'writing business JSON object to %s...' % out_busjson
    jsonutils.save_objects(objects, out_busjson, attfilt=bus_feats)

    # write review data to file
    print 'writing review JSON objects to %s...' % out_revjson
    jsonutils.save_objects(reviews, out_revjson, attfilt=rev_feats)

    # write tip data to file
    print 'writing tip JSON objects to %s...' % out_tipjson
    jsonutils.save_objects(tips, out_tipjson, attfilt=tip_feats)