def applymodel(modelpath, metadatapath, outpath): sourcefolder = '/Users/tunder/Dropbox/GenreProject/python/reception/poetryEF/fromEF' extension = '.tsv' newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension, metadatapath) print('Got predictions for that model.') newmetadict.to_csv(outpath)
def applymodel(): modelpath = input('Path to model? ') sourcefolder = '/Users/tunder/Dropbox/GenreProject/python/reception/fiction/fromEF' extension = '.tsv' metadatapath = 'mergedmeta.csv' newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension, metadatapath) print('Got predictions for that model.') outpath = 'mergedmeta.csv' newmetadict.to_csv(outpath)
def applymodel(modelpath, metapath, outpath): ''' This function applies a specified model (modelpath) to a specified metadata set (metapath), and sends the results to outpath. ''' sourcefolder = '/Users/tunder/data/character_subset/' extension = '.tsv' metadatapath = metapath = '../metadata/balanced_character_subset.csv' newmetadict = train.apply_pickled_model(modelpath, sourcefolder, extension, metadatapath) print('Got predictions for that model.') newmetadict.to_csv(outpath)
featurestart = 3000 featureend = 4400 featurestep = 50 genre_gridsearch('alldetective', c_range, featurestart, featureend, featurestep, positive_tags) elif args[1] == 'compare': # This routine assumes that you have already trained models for classes A and B. # It compares them. firstmodel = args[2] secondmodel = args[3] firstpath = '/Users/tunder/Dropbox/fiction/results/' + firstmodel + '.pkl' secondpath = '/Users/tunder/Dropbox/fiction/results/' + secondmodel + '.pkl' metadatapath = '/Users/tunder/Dropbox/fiction/meta/concatenatedmeta.csv' sourcefolder = '/Users/tunder/Dropbox/fiction/newtsvs' extension = '.tsv' firstonall = train.apply_pickled_model(firstpath, sourcefolder, extension, metadatapath) # firstonall.set_index('docid', inplace = True) secondonall = train.apply_pickled_model(secondpath, sourcefolder, extension, metadatapath) # secondonall.set_index('docid', inplace = True) firstonself = pd.read_csv('/Users/tunder/Dropbox/fiction/results/' + firstmodel + '.csv', index_col = 'volid') secondonself = pd.read_csv('/Users/tunder/Dropbox/fiction/results/' + secondmodel + '.csv', index_col = 'volid') firsttotal, firstright = comparison(firstonself, secondonall, secondmodel) secondtotal, secondright = comparison(secondonself, firstonall, firstmodel) print(firsttotal, firstright) print("Accuracy of " + secondmodel + " on volumes originally included in "+ firstmodel + ": " + str(firstright/firsttotal)) print(secondtotal, secondright) print("Accuracy of " + firstmodel + " on volumes originally included in "+ secondmodel + ": " + str(secondright/secondtotal)) print("Total accuracy: ", (firstright + secondright) / (firsttotal + secondtotal))
def futurepredict(): differences = dict() dates = dict() authors = dict() titles = dict() genders = dict() for iteration in range(5): print('ITERATION: ' + str(iteration)) for center in range(1930, 1970, 10): print('CENTER: ' + str(center)) floor = center - 30 firstmodel = "SF" + str(iteration) + '-' + str(floor) + "-" + str( center) ceiling = center + 30 secondmodel = "SF" + str(iteration) + '-' + str( center) + "-" + str(ceiling) firstpath = '../modeloutput/' + firstmodel + '.pkl' metadatapath = '../metadata/concatenatedmeta.csv' sourcefolder = '/Users/tunder/Dropbox/fiction/newtsvs' extension = '.tsv' firstonall = train.apply_pickled_model(firstpath, sourcefolder, extension, metadatapath) secondonself = pd.read_csv('../modeloutput/' + secondmodel + '.csv', index_col='volid') for docid in secondonself.index: # print(docid, firstmodel) date = secondonself.loc[docid, 'dateused'] realclass = int(secondonself.loc[docid, 'realclass']) selfprediction = secondonself.loc[docid, 'logistic'] if date < 1930 or date > 1976: continue if realclass == 0: continue try: predictionfrompast = firstonall.loc[docid, firstmodel] # this gets the prediction about this book made by a model in the past except: try: predictionfrompast = firstonall.loc[str(docid), firstmodel] except: print(docid, firstmodel, secondonself.loc[docid, 'title']) continue diff = predictionfrompast - selfprediction if docid not in differences: differences[docid] = [] differences[docid].append(diff) dates[docid] = date authors[docid] = secondonself.loc[docid, 'author'] titles[docid] = secondonself.loc[docid, 'title'] genders[docid] = secondonself.loc[docid, 'gender'] with open('../plotdata/sfsurprises.tsv', mode='w', encoding='utf-8') as f: writer = csv.DictWriter( f, delimiter='\t', fieldnames=['docid', 'date', 'gender', 'diff', 'author', 'title']) writer.writeheader() for docid, date in dates.items(): o = dict() o['docid'] = docid o['date'] = date o['diff'] = sum(differences[docid]) / len(differences[docid]) o['author'] = authors[docid] o['title'] = titles[docid] o['gender'] = genders[docid] writer.writerow(o)