def if_cited_first_n_years(self, pid_in, n): citing_ids = self.get_citing(pid_in) lags = [get_year_from_id(pid) - get_year_from_id(pid_in) for pid in citing_ids] lags.sort() if(len(lags) > 0): return (lags[0] < n) else: return False
def get_prominence(self, pid, ref, fc): citing_ids = self.get_citing(pid) cr = len([cid for cid in citing_ids if (get_year_from_id(cid) <= ref)]) fr = len([cid for cid in citing_ids if (get_year_from_id(cid) <= fc)]) if(fr < cr): return -1 elif(fr >= cr and fr < 1): return 0 else: return ((1 - cr/fr)*(1-1/fr))
def main(self): n = int(sys.argv[1]) diff = sys.argv[2] if len(sys.argv) > 2 else 3 init = 1980 last = 2006 aan = aanmeta() all_papers = aan.get_restricted_papers(init, last) till_n = [p for p in all_papers if p.year <= n] training = random.sample(till_n, int(math.ceil(0.8 * len(till_n)))) model_test = [i for i in till_n if i not in training] test = [i for i in all_papers if i.year == (n + diff)] print "Total files: %d" % (len(training) + len(model_test) + len(test)) self.feats = {} featfile = open("1980_2006.pruned_feats", "r") for line in featfile: line = line.strip() [pid, featstr] = line.split("\t") self.feats[pid] = featstr.split("<>") training_fname = "experiment_files/1980_%s.train.txt" % n model_test_fname = "experiment_files/1980_%s.modeltest.txt" % n test_fname = "experiment_files/%s.test.txt" % (n + diff) self.write_data(training, training_fname) self.write_data(model_test, model_test_fname) self.write_data(test, test_fname) # creating the response files self.cnw = CitationNetwork() training_resp_file = open("experiment_files/1980_%s.train.resp.txt" % n, "w") model_test_resp_file = open("experiment_files/1980_%s.modeltest.resp.txt" % n, "w") test_resp_file = open("experiment_files/%s.test.resp.txt" % (n + diff), "w") self.write_response(training, training_resp_file, n) self.write_response(model_test, model_test_resp_file, n) self.write_response(test, test_resp_file, n) # write the time step files ts_file = open("experiment_files/%s_%s_timesteps.txt" % (init, n), "w") for pid in [i.pid for i in all_papers if i.year >= init and i.year <= n]: ts_file.write("%s\t%d\n" % (pid, get_year_from_id(pid)))