示例#1
0
 def if_cited_first_n_years(self, pid_in, n):
     citing_ids = self.get_citing(pid_in)
     lags = [get_year_from_id(pid) - get_year_from_id(pid_in) for pid in citing_ids]
     lags.sort()
     if(len(lags) > 0):
         return (lags[0] < n)
     else:
         return False
示例#2
0
    def get_prominence(self, pid, ref, fc):
        citing_ids = self.get_citing(pid)
        cr = len([cid for cid in citing_ids if (get_year_from_id(cid) <= ref)])
        fr = len([cid for cid in citing_ids if (get_year_from_id(cid) <= fc)])

        if(fr < cr):
            return -1
        elif(fr >= cr and fr < 1):
            return 0
        else:
            return ((1 - cr/fr)*(1-1/fr))
    def main(self):

        n = int(sys.argv[1])
        diff = sys.argv[2] if len(sys.argv) > 2 else 3

        init = 1980
        last = 2006

        aan = aanmeta()
        all_papers = aan.get_restricted_papers(init, last)

        till_n = [p for p in all_papers if p.year <= n]

        training = random.sample(till_n, int(math.ceil(0.8 * len(till_n))))
        model_test = [i for i in till_n if i not in training]
        test = [i for i in all_papers if i.year == (n + diff)]

        print "Total files: %d" % (len(training) + len(model_test) + len(test))

        self.feats = {}
        featfile = open("1980_2006.pruned_feats", "r")
        for line in featfile:
            line = line.strip()
            [pid, featstr] = line.split("\t")
            self.feats[pid] = featstr.split("<>")

        training_fname = "experiment_files/1980_%s.train.txt" % n
        model_test_fname = "experiment_files/1980_%s.modeltest.txt" % n
        test_fname = "experiment_files/%s.test.txt" % (n + diff)

        self.write_data(training, training_fname)
        self.write_data(model_test, model_test_fname)
        self.write_data(test, test_fname)

        # creating the response files
        self.cnw = CitationNetwork()
        training_resp_file = open("experiment_files/1980_%s.train.resp.txt" % n, "w")
        model_test_resp_file = open("experiment_files/1980_%s.modeltest.resp.txt" % n, "w")
        test_resp_file = open("experiment_files/%s.test.resp.txt" % (n + diff), "w")
        self.write_response(training, training_resp_file, n)
        self.write_response(model_test, model_test_resp_file, n)
        self.write_response(test, test_resp_file, n)

        # write the time step files
        ts_file = open("experiment_files/%s_%s_timesteps.txt" % (init, n), "w")
        for pid in [i.pid for i in all_papers if i.year >= init and i.year <= n]:
            ts_file.write("%s\t%d\n" % (pid, get_year_from_id(pid)))