Пример #1
0
    def test_pagerank_calculation(self):
        """Create a few items and fake citation relation among them, then
        run the pagerank algorithm. Check whether this simple case can get the
        correct result.
        """
        # calculate pagerank of these 3 document
        comm = Command()
        self.verbosity = 1
        comm.do_pagerank(chown=False)

        # read in the pagerank file, converting to a dict
        pr_values_from_file = {}
        data_path = get_data_dir('collection1') + "external_pagerank"
        with open(data_path) as f:
            for line in f:
                pk, value = line.split('=')
                pr_values_from_file[pk] = float(value.strip())

        # Verify that whether the answer is correct, based on calculations in
        # Gephi
        answers = {
            '1': 0.369323534954,
            '2': 0.204581549974,
            '3': 0.378475867453,
        }
        for key, value in answers.items():
            self.assertTrue(
                abs(pr_values_from_file[key] - value) < 0.0001,
                msg="The answer for item %s was %s when it should have been "
                    "%s" % (key, pr_values_from_file[key],
                            answers[key],)
            )
Пример #2
0
    def test_pagerank_calculation(self):
        """Create a few items and fake citation relation among them, then
        run the pagerank algorithm. Check whether this simple case can get the
        correct result.
        """
        # calculate pagerank of these 3 document
        comm = Command()
        self.verbosity = 1
        comm.do_pagerank(chown=False)

        # read in the pagerank file, converting to a dict
        pr_values_from_file = {}
        data_path = get_data_dir('collection1') + "external_pagerank"
        with open(data_path) as f:
            for line in f:
                pk, value = line.split('=')
                pr_values_from_file[pk] = float(value.strip())

        # Verify that whether the answer is correct, based on calculations in
        # Gephi
        answers = {
            '1': 0.369323534954,
            '2': 0.204581549974,
            '3': 0.378475867453,
        }
        for key, value in answers.items():
            self.assertTrue(
                abs(pr_values_from_file[key] - value) < 0.0001,
                msg="The answer for item %s was %s when it should have been "
                "%s" % (
                    key,
                    pr_values_from_file[key],
                    answers[key],
                ))
Пример #3
0
 def handle(self, *args, **options):
     super(Command, self).handle(*args, **options)
     pr_results = self.do_pagerank()
     pr_dest_dir = settings.SOLR_PAGERANK_DEST_DIR
     make_sorted_pr_file(pr_results, pr_dest_dir)
     normal_dest_dir = get_data_dir('collection1') + "external_pagerank"
     print("Pagerank file created at %s. Because of distributed servers, "
           "you may need to copy it to its final destination. Somewhere "
           "like: %s." % (pr_dest_dir, normal_dest_dir))
Пример #4
0
class Command(BaseCommand):
    args = '<args>'
    help = 'Calculate pagerank value for every case'
    RESULT_FILE_PATH = get_data_dir('collection1') + "external_pagerank"

    def do_pagerank(self, chown=True):
        g = make_and_populate_nx_graph()
        pr_results = g.pagerank()
        make_sorted_pr_file(pr_results, self.RESULT_FILE_PATH)
        reload_pagerank_external_file_cache()
        cp_pr_file_to_bulk_dir(self.RESULT_FILE_PATH, chown)

    def handle(self, *args, **options):
        self.do_pagerank()