Exemplo n.º 1
0
 def setUp(self):
     """
     :return:
     """
     self.wikipedia_abstract_xml = APP_ROOT + "/Data/jawiki-20160901-abstract_part.xml"
     self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(
         self.wikipedia_abstract_xml)
     self.answer_data = APP_ROOT + "/Data/jawiki-20160901-abstract_part0.json"
     self.correct_data = APP_ROOT + "/Data/answer.json"
Exemplo n.º 2
0
 def setUp(self):
     """
     :return:
     """
     self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part.xml"
     self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(
         self.wikipedia_abstract_xml)
     self.answer_data = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part0.json"
     self.correct_data = APP_ROOT + "/../../Data/wiki_image/answer.json"
 def setUp(self):
     """
     :return:
     """
     self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.xml"
     self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(self.wikipedia_abstract_xml)
     self.answer_data = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.json"
     self.correct_data = APP_ROOT + "/../../Data/answer.json"
 def setUp(self):
     """
     :return:
     """
     self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part.xml"
     self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(self.wikipedia_abstract_xml)
     self.answer_data = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part0.json"
     self.correct_data = APP_ROOT + "/../../Data/wiki_image/answer.json"
class Test_WikiPediaXmlToJson(unittest.TestCase):
    """
    Check contents similarity
    """

    def setUp(self):
        """
        :return:
        """
        self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.xml"
        self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(self.wikipedia_abstract_xml)
        self.answer_data = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.json"
        self.correct_data = APP_ROOT + "/../../Data/answer.json"

    def test_extract_data(self):
        self.wiki_pedia_xml_to_json.input()
        self.assertEqual(filecmp.cmp(self.answer_data, self.correct_data), True)
class Test_WikiPediaXmlToJson(unittest.TestCase):
    """
    Check contents similarity
    """
    def setUp(self):
        """
        :return:
        """
        self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.xml"
        self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(
            self.wikipedia_abstract_xml)
        self.answer_data = APP_ROOT + "/../../Data/jawiki-20160901-abstract_part.json"
        self.correct_data = APP_ROOT + "/../../Data/answer.json"

    def test_extract_data(self):
        self.wiki_pedia_xml_to_json.input()
        self.assertEqual(filecmp.cmp(self.answer_data, self.correct_data),
                         True)
class Test_WikiPediaXmlToJson(unittest.TestCase):
    """
    Check contents similarity
    """

    def setUp(self):
        """
        :return:
        """
        self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part.xml"
        self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(self.wikipedia_abstract_xml)
        self.answer_data = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part0.json"
        self.correct_data = APP_ROOT + "/../../Data/wiki_image/answer.json"

    def test_extract_data(self):
        self.wiki_pedia_xml_to_json.input(image_Flag=True)
        for doc in self.wiki_pedia_xml_to_json.xml_data:
            self.wiki_pedia_xml_to_json.extract_contents(doc)
        self.assertEqual(filecmp.cmp(self.answer_data, self.correct_data), True)
Exemplo n.º 8
0
class Test_WikiPediaXmlToJson(unittest.TestCase):
    """
    Check contents similarity
    """
    def setUp(self):
        """
        :return:
        """
        self.wikipedia_abstract_xml = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part.xml"
        self.wiki_pedia_xml_to_json = WikiPediaXmlToJson(
            self.wikipedia_abstract_xml)
        self.answer_data = APP_ROOT + "/../../Data/wiki_image/enwiki-20080103-abstract_part0.json"
        self.correct_data = APP_ROOT + "/../../Data/wiki_image/answer.json"

    def test_extract_data(self):
        self.wiki_pedia_xml_to_json.input(image_Flag=True)
        for doc in self.wiki_pedia_xml_to_json.xml_data:
            self.wiki_pedia_xml_to_json.extract_contents(doc)
        self.assertEqual(filecmp.cmp(self.answer_data, self.correct_data),
                         True)
                        default='jawiki-20160901-abstract_part.xml',
                        help='set xml file')
    parser.add_argument('--img_flag',
                        '-img',
                        default="False",
                        help='set image Flag')
    parser.add_argument('--set_worker',
                        '-work',
                        default=2,
                        help='set image Flag')
    args = parser.parse_args()
    Image_Flag = ast.literal_eval(args.img_flag)
    if Image_Flag is True:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/wiki_image/" + args.xml_file
    else:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/jawiki-20160901-abstract_dir/" + args.xml_file
    wiki_pedia_xml_to_json = WikiPediaXmlToJson(wikipedia_abstract_xml)
    wiki_pedia_xml_to_json.input(image_Flag=Image_Flag)

    # Multi Process
    with ProcessPoolExecutor() as executor:
        # executor.map(wiki_pedia_xml_to_json.extract_contents, wiki_pedia_xml_to_json.xml_data)
        all_process = []
        for xml_name in wiki_pedia_xml_to_json.xml_data:
            process = executor.submit(wiki_pedia_xml_to_json.extract_contents,
                                      xml_name)
            all_process.append(process)

        for process in as_completed(all_process):
            print(process.result())
#!/usr/bin/env python
#coding: utf8
import sys
import os
from os import path
sys.path.append(os.path.join(os.path.dirname("__file__"), "./../"))
sys.path.append(os.path.join(os.path.dirname("__file__"), "."))
APP_ROOT = path.dirname(path.abspath(__file__))
import ast
from wiki_pedia_xml_to_json import WikiPediaXmlToJson
import argparse

if __name__ == '__main__':
    """
    Reference (str to boolean)
        http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python
    args
    --xml_file: set the xml file
        Example: APP_ROOT + "/../Data/jawiki-20160901-abstract.xml"
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--xml_file', '-xml', default='jawiki-20160901-abstract.xml',
                        help='set xml file')
    args = parser.parse_args()
    wikipedia_abstract_xml = APP_ROOT + "/Data/split_data/" + args.xml_file
    wiki_pedia_xml_to_json = WikiPediaXmlToJson(wikipedia_abstract_xml)
    wiki_pedia_xml_to_json.input()
    wiki_pedia_xml_to_json.extract_contents()
    args
    --xml_file: set the xml file
        Example: APP_ROOT + "/../Data/jawiki-20160901-abstract.xml"
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--xml_file', '-xml', default='jawiki-20160901-abstract_part.xml',
                        help='set xml file')
    parser.add_argument('--img_flag', '-img', default="False",
                        help='set image Flag')
    parser.add_argument('--set_worker', '-work', default=2,
                        help='set image Flag')
    args = parser.parse_args()
    Image_Flag = ast.literal_eval(args.img_flag)
    if Image_Flag is True:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/wiki_image/" + args.xml_file
    else:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/jawiki-20160901-abstract_dir/" + args.xml_file
    wiki_pedia_xml_to_json = WikiPediaXmlToJson(wikipedia_abstract_xml)
    wiki_pedia_xml_to_json.input(image_Flag=Image_Flag)

    # Multi Process
    with ProcessPoolExecutor() as executor:
        # executor.map(wiki_pedia_xml_to_json.extract_contents, wiki_pedia_xml_to_json.xml_data)
        all_process = []
        for xml_name in wiki_pedia_xml_to_json.xml_data:
            process = executor.submit(wiki_pedia_xml_to_json.extract_contents, xml_name)
            all_process.append(process)

        for process in as_completed(all_process):
            print(process.result())
sys.path.append(os.path.join(os.path.dirname("__file__"), "."))
APP_ROOT = path.dirname(path.abspath(__file__))
import ast
import pyximport

pyximport.install()
from wiki_pedia_xml_to_json import WikiPediaXmlToJson
import argparse

if __name__ == "__main__":
    """
    Reference (str to boolean)
        http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python
    args
    --xml_file: set the xml file
        Example: APP_ROOT + "/../Data/jawiki-20160901-abstract.xml"
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--xml_file", "-xml", default="jawiki-20160901-abstract_part.xml", help="set xml file")
    parser.add_argument("--img_flag", "-img", default="False", help="set image Flag")
    args = parser.parse_args()
    Image_Flag = ast.literal_eval(args.img_flag)
    if Image_Flag is True:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/wiki_image/" + args.xml_file
    else:
        wikipedia_abstract_xml = APP_ROOT + "/../Data/jawiki-20160901-abstract_dir/" + args.xml_file
    wiki_pedia_xml_to_json = WikiPediaXmlToJson(wikipedia_abstract_xml)
    wiki_pedia_xml_to_json.input(image_Flag=Image_Flag)
    for doc in wiki_pedia_xml_to_json.xml_data:
        wiki_pedia_xml_to_json.extract_contents(doc)