Exemplo n.º 1
0
p = optparse.OptionParser()
p.add_option("-a", "--count", dest="count_type")
p.add_option("-b", "--re_count", dest="count_regexp")
p.add_option("-c", "--context", dest="context_type")
p.add_option("-d", "--re_context", dest="context_regexp")
(options, args) = p.parse_args()

if ( len(args) < 2 ):
    print "Usage xuwc [ --count <count_type> | --re_count <count_regexp> ] [ --context <context_type> | --re_context <context_regexp> ] <xpath> <files>+"
    sys.exit(0)

xpath = args[0]
file_paths = args[1:]

wc_params = {}
wc_params['count_type'] = options.count_type
wc_params['count_regexp'] = options.count_regexp
wc_params['context_type'] = options.context_type
wc_params['context_regexp'] = options.context_regexp

if wc_params['count_type'] != None and wc_params['count_regexp'] != None:
    print "Cannot specify both a count and re_count option."
    sys.exit(-1)
if wc_params['context_type'] != None and wc_params['context_regexp'] != None:
    print "Cannot specify both a context and re_context option."
    sys.exit(-1)

wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
XUWc.output_reports( wc_reports, wc_params )
Exemplo n.º 2
0
    def test_xuwc(self):
        
        # M 1.1:  First test out the some queries when I specify no options
        xpath = "//tei:section"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = None
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len(wc_report.keys())
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "file_path" )
        context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[context_region_id], 2 )
        self.assertEqual( wc_params['count_type'], "tei:section" )
        
        # M 1.2
        xpath = "//tei:section/tei:subsection/tei:subsubsection[re:testsubtree('Globus','gi')]"
        file_paths = [ self.tei_data_path1, self.tei_data_path2 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = None
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 2 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        self.assertEqual( wc_reports.keys()[1], self.tei_data_path2 )

        wc_report = wc_reports[ self.tei_data_path1 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "file_path" )
        context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[context_region_id], 1 )
        self.assertEqual( wc_params['count_type'], "tei:subsubsection")

        wc_report = wc_reports[ self.tei_data_path2 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "file_path" )
        context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[context_region_id], 1 )
        self.assertEqual( wc_params['count_type'], "tei:subsubsection")

        # M 2.1  Override count
        xpath = "//ios:interface"
        file_paths = [ self.ios_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = "builtin:byte"
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "ios:interface"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 )

        wc_report = wc_reports[ self.ios_data_path1 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "ios:interface" )
        key_prefix = self.ios_data_path1.replace('.','_')
        context_region1_id = key_prefix + ".Loopback0"
        context_region2_id = key_prefix + ".GigabitEthernet4/2"
        self.assertEqual( wc_report[context_region1_id], 186 )
        self.assertEqual( wc_report[context_region2_id], 232 )
        self.assertEqual( wc_params['count_type'], "builtin:byte" )

        # M 2.3 
        file_paths = [ self.ios_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = "builtin:word"
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "ios:interface"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 )

        wc_report = wc_reports[ self.ios_data_path1 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "ios:interface" )

        key_prefix = self.ios_data_path1.replace('.','_')
        context_region1_id = key_prefix + ".Loopback0"
        context_region2_id = key_prefix + ".GigabitEthernet4/2"
        self.assertEqual( wc_report[context_region1_id], 23 )
        self.assertEqual( wc_report[context_region2_id], 27 )
        self.assertEqual( wc_params['count_type'], "builtin:word" )

        # M 2.4
        file_paths = [ self.ios_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = "builtin:character"
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "ios:interface"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 )

        wc_report = wc_reports[ self.ios_data_path1 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "ios:interface" )
        key_prefix = self.ios_data_path1.replace('.','_')
        context_region1_id = key_prefix + ".Loopback0"
        context_region2_id = key_prefix + ".GigabitEthernet4/2"
        self.assertEqual( wc_report[context_region1_id], 186 )
        self.assertEqual( wc_report[context_region2_id], 232 )
        self.assertEqual( wc_params['count_type'], "builtin:character" )

        # M 2.5
        # This one should generate an error
        xpath = "//tei:section"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = "tei:subsection"
        wc_params['count_regexp'] = None
        wc_params['context_type'] = None
        wc_params['context_regexp'] = None
        #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params )
        
        # M 2.6
        xpath = "//tei:section/tei:subsection/tei:subsubsection"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = "tei:subsection"
        wc_params['count_regexp'] = None
        wc_params['context_type'] = None
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )

        wc_report = wc_reports[ self.tei_data_path1 ]
        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "file_path" )
        context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[context_region_id], 1 )
        self.assertEqual( wc_params['count_type'], "tei:subsection" )

        # M 3.1
        xpath = "//tei:section"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = "\w+"
        wc_params['context_type'] = "tei:section"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len(wc_report.keys())
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "tei:section" )
        key_prefix = self.tei_data_path1.replace('.','_')
        context_region1_id = key_prefix + ".1"
        context_region2_id = key_prefix + ".9"
        self.assertEqual( wc_report[context_region1_id], 357 )
        self.assertEqual( wc_report[context_region2_id], 45 )
        self.assertEqual( wc_params['count_regexp'], "\w+" )

        # M 4.1  context override
        xpath = "//tei:section/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:section"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len(wc_report.keys())
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "tei:section" )
        section1_context_region_id = wc_report.keys()[0]
        section2_context_region_id = wc_report.keys()[1]
        self.assertEqual( wc_report[ section1_context_region_id ], 5 )
        self.assertEqual( wc_report[ section2_context_region_id ], 1 )
        self.assertEqual( wc_params['count_type'], "tei:p" )

        # M 4.2 should generate an error
        xpath = "//tei:section/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:subsection"
        wc_params['context_regexp'] = None
        #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params )
        
        # M 4.3 should generate an error
        xpath = "//tei:section/tei:subsection"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:subsubsection"
        wc_params['context_regexp'] = None
        #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params )

        # M 4.4 
        xpath = "//tei:section/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:p"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len(wc_report.keys())
        self.assertEqual( num_context_regions, 6 )
        self.assertEqual( wc_params['context_type'], "tei:p" )
        for context_region_id in wc_report.keys():
            self.assertEqual( wc_report[context_region_id], 1 )
        self.assertEqual( wc_params['count_type'], "tei:p" )
        
        # M 4.5
        xpath = "//tei:section/tei:subsection/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:subsection"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "tei:subsection")
        section1_context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[ section1_context_region_id ], 4 )
        self.assertEqual( wc_params['count_type'], "tei:p" )

        # M 4.6
        xpath = "//tei:section/tei:subsection/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:section"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len(wc_reports.keys()), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 1 )
        self.assertEqual( wc_params['context_type'], "tei:section")
        section1_context_region_id = wc_report.keys()[0]
        self.assertEqual( wc_report[ section1_context_region_id ], 4 )
        self.assertEqual( wc_params['count_type'], "tei:p" )

        # M 4.7
        xpath = "//tei:section/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = "tei:section"
        wc_params['context_regexp'] = None
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )
        self.assertEqual( len( wc_reports.keys() ), 1 )
        self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 )
        wc_report = wc_reports[ self.tei_data_path1 ]

        num_context_regions = len( wc_report.keys() )
        self.assertEqual( num_context_regions, 2 )
        self.assertEqual( wc_params['context_type'], "tei:section")
        section1_context_region_id = wc_report.keys()[0]
        section9_context_region_id = wc_report.keys()[1]
        self.assertEqual( wc_report[ section1_context_region_id ], 5 )
        self.assertEqual( wc_report[ section9_context_region_id ], 1 )
        self.assertEqual( wc_params['count_type'], "tei:p" )

        # M 4.8 - should explode
        xpath = "//tei:section/tei:p"
        file_paths = [ self.tei_data_path1 ]
        wc_params = {}
        wc_params['count_type'] = None
        wc_params['count_regexp'] = None
        wc_params['context_type'] = None
        wc_params['context_regexp'] = "builtin:line"
        wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )