Exemplo n.º 1
0
    def parse(self, filepath):
        pr.print('Beginning MAZ spatial data parsing.', time=True)

        parser = shapefile.Reader(filepath)
        mazs = []

        n = 0
        for item in parser:
            if item.record.County == 'MC':
                mazs.append((item.record.MAZ_ID_10, item.record.TAZ_2015,
                             item.record.Sq_miles,
                             self.encode_poly(item.shape.points)))
                n += 1
            if n >= 10000:
                pr.print(f'Pushing {n} MAZs to database.', time=True)
                self.database.push_maz(mazs)
                mazs = []
                n = 0
                pr.print('Resuming MAZ parsing.', time=True)
        pr.print(f'Pushing {n} MAZs to database.', time=True)
        self.database.push_maz(mazs)
        pr.print('MAZ spatial data parsing complete.', time=True)
Exemplo n.º 2
0
    def generate(self, planpath, routepath, mazs):
        pr.print('Generating input plans on select MAZs.', time=True)
        pr.print('Finding agents on selected MAZs.', time=True)

        mazs = tuple(mazs)
        plans = self.database.get_plans(mazs)

        plan_frmt = '<person id="%s"><plan selected="yes">'
        route_frmt = '<leg dep_time="%s" mode="%s" trav_time="%s" />'
        act_frmt = '<act dur="%s" end_time="%s" type="%s" x="%s" y="%s" />'

        planfile = open(planpath, 'w')
        routefile = open(routepath, 'w')
        target = len(plans)

        pr.print(f'Iterating over {target} plans and building plans file.',
                 time=True)

        n = 100000
        planfile.write(
            '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE plans'
            ' SYSTEM "http://www.matsim.org/files/dtd/plans_v4.dtd"><plans>')
        routefile.write('agent_id,route_index,src_maz,term_maz,dep_time,'
                        'mode,dur_time\n')
        for group in self.chunk(plans, n):
            agents = tuple(plan[0] for plan in group)
            routes = list(self.database.get_routes(agents))
            activities = list(self.database.get_activities(agents))
            routefile.write('\n'.join(','.join(str(attr) for attr in route)
                                      for route in routes))
            routefile.flush()
            for plan in group:
                planfile.write(plan_frmt % plan[0])
                for i in range(plan[1] // 2):
                    planfile.write(act_frmt %
                                   self.encode_act(activities.pop(0)))
                    planfile.write(route_frmt %
                                   self.encode_route(routes.pop(0)))
                planfile.write(act_frmt % self.encode_act(activities.pop(0)))
                planfile.write('</plan></person>')
            planfile.flush()
        planfile.write('</plans>')
        planfile.close()
        routefile.close()

        pr.print('Plans generation for select MAZs complete.', time=True)
Exemplo n.º 3
0
    def parse(self, filepath, resume=False):
        progress = pr.printer(persist=True, frmt='bold', replace=True)

        pr.print('Beginning APN parcel data parsing.', time=True)
        progress('APNs parsed: 0', progress=0)
        parser = shapefile.Reader(filepath)
        target = len(parser)
        total = 0
        apns = []

        if resume:
            pr.print('Finding where parsing left off last.', time=True)
            offset = self.database.count_apn()
            pr.print(f'Skipping to APN {offset}.', time=True)

        n = 0
        for item in parser:
            if resume:
                n += 1
                if n >= offset:
                    total += n
                    n = 0
                    resume = False
                    pr.print('Resuming APN parcel parsing.', time=True)
                    progress(f'APNs parsed: {total}', progress=total / target)
                continue
            apns.append((item.record['APN'], item.record['ADDRESS'],
                         item.record['FLOOR'], None,
                         self.encode_poly(item.shape.points)))
            n += 1
            if n >= 100000:
                pr.print(f'Pushing {n} APNs to database.', time=True)
                self.database.push_apns(apns)
                apns = []
                total += n
                n = 0
                pr.print(f'Resuming APN parcel parsing.', time=True)
                progress(f'APNs parsed: {total}', progress=total / target)

        pr.print(f'Pushing {n} APNs to database.', time=True)
        self.database.push_apns(apns)
        pr.print('APN parcel data parsing complete.', time=True)
        progress(f'APNs parsed: {n + target}', progress=1)
Exemplo n.º 4
0
    def parse(self, filepath, bin_size=100000):
        pr.print('Plans Parsing Progress',
                 progress=0,
                 persist=True,
                 replace=True,
                 frmt='bold')

        # XML parser
        parser = iterparse(filepath, events=('start', 'end'))
        parser = iter(parser)
        evt, root = next(parser)

        # bin counter (total plans processed)
        bin_count = 0
        total = 0
        target = self.database.count_plans()

        # tabular data
        plans = []
        activities = []
        routes = []

        # indexes
        agent = 0
        route = 0
        activity = 0
        leg = 0

        # other important info
        selected = False
        distance = 0
        time = 0
        modes = set()

        # iterate over XML tags
        for evt, elem in parser:
            if evt == 'start':
                if elem.tag == 'person':
                    agent = int(elem.attrib['id'])
                if elem.tag == 'plan':
                    selected = True if elem.attrib[
                        'selected'] == 'yes' else False
            elif evt == 'end' and selected:
                if elem.tag == 'plan':
                    plans.append([  # PLANS
                        agent,  # agent_id
                        route + activity,  # size
                        len(modes)  # mode_count
                    ])

                    # reset and free memory
                    modes = set()
                    route = 0
                    activity = 0
                    time = 0
                    bin_count += 1

                    if bin_count >= bin_size:
                        total += bin_count

                        pr.print(f'Pushing {bin_count} plans to SQL server.',
                                 time=True)

                        self.database.write_plans(plans)
                        self.database.write_activities(activities)
                        self.database.write_routes(routes)

                        pr.print('Resuming XML agent plan parsing.', time=True)
                        pr.print('Plans Parsing Progress',
                                 progress=total / target,
                                 persist=True,
                                 replace=True,
                                 frmt='bold')

                        # reset and free memory
                        root.clear()
                        plans = []
                        activities = []
                        routes = []
                        bin_count = 0

                elif elem.tag == 'activity':
                    end_time = self.parse_time(elem.attrib['end_time'])
                    act_type = self.encoding['activity'][elem.attrib['type']]

                    activities.append([  # ACTIVITIES
                        agent,  # agent_id
                        activity,  # act_index
                        time,  # start_time
                        end_time,  # end_time
                        act_type,  # act_type
                        None  # apn_id
                    ])

                    time = end_time
                    activity += 1

                elif elem.tag == 'leg':
                    dep_time = self.parse_time(elem.attrib['dep_time'])
                    dur_time = self.parse_time(elem.attrib['trav_time'])
                    mode = self.encoding['mode'][elem.attrib['mode']]
                    modes.add(mode)

                    routes.append([  # ROUTES
                        agent,  # agent_id
                        route,  # route_index
                        leg,  # size
                        dep_time,  # dep_time
                        dur_time,  # dur_time
                        distance,  # distance
                        mode,  # mode
                        None,  # src_apn
                        None  # term_apn
                    ])

                    time = dep_time + dur_time
                    route += 1

                elif elem.tag == 'route':
                    distance = float(elem.attrib['distance'])
                    leg = len(elem.text.split(" "))

        pr.print(f'Pushing {bin_count} plans to SQL server.')

        self.database.write_plans(plans)
        self.database.write_activities(activities)
        self.database.write_routes(routes)

        pr.print('Plans Parsing Progress',
                 progress=1,
                 persist=True,
                 replace=True,
                 frmt='bold')
        pr.print('Completed XML agent plan parsing.')
        pr.push()
Exemplo n.º 5
0
    def write_xml(self, bin_count, savepath, coords, time, silent=False):

        if not silent:
            pr.print('Beginning network link flow sampling.', time=True)
            pr.print(
                f'Finding nodes from ({coords[0]}, {coords[1]}) to '
                f'({coords[2]}, {coords[3]}).',
                time=True)

        nodes = self.database.find_nodes(*coords)

        if not silent:
            pr.print(
                f'Finding links from ({coords[0]}, {coords[1]}) to '
                f'({coords[2]}, {coords[3]}).',
                time=True)

        node_ids = tuple(node[0] for node in nodes)
        links = {
            link[0]: list(link)
            for link in self.database.find_links(node_ids)
        }

        if not silent:
            pr.print('Fetching extraneous nodes from link sample.', time=True)

        node_ids = tuple(node for link in links for node in link[1:3])
        nodes = self.database.fetch_nodes(node_ids)

        bin_size = (time[1] - time[0]) / bin_count
        bins = [round(bin_size * i + time[0]) for i in range(bin_count)]
        bins.append(time[1])

        for i in range(bin_count):
            if not silent:
                pr.print(
                    f'Fetching leg data from time {bins[i]} to {bins[i+1]}.',
                    time=True)
            legs = self.database.fetch_link_times(bins[i], bins[i + 1],
                                                  links.keys())
            for leg in legs:
                links[leg[0]][i + 9] = leg[i]
            legs = []

        node_frmt = '<node id="%s" x="%s" y="%s"></node>'
        link_frmt = (
            '<link id="%s" from="%s" to="%s" length="%s" freespeed="%s" '
            'capacity="%s" permlanes="%s" oneway="%s" modes="%s"><attributes>'
            + ''.join([
                f'<attribute name="tbin{i}" class="java.lang.Integer">'
                '%s</attribute>' for i in range(bin_count)
            ]) + '</attributes></link>')

        if not silent:
            pr.print(f'Writing network sample at {savepath}.', time=True)

        n = 100000
        with open(savepath, 'w') as network:
            network.write(
                '<?xml version="1.0" encoding="UTF-8"?>'
                '<!DOCTYPE network SYSTEM "http://www.matsim.org/files/dtd/'
                'network_v2.dtd"><network><nodes>')
            for i in range(0, len(nodes), n):
                network.write(''.join(
                    [node_frmt % node for node in nodes[i:i + n]]))
                network.flush()
            network.write('</nodes><links>')
            for i in range(0, len(links), n):
                network.write(''.join(
                    [link_frmt % link for link in links[i:i + n]]))
                network.flush()
            network.write('</links></network>')

        if not silent:
            pr.print('Network link flow sampling complete.', time=True)
Exemplo n.º 6
0
    def parse(self, filepath, bin_size=100000):

        pr.print(f'Beginning XML input plan parsing from {filepath}.',
                 time=True)
        pr.print('Plan parsing progress:',
                 progress=0,
                 persist=True,
                 frmt='bold')

        # XML parser
        parser = iterparse(filepath, events=('start', 'end'))
        parser = iter(parser)
        evt, root = next(parser)

        # bin counter (total plans processed)
        bin_count = 0
        total_count = 0

        # tabular data
        plans = []
        activities = []
        routes = []

        # indexes
        agent = 0
        route = 0
        activity = 0

        # other important info
        modes = set()

        # ireate over XML tags
        for evt, elem in parser:
            if evt == 'start':
                if elem.tag == 'person':
                    agent = int(elem.attrib['id'])
                if elem.tag == 'plan':
                    if elem.attrib['selected'] != 'yes':
                        selected = False
                    else:
                        selected = True
            elif evt == 'end' and selected:
                if elem.tag == 'plan':
                    plans.append([  # PLANS
                        agent,  # agent_id
                        route + activity,  # size
                        len(modes)  # mode_count
                    ])

                    modes = set()
                    route = 0
                    activity = 0
                    bin_count += 1

                    if bin_count >= bin_size:
                        pr.print(f'Pushing {bin_count} plans to SQL server.',
                                 time=True)

                        self.database.write_plans(plans)
                        self.database.write_activities(activities)
                        self.database.write_routes(routes)

                        root.clear()
                        plans = []
                        activities = []
                        routes = []
                        total_count += bin_count
                        bin_count = 0

                        pr.print('Resuming XML input plan parsing.', time=True)
                        pr.print('Plan parsing progress:',
                                 progress=total_count / 2947013,
                                 persist=True,
                                 frmt='bold')

                elif elem.tag == 'act':
                    end_time = self.parse_time(elem.attrib['end_time'])
                    dur_time = end_time if 'dur' not in elem.attrib else self.parse_time(
                        elem.attrib['dur'])
                    act_type = self.encoding['activity'][elem.attrib['type']]

                    activities.append([  # ACTIVITIES
                        agent,  # agent_id
                        activity,  # act_index
                        end_time - dur_time,  # start_time
                        end_time,  # end_time
                        act_type,  # act_type
                        elem.attrib['x'],  # x
                        elem.attrib['y'],  # y
                        None  # maz
                    ])
                    activity += 1

                elif elem.tag == 'leg':
                    dep_time = self.parse_time(elem.attrib['dep_time'])
                    dur_time = self.parse_time(elem.attrib['trav_time'])
                    mode = self.encoding['mode'][elem.attrib['mode']]
                    modes.add(mode)

                    routes.append([  # ROUTES
                        agent,  # agent_id
                        route,  # route_index
                        dep_time,  # dep_time
                        dur_time,  # dur_time
                        mode,  # mode
                        None,  # src_maz
                        None  # term_maz
                    ])
                    route += 1

        pr.print(f'Pushing {bin_count} plans to SQL server.', time=True)
        pr.print('Plan parsing progress:',
                 progress=1,
                 persist=True,
                 frmt='bold')

        self.database.write_plans(plans)
        self.database.write_activities(activities)
        self.database.write_routes(routes)

        pr.print('Completed XML input plan parsing.', time=True)

        root.clear()
        plans = []
        activities = []
        routes = []
Exemplo n.º 7
0
    def parse(self, filepath, bin_size=1000000, resume=False):

        parser = iterparse(filepath, events=('end', 'start'))
        evt, root = next(parser)

        types: Tuple[str] = ('entered link', 'left link',
                             'PersonEntersVehicle', 'PersonLeavesVehicle')
        links: Dict[str:int] = {}

        leg_evts: List[Tuple[int, str, int, int]] = list()
        veh_evts: List[Tuple[int, int, int, int]] = list()
        leg_id: int = 0
        veh_id: int = 0
        time: int = 0
        bin_count: int = 0
        total_count: int = 0

        pr.print('Fetching network link data.', time=True)
        links = dict(self.database.fetch_network())
        pr.print('Network link data fetch completed.', time=True)

        if resume:
            pr.print('Finding where we left off parsing last.', time=True)
            leg_id = self.database.get_leg_count()
            veh_id = self.database.get_veh_count()
            offset = leg_id + veh_id
            pr.print(f'Skipping to event {offset} of XML file.', )
        else:
            pr.print('Resuming XML leg/vehicle event parsing.', time=True)

        pr.print(f'Event Parsing Progress',
                 progress=0,
                 persist=True,
                 replace=True,
                 frmt='bold')

        for evt, elem in parser:
            if elem.tag == 'event' and evt == 'end':
                etype = elem.attrib['type']
                if resume and etype in types:
                    bin_count += 1
                    total_count += 1
                    if bin_count >= bin_size:
                        time = int(float(elem.attrib['time']))
                        root.clear()
                        bin_count = 0
                        pr.print(f'Skipped to event {total_count}.')
                        pr.print(f'Event Parsing Progress',
                                 progress=time / 86400,
                                 persist=True,
                                 replace=True,
                                 frmt='bold')
                    if total_count == offset:
                        time = int(float(elem.attrib['time']))
                        root.clear()
                        bin_count = 0
                        resume = False
                        pr.print(f'Skipped to event {total_count}.', time=True)
                        pr.print('Event skipping complete.', time=True)
                        pr.print('Resuming XML leg/vehicle event parsing.',
                                 time=True)
                        pr.print(f'Event Parsing Progress',
                                 progress=time / 86400,
                                 persist=True,
                                 replace=True,
                                 frmt='bold')
                    continue

                if etype == 'entered link':
                    time = int(float(elem.attrib['time']))
                    leg_evts.append((leg_id, int(elem.attrib['vehicle']), None,
                                     links[elem.attrib['link']], time, 1))
                    bin_count += 1
                    leg_id += 1
                elif etype == 'left link':
                    time = int(float(elem.attrib['time']))
                    leg_evts.append((leg_id, int(elem.attrib['vehicle']), None,
                                     links[elem.attrib['link']], time, 0))
                    bin_count += 1
                    leg_id += 1
                elif etype == 'PersonEntersVehicle':
                    time = int(float(elem.attrib['time']))
                    veh_evts.append((veh_id, int(elem.attrib['vehicle']),
                                     int(elem.attrib['person']), time, 1))
                    bin_count += 1
                    veh_id += 1
                elif etype == 'PersonLeavesVehicle':
                    time = int(float(elem.attrib['time']))
                    veh_evts.append((veh_id, int(elem.attrib['vehicle']),
                                     int(elem.attrib['person']), time, 0))
                    bin_count += 1
                    veh_id += 1

                if bin_count >= bin_size:
                    total_count += bin_size
                    pr.print(f'Pushing {bin_count} events to SQL database.',
                             time=True)
                    self.database.write_leg_evts(leg_evts)
                    self.database.write_veh_evts(veh_evts)
                    root.clear()
                    leg_evts = []
                    veh_evts = []
                    bin_count = 0
                    pr.print(f'Resuming XML leg/vehicle event parsing.',
                             time=True)
                    pr.print(f'Event Parsing Progress',
                             progress=time / 86400,
                             persist=True,
                             replace=True,
                             frmt='bold')

        total_count += bin_size
        pr.print(f'Pushing {bin_count} events to SQL database.', time=True)
        self.database.write_leg_evts(leg_evts)
        self.database.write_veh_evts(veh_evts)
        pr.print(f'Event Parsing Progress',
                 progress=1,
                 persist=True,
                 replace=True,
                 frmt='bold')
        pr.push()
        pr.print('XML leg/vehicle event parsing complete.', time=True)
        pr.print(f'A total of {total_count} events were parsed.', time=True)
import json
from getpass import getpass

from util.print_util import Printer as pr
from xmlparsing.events.events_parser import EventsParser

CONFIG = 'WORKSTATION'

with open('./xmlparsing/events/config.json', 'r') as handle:
    params = json.load(handle)

params = params[CONFIG]

params['database']['password'] = getpass(
    f'Password for {params["database"]["user"]}: ')

parser = EventsParser(params['database'])

if not params['resume']:
    for table in params['database']['tables'].keys():
        parser.database.create_table(table, True)

pr.print(
    'Beginning XML leg/vehicle event parsing '
    f'from {params["source_path"]}.',
    time=True)
parser.parse(params['source_path'], resume=params['resume'])
Exemplo n.º 9
0
from xmlparsing.plans.plans_parser import PlansParser
from util.print_util import Printer as pr

CONFIG = 'WORKSTATION'

with open('./xmlparsing/plans/config.json', 'r') as handle:
    params = json.load(handle)

params = params[CONFIG]
database = params['database']

database['password'] = getpass(f'Password for {database["user"]}: ')

parser = PlansParser(database, params['encoding'])

pr.print('Resetting tables for parsing.', time=True)
for table in database['tables'].keys():
    parser.database.create_table(table, True)

pr.print(f'Beginning XML agent plan parsing from {params["source_path"]}.',
         time=True)
parser.parse(params['source_path'])

pr.print('Starting index creation.', time=True)
for name, table in database['tables'].items():
    if hasattr(table, 'comp_PK'):
        pr.print(f'Creating primary key on table "{name}".', time=True)
        parser.database.alter_add_composite_PK(name)
    if hasattr(table, 'idx'):
        for idx in table['idx']:
            pr.print(f'Creating index on table "{name}".', time=True)