def test_prepare_csv_read(): expect(list(prepare_csv_read(open('tests/data/real_file.csv'), ('type', 'bool', 'string')))) == \ [{'bool': 'true', 'type': 'file', 'string': 'test'}] test_list = ['James,Rowe', 'ell,caro'] expect(list(prepare_csv_read(test_list, ('first', 'last')))) == \ [{'last': 'Rowe', 'first': 'James'}, {'last': 'caro', 'first': 'ell'}]
def import_locations(self, cells_file): """Parse OpenCellID.org data files. ``import_locations()`` returns a dictionary with keys containing the OpenCellID.org_ database identifier, and values consisting of a ``Cell`` objects. It expects cell files in the following format:: 22747,52.0438995361328,-0.2246370017529,234,33,2319,647,0,1, 2008-04-05 21:32:40,2008-04-05 21:32:40 22995,52.3305015563965,-0.2255620062351,234,10,20566,4068,0,1, 2008-04-05 21:32:59,2008-04-05 21:32:59 23008,52.3506011962891,-0.2234109938145,234,10,10566,4068,0,1, 2008-04-05 21:32:59,2008-04-05 21:32:59 The above file processed by ``import_locations()`` will return the following ``dict`` object:: {23008: Cell(23008, 52.3506011963, -0.223410993814, 234, 10, 10566, 4068, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 59), datetime.datetime(2008, 4, 5, 21, 32, 59)), 22747: Cell(22747, 52.0438995361, -0.224637001753, 234, 33, 2319, 647, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 40), datetime.datetime(2008, 4, 5, 21, 32, 40)), 22995: Cell(22995, 52.3305015564, -0.225562006235, 234, 10, 20566, 4068, 0, 1, datetime.datetime(2008, 4, 5, 21, 32, 59), datetime.datetime(2008, 4, 5, 21, 32, 59))} :type cells_file: ``file``, ``list`` or ``str`` :param cells_file: Cell data to read :rtype: ``dict`` :return: Cell data with their associated database identifier .. _OpenCellID.org: http://opencellid.org/ """ self._cells_file = cells_file field_names = ('ident', 'latitude', 'longitude', 'mcc', 'mnc', 'lac', 'cellid', 'crange', 'samples', 'created', 'updated') parse_date = lambda s: datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S') field_parsers = (int, float, float, int, int, int, int, int, int, parse_date, parse_date) data = utils.prepare_csv_read(cells_file, field_names) for row in data: try: cell = dict((n, p(row[n])) for n, p in zip(field_names, field_parsers)) except ValueError: if r"\N" in row.values(): # A few entries are incomplete, and when that occurs the # export includes the string "\N" to denote missing # data. We just ignore them for now logging.debug('Skipping incomplete entry %r' % row) break else: raise utils.FileFormatError('opencellid.org') else: self[row['ident']] = Cell(**cell)
def import_locations(self, marker_file): """Import trigpoint database files. ``import_locations()`` returns a dictionary with keys containing the trigpoint identifier, and values that are :class:`Trigpoint` objects. It expects trigpoint marker files in the format provided at alltrigs-wgs84.txt_, which is the following format:: H SOFTWARE NAME & VERSION I GPSU 4.04, S SymbolSet=0 ... W,500936,N52.066035,W000.281449, 37.0,Broom Farm W,501097,N52.010585,W000.173443, 97.0,Bygrave W,505392,N51.910886,W000.186462, 136.0,Sish Lane Any line not consisting of 6 comma separated fields will be ignored. The reader uses the :mod:`csv` module, so alternative whitespace formatting should have no effect. The above file processed by ``import_locations()`` will return the following ``dict`` object:: {500936: point.Point(52.066035, -0.281449, 37.0, "Broom Farm"), 501097: point.Point(52.010585, -0.173443, 97.0, "Bygrave"), 505392: point.Point(51.910886, -0.186462, 136.0, "Sish Lane")} :type marker_file: ``file``, ``list`` or ``str`` :param marker_file: Trigpoint marker data to read :rtype: ``dict`` :return: Named locations with :class:`Trigpoint` objects :raise ValueError: Invalid value for ``marker_file`` .. _alltrigs-wgs84.txt: http://www.haroldstreet.org.uk/trigpoints/ """ self._marker_file = marker_file field_names = ('tag', 'identity', 'latitude', 'longitude', 'altitude', 'name') pos_parse = lambda x, s: float(s[1:]) if s[0] == x else 0 - float(s[1:]) latitude_parse = partial(pos_parse, 'N') longitude_parse = partial(pos_parse, 'E') # A value of 8888.0 denotes unavailable data altitude_parse = lambda s: None if s.strip() == '8888.0' else float(s) field_parsers = (str, int, latitude_parse, longitude_parse, altitude_parse, str) data = utils.prepare_csv_read(marker_file, field_names) for row in (x for x in data if x['tag'] == 'W'): for name, parser in zip(field_names, field_parsers): row[name] = parser(row[name]) del row['tag'] try: self[row['identity']] = Trigpoint(**row) except TypeError: # Workaround formatting error in 506514 entry that contains # spurious comma del row[None] self[row['identity']] = Trigpoint(**row)
def import_locations(self, zone_file): """Parse zoneinfo zone description data files ``import_locations()`` returns a list of :class:`Zone` objects. It expects data files in one of the following formats:: AN +1211-06900 America/Curacao AO -0848+01314 Africa/Luanda AQ -7750+16636 Antarctica/McMurdo McMurdo Station, Ross Island Files containing the data in this format can be found in the :file:`zone.tab` file that is normally found in :file:`/usr/share/zoneinfo` on UNIX-like systems, or from the `standard distribution site`_. When processed by ``import_locations()`` a ``list`` object of the following style will be returned:: [Zone(None, None, "AN", "America/Curacao", None), Zone(None, None, "AO", "Africa/Luanda", None), Zone(None, None, "AO", "Antartica/McMurdo", ["McMurdo Station", "Ross Island"])] >>> zones = Zones(open("timezones")) >>> for value in sorted(zones, key=attrgetter("zone")): ... print(value) Africa/Luanda (AO: 08°48'00"S, 013°14'00"E) America/Curacao (AN: 12°11'00"N, 069°00'00"W) Antarctica/McMurdo (AQ: 77°50'00"S, 166°36'00"E also McMurdo Station, Ross Island) :type zone_file: ``file``, ``list`` or ``str`` :param zone_file: ``zone.tab`` data to read :rtype: ``list`` :return: Locations as :class:`Zone` objects :raise FileFormatError: Unknown file format .. _standard distribution site: ftp://elsie.nci.nih.gov/pub/ """ self._zone_file = zone_file field_names = ("country", "location", "zone", "comments") data = utils.prepare_csv_read(zone_file, field_names, delimiter=r" ") for row in (x for x in data if not x['country'].startswith("#")): if row['comments']: row['comments'] = row['comments'].split(", ") self.append(Zone(**row))
def import_timezones_file(self, data): """Parse geonames.org_ timezone exports. ``import_timezones_file()`` returns a dictionary with keys containing the timezone identifier, and values consisting of a UTC offset and UTC offset during daylight savings time in minutes. It expects data files in the following format:: Europe/Andorra 1.0 2.0 Asia/Dubai 4.0 4.0 Asia/Kabul 4.5 4.5 Files containing the data in this format can be downloaded from the geonames site in their `database export page`_ Files downloaded from the geonames site when processed by ``import_timezones_file()`` will return ``dict`` object of the following style:: {"Europe/Andorra": (60, 120), "Asia/Dubai": (240, 240), "Asia/Kabul": (270, 270)} :type data: ``file``, ``list`` or ``str`` :param data: geonames.org timezones data to read :rtype: ``list`` :return: geonames.org timezone identifiers with their UTC offsets :raise FileFormatError: Unknown file format .. _geonames.org: http://www.geonames.org/ .. _database export page: http://download.geonames.org/export/dump/ """ self._tzfile = data field_names = ("ident", "gmt_offset", "dst_offset") time_parse = lambda n: int(float(n) * 60) data = utils.prepare_csv_read(data, field_names, delimiter=r" ") self.timezones = {} for row in data: if row["ident"] == "TimeZoneId": continue try: delta = list(map(time_parse, (row["gmt_offset"], row["dst_offset"]))) except ValueError: raise utils.FileFormatError("geonames.org") self.timezones[row["ident"]] = delta
def read_csv(filename): """Pull locations from a user's CSV file. Read gpsbabel_'s CSV output format .. _gpsbabel: http://www.gpsbabel.org/ :param str filename: CSV file to parse (STDIN if '-') :rtype: ``tuple`` of ``dict`` and ``list`` :return: List of locations as ``str`` objects """ if filename == "-": filename = sys.stdin field_names = ("latitude", "longitude", "name") data = utils.prepare_csv_read(filename, field_names, skipinitialspace=True) index = 0 locations = {} args = [] for row in data: index += 1 name = "%02i:%s" % (index, row["name"]) locations[name] = (row["latitude"], row["longitude"]) args.append(name) return locations, args
def import_timezones_file(self, data): """ Parse geonames.org_ timezone exports ``import_timezones_file()`` returns a dictionary with keys containing the timezone identifier, and values consisting of a UTC offset and UTC offset during daylight savings time in minutes. It expects data files in the following format:: Europe/Andorra 1.0 2.0 Asia/Dubai 4.0 4.0 Asia/Kabul 4.5 4.5 Files containing the data in this format can be downloaded from the geonames site in their `database export page`_ Files downloaded from the geonames site when processed by ``import_timezones_file()`` will return ``dict`` object of the following style:: {"Europe/Andorra": (60, 120), "Asia/Dubai": (240, 240), "Asia/Kabul": (270, 270)} >>> timezones = Locations(None, open("geonames_timezones")).timezones >>> for key, value in sorted(timezones.items()): ... print("%s - %s" % (key, value)) Asia/Dubai - [240, 240] Asia/Kabul - [270, 270] Europe/Andorra - [60, 120] >>> header_skip_check = Locations(None, ... open("geonames_timezones_header")) >>> print(header_skip_check) # doctest: +ELLIPSIS Locations(None, <open file ...>) >>> broken_file_check = Locations(None, ... open("geonames_timezones_broken")) Traceback (most recent call last): ... FileFormatError: Incorrect data format, if you're using a file downloaded from geonames.org please report this to James Rowe <*****@*****.**> :type data: ``file``, ``list`` or ``str`` :param data: geonames.org timezones data to read :rtype: ``list`` :return: geonames.org timezone identifiers with their UTC offsets :raise FileFormatError: Unknown file format .. _geonames.org: http://geonames.org/ .. _database export page: http://download.geonames.org/export/dump/ """ self._tzfile = data field_names = ("ident", "gmt_offset", "dst_offset") time_parse = lambda n: int(float(n) * 60) data = utils.prepare_csv_read(data, field_names, delimiter=r" ") self.timezones = {} for row in data: if row['ident'] == "TimeZoneId": continue try: delta = map(time_parse, (row['gmt_offset'], row['dst_offset'])) except ValueError: raise utils.FileFormatError("geonames.org") self.timezones[row['ident']] = delta
def import_locations(self, data): """ Parse geonames.org country database exports ``import_locations()`` returns a list of :class:`trigpoints.Trigpoint` objects generated from the data exported by geonames.org_. It expects data files in the following tab separated format:: 2633441 Afon Wyre Afon Wyre River Wayrai,River Wyrai,Wyre 52.3166667 -4.1666667 H STM GB GB 00 0 -9999 Europe/London 1994-01-13 2633442 Wyre Wyre Viera 59.1166667 -2.9666667 T ISL GB GB V9 0 1 Europe/London 2004-09-24 2633443 Wraysbury Wraysbury Wyrardisbury 51.45 -0.55 P PPL GB P9 0 28 Europe/London 2006-08-21 Files containing the data in this format can be downloaded from the geonames.org_ site in their `database export page`_. Files downloaded from the geonames site when processed by ``import_locations()`` will return ``list`` objects of the following style:: [Location(2633441, "Afon Wyre", "Afon Wyre", ['River Wayrai', 'River Wyrai', 'Wyre'], 52.3166667, -4.1666667, "H", "STM", "GB", ['GB'], "00", None, None, None, 0, None, -9999, "Europe/London", datetime.date(1994, 1, 13)), Location(2633442, "Wyre", "Wyre", ['Viera'], 59.1166667, -2.9666667, "T", "ISL", "GB", ['GB'], "V9", None, None, None, 0, None, 1, "Europe/London", datetime.date(2004, 9, 24)), Location(2633443, "Wraysbury", "Wraysbury", ['Wyrardisbury'], 51.45, -0.55, "P", "PPL", "GB", None, "P9", None, None, None, 0, None, 28, "Europe/London", datetime.date(2006, 8, 21))] >>> locations = Locations(open("geonames")) >>> for location in sorted(locations, key=attrgetter("geonameid")): ... print("%i - %s" % (location.geonameid, location)) 2633441 - Afon Wyre (River Wayrai, River Wyrai, Wyre - N52.317°; W004.167°) 2633442 - Wyre (Viera - N59.117°; W002.967°) 2633443 - Wraysbury (Wyrardisbury - N51.450°; W000.550°) >>> broken_locations = Locations(open("broken_geonames")) Traceback (most recent call last): ... FileFormatError: Incorrect data format, if you're using a file downloaded from geonames.org please report this to James Rowe <*****@*****.**> :type data: ``file``, ``list`` or ``str`` :param data: geonames.org locations data to read :rtype: ``list`` :return: geonames.org identifiers with :class:`Location` objects :raise FileFormatError: Unknown file format .. _geonames.org: http://geonames.org/ .. _database export page: http://download.geonames.org/export/dump/ """ self._data = data field_names = ("geonameid", "name", "asciiname", "alt_names", "latitude", "longitude", "feature_class", "feature_code", "country", "alt_country", "admin1", "admin2", "admin3", "admin4", "population", "altitude", "gtopo30", "tzname", "modified_date") comma_split = lambda s: s.split(",") date_parse = lambda s: datetime.date(*map(int, s.split("-"))) or_none = lambda x, s: x(s) if s else None str_or_none = lambda s: or_none(str, s) float_or_none = lambda s: or_none(float, s) int_or_none = lambda s: or_none(int, s) tz_parse = lambda s: self.timezones[s][0] if self.timezones else None field_parsers = (int_or_none, str_or_none, str_or_none, comma_split, float_or_none, float_or_none, str_or_none, str_or_none, str_or_none, comma_split, str_or_none, str_or_none, str_or_none, str_or_none, int_or_none, int_or_none, int_or_none, tz_parse, date_parse) data = utils.prepare_csv_read(data, field_names, delimiter=r" ") for row in data: try: for name, parser in zip(field_names, field_parsers): row[name] = parser(row[name]) except ValueError: raise utils.FileFormatError("geonames.org") self.append(Location(**row))
def test_prepare_csv_read(data, keys, result): assert list(prepare_csv_read(data, keys)) == result
def import_locations(self, marker_file): """ Import trigpoint database files ``import_locations()`` returns a dictionary with keys containing the trigpoint identifier, and values that are :class:`Trigpoint` objects. It expects trigpoint marker files in the format provided at alltrigs-wgs84.txt_, which is the following format:: H SOFTWARE NAME & VERSION I GPSU 4.04, S SymbolSet=0 ... W,500936,N52.066035,W000.281449, 37.0,Broom Farm W,501097,N52.010585,W000.173443, 97.0,Bygrave W,505392,N51.910886,W000.186462, 136.0,Sish Lane Any line not consisting of 6 comma separated fields will be ignored. The reader uses the :mod:`csv` module, so alternative whitespace formatting should have no effect. The above file processed by ``import_locations()`` will return the following ``dict`` object:: {500936: point.Point(52.066035, -0.281449, 37.0, "Broom Farm"), 501097: point.Point(52.010585, -0.173443, 97.0, "Bygrave"), 505392: point.Point(51.910886, -0.186462, 136.0, "Sish Lane")} >>> marker_file = open("trigpoints") >>> markers = Trigpoints(marker_file) >>> for key, value in sorted(markers.items()): ... print("%s - %s" % (key, value)) 500936 - Broom Farm (52°03'57"N, 000°16'53"W alt 37m) 501097 - Bygrave (52°00'38"N, 000°10'24"W alt 97m) 505392 - Sish Lane (51°54'39"N, 000°11'11"W alt 136m) >>> marker_file.seek(0) >>> markers = Trigpoints(marker_file.readlines()) >>> markers = Trigpoints(open("southern_trigpoints")) >>> print(markers[1]) FakeLand (48°07'23"S, 000°07'23"W alt 12m) >>> markers = Trigpoints(open("broken_trigpoints")) >>> for key, value in sorted(markers.items()): ... print("%s - %s" % (key, value)) 500968 - Brown Hill Nm See The Heights (53°38'23"N, 001°39'34"W) 501414 - Cheriton Hill Nm See Paddlesworth (51°06'03"N, 001°08'33"E) :type marker_file: ``file``, ``list`` or ``str`` :param marker_file: Trigpoint marker data to read :rtype: ``dict`` :return: Named locations with :class:`Trigpoint` objects :raise ValueError: Invalid value for ``marker_file`` .. _alltrigs-wgs84.txt: http://www.haroldstreet.org.uk/trigpoints.php """ self._marker_file = marker_file field_names = ("tag", "identity", "latitude", "longitude", "altitude", "name") pos_parse = lambda x, s: float(s[1:]) if s[0] == x else 0 - float(s[1:]) latitude_parse = partial(pos_parse, "N") longitude_parse = partial(pos_parse, "E") # A value of 8888.0 denotes unavailable data altitude_parse = lambda s: None if s.strip() == "8888.0" else float(s) field_parsers = (str, int, latitude_parse, longitude_parse, altitude_parse, str) data = utils.prepare_csv_read(marker_file, field_names) for row in (x for x in data if x['tag'] == "W"): for name, parser in zip(field_names, field_parsers): row[name] = parser(row[name]) del row['tag'] try: self[row['identity']] = Trigpoint(**row) except TypeError: # Workaround formatting error in 506514 entry that contains # spurious comma del row[None] self[row['identity']] = Trigpoint(**row)