def testIsList( self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_list' function.""" assert auxiliary.check_is_list("TestArgument", []) assert auxiliary.check_is_list("TestArgument", [1, 3, 5]) assert auxiliary.check_is_list("TestArgument", ["a", "56", 1, {}])
def testIsList( self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_list' function.""" assert (auxiliary.check_is_list('TestArgument', []) == None) assert (auxiliary.check_is_list('TestArgument', [1, 3, 5]) == None) assert (auxiliary.check_is_list('TestArgument', ['a', '56', 1, {}]) == None)
def testIsList(self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_list' function.""" assert (auxiliary.check_is_list('TestArgument',[]) == None) assert (auxiliary.check_is_list('TestArgument',[1,3,5]) == None) assert (auxiliary.check_is_list('TestArgument',['a','56',1,{}]) == None)
def testIsList(self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_list' function.""" assert auxiliary.check_is_list("TestArgument", []) == None assert auxiliary.check_is_list("TestArgument", [1, 3, 5]) == None assert auxiliary.check_is_list("TestArgument", ["a", "56", 1, {}]) == None
def load(self, file_names): """Load one or more files with entries and their localities into the table. See Febrl manual for details on the file format. """ # Check input argument type - - - - - - - - - - - - - - - - - - - - - - - - # if (isinstance(file_names, str)): file_names = [file_names] # Make a list out of a single file name auxiliary.check_is_list('file_names', file_names) i = 0 for file_name in file_names: auxiliary.check_is_string('file_name[%d]' % (i), file_name[i]) i += 1 self.file_names = file_names self.clear() # Remove all items from the look-up table # Loop over file names - - - - - - - - - - - - - - - - - - - - - - - - - - # for fn in self.file_names: try: # Open file and read all lines into a list f = open(fn,'r') except: logging.exception('Cannot read from file "%s"' % (fn)) raise IOError file_data = f.readlines() # Read complete file f.close() # Now process all lines - - - - - - - - - - - - - - - - - - - - - - - - - # for line in file_data: l = line.strip() if (len(l) > 0) and (l[0] != '#'): # Not empty line and not comment ll = l.split(',') # Get fields from a line # Check for three columns # if (len(ll) != 3): logging.exception('Illegal file format (not 3 columns) in file' + \ ': "%s" in line: %s' % (fn, l)) raise Exception key = ll[0].strip().lower() # Make sure it's lower case long = ll[1].strip() lati = ll[2].strip() # Try to convert into numerical (float) values # try: long = float(long) except: logging.exception('Longitude: "%s" is not a number in line: "%s"' \ % (str(long), l)) raise Exception try: lati = float(lati) except: logging.exception('Lattitude: "%s" is not a number in line: "%s"' \ % (str(lati), l)) raise Exception # And check their values # if (long < -180.0) or (long > 180.0): logging.exception('Illegal value for longitude: '+str(long)) raise Exception if (lati < -90.0) or (lati > 90.0): logging.exception('Illegal value for latitude: '+str(lati)) raise Exception val = [long,lati] # Value for dictionary if (self.__contains__(key)) and (self.__getitem__(key) != val): logging.exception('Key "%s" already in look-up table with ' % \ (str(key)) + 'different value') raise Exception self.__setitem__(key, val) self.length = self.__len__() # Get number of elements in the look-up table # A log message - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # logging.info('Loaded geocode look-up table "%s"' % (self.description)) logging.info(' From files: %s' % (str(self.file_names))) logging.info(' Number of entries: %i' % (self.length))
def load(self, file_names): """Load one or more files with words and their frequency counts into the look-up table. See Febrl manual for details on the file format. """ # Check input argument type - - - - - - - - - - - - - - - - - - - - - - - - # if (isinstance(file_names, str)): file_names = [file_names] # Make a list out of a single file name auxiliary.check_is_list('file_names', file_names) i = 0 for file_name in file_names: auxiliary.check_is_string('file_name[%d]' % (i), file_name[i]) i += 1 self.file_names = file_names self.clear() # Remove all items from the look-up table self.sum = 0 # Loop over file names - - - - - - - - - - - - - - - - - - - - - - - - - - # for fn in self.file_names: try: # Open file and read all lines into a list f = open(fn,'r') except: logging.exception('Cannot read from file "%s"' % (fn)) raise IOError file_data = f.readlines() # Read complete file f.close() # Now process all lines - - - - - - - - - - - - - - - - - - - - - - - - - # for line in file_data: l = line.strip() if (len(l) > 0) and (l[0] != '#'): # Not empty line and not comment ll = l.split(',') # Get fields from a line # Check for two columns # if (len(ll) != 2): logging.exception('Illegal file format (not 2 columns) in file' + \ ': "%s" in line: %s"' % (fn, l)) raise Exception key = ll[0].strip().lower() # Make sure it's lower case val = ll[1].strip().lower() try: val = int(val) # Convert the value into an integer except: logging.exception('Illegal value for frequency count: "%s"' % \ (str(val)) + ' in line: "%s"' % (l)) raise Exception if (self.__contains__(key)): val += self.__getitem__(key) # Sum up counts self.__setitem__(key, val) self.sum += val self.length = self.__len__() # Get number of elements in the look-up table # A log message - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # logging.info('Loaded frequency look-up table "%s"' % \ (self.description)) logging.info(' From files: %s' % (str(self.file_names))) logging.info(' Number of entries: %i' % (self.length)) logging.info(' Sum of all value: %i' % (self.sum))
def load(self, file_names): """Load one or more files with word corrections and tags into the look-up table. See Febrl manual for details on the file format. """ # Check input argument type - - - - - - - - - - - - - - - - - - - - - - - - # if (isinstance(file_names, str)): file_names = [file_names] # Make a list out of a single file name auxiliary.check_is_list('file_names', file_names) i = 0 for file_name in file_names: auxiliary.check_is_string('file_name[%d]' % (i), file_name[i]) i += 1 self.file_names = file_names self.clear() # Remove all items from the look-up table self.max_key_length = 0 # Loop over file names - - - - - - - - - - - - - - - - - - - - - - - - - - # for fn in self.file_names: try: # Open file and read all lines into a list f = open(fn,'r') except: logging.exception('Cannot read from file "%s"' % (fn)) raise IOError file_data = f.readlines() # Read complete file f.close() tag = '' # Start with no tag key = '' # Start with an empty key # Now process all lines - - - - - - - - - - - - - - - - - - - - - - - - - # for line in file_data: l = line.strip() # Remove line separators if (len(l) > 0) and (l[0] != '#'): # Not empty line and not comment if (l[:5] == 'tag=<'): # It's a line with a new tag tag = l[5:7] else: # A line with an entry # Make sure a tag is set # if (tag == ''): logging.exception('Missing tag specification in file "%s"' % \ (fn)) raise Exception line_list = l.split(':') # Separate key from values if (len(line_list) > 2): logging.exception('Illegal format in file "%s" in line: %s' % \ (fn, l)) raise Exception if (len(line_list) == 2): # Line contains a key - - - - - - - - - key = line_list[0].strip().lower() # Get and clean key key_list = key.split(' ') # Make a list of key words if (len(key_list) > self.max_key_length): self.max_key_length = len(key_list) # Update maximal key length # Insert key itself into lookup table # dict_val = '_'.join(key_list) dict_key = tuple(key_list) this_tag = tag if (self.__contains__(dict_key)): # Already in lookup table test_item = self.__getitem__(dict_key) test_val = test_item[0] # Value without tag test_tag = test_item[1] if (dict_val != test_val): logging.warn('Key "%s" already in dictionary with ' % \ (str(dict_val)) + 'different value (old value ' + \ 'will be over written with "%s")' % (str(test_val))) if (test_tag.find(this_tag) < 0): # This tag is new this_tag = test_tag+'/'+this_tag # Tag for this entry else: this_tag = test_tag this_val = (dict_val, this_tag) self.__setitem__(dict_key,this_val) # Insert key itself vals = line_list[1].lower() # Get values in this line in a string elif (len(line_list) == 1): # Line contains only values - - - - - vals = line_list[0].lower() # Get values in this line in a string # Porcess all values right of ':' in this line val_list = vals.split(',') # Split values into a list for val in val_list: # Loop over all values - - - - - - - - - - - val_strip = val.strip() if (val_strip != ''): # Only append non-empty values key_list = val_strip.split(' ') # Make a list of key words if (len(key_list) > self.max_key_length): self.max_key_length = len(key_list) # Update maximal key len dict_key = tuple(key_list) this_tag = tag if (self.__contains__(dict_key)): test_item = self.__getitem__(dict_key) test_val = test_item[0] # Value without tag test_tag = test_item[1] if (dict_val != test_val): logging.warn('Key "%s" already in dictionary with ' % \ (str(dict_val)) + 'different value (old value ' + \ 'will be over written with "%s")' % (str(test_val))) if (test_tag.find(this_tag) < 0): # This tag is new this_tag = test_tag+'/'+this_tag # Tag for this entry else: this_tag = test_tag this_val = (dict_val, this_tag) self.__setitem__(dict_key,this_val) self.length = self.__len__() # Get number of elements in the look-up table # A log message - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # logging.info('Loaded tag look-up table "%s"' % (self.description)) logging.info(' From files: %s' % (str(self.file_names))) logging.info(' Number of entries: %i' % (self.length)) logging.info(' Maximal key length: %i' % (self.max_key_length))