class unittest_Ingestor(unittest.TestCase): def setUp(self,logFile='log/Template.log'): self.logger = Logger(logFile, 'DEBUG', 'Sub') self.logger = self.logger.getLogger() self.source = Source('source-test', self.logger) self.ingestor = Ingestor(self.source,self.logger) self.ingestor.setClients() def test_Ingestor(self): print self.ingestor.clients #filter.ingestor.ingestSingleFile(igniter) #filter.ingestor.ingestBulletinFile(igniter) #source.ingestor.ingestSingleFile(igniter) #source.ingestor.ingestBulletinFile(igniter) #source.ingestor.ingestCollection(igniter) #self.ingestor.setFeeds(self.feeds) #self.ingestor.setClients() #source.ingestor.createDir('/apps/px/turton', source.ingestor.dbDirsCache) #self.drp = DirectRoutingParser(pathFichierCircuit, self.source.ingestor.allNames, logger) #self.source.ingestor.ingest() #self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist) #self.source.ingestor.ingest(tempNom, nomFichier, clist) #self.source.ingestor.ingest() #clist = self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist) #self.source.ingestor.ingest(tempNom, nomFichier, clist) #self.assertEqual(None, None) pass
class unittest_Ingestor(unittest.TestCase): def setUp(self, logFile='log/Template.log'): self.logger = Logger(logFile, 'DEBUG', 'Sub') self.logger = self.logger.getLogger() self.source = Source('source-test', self.logger) self.ingestor = Ingestor(self.source, self.logger) self.ingestor.setClients() def test_Ingestor(self): print self.ingestor.clients #filter.ingestor.ingestSingleFile(igniter) #filter.ingestor.ingestBulletinFile(igniter) #source.ingestor.ingestSingleFile(igniter) #source.ingestor.ingestBulletinFile(igniter) #source.ingestor.ingestCollection(igniter) #self.ingestor.setFeeds(self.feeds) #self.ingestor.setClients() #source.ingestor.createDir('/apps/px/turton', source.ingestor.dbDirsCache) #self.drp = DirectRoutingParser(pathFichierCircuit, self.source.ingestor.allNames, logger) #self.source.ingestor.ingest() #self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist) #self.source.ingestor.ingest(tempNom, nomFichier, clist) #self.source.ingestor.ingest() #clist = self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist) #self.source.ingestor.ingest(tempNom, nomFichier, clist) #self.assertEqual(None, None) pass
def __init__(self): self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) if self.es.ping(): logging.info('Elasticsearch connected..') else: logging.info('Could not connect to Elasticsearch!') self.index_name = None self.ingest = Ingestor() self.db = Database() self.pp = pprint.PrettyPrinter(indent=4) self.default_zip = None self.default_house = None self.status = None
def run_popup(self, file_loc): #CSV file stuff self.ingestor = Ingestor(file_loc) self.ingestor.readCSV() self.rows = self.ingestor.getCSVHeaders() #Create buttons from the csv file headers that was just selected self.generate_checkboxes(self.rows) #Create a area that has a scroll bar scrollArea = QScrollArea() scrollArea.setWidget(self.csvHeaderGroup_box) scrollArea.horizontalScrollBar().setEnabled(False) #Create the buttons for tables that already exist in the database self.generate_radiobuttons(self.tablesInDB) #List of button groups self.buttonGroups = [self.commonFileTypesGroup, self.csvHeaderGroup] #Create label tableNameLabel = QtWidgets.QLabel("Table Name") #Create text field self.tableNameField = QtWidgets.QLineEdit() self.tableNameField.setPlaceholderText("Enter Custom Table Name") #Create buttons self.cancelButton = QPushButton('Cancel') self.importButton = QPushButton('Import') self.cancelButton.clicked.connect(self.closeWindow) self.importButton.clicked.connect(self.importCSV) #Create progress Bar self.progressBar = QtWidgets.QProgressBar() #Create the master layout which is a grid layout = QGridLayout() #Add widgets #format of addWidget(widget,row,col,row span, col span) layout.addWidget(scrollArea, 1, 1, 1, 2) layout.addWidget(tableNameLabel, 2, 1, 1, 2) layout.addWidget(self.tableNameField, 3, 1, 1, 2) layout.addWidget(self.commonFileTypesGroupBox, 4, 1, 1, 2) layout.addWidget(self.progressBar, 5, 1, 1, 2) layout.addWidget(self.cancelButton, 6, 1) layout.addWidget(self.importButton, 6, 2) self.setLayout(layout) self.resize(self.sizeHint())
def setup(): """ Load all resources """ quote_files = [ './_data/DogQuotes/DogQuotesTXT.txt', './_data/DogQuotes/DogQuotesDOCX.docx', './_data/DogQuotes/DogQuotesPDF.pdf', './_data/DogQuotes/DogQuotesCSV.csv' ] # TODO: Use the Ingestor class to parse all files in the # quote_files variable """Very similar implementation as meme file code""" """Variable for all quotes""" """For loop with try except calling Ingestor class parse""" """ to add quotes you need array""" quote = [] for i in quote_files: try: quote.extend(Ingestor.parse(i)) except ValueError as error: print(f"ValueError: {error}") images_path = "./_data/photos/dog/" # TODO: Use the pythons standard library os class to find all # images within the images images_path directory """For all images""" image = [] """For loop similar to meme.py applies format to image file to pull it""" for root, dirs, files in os.walk(images_path): image = [os.path.join(root, name) for name in files] return quote, image
def setUp(self): sqlite_file = 'test.db' self.db = DatabaseManager(sqlite_file, '__ADMIN__') filename = "Test_Files/DatabaseManagerTest_15.csv" self.ingestor = Ingestor(filename) self.ingestor.readCSV() tempHeaders = self.ingestor.getCSVHeaders() self.searchCritera = [ tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4], tempHeaders[5], tempHeaders[6] ] searchCriteraTwoD = self.ingestor.getHeaderIndex( self.searchCritera, tempHeaders) self.ingestor.searchRows(searchCriteraTwoD, self.ingestor.getRows()) self.searchCritera = self.db.remove_spaces(self.searchCritera) self.new_table = 'Test_15'
def generate_meme(path=None, body=None, author=None): """ Generate a meme given an path and a quote """ """Not used error?""" """img = None """ """Not used error?""" """quote = None""" if path is None: images = "./_data/photos/dog/" imgs = [] for root, dirs, files in os.walk(images): imgs = [os.path.join(root, name) for name in files] img = random.choice(imgs) else: img = path[0] if body is None: quote_files = [ './_data/DogQuotes/DogQuotesTXT.txt', './_data/DogQuotes/DogQuotesDOCX.docx', './_data/DogQuotes/DogQuotesPDF.pdf', './_data/DogQuotes/DogQuotesCSV.csv' ] quotes = [] for f in quote_files: quotes.extend(Ingestor.parse(f)) quote = random.choice(quotes) else: if author is None: raise Exception('Author Required if Body is Used') quote = QuoteModel(body, author) meme = MemeEngine('./tmp') """Changed meme.make_meme to fit my coded function""" path = meme.format_and_make(img, quote.body, quote.author) return path
class Source(object): def __init__(self, name='toto', logger=None, ingestion=True, filter=False ) : # General Attributes self.name = name # Source's name self.filter = filter # is this source realy defines a filter ? if logger is None: pathlog = PXPaths.LOG + 'rx_' + name + '.log' namelog = 'RX' + name if self.filter : pathlog = PXPaths.LOG + 'fx_' + name + '.log' namelog = 'FX' + name self.logger = Logger(pathlog, 'INFO', namelog ) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger if not self.filter : self.logger.info("Initialisation of source %s" % self.name) else : self.logger.info("Initialisation of filter %s" % self.name) # Attributes coming from the configuration file of the source #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct' # Extension to be added to the ingest name self.ingestion = ingestion # do we want to start the ingestion... self.debug = False # If we want sections with debug code to be executed self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.bulletin_type = None # type of bulletin ingested (None, am, wmo) self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.routemask = True # use accept and parenthesis in mask to create a key and route with it self.routing_version = 1 # directRouting version setting self.nodups = False # Check if the file was already received (md5sum present in the cache) self.tmasks = [] # All the transformation maks (timask, temask) self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name # Extension to be added to the ingest name when the bulletin is outside its arrival range self.arrival_extension = None self.type = None # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo'] self.port = None # Port number if type is in ['am', 'wmo'] self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.mapEnteteDelai = None # self.addStationInFilename = True # self.addSMHeader = False # self.validation = False # Validate the filename (ex: prio an timestamp) self.patternMatching = True # No pattern matching self.clientsPatternMatching = True # No clients pattern matching self.sorter = None # No sorting on the filnames self.feeds = [] # more source to feed directly self.keepAlive = True # TCP SO_KEEPALIVE on (True) or off(False) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked # AMQP self.exchange_key = '' self.exchange_name = None self.exchange_realm = '/data' self.exchange_type = 'fanout' #----------------------------------------------------------------------------------------- # Setting up pulls configuration values #----------------------------------------------------------------------------------------- self.pulls = [] # All the directories and file patterns to pull self.host = 'localhost' # Remote host address (or ip) where to send files self.protocol = None # First thing in the url: ftp, file, am, wmo, amis self.url = None self.user = None # User name used to connect self.passwd = None # Password self.ssh_keyfile = None # ssh private key file for sftp protocol self.ftp_mode = 'passive' # Default is 'passive', can be set to 'active' self.timeout_get = 0 # Timeout in sec. to consider a get to hang ( 0 means inactive ) self.pull_sleep = 600 # Time in sec. to retry the pull self.pull_wait = 10 # Time in sec. to wait after ls before pulling (make sure files are arrived) self.delete = False # file is deleted after pull if it is false, the file's ls is kept # to check if it changed... self.pull_prefix = '' # file may be prefixed bu some string filename will than be prefix_filename # or value 'HDATETIME' for the file data time on remote host #----------------------------------------------------------------------------------------- # Setting up default collection configuration values #----------------------------------------------------------------------------------------- self.headers = [] # Title for report in the form TT from (TTAAii) self.issue_hours = [] # list of emission hours to collect self.issue_primary = [] # amount of minutes past emission hours for the primary collection (report on time) self.issue_cycle = [] # amount of minutes for cycling after the primary collection for more reports self.history = 25 # time in hours to consider a valid report even if "history" hours late. self.future = 40 # time in minutes to consider a valid report even if "future" minutes too soon #----------------------------------------------------------------------------------------- # Setting file transformations/conversions... etc... #----------------------------------------------------------------------------------------- self.fx_script = None # a script to convert/modify each received files self.fx_execfile = None self.lx_script = None # a script to convert/modify a list of received files self.lx_execfile = None self.pull_script = None # a script to pull files prior to read rxq self.pull_execfile = None #----------------------------------------------------------------------------------------- # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source #----------------------------------------------------------------------------------------- pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path ) #----------------------------------------------------------------------------------------- # Parse the configuration file #----------------------------------------------------------------------------------------- filePath = PXPaths.RX_CONF + self.name + '.conf' if self.filter : filePath = PXPaths.FX_CONF + self.name + '.conf' self.readConfig( filePath ) #----------------------------------------------------------------------------------------- # instantiate the fx script in source class #----------------------------------------------------------------------------------------- if self.fx_execfile != None : try : execfile(PXPaths.SCRIPTS + self.fx_execfile ) except : self.logger.error("Problem with fx_script %s" % self.fx_execfile) if self.lx_execfile != None : try : execfile(PXPaths.SCRIPTS + self.lx_execfile ) except : self.logger.error("Problem with lx_script %s" % self.lx_execfile) if self.pull_execfile != None : try : execfile(PXPaths.SCRIPTS + self.pull_execfile ) except : self.logger.error("Problem with pull_script %s" % self.pull_execfile) #----------------------------------------------------------------------------------------- # Make sure the collection params are valid #----------------------------------------------------------------------------------------- if self.type == 'collector' : self.validateCollectionParams() #----------------------------------------------------------------------------------------- # If we do want to start the ingestor... #----------------------------------------------------------------------------------------- if self.ingestion : if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) if len(self.feeds) > 0 : self.ingestor.setFeeds(self.feeds) self.ingestor.setClients() #self.printInfos(self) def readConfig(self,filePath): def isTrue(s): if s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \ s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \ s == '1' or s == 'On' : return True else: return False try: config = open(filePath, 'r') except: (type, value, tb) = sys.exc_info() print("Type: %s, Value: %s" % (type, value)) return # current dir and filename could eventually be used # for file renaming and perhaps file move (like a special receiver/dispatcher) currentDir = '.' # just to preserve consistency with client : unused in source for now currentFileOption = 'WHATFN' # just to preserve consistency with client : unused in source for now currentTransformation = 'GIFFY' # Default transformation for tmasks currentLST = None # a list consisting of one directory followed one or more file patterns for line in config.readlines(): words = line.split() if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)): try: if words[0] == 'extension': if len(words[1].split(':')) != 5: self.logger.error("Extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.extension = ':' + words[1] self.extension = self.extension.replace('-NAME',self.name) elif words[0] == 'arrival_extension': if len(words[1].split(':')) != 5: self.logger.error("arrival_extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.arrival_extension = ':' + words[1] self.arrival_extension = self.arrival_extension.replace('-NAME',self.name) elif words[0] == 'accept': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption,cmask,True)) elif words[0] == 'reject': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption,cmask,False)) elif words[0] == 'routemask': self.routemask = isTrue(words[1]) elif words[0] == 'routing_version': self.routing_version = int(words[1]) elif words[0] == 'noduplicates': self.nodups = isTrue(words[1]) elif words[0] == 'imask': self.masks_deprecated.append((words[1], currentDir, currentFileOption)) elif words[0] == 'emask': self.masks_deprecated.append((words[1],)) elif words[0] == 'timask': self.tmasks.append((words[1], currentTransformation)) elif words[0] == 'temask': self.tmasks.append((words[1],)) elif words[0] == 'transformation': currentTransformation = words[1] elif words[0] == 'batch': self.batch = int(words[1]) elif words[0] == 'cache_size': self.cache_size = int(words[1]) elif words[0] == 'bulletin_type': self.bulletin_type = words[1] elif words[0] == 'type': self.type = words[1] elif words[0] == 'port': self.port = int(words[1]) elif words[0] == 'AddSMHeader' and isTrue(words[1]): self.addSMHeader = True elif words[0] == 'addStationInFilename' : self.addStationInFilename = isTrue(words[1]) elif words[0] == 'patternMatching': self.patternMatching = isTrue(words[1]) elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching = isTrue(words[1]) elif words[0] == 'validation' and isTrue(words[1]): self.validation = True elif words[0] == 'keepAlive': self.keepAlive = isTrue(words[1]) elif words[0] == 'debug' and isTrue(words[1]): self.debug = True elif words[0] == 'mtime': self.mtime = int(words[1]) elif words[0] == 'sorter': self.sorter = words[1] elif words[0] == 'header': self.headers.append(words[1]) elif words[0] == 'hours': self.issue_hours.append(words[1]) elif words[0] == 'primary': self.issue_primary.append(words[1]) elif words[0] == 'cycle': self.issue_cycle.append(words[1]) elif words[0] == 'feed': self.feeds.append(words[1]) elif words[0] == 'routingTable': self.routingTable = words[1] elif words[0] == 'fx_script': self.fx_execfile = words[1] elif words[0] == 'lx_script': self.lx_execfile = words[1] elif words[0] == 'pull_script': self.pull_execfile = words[1] elif words[0] == 'arrival': if self.mapEnteteDelai == None : self.mapEnteteDelai = {} self.mapEnteteDelai[words[1]] = (int(words[2]), int(words[3])) elif words[0] == 'logrotate': if words[1].isdigit(): self.logger.setBackupCount(int(words[1])) # options for pull elif words[0] == 'directory': currentDir = words[1] currentLST = [] # permit directory duplications but warn for lst in self.pulls : if lst[0] == currentDir : currentLST = lst break if len(currentLST) != 0 : self.logger.warning("This directory appears twice %s" % currentDir) self.logger.warning("Please correct your config") continue # normal directory addition currentLST.append( currentDir ) self.pulls.append( currentLST ) elif words[0] == 'get': currentFilePattern = words[1] currentLST.append(currentFilePattern) elif words[0] == 'destination': self.url = words[1] urlParser = URLParser(words[1]) (self.protocol, currentDir, self.user, self.passwd, self.host, self.port) = urlParser.parse() if len(words) > 2: currentFileOption = words[2] currentLST = [] currentLST.append( currentDir ) self.pulls.append( currentLST ) elif words[0] == 'protocol': self.protocol = words[1] elif words[0] == 'host': self.host = words[1] elif words[0] == 'user': self.user = words[1] elif words[0] == 'password': self.passwd = words[1] elif words[0] == 'ssh_keyfile': self.ssh_keyfile = words[1] elif words[0] == 'timeout_get': self.timeout_get = int(words[1]) elif words[0] == 'ftp_mode': self.ftp_mode = words[1] elif words[0] == 'pull_sleep': self.pull_sleep = int(words[1]) elif words[0] == 'pull_wait': self.pull_wait = int(words[1]) elif words[0] == 'delete': self.delete = isTrue(words[1]) elif words[0] == 'pull_prefix': self.pull_prefix = words[1] # AMQP elif words[0] == 'exchange_key': self.exchange_key = words[1] elif words[0] == 'exchange_name': self.exchange_name = words[1] elif words[0] == 'exchange_realm': self.exchange_realm = words[1] elif words[0] == 'exchange_type': if words[1] in ['fanout','direct','topic','headers'] : self.exchange_type = words[1] else : self.logger.error("Problem with exchange_type %s" % words[1]) # options for collector if self.type == 'collector' : if words[0] == 'aaxx' : self.aaxx = words[1].split(',') if words[0] == 'metar' : self.metar = words[1].split(',') elif words[0] == 'taf' : self.taf = words[1].split(',') elif words[0] == 'history': self.history = int(words[1]) elif words[0] == 'future' : self.future = int(words[1]) elif words[0] == 'issue' : if words[1] == 'all' : lst = [] lst.append(words[1]) self.issue_hours.append(lst) else : lst = words[1].split(",") self.issue_hours.append( lst ) self.issue_primary.append( int(words[2]) ) self.issue_cycle.append( int(words[3]) ) except: self.logger.error("Problem with this line (%s) in configuration file of source %s" % (words, self.name)) config.close() if len(self.masks) > 0 : self.patternMatching = True if len(self.masks_deprecated) > 0 : self.patternMatching = True self.logger.debug("Configuration file of source %s has been read" % (self.name)) def run_fx_script(self, filename, logger): if self.fx_script == None : return filename return self.fx_script(filename, logger) def run_lx_script(self, filelist, logger): if self.lx_script == None : return filelist return self.lx_script(filelist, logger) def run_pull_script(self, flow, logger, sleeping): filelist = [] if self.pull_script == None : return filelist return self.pull_script(flow, logger, sleeping) def getTransformation(self, filename): for mask in self.tmasks: if fnmatch.fnmatch(filename, mask[0]): try: return mask[1] except: return None return None def fileMatchMask(self, filename): # IMPORTANT NOTE HERE FALLBACK BEHAVIOR IS TO ACCEPT THE FILE # THIS IS THE OPPOSITE OF THE CLIENT WHERE THE FALLBACK IS REJECT # check against the deprecated masks if len(self.masks_deprecated) > 0 : for mask in self.masks_deprecated: if fnmatch.fnmatch(filename, mask[0]): try: if mask[2]: return True except: return False # check against the masks for mask in self.masks: if mask[3].match(filename) : return mask[4] # fallback behavior return True def printInfos(self, source): print("==========================================================================") print("Name: %s " % source.name) print("Type: %s" % source.type) print("Batch: %s" % source.batch) print("Cache_size: %s" % source.cache_size) print("Bulletin_type: %s" % source.bulletin_type) print("Port: %s" % source.port) print("TCP SO_KEEPALIVE: %s" % source.keepAlive) print("Extension: %s" % source.extension) print("Arrival_Extension: %s" % source.arrival_extension) print("Arrival: %s" % source.mapEnteteDelai) print("addSMHeader: %s" % source.addSMHeader) print("addStationInFilename: %s" % source.addStationInFilename) print("Validation: %s" % source.validation) print("Source Pattern Matching: %s" % source.patternMatching) print("Clients Pattern Matching: %s" % source.clientsPatternMatching) print("mtime: %s" % source.mtime) print("Sorter: %s" % source.sorter) print("Routing table: %s" % source.routingTable) print("Route with Mask: %s" % source.routemask) print("No duplicates: %s" % source.nodups) print("FX script: %s" % source.fx_execfile) print("LX script: %s" % source.lx_execfile) print("Pull script: %s" % source.pull_execfile) print("******************************************") print("* AMQP stuff *") print("******************************************") print("exchange_key: %s" % source.exchange_key) print("exchange_name: %s" % source.exchange_name) print("exchange_realm: %s" % source.exchange_realm) print("exchange_type: %s" % source.exchange_type) print("******************************************") print("* Source Masks *") print("******************************************") for mask in self.masks: if mask[4] : print(" accept %s" % mask[0]) else : print(" reject %s" % mask[0]) print("* Source Masks deprecated *") for mask in self.masks_deprecated: print mask print("==========================================================================") print("******************************************") print("* Source T-Masks *") print("******************************************") for mask in self.tmasks: print mask print("==========================================================================") print("******************************************") print("* sources to feed (collections...) *") print("******************************************") for feed in self.feeds: print feed print("==========================================================================") if self.type == 'pull-file' : print("******************************************") print("* Pull Params *") print("******************************************") print "protocol %s" % self.protocol print "host %s" % self.host print "user %s" % self.user print "passwd %s" % self.passwd print "ssh_keyfile %s" % self.ssh_keyfile print "ftp_mode %s" % self.ftp_mode print "" print "delete %s" % self.delete print "pull_sleep %s" % self.pull_sleep print "pull_wait %s" % self.pull_wait print "pull_prefix %s" % self.pull_prefix print "timeout_get %s" % self.timeout_get print "" for lst in self.pulls : for pos, elem in enumerate(lst): if pos == 0 : print "directory %s" % elem else : print "get %s" % elem print("==========================================================================") if self.type == 'collector' : print("******************************************") print("* Collection Params *") print("******************************************") print "bulletin aaxx %s" % self.aaxx print "bulletin metar %s" % self.metar print "bulletin taf %s" % self.taf for position, header in enumerate(self.headers): print "\nHeader %s" % header lst = self.issue_hours[position] print "issue hours %s" % lst print "issue primary %s" % self.issue_primary[position] print "issue cycle %s" % self.issue_cycle[position] print "history %s" % self.history print "future %s" % self.future print("==========================================================================")
def __init__(self, name='toto', logger=None, ingestion=True, filter=False): # General Attributes self.name = name # Source's name self.filter = filter # is this source realy defines a filter ? if logger is None: pathlog = PXPaths.LOG + 'rx_' + name + '.log' namelog = 'RX' + name if self.filter: pathlog = PXPaths.LOG + 'fx_' + name + '.log' namelog = 'FX' + name self.logger = Logger(pathlog, 'INFO', namelog) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger if not self.filter: self.logger.info("Initialisation of source %s" % self.name) else: self.logger.info("Initialisation of filter %s" % self.name) # Attributes coming from the configuration file of the source #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct' # Extension to be added to the ingest name self.ingestion = ingestion # do we want to start the ingestion... self.debug = False # If we want sections with debug code to be executed self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.bulletin_type = None # type of bulletin ingested (None, am, wmo) self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.routemask = True # use accept and parenthesis in mask to create a key and route with it self.routing_version = 1 # directRouting version setting self.nodups = False # Check if the file was already received (md5sum present in the cache) self.tmasks = [] # All the transformation maks (timask, temask) self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name # Extension to be added to the ingest name when the bulletin is outside its arrival range self.arrival_extension = None self.type = None # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo'] self.port = None # Port number if type is in ['am', 'wmo'] self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.mapEnteteDelai = None # self.addStationInFilename = True # self.addSMHeader = False # self.validation = False # Validate the filename (ex: prio an timestamp) self.patternMatching = True # No pattern matching self.clientsPatternMatching = True # No clients pattern matching self.sorter = None # No sorting on the filnames self.feeds = [] # more source to feed directly self.keepAlive = True # TCP SO_KEEPALIVE on (True) or off(False) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked # AMQP self.exchange_key = '' self.exchange_name = None self.exchange_realm = '/data' self.exchange_type = 'fanout' #----------------------------------------------------------------------------------------- # Setting up pulls configuration values #----------------------------------------------------------------------------------------- self.pulls = [] # All the directories and file patterns to pull self.host = 'localhost' # Remote host address (or ip) where to send files self.protocol = None # First thing in the url: ftp, file, am, wmo, amis self.url = None self.user = None # User name used to connect self.passwd = None # Password self.ssh_keyfile = None # ssh private key file for sftp protocol self.ftp_mode = 'passive' # Default is 'passive', can be set to 'active' self.timeout_get = 0 # Timeout in sec. to consider a get to hang ( 0 means inactive ) self.pull_sleep = 600 # Time in sec. to retry the pull self.pull_wait = 10 # Time in sec. to wait after ls before pulling (make sure files are arrived) self.delete = False # file is deleted after pull if it is false, the file's ls is kept # to check if it changed... self.pull_prefix = '' # file may be prefixed bu some string filename will than be prefix_filename # or value 'HDATETIME' for the file data time on remote host # VIP option, None for standalone process self.vip = None #----------------------------------------------------------------------------------------- # Setting up default collection configuration values #----------------------------------------------------------------------------------------- self.headers = [] # Title for report in the form TT from (TTAAii) self.issue_hours = [] # list of emission hours to collect self.issue_primary = [ ] # amount of minutes past emission hours for the primary collection (report on time) self.issue_cycle = [ ] # amount of minutes for cycling after the primary collection for more reports self.history = 25 # time in hours to consider a valid report even if "history" hours late. self.future = 40 # time in minutes to consider a valid report even if "future" minutes too soon #----------------------------------------------------------------------------------------- # Setting file transformations/conversions... etc... #----------------------------------------------------------------------------------------- self.fx_script = None # a script to convert/modify each received files self.fx_execfile = None self.lx_script = None # a script to convert/modify a list of received files self.lx_execfile = None self.pull_script = None # a script to pull files prior to read rxq self.pull_execfile = None #----------------------------------------------------------------------------------------- # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source #----------------------------------------------------------------------------------------- pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path) #----------------------------------------------------------------------------------------- # Parse the configuration file #----------------------------------------------------------------------------------------- filePath = PXPaths.RX_CONF + self.name + '.conf' if self.filter: filePath = PXPaths.FX_CONF + self.name + '.conf' self.readConfig(filePath) #----------------------------------------------------------------------------------------- # instantiate the fx script in source class #----------------------------------------------------------------------------------------- if self.fx_execfile != None: try: execfile(PXPaths.SCRIPTS + self.fx_execfile) except: self.logger.error("Problem with fx_script %s" % self.fx_execfile) if self.lx_execfile != None: try: execfile(PXPaths.SCRIPTS + self.lx_execfile) except: self.logger.error("Problem with lx_script %s" % self.lx_execfile) if self.pull_execfile != None: try: execfile(PXPaths.SCRIPTS + self.pull_execfile) except: self.logger.error("Problem with pull_script %s" % self.pull_execfile) #----------------------------------------------------------------------------------------- # Make sure the collection params are valid #----------------------------------------------------------------------------------------- if self.type == 'collector': self.validateCollectionParams() #----------------------------------------------------------------------------------------- # If we do want to start the ingestor... #----------------------------------------------------------------------------------------- if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) if len(self.feeds) > 0: self.ingestor.setFeeds(self.feeds) self.ingestor.setClients()
class Source(object): def __init__(self, name='toto', logger=None, ingestion=True, filter=False): # General Attributes self.name = name # Source's name self.filter = filter # is this source realy defines a filter ? if logger is None: pathlog = PXPaths.LOG + 'rx_' + name + '.log' namelog = 'RX' + name if self.filter: pathlog = PXPaths.LOG + 'fx_' + name + '.log' namelog = 'FX' + name self.logger = Logger(pathlog, 'INFO', namelog) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger if not self.filter: self.logger.info("Initialisation of source %s" % self.name) else: self.logger.info("Initialisation of filter %s" % self.name) # Attributes coming from the configuration file of the source #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct' # Extension to be added to the ingest name self.ingestion = ingestion # do we want to start the ingestion... self.debug = False # If we want sections with debug code to be executed self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.bulletin_type = None # type of bulletin ingested (None, am, wmo) self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.routemask = True # use accept and parenthesis in mask to create a key and route with it self.routing_version = 1 # directRouting version setting self.nodups = False # Check if the file was already received (md5sum present in the cache) self.tmasks = [] # All the transformation maks (timask, temask) self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name # Extension to be added to the ingest name when the bulletin is outside its arrival range self.arrival_extension = None self.type = None # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo'] self.port = None # Port number if type is in ['am', 'wmo'] self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.mapEnteteDelai = None # self.addStationInFilename = True # self.addSMHeader = False # self.validation = False # Validate the filename (ex: prio an timestamp) self.patternMatching = True # No pattern matching self.clientsPatternMatching = True # No clients pattern matching self.sorter = None # No sorting on the filnames self.feeds = [] # more source to feed directly self.keepAlive = True # TCP SO_KEEPALIVE on (True) or off(False) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked # AMQP self.exchange_key = '' self.exchange_name = None self.exchange_realm = '/data' self.exchange_type = 'fanout' #----------------------------------------------------------------------------------------- # Setting up pulls configuration values #----------------------------------------------------------------------------------------- self.pulls = [] # All the directories and file patterns to pull self.host = 'localhost' # Remote host address (or ip) where to send files self.protocol = None # First thing in the url: ftp, file, am, wmo, amis self.url = None self.user = None # User name used to connect self.passwd = None # Password self.ssh_keyfile = None # ssh private key file for sftp protocol self.ftp_mode = 'passive' # Default is 'passive', can be set to 'active' self.timeout_get = 0 # Timeout in sec. to consider a get to hang ( 0 means inactive ) self.pull_sleep = 600 # Time in sec. to retry the pull self.pull_wait = 10 # Time in sec. to wait after ls before pulling (make sure files are arrived) self.delete = False # file is deleted after pull if it is false, the file's ls is kept # to check if it changed... self.pull_prefix = '' # file may be prefixed bu some string filename will than be prefix_filename # or value 'HDATETIME' for the file data time on remote host # VIP option, None for standalone process self.vip = None #----------------------------------------------------------------------------------------- # Setting up default collection configuration values #----------------------------------------------------------------------------------------- self.headers = [] # Title for report in the form TT from (TTAAii) self.issue_hours = [] # list of emission hours to collect self.issue_primary = [ ] # amount of minutes past emission hours for the primary collection (report on time) self.issue_cycle = [ ] # amount of minutes for cycling after the primary collection for more reports self.history = 25 # time in hours to consider a valid report even if "history" hours late. self.future = 40 # time in minutes to consider a valid report even if "future" minutes too soon #----------------------------------------------------------------------------------------- # Setting file transformations/conversions... etc... #----------------------------------------------------------------------------------------- self.fx_script = None # a script to convert/modify each received files self.fx_execfile = None self.lx_script = None # a script to convert/modify a list of received files self.lx_execfile = None self.pull_script = None # a script to pull files prior to read rxq self.pull_execfile = None #----------------------------------------------------------------------------------------- # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source #----------------------------------------------------------------------------------------- pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path) #----------------------------------------------------------------------------------------- # Parse the configuration file #----------------------------------------------------------------------------------------- filePath = PXPaths.RX_CONF + self.name + '.conf' if self.filter: filePath = PXPaths.FX_CONF + self.name + '.conf' self.readConfig(filePath) #----------------------------------------------------------------------------------------- # instantiate the fx script in source class #----------------------------------------------------------------------------------------- if self.fx_execfile != None: try: execfile(PXPaths.SCRIPTS + self.fx_execfile) except: self.logger.error("Problem with fx_script %s" % self.fx_execfile) if self.lx_execfile != None: try: execfile(PXPaths.SCRIPTS + self.lx_execfile) except: self.logger.error("Problem with lx_script %s" % self.lx_execfile) if self.pull_execfile != None: try: execfile(PXPaths.SCRIPTS + self.pull_execfile) except: self.logger.error("Problem with pull_script %s" % self.pull_execfile) #----------------------------------------------------------------------------------------- # Make sure the collection params are valid #----------------------------------------------------------------------------------------- if self.type == 'collector': self.validateCollectionParams() #----------------------------------------------------------------------------------------- # If we do want to start the ingestor... #----------------------------------------------------------------------------------------- if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) if len(self.feeds) > 0: self.ingestor.setFeeds(self.feeds) self.ingestor.setClients() #self.printInfos(self) def readConfig(self, filePath): def isTrue(s): if s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \ s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \ s == '1' or s == 'On' : return True else: return False try: config = open(filePath, 'r') except: (type, value, tb) = sys.exc_info() print("Type: %s, Value: %s" % (type, value)) return # current dir and filename could eventually be used # for file renaming and perhaps file move (like a special receiver/dispatcher) currentDir = '.' # just to preserve consistency with client : unused in source for now currentFileOption = 'WHATFN' # just to preserve consistency with client : unused in source for now currentTransformation = 'GIFFY' # Default transformation for tmasks currentLST = None # a list consisting of one directory followed one or more file patterns for line in config.readlines(): words = line.split() if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)): try: if words[0] == 'extension': if len(words[1].split(':')) != 5: self.logger.error( "Extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.extension = ':' + words[1] self.extension = self.extension.replace( '-NAME', self.name) elif words[0] == 'arrival_extension': if len(words[1].split(':')) != 5: self.logger.error( "arrival_extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.arrival_extension = ':' + words[1] self.arrival_extension = self.arrival_extension.replace( '-NAME', self.name) elif words[0] == 'accept': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption, cmask, True)) elif words[0] == 'reject': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption, cmask, False)) elif words[0] == 'routemask': self.routemask = isTrue(words[1]) elif words[0] == 'routing_version': self.routing_version = int(words[1]) elif words[0] == 'noduplicates': self.nodups = isTrue(words[1]) elif words[0] == 'imask': self.masks_deprecated.append( (words[1], currentDir, currentFileOption)) elif words[0] == 'emask': self.masks_deprecated.append((words[1], )) elif words[0] == 'timask': self.tmasks.append((words[1], currentTransformation)) elif words[0] == 'temask': self.tmasks.append((words[1], )) elif words[0] == 'transformation': currentTransformation = words[1] elif words[0] == 'batch': self.batch = int(words[1]) elif words[0] == 'cache_size': self.cache_size = int(words[1]) elif words[0] == 'bulletin_type': self.bulletin_type = words[1] elif words[0] == 'type': self.type = words[1] elif words[0] == 'port': self.port = int(words[1]) elif words[0] == 'AddSMHeader' and isTrue(words[1]): self.addSMHeader = True elif words[0] == 'addStationInFilename': self.addStationInFilename = isTrue(words[1]) elif words[0] == 'patternMatching': self.patternMatching = isTrue(words[1]) elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching = isTrue(words[1]) elif words[0] == 'validation' and isTrue(words[1]): self.validation = True elif words[0] == 'keepAlive': self.keepAlive = isTrue(words[1]) elif words[0] == 'debug' and isTrue(words[1]): self.debug = True elif words[0] == 'mtime': self.mtime = int(words[1]) elif words[0] == 'sorter': self.sorter = words[1] elif words[0] == 'header': self.headers.append(words[1]) elif words[0] == 'hours': self.issue_hours.append(words[1]) elif words[0] == 'primary': self.issue_primary.append(words[1]) elif words[0] == 'cycle': self.issue_cycle.append(words[1]) elif words[0] == 'feed': self.feeds.append(words[1]) elif words[0] == 'routingTable': self.routingTable = words[1] elif words[0] == 'fx_script': self.fx_execfile = words[1] elif words[0] == 'lx_script': self.lx_execfile = words[1] elif words[0] == 'pull_script': self.pull_execfile = words[1] elif words[0] == 'vip': self.vip = words[1] elif words[0] == 'arrival': if self.mapEnteteDelai == None: self.mapEnteteDelai = {} self.mapEnteteDelai[words[1]] = (int(words[2]), int(words[3])) elif words[0] == 'logrotate': if words[1].isdigit(): self.logger.setBackupCount(int(words[1])) # options for pull elif words[0] == 'directory': currentDir = words[1] currentLST = [] # permit directory duplications but warn for lst in self.pulls: if lst[0] == currentDir: currentLST = lst break if len(currentLST) != 0: self.logger.warning( "This directory appears twice %s" % currentDir) self.logger.warning("Please correct your config") continue # normal directory addition currentLST.append(currentDir) self.pulls.append(currentLST) elif words[0] == 'get': currentFilePattern = words[1] currentLST.append(currentFilePattern) elif words[0] == 'destination': self.url = words[1] urlParser = URLParser(words[1]) (self.protocol, currentDir, self.user, self.passwd, self.host, self.port) = urlParser.parse() if len(words) > 2: currentFileOption = words[2] currentLST = [] currentLST.append(currentDir) self.pulls.append(currentLST) elif words[0] == 'protocol': self.protocol = words[1] elif words[0] == 'host': self.host = words[1] elif words[0] == 'user': self.user = words[1] elif words[0] == 'password': self.passwd = words[1] elif words[0] == 'ssh_keyfile': self.ssh_keyfile = words[1] elif words[0] == 'timeout_get': self.timeout_get = int(words[1]) elif words[0] == 'ftp_mode': self.ftp_mode = words[1] elif words[0] == 'pull_sleep': self.pull_sleep = int(words[1]) elif words[0] == 'pull_wait': self.pull_wait = int(words[1]) elif words[0] == 'delete': self.delete = isTrue(words[1]) elif words[0] == 'pull_prefix': self.pull_prefix = words[1] # AMQP elif words[0] == 'exchange_key': self.exchange_key = words[1] elif words[0] == 'exchange_name': self.exchange_name = words[1] elif words[0] == 'exchange_realm': self.exchange_realm = words[1] elif words[0] == 'exchange_type': if words[1] in [ 'fanout', 'direct', 'topic', 'headers' ]: self.exchange_type = words[1] else: self.logger.error("Problem with exchange_type %s" % words[1]) # options for collector if self.type == 'collector': if words[0] == 'aaxx': self.aaxx = words[1].split(',') if words[0] == 'metar': self.metar = words[1].split(',') elif words[0] == 'taf': self.taf = words[1].split(',') elif words[0] == 'history': self.history = int(words[1]) elif words[0] == 'future': self.future = int(words[1]) elif words[0] == 'issue': if words[1] == 'all': lst = [] lst.append(words[1]) self.issue_hours.append(lst) else: lst = words[1].split(",") self.issue_hours.append(lst) self.issue_primary.append(int(words[2])) self.issue_cycle.append(int(words[3])) except: self.logger.error( "Problem with this line (%s) in configuration file of source %s" % (words, self.name)) config.close() if len(self.masks) > 0: self.patternMatching = True if len(self.masks_deprecated) > 0: self.patternMatching = True self.logger.debug("Configuration file of source %s has been read" % (self.name)) def run_fx_script(self, filename, logger): if self.fx_script == None: return filename return self.fx_script(filename, logger) def run_lx_script(self, filelist, logger): if self.lx_script == None: return filelist return self.lx_script(filelist, logger) def run_pull_script(self, flow, logger, sleeping): filelist = [] if self.pull_script == None: return filelist return self.pull_script(flow, logger, sleeping) def getTransformation(self, filename): for mask in self.tmasks: if fnmatch.fnmatch(filename, mask[0]): try: return mask[1] except: return None return None def fileMatchMask(self, filename): # IMPORTANT NOTE HERE FALLBACK BEHAVIOR IS TO ACCEPT THE FILE # THIS IS THE OPPOSITE OF THE CLIENT WHERE THE FALLBACK IS REJECT # check against the deprecated masks if len(self.masks_deprecated) > 0: for mask in self.masks_deprecated: if fnmatch.fnmatch(filename, mask[0]): try: if mask[2]: return True except: return False # check against the masks for mask in self.masks: if mask[3].match(filename): return mask[4] # fallback behavior return True def printInfos(self, source): print( "==========================================================================" ) print("Name: %s " % source.name) print("Type: %s" % source.type) print("Batch: %s" % source.batch) print("Cache_size: %s" % source.cache_size) print("Bulletin_type: %s" % source.bulletin_type) print("Port: %s" % source.port) print("TCP SO_KEEPALIVE: %s" % source.keepAlive) print("Extension: %s" % source.extension) print("Arrival_Extension: %s" % source.arrival_extension) print("Arrival: %s" % source.mapEnteteDelai) print("addSMHeader: %s" % source.addSMHeader) print("addStationInFilename: %s" % source.addStationInFilename) print("Validation: %s" % source.validation) print("Source Pattern Matching: %s" % source.patternMatching) print("Clients Pattern Matching: %s" % source.clientsPatternMatching) print("mtime: %s" % source.mtime) print("Sorter: %s" % source.sorter) print("Routing table: %s" % source.routingTable) print("Route with Mask: %s" % source.routemask) print("No duplicates: %s" % source.nodups) print("FX script: %s" % source.fx_execfile) print("LX script: %s" % source.lx_execfile) print("Pull script: %s" % source.pull_execfile) print("VIP : %s" % source.vip) print("******************************************") print("* AMQP stuff *") print("******************************************") print("exchange_key: %s" % source.exchange_key) print("exchange_name: %s" % source.exchange_name) print("exchange_realm: %s" % source.exchange_realm) print("exchange_type: %s" % source.exchange_type) print("******************************************") print("* Source Masks *") print("******************************************") for mask in self.masks: if mask[4]: print(" accept %s" % mask[0]) else: print(" reject %s" % mask[0]) print("* Source Masks deprecated *") for mask in self.masks_deprecated: print mask print( "==========================================================================" ) print("******************************************") print("* Source T-Masks *") print("******************************************") for mask in self.tmasks: print mask print( "==========================================================================" ) print("******************************************") print("* sources to feed (collections...) *") print("******************************************") for feed in self.feeds: print feed print( "==========================================================================" ) if self.type == 'pull-file': print("******************************************") print("* Pull Params *") print("******************************************") print "protocol %s" % self.protocol print "host %s" % self.host print "user %s" % self.user print "passwd %s" % self.passwd print "ssh_keyfile %s" % self.ssh_keyfile print "ftp_mode %s" % self.ftp_mode print "" print "delete %s" % self.delete print "pull_sleep %s" % self.pull_sleep print "pull_wait %s" % self.pull_wait print "pull_prefix %s" % self.pull_prefix print "timeout_get %s" % self.timeout_get print "" for lst in self.pulls: for pos, elem in enumerate(lst): if pos == 0: print "directory %s" % elem else: print "get %s" % elem print( "==========================================================================" ) if self.type == 'collector': print("******************************************") print("* Collection Params *") print("******************************************") print "bulletin aaxx %s" % self.aaxx print "bulletin metar %s" % self.metar print "bulletin taf %s" % self.taf for position, header in enumerate(self.headers): print "\nHeader %s" % header lst = self.issue_hours[position] print "issue hours %s" % lst print "issue primary %s" % self.issue_primary[position] print "issue cycle %s" % self.issue_cycle[position] print "history %s" % self.history print "future %s" % self.future print( "==========================================================================" )
def test_parse_no_extension(self): with self.assertRaises(ValueError): _ = Ingestor.parse('fakefile')
def __init__(self, name='toto', logger=None, ingestion=True) : # General Attributes self.name = name # Sourlient's name if logger is None: self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger self.logger.info("Initialisation of sourlient %s" % self.name) self.ingestion = ingestion # Determine if the Sourlient will have an Ingestor self.debug = False # If we want sections with debug code to be executed self.subscriber = True # False if it is a provider self.type = 'aftn' # Must be in ['aftn'] self.host = 'localhost' # Remote host name (or ip) where to send files self.portR = 56550 # Receiving port self.portS = 5160 # Sending port self.stationID = 'SUB' # Three letter ID of this process self.otherStationID = 'MHS' # Three letter ID of the other party self.address = 'CYHQUSER' # AFTN address of this process self.otherAddress = 'CYHQMHSN' # AFTN address of the other party self.digits = 4 # Number of digits used in the CSN self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.ackUsed = True # Should always be True. False is only for testing purposes self.maxAckTime = 60 # Number of second we wait for an ack before trying to reconnect self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.timeout = 10 # Time we wait between each tentative to connect self.maxLength = 0 # Max. length of a message... limit use for segmentation, 0 means unused self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name self.validation = False # Validation of the filename (prio + date) self.patternMatching = False # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient) self.diskReaderPatternMatching = True # Verification of the emask and imask of the sourlient before sending a file self.clientsPatternMatching = True # Verification of the emask and imask of the clients before linking a file self.nodups = True # Check if the file has already been sent (md5sum present in the cache) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked self.sorter = 'MultiKeysStringSorter' # Class (or object) used to sort self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.collection = None # Sourlient do not participate in the collection effort self.slow = False # Sleeps are added when we want to be able to decrypt log entries # Socket Attributes self.port = None # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path ) # read in sourlient config filePath = PXPaths.TRX_CONF + self.name + '.conf' self.readConfig(filePath) if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) #self.printInfos(self) self.ingestor.setClients()
def __init__(self, name='toto', logger=None, ingestion=True): # General Attributes self.name = name # Sourlient's name if logger is None: self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger self.logger.info("Initialisation of sourlient %s" % self.name) self.ingestion = ingestion # Determine if the Sourlient will have an Ingestor self.debug = False # If we want sections with debug code to be executed self.subscriber = True # False if it is a provider self.type = 'aftn' # Must be in ['aftn'] self.host = 'localhost' # Remote host name (or ip) where to send files self.portR = 56550 # Receiving port self.portS = 5160 # Sending port self.stationID = 'SUB' # Three letter ID of this process self.otherStationID = 'MHS' # Three letter ID of the other party self.address = 'CYHQUSER' # AFTN address of this process self.otherAddress = 'CYHQMHSN' # AFTN address of the other party self.digits = 4 # Number of digits used in the CSN self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.ackUsed = True # Should always be True. False is only for testing purposes self.maxAckTime = 60 # Number of second we wait for an ack before trying to reconnect self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.timeout = 10 # Time we wait between each tentative to connect self.maxLength = 0 # Max. length of a message... limit use for segmentation, 0 means unused self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name self.validation = False # Validation of the filename (prio + date) self.patternMatching = False # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient) self.diskReaderPatternMatching = True # Verification of the emask and imask of the sourlient before sending a file self.clientsPatternMatching = True # Verification of the emask and imask of the clients before linking a file self.nodups = True # Check if the file has already been sent (md5sum present in the cache) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked self.sorter = 'MultiKeysStringSorter' # Class (or object) used to sort self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.collection = None # Sourlient do not participate in the collection effort self.slow = False # Sleeps are added when we want to be able to decrypt log entries # Socket Attributes self.port = None # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path) # read in sourlient config filePath = PXPaths.TRX_CONF + self.name + '.conf' self.readConfig(filePath) if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) #self.printInfos(self) self.ingestor.setClients()
class IngestorTest(unittest.TestCase): def setUp(self): filename = 'Test_Files/DatabaseManagerTest_15.csv' self.ingestor = Ingestor(filename) self.ingestor.readCSV() def test_headers(self): headerList = ["Street Address","owner's first name","last_name","Long","email","phone Number","Loan Amount","Lat"] #self.assertEqual(self.ingestor.getCSVHeaders(),headerList) def test_search_headers(self): searchCritera = ["last_name","Long","phone Number"] expectedRetun = [["last_name",2],["Long",3],["phone_Number",5]] self.assertEqual(self.ingestor.getHeaderIndex(searchCritera,self.ingestor.getCSVHeaders()),expectedRetun) def test_get_row(self): expectedRetun = ["8 Hoard Court","Samuele","Gulliver","-64.1305924","*****@*****.**","+54 (656) 804-6029","$14,895.21 ","-31.4325479"] self.assertEqual(self.ingestor.getRowAt(0),expectedRetun) def test_get_row_filtered(self): searchCritera = [["last_name",2],["Long",3],["phone_Number",5]] expectedRetun = ["Gulliver","-64.1305924","+54 (656) 804-6029"] self.ingestor.searchRows(searchCritera,self.ingestor.getRows()) self.assertEqual(self.ingestor.getRowAt(0),expectedRetun) expectedRetun = ["Scoullar","121.5570313","+63 (634) 506-0432"] self.assertEqual(self.ingestor.getRowAt(4),expectedRetun) def test_number_of_headers(self): self.assertEqual(self.ingestor.getNumberOfHeaders(),8) def test_number_of_row(self): self.assertEqual(self.ingestor.getNumberOfRows(),15) def test_switch_files(self): self.assertFalse(self.ingestor.updateFileLoc("")) self.assertTrue(self.ingestor.updateFileLoc("Test_Files/DatabaseManagerTest_1000.csv"))
def setUp(self): filename = 'Test_Files/DatabaseManagerTest_15.csv' self.ingestor = Ingestor(filename) self.ingestor.readCSV()
class csv_importer_popup(QtWidgets.QDialog): #Signals when csv_importer_popup closes importDoneSignal = QtCore.pyqtSignal('QString') def __init__(self, window_title, db_file_loc, tables, protected_table_prefix): super().__init__() #Creating the window self.title = window_title self.setWindowTitle(self.title) self.protected_table_prefix = protected_table_prefix self.tablesInDB = tables #Database manager stuff self.db = DatabaseManager(db_file_loc, protected_table_prefix) #Create array with tables already in the database to be #put in the common files radio button box self.default_lists = [] for table in tables: tempList = [] for columnName in self.db.get_headers(table): tempList.append(columnName) self.default_lists.append(tempList) self.layout = QGridLayout() def run_popup(self, file_loc): #CSV file stuff self.ingestor = Ingestor(file_loc) self.ingestor.readCSV() self.rows = self.ingestor.getCSVHeaders() #Create buttons from the csv file headers that was just selected self.generate_checkboxes(self.rows) #Create a area that has a scroll bar scrollArea = QScrollArea() scrollArea.setWidget(self.csvHeaderGroup_box) scrollArea.horizontalScrollBar().setEnabled(False) #Create the buttons for tables that already exist in the database self.generate_radiobuttons(self.tablesInDB) #List of button groups self.buttonGroups = [self.commonFileTypesGroup, self.csvHeaderGroup] #Create label tableNameLabel = QtWidgets.QLabel("Table Name") #Create text field self.tableNameField = QtWidgets.QLineEdit() self.tableNameField.setPlaceholderText("Enter Custom Table Name") #Create buttons self.cancelButton = QPushButton('Cancel') self.importButton = QPushButton('Import') self.cancelButton.clicked.connect(self.closeWindow) self.importButton.clicked.connect(self.importCSV) #Create progress Bar self.progressBar = QtWidgets.QProgressBar() #Create the master layout which is a grid layout = QGridLayout() #Add widgets #format of addWidget(widget,row,col,row span, col span) layout.addWidget(scrollArea, 1, 1, 1, 2) layout.addWidget(tableNameLabel, 2, 1, 1, 2) layout.addWidget(self.tableNameField, 3, 1, 1, 2) layout.addWidget(self.commonFileTypesGroupBox, 4, 1, 1, 2) layout.addWidget(self.progressBar, 5, 1, 1, 2) layout.addWidget(self.cancelButton, 6, 1) layout.addWidget(self.importButton, 6, 2) self.setLayout(layout) self.resize(self.sizeHint()) def generate_checkboxes(self, button_name_list): #Generate check_boxes self.csvHeaderGroup = QButtonGroup() self.csvHeaderGroup_layout = QVBoxLayout() self.csvHeaderGroup.setExclusive(False) self.csvHeaderGroup_box = QGroupBox('Select which headers') self.csvHeaderGroup_layout.addStretch(1) for button_name in button_name_list: #Add each button to the layout from the csv file checkbox = QCheckBox(button_name) self.csvHeaderGroup.addButton(checkbox) self.csvHeaderGroup_layout.addWidget( self.csvHeaderGroup.buttons()[-1]) self.csvHeaderGroup_box.setLayout(self.csvHeaderGroup_layout) def generate_radiobuttons(self, button_name_list): #Generate Radio Buttons self.commonFileTypesGroup = QButtonGroup() self.commonFileTypesGroupLayout = QVBoxLayout() self.commonFileTypesGroupBox = QGroupBox('Select a pre-existing table') self.commonFileTypesGroupLayout.addStretch(1) count = 0 for button_name in button_name_list: #Add each button from available lists in database radioButton = QRadioButton(button_name) self.commonFileTypesGroup.addButton(radioButton, count) self.commonFileTypesGroupLayout.addWidget( self.commonFileTypesGroup.buttons()[-1]) count += 1 self.commonFileTypesGroupBox.setLayout(self.commonFileTypesGroupLayout) def import_done(self, tableName): #Returns what table was created self.importDoneSignal.emit(tableName) self.accept() def closeWindow(self): #Closes the window self.reject() def importCSV(self): self.importButton.setEnabled(False) self.cancelButton.setEnabled(False) #Check if any radio buttons were pressed by checking if they were #checked and save the number in the button group radio_button_number = -1 special_button_number = -1 count = 0 for radioButton in self.buttonGroups[0].buttons(): if radioButton.isChecked(): radio_button_number = count break count += 1 for specialButton in self.buttonGroups[1].buttons( ): # Do the same for the special buttons if specialButton.isChecked(): special_button_number = count break count += 1 if radio_button_number > -1: searchCritera = self.ingestor.getHeaderIndex( self.default_lists[radio_button_number], self.ingestor.getCSVHeaders()) buttonText = self.buttonGroups[0].buttons( )[radio_button_number].text() #Check which table corresponds with the button pressed for tableName in self.tablesInDB: if buttonText.replace(' ', '_') == tableName: #Uses the ingestor to search the un-filtered rows using #this search criteria list self.ingestor.searchRows(searchCritera, self.ingestor.getRows()) #Check if tables exists already if not self.db.doesTableExist(tableName): #If not the create it with the table name self.db.create_table_list( tableName, self.db.remove_spaces( self.default_lists[radio_button_number]), 'string') self.import_with_progress_bar( tableName, self.ingestor.getRows(), self.default_lists[radio_button_number]) self.import_done(tableName) else: try: if self.tableNameField.text( ) == '' or self.protected_table_prefix in self.tableNameField.text( ): raise Exception() else: customTableName = self.db.is_valid_string( self.tableNameField.text().replace(' ', '_')) if special_button_number > -1: # Default header option not chosen, so custom lists try: requestedHeaders = [] for item in self.buttonGroups[1].buttons(): if item.isChecked(): requestedHeaders.append(item.text()) does_exist = self.db.doesTableExist( customTableName) has_same_cols = True if not does_exist: #If not the create it with the table name self.db.create_table_list( customTableName, self.db.remove_spaces(requestedHeaders), 'string') else: #Tables exists. Does it have the same columns? if not (requestedHeaders == self.db.get_headers(customTableName)): has_same_cols = False #Find the different column names #This works by turning the lists into sets #A set is an unordered list with no duplicate elements #A set supports matrix operations so you can subtract the two sets #This returns the elements that are not shared different_cols = list( set( self.db.remove_spaces( requestedHeaders)) - set( self.db.get_headers( customTableName))) #Add the extra columns for col in different_cols: self.db.add_column( customTableName, col, 'string') if has_same_cols: #New table is identical to existing one print("same columns") searchCritera = self.ingestor.getHeaderIndex( requestedHeaders, self.ingestor.getCSVHeaders()) self.ingestor.searchRows( searchCritera, self.ingestor.getRows()) rows = self.ingestor.getRows() self.import_with_progress_bar( customTableName, self.ingestor.getRows(), requestedHeaders) else: #New table has different columns #Combine the headers in the lists print("diff columns") combinedHeaders = self.db.get_headers( customTableName) + requestedHeaders #Have to re order them to match the csv file newRequestedHeaders = [] for header in self.db.remove_spaces( self.ingestor.getCSVHeaders()): #Find the header in the csv file #The order matters because the primary key is needed to update the row if header in combinedHeaders: newRequestedHeaders.append(header) #Get the index for the header searchCritera = self.ingestor.getHeaderIndex( newRequestedHeaders, self.ingestor.getCSVHeaders()) #Filter the rows so only the requested info is there self.ingestor.searchRows( searchCritera, self.ingestor.getRows()) rows = self.ingestor.getRows() #Import them nomrally self.import_with_progress_bar( customTableName, self.ingestor.getRows(), newRequestedHeaders) self.import_done(customTableName) except Exception as er: #General error message print('Error message:', er.args[0]) return False else: raise Exception() except: ErrorBox = QtWidgets.QMessageBox() choice = ErrorBox.critical( self, 'Table Name Error', "Table name can only have letters numbers, and underscores", ErrorBox.Ok) if choice == ErrorBox.Ok: #User wants to try a new name print("Closing") ErrorBox.accept() self.importButton.setEnabled(True) self.cancelButton.setEnabled(True) def import_with_progress_bar(self, tableName, rows_to_be_added, column_headers): """ Adds the ingestor rows to the db one row at a time so the progress bar will show the progress """ #Set the max value of the progress bar to the number of rows to be add self.progressBar.setMaximum(len(rows_to_be_added)) #self.db.add_list_of_rows(tableName,self.db.remove_spaces(self.default_lists[button_number]),rows) count = 0 for row in rows_to_be_added: #For every row to be added add it to the db and increment the progress #bar value by 1 count += 1 self.db.add_row_list(tableName, self.db.remove_spaces(column_headers), row) self.progressBar.setValue(count)
def setUp(self,logFile='log/Template.log'): self.logger = Logger(logFile, 'DEBUG', 'Sub') self.logger = self.logger.getLogger() self.source = Source('source-test', self.logger) self.ingestor = Ingestor(self.source,self.logger) self.ingestor.setClients()
def test_can_ingest(self): self.assertTrue(Ingestor.can_ingest('myfile.PDF')) self.assertFalse(Ingestor.can_ingest('myfile.HTML')) self.assertFalse(Ingestor.can_ingest(None))
def __init__(self, name='toto', logger=None, ingestion=True, filter=False ) : # General Attributes self.name = name # Source's name self.filter = filter # is this source realy defines a filter ? if logger is None: pathlog = PXPaths.LOG + 'rx_' + name + '.log' namelog = 'RX' + name if self.filter : pathlog = PXPaths.LOG + 'fx_' + name + '.log' namelog = 'FX' + name self.logger = Logger(pathlog, 'INFO', namelog ) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger if not self.filter : self.logger.info("Initialisation of source %s" % self.name) else : self.logger.info("Initialisation of filter %s" % self.name) # Attributes coming from the configuration file of the source #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct' # Extension to be added to the ingest name self.ingestion = ingestion # do we want to start the ingestion... self.debug = False # If we want sections with debug code to be executed self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.bulletin_type = None # type of bulletin ingested (None, am, wmo) self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.routemask = True # use accept and parenthesis in mask to create a key and route with it self.routing_version = 1 # directRouting version setting self.nodups = False # Check if the file was already received (md5sum present in the cache) self.tmasks = [] # All the transformation maks (timask, temask) self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name # Extension to be added to the ingest name when the bulletin is outside its arrival range self.arrival_extension = None self.type = None # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo'] self.port = None # Port number if type is in ['am', 'wmo'] self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.mapEnteteDelai = None # self.addStationInFilename = True # self.addSMHeader = False # self.validation = False # Validate the filename (ex: prio an timestamp) self.patternMatching = True # No pattern matching self.clientsPatternMatching = True # No clients pattern matching self.sorter = None # No sorting on the filnames self.feeds = [] # more source to feed directly self.keepAlive = True # TCP SO_KEEPALIVE on (True) or off(False) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked # AMQP self.exchange_key = '' self.exchange_name = None self.exchange_realm = '/data' self.exchange_type = 'fanout' #----------------------------------------------------------------------------------------- # Setting up pulls configuration values #----------------------------------------------------------------------------------------- self.pulls = [] # All the directories and file patterns to pull self.host = 'localhost' # Remote host address (or ip) where to send files self.protocol = None # First thing in the url: ftp, file, am, wmo, amis self.url = None self.user = None # User name used to connect self.passwd = None # Password self.ssh_keyfile = None # ssh private key file for sftp protocol self.ftp_mode = 'passive' # Default is 'passive', can be set to 'active' self.timeout_get = 0 # Timeout in sec. to consider a get to hang ( 0 means inactive ) self.pull_sleep = 600 # Time in sec. to retry the pull self.pull_wait = 10 # Time in sec. to wait after ls before pulling (make sure files are arrived) self.delete = False # file is deleted after pull if it is false, the file's ls is kept # to check if it changed... self.pull_prefix = '' # file may be prefixed bu some string filename will than be prefix_filename # or value 'HDATETIME' for the file data time on remote host #----------------------------------------------------------------------------------------- # Setting up default collection configuration values #----------------------------------------------------------------------------------------- self.headers = [] # Title for report in the form TT from (TTAAii) self.issue_hours = [] # list of emission hours to collect self.issue_primary = [] # amount of minutes past emission hours for the primary collection (report on time) self.issue_cycle = [] # amount of minutes for cycling after the primary collection for more reports self.history = 25 # time in hours to consider a valid report even if "history" hours late. self.future = 40 # time in minutes to consider a valid report even if "future" minutes too soon #----------------------------------------------------------------------------------------- # Setting file transformations/conversions... etc... #----------------------------------------------------------------------------------------- self.fx_script = None # a script to convert/modify each received files self.fx_execfile = None self.lx_script = None # a script to convert/modify a list of received files self.lx_execfile = None self.pull_script = None # a script to pull files prior to read rxq self.pull_execfile = None #----------------------------------------------------------------------------------------- # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source #----------------------------------------------------------------------------------------- pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path ) #----------------------------------------------------------------------------------------- # Parse the configuration file #----------------------------------------------------------------------------------------- filePath = PXPaths.RX_CONF + self.name + '.conf' if self.filter : filePath = PXPaths.FX_CONF + self.name + '.conf' self.readConfig( filePath ) #----------------------------------------------------------------------------------------- # instantiate the fx script in source class #----------------------------------------------------------------------------------------- if self.fx_execfile != None : try : execfile(PXPaths.SCRIPTS + self.fx_execfile ) except : self.logger.error("Problem with fx_script %s" % self.fx_execfile) if self.lx_execfile != None : try : execfile(PXPaths.SCRIPTS + self.lx_execfile ) except : self.logger.error("Problem with lx_script %s" % self.lx_execfile) if self.pull_execfile != None : try : execfile(PXPaths.SCRIPTS + self.pull_execfile ) except : self.logger.error("Problem with pull_script %s" % self.pull_execfile) #----------------------------------------------------------------------------------------- # Make sure the collection params are valid #----------------------------------------------------------------------------------------- if self.type == 'collector' : self.validateCollectionParams() #----------------------------------------------------------------------------------------- # If we do want to start the ingestor... #----------------------------------------------------------------------------------------- if self.ingestion : if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) if len(self.feeds) > 0 : self.ingestor.setFeeds(self.feeds) self.ingestor.setClients()
class Sourlient(object): def __init__(self, name='toto', logger=None, ingestion=True): # General Attributes self.name = name # Sourlient's name if logger is None: self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger self.logger.info("Initialisation of sourlient %s" % self.name) self.ingestion = ingestion # Determine if the Sourlient will have an Ingestor self.debug = False # If we want sections with debug code to be executed self.subscriber = True # False if it is a provider self.type = 'aftn' # Must be in ['aftn'] self.host = 'localhost' # Remote host name (or ip) where to send files self.portR = 56550 # Receiving port self.portS = 5160 # Sending port self.stationID = 'SUB' # Three letter ID of this process self.otherStationID = 'MHS' # Three letter ID of the other party self.address = 'CYHQUSER' # AFTN address of this process self.otherAddress = 'CYHQMHSN' # AFTN address of the other party self.digits = 4 # Number of digits used in the CSN self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.ackUsed = True # Should always be True. False is only for testing purposes self.maxAckTime = 60 # Number of second we wait for an ack before trying to reconnect self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.timeout = 10 # Time we wait between each tentative to connect self.maxLength = 0 # Max. length of a message... limit use for segmentation, 0 means unused self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name self.validation = False # Validation of the filename (prio + date) self.patternMatching = False # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient) self.diskReaderPatternMatching = True # Verification of the emask and imask of the sourlient before sending a file self.clientsPatternMatching = True # Verification of the emask and imask of the clients before linking a file self.nodups = True # Check if the file has already been sent (md5sum present in the cache) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked self.sorter = 'MultiKeysStringSorter' # Class (or object) used to sort self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.collection = None # Sourlient do not participate in the collection effort self.slow = False # Sleeps are added when we want to be able to decrypt log entries # Socket Attributes self.port = None # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path) # read in sourlient config filePath = PXPaths.TRX_CONF + self.name + '.conf' self.readConfig(filePath) if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) #self.printInfos(self) self.ingestor.setClients() def readConfig(self, filePath): def isTrue(s): if s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \ s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \ s == '1' or s == 'On' : return True else: return False currentDir = '.' # Current directory currentFileOption = 'WHATFN' # Under what filename the file will be sent (WHATFN, NONE, etc., See PDS) try: config = open(filePath, 'r') except: (type, value, tb) = sys.exc_info() print("Type: %s, Value: %s" % (type, value)) return for line in config.readlines(): words = line.split() if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)): try: if words[0] == 'extension': if len(words[1].split(':')) != 5: self.logger.error( "Extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.extension = ':' + words[1] if words[0] == 'accept': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption, cmask, True)) elif words[0] == 'reject': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption, cmask, False)) elif words[0] == 'imask': self.masks_deprecated.append( (words[1], currentDir, currentFileOption)) elif words[0] == 'emask': self.masks_deprecated.append((words[1], )) elif words[0] == 'subscriber': self.subscriber = isTrue(words[1]) elif words[0] == 'validation': self.validation = isTrue(words[1]) elif words[0] == 'noduplicates': self.nodups = isTrue(words[1]) elif words[0] == 'slow': self.slow = isTrue(words[1]) elif words[0] == 'patternMatching': self.diskReaderPatternMatching = isTrue(words[1]) elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching = isTrue(words[1]) elif words[0] == 'mtime': self.mtime = int(words[1]) elif words[0] == 'sorter': self.sorter = words[1] elif words[0] == 'type': self.type = words[1] elif words[0] == 'maxLength': self.maxLength = int(words[1]) elif words[0] == 'host': self.host = words[1] elif words[0] == 'portR': self.portR = int(words[1]) elif words[0] == 'portS': self.portS = int(words[1]) elif words[0] == 'stationID': self.stationID = words[1] elif words[0] == 'otherStationID': self.otherStationID = words[1] elif words[0] == 'address': self.address = words[1] elif words[0] == 'otherAddress': self.otherAddress = words[1] elif words[0] == 'digits': self.digits = int(words[1]) elif words[0] == 'routingTable': self.routingTable = words[1] elif words[0] == 'ackUsed': self.ackUsed = isTrue(words[1]) elif words[0] == 'maxAckTime': self.maxAckTime = int(words[1]) elif words[0] == 'batch': self.batch = int(words[1]) elif words[0] == 'cache_size': self.cache_size = int(words[1]) elif words[0] == 'debug' and isTrue(words[1]): self.debug = True elif words[0] == 'timeout': self.timeout = int(words[1]) elif words[0] == 'timeout_send': self.timeout_send = int(words[1]) elif words[0] == 'logrotate': if words[1].isdigit(): self.logger.setBackupCount(int(words[1])) except: self.logger.error( "Problem with this line (%s) in configuration file of client %s" % (words, self.name)) if not self.validation: self.sorter = 'None' # Must be a string because eval will be subsequently applied to this config.close() def _getMatchingMask(self, filename): if len(self.masks_deprecated) > 0: for mask in self.masks_deprecated: if fnmatch.fnmatch(filename, mask[0]): try: if mask[2]: return mask except: return None for mask in self.masks: if mask[3].match(filename): if mask[4]: return mask return None return None def printInfos(self, client): print( "==========================================================================" ) print("Name: %s " % client.name) print("Type: %s" % client.type) print("Subscriber: %s" % client.subscriber) print("Host: %s" % client.host) print("PortR: %s" % client.portR) print("PortS: %s" % client.portS) print("Station ID: %s" % client.stationID) print("Other Station ID: %s" % client.otherStationID) print("Address: %s" % client.address) print("Other Address: %s" % client.otherAddress) print("Digits: %i" % client.digits) print("Extension: %s" % client.extension) print("Slow: %s" % client.slow) print("Batch: %s" % client.batch) print("Cache_size: %s" % client.cache_size) print("Max length: %i" % client.maxLength) print("Mtime: %i" % client.mtime) print("Timeout: %s" % client.timeout) print("Sorter: %s" % client.sorter) print("Validation: %s" % client.validation) print("Routing table: %s" % client.routingTable) print("Maximum time (seconds) we wait for an ack: %s" % client.maxAckTime) print("Ack used: %s" % client.ackUsed) print("Sourlient Pattern Matching: %s" % client.diskReaderPatternMatching) print("Clients Pattern Matching: %s" % client.clientsPatternMatching) print("******************************************") print("* Sourlient Masks *") print("******************************************") for mask in self.masks: if mask[4]: print(" accept %s" % mask[0]) else: print(" reject %s" % mask[0]) print( "==========================================================================" )
class Elastic: def __init__(self): self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) if self.es.ping(): logging.info('Elasticsearch connected..') else: logging.info('Could not connect to Elasticsearch!') self.index_name = None self.ingest = Ingestor() self.db = Database() self.pp = pprint.PrettyPrinter(indent=4) self.default_zip = None self.default_house = None self.status = None def store_es_record(self, house, zipcode): try: self.index_name = 'listings_' + str(zipcode) self.es.index(index=self.index_name, body=house) except Exception as ex: logging.info('Error in indexing data', ex) logging.info(str(ex)) def load_es(self, zipcode): self.index_name = 'listings_' + str(zipcode) objects = self.ingest.callWebservice(zipcode) dict = objects.json() total = dict['result']['total'] listings = dict['result']['listings'] for house in listings: self.store_es_record(house, zipcode) return total def process(self, search_object, original_keys, modified_keys): try: res = self.es.search(index=self.index_name, body=search_object) data_dict = ast.literal_eval(str(res)) num = len(data_dict['hits']['hits']) listing = [] for i in range(num): v = data_dict['hits']['hits'][i]['_source'] val = json.dumps(v) listing.append(val) return listing except Exception as ex: logging.info('Error in process', ex) logging.info(str(ex)) def process0(self, results): try: settings = { "settings": { "number_of_shards": 1, "number_of_replicas": 0 } } zipcode = str(results[1][0]) if not self.db.exists_zipcode(zipcode): # Ignore 400 means to ignore "Index Already Exist" error. index_name = 'listings_' + str(zipcode) self.es.indices.create(index_name, ignore=400, body=settings) print('Elasticsearch created index', str(index_name)) num = self.load_es(zipcode) print("Loaded " + str(num) + " records into Elasticsearch") self.db.insert_db(zipcode, time.time()) self.status = "Downloaded listings for " + str(results[1][0]) else: self.status = str(results[1][0]) + " already downloaded" self.default_zip = zip self.index_name = 'listings_' + str(zip) status = {'status': self.status} data = ast.literal_eval(str(status)) result = json.dumps(data) return result except Exception as ex: logging.info('Error in process0', ex) logging.info(str(ex)) status = { 'status': 'Downloading ' + str(results[1][0]) + ' failed' } data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result def process6(self, results): #what properties are on the market in the price range 100000 to 300000 try: keywords = results[1] key = keywords[0] if key == 'price': if len(keywords) == 3: unit_low = keywords[1] unit_high = keywords[2] low_factor = 0 high_factor = 0 if unit_low and unit_low.startswith('mil'): low_factor = 1000000 if unit_high and unit_high.startswith('mil'): high_factor = 1000000 else: low_factor = 1 high_factor = 1 pricerange = results[2] low_range = pricerange[0] high_range = pricerange[1] original_keys = [ "address.street", "address.city", "address.state", "address.zip", "listPrice", "sqft", "district", "size", "yearBuilt", "beds", "full" ] modified_keys = [ "street", "city", "state", "zip", "listPrice", "lotSize", "district", "size", "yearBuilt", "beds", "baths" ] search_object = { "_source": { "includes": original_keys }, "query": { "bool": { "must": [{ "match": { "address.zip": self.default_zip } }], "filter": [{ "range": { "listPrice": { "gte": int(low_range) * low_factor, "lte": int(high_range) * high_factor } } }] } } } val = self.process(search_object, original_keys, modified_keys) return val except Exception as ex: logging.info('Error in process2', ex) logging.info(str(ex)) def process1(self, results): # "make 57402 Ernestine Radial the default house", try: address = results[1][0][0] default = results[1][1][0] house = results[1][1][1] #original_keys = ["address.street", "listPrice"] if default == 'default' and house == 'house': if address: self.default_house = address status = { 'status': 'setting default house to ' + self.default_house } else: status = {'status': 'error setting default house'} data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result except Exception as ex: logging.info('Error in process2', ex) logging.info(str(ex)) def process2(self, results): # "what is the price of the house" try: keyword = results[1][1][0] original_keys = ['listPrice'] if keyword == 'price': search_object = { "_source": { "includes": ["listPrice"] }, "query": { "match_phrase": { "address.street": self.default_house } } } val = self.process(search_object, original_keys, original_keys) return val except Exception as ex: logging.info('Error in process3', ex) logging.info(str(ex)) def process3(self, results): #"how many bedrooms and bathrooms does it have", try: bedrooms = results[1][0] bathrooms = results[1][1] original_keys = ["beds", "baths.full", "baths.half"] if (bedrooms == 'bedrooms' or bedrooms == 'bedroom') and ( bathrooms == 'baths' or bathrooms == 'bathrooms' or bathrooms == 'bathroom' or bathrooms == 'bath'): search_object = { "_source": { "includes": ["beds", "baths.full", "baths.half"] }, "query": { "match_phrase": { "address.street": self.default_house } } } val = self.process(search_object, original_keys, original_keys) return val except Exception as ex: logging.info('Error in process3', ex) logging.info(str(ex)) def process4(self, results): #"how long has the property been on the market" try: original_keys = ["daysOnHJI"] search_object = { "_source": { "includes": ["daysOnHJI"] }, "query": { "match_phrase": { "address.street": self.default_house } } } val = self.process(search_object, original_keys, original_keys) return val except Exception as ex: logging.info('Error in process5', ex) logging.info(str(ex)) def process5(self, results): #"how much section 1 work is required", try: status = {'section_1': 5000} data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result except Exception as ex: logging.info('Error in process6', ex) logging.info(str(ex)) def process7(self, results): #"what is the required setback from lot boundary" try: status = {'setback': '25 ft'} data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result except Exception as ex: logging.info('Error in process7', ex) logging.info(str(ex)) def process8(self, results): #set default zip = zip try: zip = str(results[1][0]) self.default_zip = zip self.index_name = 'listings_' + str(zip) status = {'status': 'setting default zip to ' + str(zip)} data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result except Exception as ex: logging.info('Error in process8', ex) logging.info(str(ex)) def process9(self, results): #get default zip try: if self.default_zip: status = { 'status': 'the default zip is ' + str(self.default_zip) } else: status = {'status': 'the default zip is not set'} data = ast.literal_eval(str(status)) result = [json.dumps(data)] return result except Exception as ex: logging.info('Error in process8', ex) logging.info(str(ex)) def process11(self, results): try: zip = str(results[1][0]) original_keys = [ "address.street", "address.city", "address.state", "address.zip", "listPrice", "sqft", "district", "size", "yearBuilt", "beds", "full" ] modified_keys = [ "street", "city", "state", "zip", "listPrice", "lotSize", "district", "size", "yearBuilt", "beds", "baths" ] search_object = { "_source": { "includes": original_keys }, "query": { "bool": { "must": [{ "match": { "address.zip": zip } }] } } } val = self.process(search_object, original_keys, modified_keys) return val except Exception as ex: logging.info('Error in process1', ex) logging.info(str(ex)) def search_all(self): search_object = {"query": {"match_all": {}}} res = self.es.search(index=self.index_name, body=search_object) res_json = json.dumps(res) return res_json def search_id(self, id): search_object = {'query': {'match': {'id': str(id)}}} res = self.es.search(index=self.index_name, body=search_object) res_json = json.dumps(res) return res_json def filterKeys(self, res, original_keys, modified_keys): numberOfHouses = self.wrap_extract_values(res, ['value'])[0][0] vals = self.wrap_extract_values(res, original_keys) #self.pp.pprint(vals) numberOfAttrs = len(original_keys) houses = [] for j in range(numberOfHouses): oneHouse = [] for i in range(numberOfAttrs): if not vals[i][j] == None: elem = vals[i][j] oneHouse.append(elem) houses.append(oneHouse) houses_json = [] for house in houses: res = dict(zip(modified_keys, house)) houses_json.append(res) return houses_json def wrap_extract_values(self, obj, listkey): allvals = [] for k in listkey: val = self.extract_values(obj, k) allvals.append(val) return allvals def extract_values(self, obj, key): """Pull all values of specified key from nested JSON.""" arr = [] def extract(obj, arr, key): """Recursively search for values of key in JSON tree.""" if isinstance(obj, dict): for k, v in obj.items(): if isinstance(v, (dict, list)): extract(v, arr, key) elif k == key: arr.append(v) elif isinstance(obj, list): for item in obj: extract(item, arr, key) return arr results = extract(obj, arr, key) return results
class DatabaseManagerTester(unittest.TestCase): def setUp(self): sqlite_file = 'test.db' self.db = DatabaseManager(sqlite_file, '__ADMIN__') filename = "Test_Files/DatabaseManagerTest_15.csv" self.ingestor = Ingestor(filename) self.ingestor.readCSV() tempHeaders = self.ingestor.getCSVHeaders() self.searchCritera = [ tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4], tempHeaders[5], tempHeaders[6] ] searchCriteraTwoD = self.ingestor.getHeaderIndex( self.searchCritera, tempHeaders) self.ingestor.searchRows(searchCriteraTwoD, self.ingestor.getRows()) self.searchCritera = self.db.remove_spaces(self.searchCritera) self.new_table = 'Test_15' def test_create_new_table(self): self.assertTrue( self.db.create_table_list(self.new_table, self.searchCritera, 'string')) def test_add_row_list(self): self.assertTrue( self.db.add_list_of_rows(self.new_table, self.searchCritera, self.ingestor.getRows())) def test_get_headers(self): expectedReturn = [ 'Street_Address', "owner's_first_name", 'last_name', 'email', 'phone_Number', 'Loan_Amount' ] self.assertEqual(self.db.get_headers(self.new_table), expectedReturn) def test_get_table(self): pass def test_get_table_names(self): tables_in_db = self.db.get_table_names() self.assertTrue(self.new_table in tables_in_db) def test_get_row_at_with_column(self): column_to_use = "72 Pearson Drive" row_from_db = self.db.get_row_at(self.new_table, column_name=self.searchCritera[0], column_value=column_to_use) expectedRetun = [ u'72 Pearson Drive', u'Bartholemy', u'Parnaby', u'*****@*****.**', u'+55 (385) 326-3642', u'$44,795.68 ' ] #The lists are the same but it doesn't think they are equal #self.assertEqual(row_from_db,expectedRetun) def test_get_row_at_with_rowid(self): rowid = 3 row_from_db = self.db.get_row_at(self.new_table, row_id=rowid) expectedRetun = [ u'72 Pearson Drive', u'Bartholemy', u'Parnaby', u'*****@*****.**', u'+55 (385) 326-3642', u'$44,795.68 ' ] #self.assertEqual(row_from_db,expectedRetun) def test_delete_row(self): rowid = 9 rowToDel = self.db.get_row_at(self.new_table, row_id=rowid) rowAfterToDel = self.db.get_row_at(self.new_table, row_id=rowid + 1) self.db.delete_row_at(self.new_table, rowid) self.assertEqual(self.db.get_row_at(self.new_table, row_id=rowid), rowAfterToDel) def test_update_row(self): rowid = 9 old_row = self.db.get_row_at(self.new_table, row_id=rowid) updated_row1 = [ "a house", "josh", "green", "*****@*****.**", "228-192-2819", "$2.17" ] self.db.update_row_at(self.new_table, primary_key=rowid, new_row=updated_row1) self.assertTrue(old_row is not updated_row1)
def setUp(self, logFile='log/Template.log'): self.logger = Logger(logFile, 'DEBUG', 'Sub') self.logger = self.logger.getLogger() self.source = Source('source-test', self.logger) self.ingestor = Ingestor(self.source, self.logger) self.ingestor.setClients()
class Sourlient(object): def __init__(self, name='toto', logger=None, ingestion=True) : # General Attributes self.name = name # Sourlient's name if logger is None: self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging self.logger = self.logger.getLogger() else: self.logger = logger self.logger.info("Initialisation of sourlient %s" % self.name) self.ingestion = ingestion # Determine if the Sourlient will have an Ingestor self.debug = False # If we want sections with debug code to be executed self.subscriber = True # False if it is a provider self.type = 'aftn' # Must be in ['aftn'] self.host = 'localhost' # Remote host name (or ip) where to send files self.portR = 56550 # Receiving port self.portS = 5160 # Sending port self.stationID = 'SUB' # Three letter ID of this process self.otherStationID = 'MHS' # Three letter ID of the other party self.address = 'CYHQUSER' # AFTN address of this process self.otherAddress = 'CYHQMHSN' # AFTN address of the other party self.digits = 4 # Number of digits used in the CSN self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name self.ackUsed = True # Should always be True. False is only for testing purposes self.maxAckTime = 60 # Number of second we wait for an ack before trying to reconnect self.batch = 100 # Number of files that will be read in each pass self.cache_size = 125000 # Maximum Number of md5sum from files kept in cache manager self.timeout = 10 # Time we wait between each tentative to connect self.maxLength = 0 # Max. length of a message... limit use for segmentation, 0 means unused self.extension = ':MISSING:MISSING:MISSING:MISSING:' # Extension to be added to the ingest name self.validation = False # Validation of the filename (prio + date) self.patternMatching = False # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient) self.diskReaderPatternMatching = True # Verification of the emask and imask of the sourlient before sending a file self.clientsPatternMatching = True # Verification of the emask and imask of the clients before linking a file self.nodups = True # Check if the file has already been sent (md5sum present in the cache) self.mtime = 0 # Integer indicating the number of seconds a file must not have # been touched before being picked self.sorter = 'MultiKeysStringSorter' # Class (or object) used to sort self.masks = [] # All the masks (accept and reject) self.masks_deprecated = [] # All the masks (imask and emask) self.collection = None # Sourlient do not participate in the collection effort self.slow = False # Sleeps are added when we want to be able to decrypt log entries # Socket Attributes self.port = None # All defaults for a source were set earlier in this class # But some of them may have been overwritten in the px.conf file # Load the px.conf stuff related to the source pxconf_Path = PXPaths.ETC + 'px.conf' if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path ) # read in sourlient config filePath = PXPaths.TRX_CONF + self.name + '.conf' self.readConfig(filePath) if self.ingestion: if hasattr(self, 'ingestor'): # Will happen only when a reload occurs self.ingestor.__init__(self) else: self.ingestor = Ingestor(self) #self.printInfos(self) self.ingestor.setClients() def readConfig(self,filePath): def isTrue(s): if s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \ s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \ s == '1' or s == 'On' : return True else: return False currentDir = '.' # Current directory currentFileOption = 'WHATFN' # Under what filename the file will be sent (WHATFN, NONE, etc., See PDS) try: config = open(filePath, 'r') except: (type, value, tb) = sys.exc_info() print("Type: %s, Value: %s" % (type, value)) return for line in config.readlines(): words = line.split() if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)): try: if words[0] == 'extension': if len(words[1].split(':')) != 5: self.logger.error("Extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) else: self.extension = ':' + words[1] if words[0] == 'accept': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption,cmask,True)) elif words[0] == 'reject': cmask = re.compile(words[1]) self.masks.append((words[1], currentDir, currentFileOption,cmask,False)) elif words[0] == 'imask' : self.masks_deprecated.append((words[1], currentDir, currentFileOption)) elif words[0] == 'emask' : self.masks_deprecated.append((words[1],)) elif words[0] == 'subscriber': self.subscriber = isTrue(words[1]) elif words[0] == 'validation': self.validation = isTrue(words[1]) elif words[0] == 'noduplicates': self.nodups = isTrue(words[1]) elif words[0] == 'slow': self.slow = isTrue(words[1]) elif words[0] == 'patternMatching': self.diskReaderPatternMatching = isTrue(words[1]) elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching = isTrue(words[1]) elif words[0] == 'mtime': self.mtime = int(words[1]) elif words[0] == 'sorter': self.sorter = words[1] elif words[0] == 'type': self.type = words[1] elif words[0] == 'maxLength': self.maxLength = int(words[1]) elif words[0] == 'host': self.host = words[1] elif words[0] == 'portR': self.portR = int(words[1]) elif words[0] == 'portS': self.portS = int(words[1]) elif words[0] == 'stationID': self.stationID = words[1] elif words[0] == 'otherStationID': self.otherStationID = words[1] elif words[0] == 'address': self.address = words[1] elif words[0] == 'otherAddress': self.otherAddress = words[1] elif words[0] == 'digits': self.digits = int(words[1]) elif words[0] == 'routingTable': self.routingTable = words[1] elif words[0] == 'ackUsed': self.ackUsed = isTrue(words[1]) elif words[0] == 'maxAckTime': self.maxAckTime = int(words[1]) elif words[0] == 'batch': self.batch = int(words[1]) elif words[0] == 'cache_size': self.cache_size = int(words[1]) elif words[0] == 'debug' and isTrue(words[1]): self.debug = True elif words[0] == 'timeout': self.timeout = int(words[1]) elif words[0] == 'timeout_send': self.timeout_send = int(words[1]) elif words[0] == 'logrotate': if words[1].isdigit(): self.logger.setBackupCount(int(words[1])) except: self.logger.error("Problem with this line (%s) in configuration file of client %s" % (words, self.name)) if not self.validation: self.sorter = 'None' # Must be a string because eval will be subsequently applied to this config.close() def _getMatchingMask(self, filename): if len(self.masks_deprecated) > 0 : for mask in self.masks_deprecated: if fnmatch.fnmatch(filename, mask[0]): try: if mask[2]: return mask except: return None for mask in self.masks: if mask[3].match(filename) : if mask[4] : return mask return None return None def printInfos(self, client): print("==========================================================================") print("Name: %s " % client.name) print("Type: %s" % client.type) print("Subscriber: %s" % client.subscriber) print("Host: %s" % client.host) print("PortR: %s" % client.portR) print("PortS: %s" % client.portS) print("Station ID: %s" % client.stationID) print("Other Station ID: %s" % client.otherStationID) print("Address: %s" % client.address) print("Other Address: %s" % client.otherAddress) print("Digits: %i" % client.digits) print("Extension: %s" % client.extension) print("Slow: %s" % client.slow) print("Batch: %s" % client.batch) print("Cache_size: %s" % client.cache_size) print("Max length: %i" % client.maxLength) print("Mtime: %i" % client.mtime) print("Timeout: %s" % client.timeout) print("Sorter: %s" % client.sorter) print("Validation: %s" % client.validation) print("Routing table: %s" % client.routingTable) print("Maximum time (seconds) we wait for an ack: %s" % client.maxAckTime) print("Ack used: %s" % client.ackUsed) print("Sourlient Pattern Matching: %s" % client.diskReaderPatternMatching) print("Clients Pattern Matching: %s" % client.clientsPatternMatching) print("******************************************") print("* Sourlient Masks *") print("******************************************") for mask in self.masks: if mask[4] : print(" accept %s" % mask[0]) else : print(" reject %s" % mask[0]) print("==========================================================================")
# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html # https://www.dataquest.io/blog/python-pandas-databases/ # https://www.youtube.com/watch?v=pd-0G0MigUA #https://stackoverflow.com/questions/17044259/python-how-to-check-if-table-exists from DatabaseManager import DatabaseManager from Ingestor import Ingestor # SETTING IT EQUAL TO ':memory:' WILL HAVE IT RUN ON RAM AND NO SQLITE FILE WILL BE MADE. sqlite_file = 'test.db' db = DatabaseManager(sqlite_file, '__ADMIN__') CLEAR_ON_COMPLETION = True filename = "Test_Files/DatabaseManagerTest_15.csv" ingestor = Ingestor(filename) ingestor.readCSV() tempHeaders = ingestor.getCSVHeaders() searchCritera = [ tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4], tempHeaders[5], tempHeaders[6] ] searchCriteraTwoD = ingestor.getHeaderIndex(searchCritera, tempHeaders) print("\nDictionary of search critera and their indexes in the csv") print(searchCriteraTwoD) ingestor.searchRows(searchCriteraTwoD, ingestor.getRows()) print("\nPrint filtered list from unfiltered row") print(ingestor.getRowAt(0))
def main(): filename = 'Test_Files/DatabaseManagerTest_15.csv' ingestor = Ingestor(filename) ingestor.readCSV() print("Header of csv file") print(ingestor.getCSVHeaders()) tempHeaders = ingestor.getCSVHeaders() searchCritera = [tempHeaders[2], tempHeaders[3], tempHeaders[5]] searchCritera = ingestor.getHeaderIndex(searchCritera, tempHeaders) print("\nDictionary of search critera and their indexes in the csv") print(searchCritera) print("\nPrint raw list from csv") print(ingestor.getRowAt(1)) ingestor.searchRows(searchCritera, ingestor.getRows()) print("\nPrint filtered list from unfiltered row") print(ingestor.getRowAt(1)) print(ingestor.getRowAt(2)) print(ingestor.getRowAt(3)) print(ingestor.getRowAt(4)) print("\nNumber of columns") print(ingestor.getNumberOfHeaders()) print("\nNumber of rows") print(ingestor.getNumberOfRows()) print( "\nUpdating file to a csv in project folder names newList.csv. Expected:False" ) print( ingestor.updateFileLoc( "/home/anthonym/Documents/SchoolWork/SoftwareEngineering/newList.csv" )) print("\nUpdating file to a csv with no name. Expected:false") print(ingestor.updateFileLoc("")) print( "\nUpdating file to a csv with location Test_Files/DatabaseManagerTest_1000.csv. Expected:true" ) print(ingestor.updateFileLoc("Test_Files/DatabaseManagerTest_1000.csv"))
def test_parse_fakefile(self): with self.assertRaises(FileNotFoundError): _ = Ingestor.parse('fakefile.pdf')