Exemplos de Ingestor em Python, exemplos de Ingestor.Ingestor em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: unittest_Ingestor.py Projeto: hawkeye438/metpx

class unittest_Ingestor(unittest.TestCase):
   
  def setUp(self,logFile='log/Template.log'):
    self.logger = Logger(logFile, 'DEBUG', 'Sub')    
    self.logger = self.logger.getLogger() 
    self.source = Source('source-test', self.logger)
    self.ingestor = Ingestor(self.source,self.logger)
    self.ingestor.setClients()
      
  def test_Ingestor(self):  
    print self.ingestor.clients

    #filter.ingestor.ingestSingleFile(igniter)
    #filter.ingestor.ingestBulletinFile(igniter)
    #source.ingestor.ingestSingleFile(igniter)
    #source.ingestor.ingestBulletinFile(igniter)
    #source.ingestor.ingestCollection(igniter)
    #self.ingestor.setFeeds(self.feeds)
    #self.ingestor.setClients()
    #source.ingestor.createDir('/apps/px/turton', source.ingestor.dbDirsCache)
    #self.drp = DirectRoutingParser(pathFichierCircuit, self.source.ingestor.allNames, logger)
    #self.source.ingestor.ingest()
    #self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist)
    #self.source.ingestor.ingest(tempNom, nomFichier, clist)
    #self.source.ingestor.ingest()
    #clist = self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist)
    #self.source.ingestor.ingest(tempNom, nomFichier, clist)
    #self.assertEqual(None, None)
    pass

Exemplo n.º 2

0

Exibir arquivo

class unittest_Ingestor(unittest.TestCase):
    def setUp(self, logFile='log/Template.log'):
        self.logger = Logger(logFile, 'DEBUG', 'Sub')
        self.logger = self.logger.getLogger()
        self.source = Source('source-test', self.logger)
        self.ingestor = Ingestor(self.source, self.logger)
        self.ingestor.setClients()

    def test_Ingestor(self):
        print self.ingestor.clients

        #filter.ingestor.ingestSingleFile(igniter)
        #filter.ingestor.ingestBulletinFile(igniter)
        #source.ingestor.ingestSingleFile(igniter)
        #source.ingestor.ingestBulletinFile(igniter)
        #source.ingestor.ingestCollection(igniter)
        #self.ingestor.setFeeds(self.feeds)
        #self.ingestor.setClients()
        #source.ingestor.createDir('/apps/px/turton', source.ingestor.dbDirsCache)
        #self.drp = DirectRoutingParser(pathFichierCircuit, self.source.ingestor.allNames, logger)
        #self.source.ingestor.ingest()
        #self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist)
        #self.source.ingestor.ingest(tempNom, nomFichier, clist)
        #self.source.ingestor.ingest()
        #clist = self.source.ingestor.getMatchingClientNamesFromMasks(nomFichier, clist)
        #self.source.ingestor.ingest(tempNom, nomFichier, clist)
        #self.assertEqual(None, None)
        pass

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Elastic.py Projeto: softwarethreads/ca

 def __init__(self):
     self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
     if self.es.ping():
         logging.info('Elasticsearch connected..')
     else:
         logging.info('Could not connect to Elasticsearch!')
     self.index_name = None
     self.ingest = Ingestor()
     self.db = Database()
     self.pp = pprint.PrettyPrinter(indent=4)
     self.default_zip = None
     self.default_house = None
     self.status = None

Exemplo n.º 4

0

Exibir arquivo

Arquivo: csv_importer.py Projeto: 2237montala/DirectMarketing_CMR

    def run_popup(self, file_loc):
        #CSV file stuff
        self.ingestor = Ingestor(file_loc)
        self.ingestor.readCSV()

        self.rows = self.ingestor.getCSVHeaders()

        #Create buttons from the csv file headers that was just selected
        self.generate_checkboxes(self.rows)

        #Create a area that has a scroll bar
        scrollArea = QScrollArea()
        scrollArea.setWidget(self.csvHeaderGroup_box)
        scrollArea.horizontalScrollBar().setEnabled(False)

        #Create the buttons for tables that already exist in the database
        self.generate_radiobuttons(self.tablesInDB)

        #List of button groups
        self.buttonGroups = [self.commonFileTypesGroup, self.csvHeaderGroup]

        #Create label
        tableNameLabel = QtWidgets.QLabel("Table Name")

        #Create text field
        self.tableNameField = QtWidgets.QLineEdit()
        self.tableNameField.setPlaceholderText("Enter Custom Table Name")

        #Create buttons
        self.cancelButton = QPushButton('Cancel')
        self.importButton = QPushButton('Import')

        self.cancelButton.clicked.connect(self.closeWindow)
        self.importButton.clicked.connect(self.importCSV)

        #Create progress Bar
        self.progressBar = QtWidgets.QProgressBar()

        #Create the master layout which is a grid
        layout = QGridLayout()
        #Add widgets
        #format of addWidget(widget,row,col,row span, col span)
        layout.addWidget(scrollArea, 1, 1, 1, 2)
        layout.addWidget(tableNameLabel, 2, 1, 1, 2)
        layout.addWidget(self.tableNameField, 3, 1, 1, 2)
        layout.addWidget(self.commonFileTypesGroupBox, 4, 1, 1, 2)
        layout.addWidget(self.progressBar, 5, 1, 1, 2)
        layout.addWidget(self.cancelButton, 6, 1)
        layout.addWidget(self.importButton, 6, 2)
        self.setLayout(layout)
        self.resize(self.sizeHint())

Exemplo n.º 5

0

Exibir arquivo

def setup():
    """ Load all resources """
    quote_files = [
        './_data/DogQuotes/DogQuotesTXT.txt',
        './_data/DogQuotes/DogQuotesDOCX.docx',
        './_data/DogQuotes/DogQuotesPDF.pdf',
        './_data/DogQuotes/DogQuotesCSV.csv'
    ]
    # TODO: Use the Ingestor class to parse all files in the
    # quote_files variable
    """Very similar implementation as meme file code"""
    """Variable for all quotes"""
    """For loop with try except calling Ingestor class parse"""
    """ to add quotes you need array"""
    quote = []
    for i in quote_files:
        try:
            quote.extend(Ingestor.parse(i))
        except ValueError as error:
            print(f"ValueError: {error}")

    images_path = "./_data/photos/dog/"
    # TODO: Use the pythons standard library os class to find all
    # images within the images images_path directory
    """For all images"""
    image = []
    """For loop similar to meme.py applies format to image file to pull it"""
    for root, dirs, files in os.walk(images_path):
        image = [os.path.join(root, name) for name in files]
    return quote, image

Exemplo n.º 6

0

Exibir arquivo

Arquivo: DatabaseManagerTester.py Projeto: 2237montala/DirectMarketing_CMR

    def setUp(self):
        sqlite_file = 'test.db'
        self.db = DatabaseManager(sqlite_file, '__ADMIN__')

        filename = "Test_Files/DatabaseManagerTest_15.csv"
        self.ingestor = Ingestor(filename)
        self.ingestor.readCSV()

        tempHeaders = self.ingestor.getCSVHeaders()
        self.searchCritera = [
            tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4],
            tempHeaders[5], tempHeaders[6]
        ]
        searchCriteraTwoD = self.ingestor.getHeaderIndex(
            self.searchCritera, tempHeaders)
        self.ingestor.searchRows(searchCriteraTwoD, self.ingestor.getRows())
        self.searchCritera = self.db.remove_spaces(self.searchCritera)
        self.new_table = 'Test_15'

Exemplo n.º 7

0

Exibir arquivo

def generate_meme(path=None, body=None, author=None):
    """ Generate a meme given an path and a quote """
    """Not used error?"""
    """img = None """
    """Not used error?"""
    """quote = None"""
    if path is None:
        images = "./_data/photos/dog/"
        imgs = []
        for root, dirs, files in os.walk(images):
            imgs = [os.path.join(root, name) for name in files]

        img = random.choice(imgs)
    else:
        img = path[0]

    if body is None:
        quote_files = [
            './_data/DogQuotes/DogQuotesTXT.txt',
            './_data/DogQuotes/DogQuotesDOCX.docx',
            './_data/DogQuotes/DogQuotesPDF.pdf',
            './_data/DogQuotes/DogQuotesCSV.csv'
        ]
        quotes = []
        for f in quote_files:
            quotes.extend(Ingestor.parse(f))

        quote = random.choice(quotes)
    else:
        if author is None:
            raise Exception('Author Required if Body is Used')
        quote = QuoteModel(body, author)

    meme = MemeEngine('./tmp')
    """Changed meme.make_meme to fit my coded function"""
    path = meme.format_and_make(img, quote.body, quote.author)
    return path

Exemplo n.º 8

0

Exibir arquivo

Arquivo: Source.py Projeto: hawkeye438/metpx

class Source(object):

    def __init__(self, name='toto', logger=None, ingestion=True, filter=False ) :
        
        # General Attributes
        self.name   = name                        # Source's name
        self.filter = filter                      # is this source realy defines a filter ?
        if logger is None:
            pathlog     = PXPaths.LOG + 'rx_' + name + '.log'
            namelog     = 'RX' + name
            if self.filter :
               pathlog  = PXPaths.LOG + 'fx_' + name + '.log'
               namelog  = 'FX' + name
            self.logger = Logger(pathlog, 'INFO', namelog ) # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger

        if not self.filter :
           self.logger.info("Initialisation of source %s" % self.name)
        else :
           self.logger.info("Initialisation of filter %s" % self.name)

        # Attributes coming from the configuration file of the source
        #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct'  # Extension to be added to the ingest name
        self.ingestion = ingestion                # do we want to start the ingestion...
        self.debug = False                        # If we want sections with debug code to be executed
        self.batch = 100                          # Number of files that will be read in each pass
        self.cache_size = 125000                  # Maximum Number of md5sum from files kept in cache manager
        self.bulletin_type = None                 # type of bulletin ingested (None, am, wmo)
        self.masks = []                           # All the masks (accept and reject)
        self.masks_deprecated = []                # All the masks (imask and emask)
        self.routemask = True                     # use accept and parenthesis in mask to create a key and route with it
        self.routing_version = 1                  # directRouting version setting
        self.nodups = False                       # Check if the file was already received (md5sum present in the cache)
        self.tmasks = []                          # All the transformation maks (timask, temask)
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'   # Extension to be added to the ingest name
        # Extension to be added to the ingest name when the bulletin is outside its arrival range
        self.arrival_extension = None
        self.type = None                                       # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo']
        self.port = None                                       # Port number if type is in ['am', 'wmo']
        self.routingTable = PXPaths.ROUTING_TABLE              # Defaut routing table name
        self.mapEnteteDelai = None                             #
        self.addStationInFilename = True                       #
        self.addSMHeader = False                               #
        self.validation = False                                # Validate the filename (ex: prio an timestamp)
        self.patternMatching = True                            # No pattern matching
        self.clientsPatternMatching = True                     # No clients pattern matching
        self.sorter = None                                     # No sorting on the filnames
        self.feeds = []                                        # more source to feed directly
        self.keepAlive = True                                  # TCP SO_KEEPALIVE on (True) or off(False)
        self.mtime = 0                                         # Integer indicating the number of seconds a file must not have 
                                                               # been touched before being picked


        # AMQP

        self.exchange_key   = ''
        self.exchange_name  = None
        self.exchange_realm = '/data'
        self.exchange_type  = 'fanout'

        #-----------------------------------------------------------------------------------------
        # Setting up pulls configuration values
        #-----------------------------------------------------------------------------------------

        self.pulls = []                           # All the directories and file patterns to pull
        self.host = 'localhost'                   # Remote host address (or ip) where to send files
        self.protocol = None                      # First thing in the url: ftp, file, am, wmo, amis
        self.url = None
        self.user = None                    # User name used to connect
        self.passwd = None                  # Password 
        self.ssh_keyfile = None             # ssh private key file for sftp protocol
        self.ftp_mode = 'passive'           # Default is 'passive', can be set to 'active'

        self.timeout_get = 0                # Timeout in sec. to consider a get to hang ( 0 means inactive )
        self.pull_sleep  = 600              # Time in sec. to retry the pull
        self.pull_wait   = 10               # Time in sec. to wait after ls before pulling (make sure files are arrived)
        self.delete = False                 # file is deleted after pull  if it is false, the file's ls is kept
                                            # to check if it changed...
        self.pull_prefix = ''               # file may be prefixed bu some string filename will than be prefix_filename
                                            # or value 'HDATETIME' for the file data time on remote host

        #-----------------------------------------------------------------------------------------
        # Setting up default collection configuration values
        #-----------------------------------------------------------------------------------------

        self.headers       = []   # Title for report in the form TT from (TTAAii)
        self.issue_hours   = []   # list of emission hours to collect
        self.issue_primary = []   # amount of minutes past emission hours for the primary collection (report on time)
        self.issue_cycle   = []   # amount of minutes for cycling after the primary collection for more reports
        self.history       = 25   # time in hours to consider a valid report even if "history" hours late.
        self.future        = 40   # time in minutes to consider a valid report even if "future" minutes too soon

        #-----------------------------------------------------------------------------------------
        # Setting file transformations/conversions... etc...
        #-----------------------------------------------------------------------------------------

        self.fx_script   = None   # a script to convert/modify each received files
        self.fx_execfile = None

        self.lx_script   = None   # a script to convert/modify a list of received files
        self.lx_execfile = None

        self.pull_script   = None # a script to pull files prior to read rxq
        self.pull_execfile = None

        #-----------------------------------------------------------------------------------------
        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source
        #-----------------------------------------------------------------------------------------

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path )

        #-----------------------------------------------------------------------------------------
        # Parse the configuration file
        #-----------------------------------------------------------------------------------------

        filePath                  = PXPaths.RX_CONF +  self.name + '.conf'
        if self.filter : filePath = PXPaths.FX_CONF +  self.name + '.conf'
        self.readConfig( filePath )

        #-----------------------------------------------------------------------------------------
        # instantiate the fx script in source class
        #-----------------------------------------------------------------------------------------

        if self.fx_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.fx_execfile )
           except : self.logger.error("Problem with fx_script %s" % self.fx_execfile)

        if self.lx_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.lx_execfile )
           except : self.logger.error("Problem with lx_script %s" % self.lx_execfile)

        if self.pull_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.pull_execfile )
           except : self.logger.error("Problem with pull_script %s" % self.pull_execfile)

        #-----------------------------------------------------------------------------------------
        # Make sure the collection params are valid
        #-----------------------------------------------------------------------------------------
        if self.type == 'collector' :
           self.validateCollectionParams()

        #-----------------------------------------------------------------------------------------
        # If we do want to start the ingestor...
        #-----------------------------------------------------------------------------------------

        if self.ingestion :

           if hasattr(self, 'ingestor'):
               # Will happen only when a reload occurs
               self.ingestor.__init__(self)
           else:
               self.ingestor = Ingestor(self)

           if len(self.feeds) > 0 :
              self.ingestor.setFeeds(self.feeds)

           self.ingestor.setClients()

        #self.printInfos(self)

    def readConfig(self,filePath):

        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return 

        # current dir and filename could eventually be used
        # for file renaming and perhaps file move (like a special receiver/dispatcher)

        currentDir = '.'                # just to preserve consistency with client : unused in source for now
        currentFileOption = 'WHATFN'    # just to preserve consistency with client : unused in source for now
        currentTransformation = 'GIFFY' # Default transformation for tmasks
        currentLST = None               # a list consisting of one directory followed one or more file patterns

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error("Extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) 
                        else:
                            self.extension = ':' + words[1]
                            self.extension = self.extension.replace('-NAME',self.name)
                    elif words[0] == 'arrival_extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error("arrival_extension (%s) for source %s has wrong number of fields" % (words[1], self.name)) 
                        else:
                            self.arrival_extension = ':' + words[1]
                            self.arrival_extension = self.arrival_extension.replace('-NAME',self.name)
                    elif words[0] == 'accept': 
                         cmask = re.compile(words[1])
                         self.masks.append((words[1], currentDir, currentFileOption,cmask,True))
                    elif words[0] == 'reject':
                         cmask = re.compile(words[1])
                         self.masks.append((words[1], currentDir, currentFileOption,cmask,False))
                    elif words[0] == 'routemask': self.routemask = isTrue(words[1])
                    elif words[0] == 'routing_version': self.routing_version = int(words[1])
                    elif words[0] == 'noduplicates': self.nodups =  isTrue(words[1])
                    elif words[0] == 'imask': self.masks_deprecated.append((words[1], currentDir, currentFileOption))
                    elif words[0] == 'emask': self.masks_deprecated.append((words[1],))
                    elif words[0] == 'timask': self.tmasks.append((words[1], currentTransformation))
                    elif words[0] == 'temask': self.tmasks.append((words[1],))
                    elif words[0] == 'transformation': currentTransformation = words[1]
                    elif words[0] == 'batch': self.batch = int(words[1])
                    elif words[0] == 'cache_size': self.cache_size = int(words[1])
                    elif words[0] == 'bulletin_type': self.bulletin_type = words[1]
                    elif words[0] == 'type': self.type = words[1]
                    elif words[0] == 'port': self.port = int(words[1])
                    elif words[0] == 'AddSMHeader' and isTrue(words[1]): self.addSMHeader = True
                    elif words[0] == 'addStationInFilename' : self.addStationInFilename = isTrue(words[1])
                    elif words[0] == 'patternMatching': self.patternMatching =  isTrue(words[1])
                    elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching =  isTrue(words[1])
                    elif words[0] == 'validation' and isTrue(words[1]): self.validation = True
                    elif words[0] == 'keepAlive': self.keepAlive = isTrue(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]): self.debug = True
                    elif words[0] == 'mtime': self.mtime = int(words[1])
                    elif words[0] == 'sorter': self.sorter = words[1]
                    elif words[0] == 'header': self.headers.append(words[1])
                    elif words[0] == 'hours': self.issue_hours.append(words[1])
                    elif words[0] == 'primary': self.issue_primary.append(words[1])
                    elif words[0] == 'cycle': self.issue_cycle.append(words[1])
                    elif words[0] == 'feed': self.feeds.append(words[1])
                    elif words[0] == 'routingTable': self.routingTable = words[1]
                    elif words[0] == 'fx_script': self.fx_execfile = words[1]
                    elif words[0] == 'lx_script': self.lx_execfile = words[1]
                    elif words[0] == 'pull_script': self.pull_execfile = words[1]

                    elif words[0] == 'arrival':
                         if self.mapEnteteDelai == None : self.mapEnteteDelai = {}
                         self.mapEnteteDelai[words[1]] = (int(words[2]), int(words[3]))
		  
	  	    elif words[0] == 'logrotate':
                         if words[1].isdigit():
                                self.logger.setBackupCount(int(words[1]))

                    # options for pull
                    elif words[0] == 'directory': 
                         currentDir = words[1]
                         currentLST = []
                         # permit directory duplications but warn
                         for lst in self.pulls :
                             if lst[0] == currentDir :
                                currentLST = lst
                                break
                         if len(currentLST) != 0 :
                            self.logger.warning("This directory appears twice %s" % currentDir)
                            self.logger.warning("Please correct your config")
                            continue
                         # normal directory addition
                         currentLST.append( currentDir )
                         self.pulls.append( currentLST )
                    elif words[0] == 'get':
                         currentFilePattern = words[1]
                         currentLST.append(currentFilePattern)
                    elif words[0] == 'destination':
                        self.url = words[1]
                        urlParser = URLParser(words[1])
                        (self.protocol, currentDir, self.user, self.passwd, self.host, self.port) =  urlParser.parse()
                        if len(words) > 2:
                            currentFileOption = words[2]
                        currentLST = []
                        currentLST.append( currentDir )
                        self.pulls.append( currentLST )
                    elif words[0] == 'protocol': self.protocol = words[1]
                    elif words[0] == 'host': self.host = words[1]
                    elif words[0] == 'user': self.user = words[1]
                    elif words[0] == 'password': self.passwd = words[1]
                    elif words[0] == 'ssh_keyfile': self.ssh_keyfile = words[1]
                    elif words[0] == 'timeout_get': self.timeout_get = int(words[1])
                    elif words[0] == 'ftp_mode': self.ftp_mode = words[1]
                    elif words[0] == 'pull_sleep': self.pull_sleep = int(words[1])
                    elif words[0] == 'pull_wait': self.pull_wait = int(words[1])
                    elif words[0] == 'delete': self.delete = isTrue(words[1])
                    elif words[0] == 'pull_prefix': self.pull_prefix = words[1]


                    # AMQP
                    elif words[0] == 'exchange_key': self.exchange_key = words[1]
                    elif words[0] == 'exchange_name': self.exchange_name = words[1]
                    elif words[0] == 'exchange_realm': self.exchange_realm = words[1]
                    elif words[0] == 'exchange_type':
                         if words[1] in ['fanout','direct','topic','headers'] :
                            self.exchange_type = words[1]
                         else :
                            self.logger.error("Problem with exchange_type %s" % words[1])

                    # options for collector
                    if   self.type == 'collector' :
                         if   words[0] == 'aaxx'   : self.aaxx  = words[1].split(',')
                         if   words[0] == 'metar'  : self.metar = words[1].split(',')
                         elif words[0] == 'taf'    : self.taf = words[1].split(',')
                         elif words[0] == 'history': self.history = int(words[1])
                         elif words[0] == 'future' : self.future = int(words[1])
                         elif words[0] == 'issue'  : 
                                                     if words[1] == 'all' :
                                                        lst = []
                                                        lst.append(words[1])
                                                        self.issue_hours.append(lst)
                                                     else :
                                                        lst = words[1].split(",")
                                                        self.issue_hours.append( lst )
                                                     self.issue_primary.append(  int(words[2])       )
                                                     self.issue_cycle.append(    int(words[3])       )

                except:
                    self.logger.error("Problem with this line (%s) in configuration file of source %s" % (words, self.name))

        config.close()

        if len(self.masks) > 0 : self.patternMatching = True
        if len(self.masks_deprecated) > 0 : self.patternMatching = True

        self.logger.debug("Configuration file of source  %s has been read" % (self.name))

    def run_fx_script(self, filename, logger):
        if self.fx_script == None : return filename
        return self.fx_script(filename, logger)

    def run_lx_script(self, filelist, logger):
        if self.lx_script == None : return filelist
        return self.lx_script(filelist, logger)

    def run_pull_script(self, flow, logger, sleeping):
        filelist = []
        if self.pull_script == None : return filelist
        return self.pull_script(flow, logger, sleeping)

    def getTransformation(self, filename):
        for mask in self.tmasks:
            if fnmatch.fnmatch(filename, mask[0]):
                try:
                    return mask[1]
                except:
                    return None
        return None

    def fileMatchMask(self, filename):
    # IMPORTANT NOTE HERE FALLBACK BEHAVIOR IS TO ACCEPT THE FILE
    # THIS IS THE OPPOSITE OF THE CLIENT WHERE THE FALLBACK IS REJECT

        # check against the deprecated masks

        if len(self.masks_deprecated) > 0 :
           for mask in self.masks_deprecated:
               if fnmatch.fnmatch(filename, mask[0]):
                  try:
                       if mask[2]: return True
                  except:
                       return False

        # check against the masks
        for mask in self.masks:
            if mask[3].match(filename) : return mask[4]

        # fallback behavior 
        return True

    def printInfos(self, source):
        print("==========================================================================")
        print("Name: %s " % source.name)
        print("Type: %s" % source.type)
        print("Batch: %s" %  source.batch)
        print("Cache_size: %s" %  source.cache_size)
        print("Bulletin_type: %s" %  source.bulletin_type)
        print("Port: %s" % source.port)
        print("TCP SO_KEEPALIVE: %s" % source.keepAlive)
        print("Extension: %s" % source.extension)
        print("Arrival_Extension: %s" % source.arrival_extension)
        print("Arrival: %s" % source.mapEnteteDelai)
        print("addSMHeader: %s" % source.addSMHeader)
        print("addStationInFilename: %s" % source.addStationInFilename)
        print("Validation: %s" % source.validation)
        print("Source Pattern Matching: %s" % source.patternMatching)
        print("Clients Pattern Matching: %s" % source.clientsPatternMatching)
        print("mtime: %s" % source.mtime)
        print("Sorter: %s" % source.sorter)
        print("Routing table: %s" % source.routingTable)
        print("Route with Mask: %s" % source.routemask)
        print("No duplicates: %s" % source.nodups)
        print("FX script: %s" % source.fx_execfile)
        print("LX script: %s" % source.lx_execfile)
        print("Pull script: %s" % source.pull_execfile)

        print("******************************************")
        print("*       AMQP stuff                       *")
        print("******************************************")

        print("exchange_key: %s" % source.exchange_key)
        print("exchange_name: %s" % source.exchange_name)
        print("exchange_realm: %s" % source.exchange_realm)
        print("exchange_type: %s" % source.exchange_type)

        print("******************************************")
        print("*       Source Masks                     *")
        print("******************************************")

        for mask in self.masks:
            if mask[4] : 
               print(" accept %s" % mask[0])
            else :
               print(" reject %s" % mask[0])

        print("*       Source Masks deprecated          *")
        for mask in self.masks_deprecated:
            print mask

        print("==========================================================================")

        print("******************************************")
        print("*       Source T-Masks                   *")
        print("******************************************")

        for mask in self.tmasks:
            print mask

        print("==========================================================================")

        print("******************************************")
        print("*       sources to feed (collections...) *")
        print("******************************************")

        for feed in self.feeds:
            print feed

        print("==========================================================================")

        if self.type == 'pull-file' :
           print("******************************************")
           print("*       Pull Params                      *")
           print("******************************************")

           print "protocol          %s" % self.protocol
           print "host              %s" % self.host
           print "user              %s" % self.user
           print "passwd            %s" % self.passwd
           print "ssh_keyfile       %s" % self.ssh_keyfile
           print "ftp_mode          %s" % self.ftp_mode
           print ""
           print "delete            %s" % self.delete
           print "pull_sleep        %s" % self.pull_sleep
           print "pull_wait         %s" % self.pull_wait
           print "pull_prefix       %s" % self.pull_prefix
           print "timeout_get       %s" % self.timeout_get

           print ""
           for lst in self.pulls :
               for pos, elem in enumerate(lst):
                   if pos == 0 : print "directory         %s" % elem
                   else        : print "get               %s" % elem

        print("==========================================================================")

        if self.type == 'collector' :
           print("******************************************")
           print("*       Collection Params                *")
           print("******************************************")
           print "bulletin aaxx  %s" % self.aaxx
           print "bulletin metar %s" % self.metar
           print "bulletin taf   %s" % self.taf

           for position, header in enumerate(self.headers):
               print "\nHeader %s" % header
               lst = self.issue_hours[position]
               print "issue hours         %s" % lst
               print "issue primary       %s" % self.issue_primary[position]
               print "issue cycle         %s" % self.issue_cycle[position]

           print "history             %s" % self.history
           print "future              %s" % self.future

           print("==========================================================================")

Exemplo n.º 9

0

Exibir arquivo

Arquivo: Source.py Projeto: nourou6/Sundew

    def __init__(self, name='toto', logger=None, ingestion=True, filter=False):

        # General Attributes
        self.name = name  # Source's name
        self.filter = filter  # is this source realy defines a filter ?
        if logger is None:
            pathlog = PXPaths.LOG + 'rx_' + name + '.log'
            namelog = 'RX' + name
            if self.filter:
                pathlog = PXPaths.LOG + 'fx_' + name + '.log'
                namelog = 'FX' + name
            self.logger = Logger(pathlog, 'INFO', namelog)  # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger

        if not self.filter:
            self.logger.info("Initialisation of source %s" % self.name)
        else:
            self.logger.info("Initialisation of filter %s" % self.name)

        # Attributes coming from the configuration file of the source
        #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct'  # Extension to be added to the ingest name
        self.ingestion = ingestion  # do we want to start the ingestion...
        self.debug = False  # If we want sections with debug code to be executed
        self.batch = 100  # Number of files that will be read in each pass
        self.cache_size = 125000  # Maximum Number of md5sum from files kept in cache manager
        self.bulletin_type = None  # type of bulletin ingested (None, am, wmo)
        self.masks = []  # All the masks (accept and reject)
        self.masks_deprecated = []  # All the masks (imask and emask)
        self.routemask = True  # use accept and parenthesis in mask to create a key and route with it
        self.routing_version = 1  # directRouting version setting
        self.nodups = False  # Check if the file was already received (md5sum present in the cache)
        self.tmasks = []  # All the transformation maks (timask, temask)
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name
        # Extension to be added to the ingest name when the bulletin is outside its arrival range
        self.arrival_extension = None
        self.type = None  # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo']
        self.port = None  # Port number if type is in ['am', 'wmo']
        self.routingTable = PXPaths.ROUTING_TABLE  # Defaut routing table name
        self.mapEnteteDelai = None  #
        self.addStationInFilename = True  #
        self.addSMHeader = False  #
        self.validation = False  # Validate the filename (ex: prio an timestamp)
        self.patternMatching = True  # No pattern matching
        self.clientsPatternMatching = True  # No clients pattern matching
        self.sorter = None  # No sorting on the filnames
        self.feeds = []  # more source to feed directly
        self.keepAlive = True  # TCP SO_KEEPALIVE on (True) or off(False)
        self.mtime = 0  # Integer indicating the number of seconds a file must not have
        # been touched before being picked

        # AMQP

        self.exchange_key = ''
        self.exchange_name = None
        self.exchange_realm = '/data'
        self.exchange_type = 'fanout'

        #-----------------------------------------------------------------------------------------
        # Setting up pulls configuration values
        #-----------------------------------------------------------------------------------------

        self.pulls = []  # All the directories and file patterns to pull
        self.host = 'localhost'  # Remote host address (or ip) where to send files
        self.protocol = None  # First thing in the url: ftp, file, am, wmo, amis
        self.url = None
        self.user = None  # User name used to connect
        self.passwd = None  # Password
        self.ssh_keyfile = None  # ssh private key file for sftp protocol
        self.ftp_mode = 'passive'  # Default is 'passive', can be set to 'active'

        self.timeout_get = 0  # Timeout in sec. to consider a get to hang ( 0 means inactive )
        self.pull_sleep = 600  # Time in sec. to retry the pull
        self.pull_wait = 10  # Time in sec. to wait after ls before pulling (make sure files are arrived)
        self.delete = False  # file is deleted after pull  if it is false, the file's ls is kept
        # to check if it changed...
        self.pull_prefix = ''  # file may be prefixed bu some string filename will than be prefix_filename
        # or value 'HDATETIME' for the file data time on remote host

        # VIP option, None for standalone process
        self.vip = None

        #-----------------------------------------------------------------------------------------
        # Setting up default collection configuration values
        #-----------------------------------------------------------------------------------------

        self.headers = []  # Title for report in the form TT from (TTAAii)
        self.issue_hours = []  # list of emission hours to collect
        self.issue_primary = [
        ]  # amount of minutes past emission hours for the primary collection (report on time)
        self.issue_cycle = [
        ]  # amount of minutes for cycling after the primary collection for more reports
        self.history = 25  # time in hours to consider a valid report even if "history" hours late.
        self.future = 40  # time in minutes to consider a valid report even if "future" minutes too soon

        #-----------------------------------------------------------------------------------------
        # Setting file transformations/conversions... etc...
        #-----------------------------------------------------------------------------------------

        self.fx_script = None  # a script to convert/modify each received files
        self.fx_execfile = None

        self.lx_script = None  # a script to convert/modify a list of received files
        self.lx_execfile = None

        self.pull_script = None  # a script to pull files prior to read rxq
        self.pull_execfile = None

        #-----------------------------------------------------------------------------------------
        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source
        #-----------------------------------------------------------------------------------------

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path)

        #-----------------------------------------------------------------------------------------
        # Parse the configuration file
        #-----------------------------------------------------------------------------------------

        filePath = PXPaths.RX_CONF + self.name + '.conf'
        if self.filter: filePath = PXPaths.FX_CONF + self.name + '.conf'
        self.readConfig(filePath)

        #-----------------------------------------------------------------------------------------
        # instantiate the fx script in source class
        #-----------------------------------------------------------------------------------------

        if self.fx_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.fx_execfile)
            except:
                self.logger.error("Problem with fx_script %s" %
                                  self.fx_execfile)

        if self.lx_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.lx_execfile)
            except:
                self.logger.error("Problem with lx_script %s" %
                                  self.lx_execfile)

        if self.pull_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.pull_execfile)
            except:
                self.logger.error("Problem with pull_script %s" %
                                  self.pull_execfile)

        #-----------------------------------------------------------------------------------------
        # Make sure the collection params are valid
        #-----------------------------------------------------------------------------------------
        if self.type == 'collector':
            self.validateCollectionParams()

        #-----------------------------------------------------------------------------------------
        # If we do want to start the ingestor...
        #-----------------------------------------------------------------------------------------

        if self.ingestion:

            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)

            if len(self.feeds) > 0:
                self.ingestor.setFeeds(self.feeds)

            self.ingestor.setClients()

Exemplo n.º 10

0

Exibir arquivo

Arquivo: Source.py Projeto: nourou6/Sundew

class Source(object):
    def __init__(self, name='toto', logger=None, ingestion=True, filter=False):

        # General Attributes
        self.name = name  # Source's name
        self.filter = filter  # is this source realy defines a filter ?
        if logger is None:
            pathlog = PXPaths.LOG + 'rx_' + name + '.log'
            namelog = 'RX' + name
            if self.filter:
                pathlog = PXPaths.LOG + 'fx_' + name + '.log'
                namelog = 'FX' + name
            self.logger = Logger(pathlog, 'INFO', namelog)  # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger

        if not self.filter:
            self.logger.info("Initialisation of source %s" % self.name)
        else:
            self.logger.info("Initialisation of filter %s" % self.name)

        # Attributes coming from the configuration file of the source
        #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct'  # Extension to be added to the ingest name
        self.ingestion = ingestion  # do we want to start the ingestion...
        self.debug = False  # If we want sections with debug code to be executed
        self.batch = 100  # Number of files that will be read in each pass
        self.cache_size = 125000  # Maximum Number of md5sum from files kept in cache manager
        self.bulletin_type = None  # type of bulletin ingested (None, am, wmo)
        self.masks = []  # All the masks (accept and reject)
        self.masks_deprecated = []  # All the masks (imask and emask)
        self.routemask = True  # use accept and parenthesis in mask to create a key and route with it
        self.routing_version = 1  # directRouting version setting
        self.nodups = False  # Check if the file was already received (md5sum present in the cache)
        self.tmasks = []  # All the transformation maks (timask, temask)
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name
        # Extension to be added to the ingest name when the bulletin is outside its arrival range
        self.arrival_extension = None
        self.type = None  # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo']
        self.port = None  # Port number if type is in ['am', 'wmo']
        self.routingTable = PXPaths.ROUTING_TABLE  # Defaut routing table name
        self.mapEnteteDelai = None  #
        self.addStationInFilename = True  #
        self.addSMHeader = False  #
        self.validation = False  # Validate the filename (ex: prio an timestamp)
        self.patternMatching = True  # No pattern matching
        self.clientsPatternMatching = True  # No clients pattern matching
        self.sorter = None  # No sorting on the filnames
        self.feeds = []  # more source to feed directly
        self.keepAlive = True  # TCP SO_KEEPALIVE on (True) or off(False)
        self.mtime = 0  # Integer indicating the number of seconds a file must not have
        # been touched before being picked

        # AMQP

        self.exchange_key = ''
        self.exchange_name = None
        self.exchange_realm = '/data'
        self.exchange_type = 'fanout'

        #-----------------------------------------------------------------------------------------
        # Setting up pulls configuration values
        #-----------------------------------------------------------------------------------------

        self.pulls = []  # All the directories and file patterns to pull
        self.host = 'localhost'  # Remote host address (or ip) where to send files
        self.protocol = None  # First thing in the url: ftp, file, am, wmo, amis
        self.url = None
        self.user = None  # User name used to connect
        self.passwd = None  # Password
        self.ssh_keyfile = None  # ssh private key file for sftp protocol
        self.ftp_mode = 'passive'  # Default is 'passive', can be set to 'active'

        self.timeout_get = 0  # Timeout in sec. to consider a get to hang ( 0 means inactive )
        self.pull_sleep = 600  # Time in sec. to retry the pull
        self.pull_wait = 10  # Time in sec. to wait after ls before pulling (make sure files are arrived)
        self.delete = False  # file is deleted after pull  if it is false, the file's ls is kept
        # to check if it changed...
        self.pull_prefix = ''  # file may be prefixed bu some string filename will than be prefix_filename
        # or value 'HDATETIME' for the file data time on remote host

        # VIP option, None for standalone process
        self.vip = None

        #-----------------------------------------------------------------------------------------
        # Setting up default collection configuration values
        #-----------------------------------------------------------------------------------------

        self.headers = []  # Title for report in the form TT from (TTAAii)
        self.issue_hours = []  # list of emission hours to collect
        self.issue_primary = [
        ]  # amount of minutes past emission hours for the primary collection (report on time)
        self.issue_cycle = [
        ]  # amount of minutes for cycling after the primary collection for more reports
        self.history = 25  # time in hours to consider a valid report even if "history" hours late.
        self.future = 40  # time in minutes to consider a valid report even if "future" minutes too soon

        #-----------------------------------------------------------------------------------------
        # Setting file transformations/conversions... etc...
        #-----------------------------------------------------------------------------------------

        self.fx_script = None  # a script to convert/modify each received files
        self.fx_execfile = None

        self.lx_script = None  # a script to convert/modify a list of received files
        self.lx_execfile = None

        self.pull_script = None  # a script to pull files prior to read rxq
        self.pull_execfile = None

        #-----------------------------------------------------------------------------------------
        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source
        #-----------------------------------------------------------------------------------------

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path)

        #-----------------------------------------------------------------------------------------
        # Parse the configuration file
        #-----------------------------------------------------------------------------------------

        filePath = PXPaths.RX_CONF + self.name + '.conf'
        if self.filter: filePath = PXPaths.FX_CONF + self.name + '.conf'
        self.readConfig(filePath)

        #-----------------------------------------------------------------------------------------
        # instantiate the fx script in source class
        #-----------------------------------------------------------------------------------------

        if self.fx_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.fx_execfile)
            except:
                self.logger.error("Problem with fx_script %s" %
                                  self.fx_execfile)

        if self.lx_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.lx_execfile)
            except:
                self.logger.error("Problem with lx_script %s" %
                                  self.lx_execfile)

        if self.pull_execfile != None:
            try:
                execfile(PXPaths.SCRIPTS + self.pull_execfile)
            except:
                self.logger.error("Problem with pull_script %s" %
                                  self.pull_execfile)

        #-----------------------------------------------------------------------------------------
        # Make sure the collection params are valid
        #-----------------------------------------------------------------------------------------
        if self.type == 'collector':
            self.validateCollectionParams()

        #-----------------------------------------------------------------------------------------
        # If we do want to start the ingestor...
        #-----------------------------------------------------------------------------------------

        if self.ingestion:

            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)

            if len(self.feeds) > 0:
                self.ingestor.setFeeds(self.feeds)

            self.ingestor.setClients()

        #self.printInfos(self)

    def readConfig(self, filePath):
        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return

        # current dir and filename could eventually be used
        # for file renaming and perhaps file move (like a special receiver/dispatcher)

        currentDir = '.'  # just to preserve consistency with client : unused in source for now
        currentFileOption = 'WHATFN'  # just to preserve consistency with client : unused in source for now
        currentTransformation = 'GIFFY'  # Default transformation for tmasks
        currentLST = None  # a list consisting of one directory followed one or more file patterns

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error(
                                "Extension (%s) for source %s has wrong number of fields"
                                % (words[1], self.name))
                        else:
                            self.extension = ':' + words[1]
                            self.extension = self.extension.replace(
                                '-NAME', self.name)
                    elif words[0] == 'arrival_extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error(
                                "arrival_extension (%s) for source %s has wrong number of fields"
                                % (words[1], self.name))
                        else:
                            self.arrival_extension = ':' + words[1]
                            self.arrival_extension = self.arrival_extension.replace(
                                '-NAME', self.name)
                    elif words[0] == 'accept':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, True))
                    elif words[0] == 'reject':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, False))
                    elif words[0] == 'routemask':
                        self.routemask = isTrue(words[1])
                    elif words[0] == 'routing_version':
                        self.routing_version = int(words[1])
                    elif words[0] == 'noduplicates':
                        self.nodups = isTrue(words[1])
                    elif words[0] == 'imask':
                        self.masks_deprecated.append(
                            (words[1], currentDir, currentFileOption))
                    elif words[0] == 'emask':
                        self.masks_deprecated.append((words[1], ))
                    elif words[0] == 'timask':
                        self.tmasks.append((words[1], currentTransformation))
                    elif words[0] == 'temask':
                        self.tmasks.append((words[1], ))
                    elif words[0] == 'transformation':
                        currentTransformation = words[1]
                    elif words[0] == 'batch':
                        self.batch = int(words[1])
                    elif words[0] == 'cache_size':
                        self.cache_size = int(words[1])
                    elif words[0] == 'bulletin_type':
                        self.bulletin_type = words[1]
                    elif words[0] == 'type':
                        self.type = words[1]
                    elif words[0] == 'port':
                        self.port = int(words[1])
                    elif words[0] == 'AddSMHeader' and isTrue(words[1]):
                        self.addSMHeader = True
                    elif words[0] == 'addStationInFilename':
                        self.addStationInFilename = isTrue(words[1])
                    elif words[0] == 'patternMatching':
                        self.patternMatching = isTrue(words[1])
                    elif words[0] == 'clientsPatternMatching':
                        self.clientsPatternMatching = isTrue(words[1])
                    elif words[0] == 'validation' and isTrue(words[1]):
                        self.validation = True
                    elif words[0] == 'keepAlive':
                        self.keepAlive = isTrue(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]):
                        self.debug = True
                    elif words[0] == 'mtime':
                        self.mtime = int(words[1])
                    elif words[0] == 'sorter':
                        self.sorter = words[1]
                    elif words[0] == 'header':
                        self.headers.append(words[1])
                    elif words[0] == 'hours':
                        self.issue_hours.append(words[1])
                    elif words[0] == 'primary':
                        self.issue_primary.append(words[1])
                    elif words[0] == 'cycle':
                        self.issue_cycle.append(words[1])
                    elif words[0] == 'feed':
                        self.feeds.append(words[1])
                    elif words[0] == 'routingTable':
                        self.routingTable = words[1]
                    elif words[0] == 'fx_script':
                        self.fx_execfile = words[1]
                    elif words[0] == 'lx_script':
                        self.lx_execfile = words[1]
                    elif words[0] == 'pull_script':
                        self.pull_execfile = words[1]
                    elif words[0] == 'vip':
                        self.vip = words[1]

                    elif words[0] == 'arrival':
                        if self.mapEnteteDelai == None:
                            self.mapEnteteDelai = {}
                        self.mapEnteteDelai[words[1]] = (int(words[2]),
                                                         int(words[3]))

                    elif words[0] == 'logrotate':
                        if words[1].isdigit():
                            self.logger.setBackupCount(int(words[1]))

                    # options for pull
                    elif words[0] == 'directory':
                        currentDir = words[1]
                        currentLST = []
                        # permit directory duplications but warn
                        for lst in self.pulls:
                            if lst[0] == currentDir:
                                currentLST = lst
                                break
                        if len(currentLST) != 0:
                            self.logger.warning(
                                "This directory appears twice %s" % currentDir)
                            self.logger.warning("Please correct your config")
                            continue
                        # normal directory addition
                        currentLST.append(currentDir)
                        self.pulls.append(currentLST)
                    elif words[0] == 'get':
                        currentFilePattern = words[1]
                        currentLST.append(currentFilePattern)
                    elif words[0] == 'destination':
                        self.url = words[1]
                        urlParser = URLParser(words[1])
                        (self.protocol, currentDir, self.user, self.passwd,
                         self.host, self.port) = urlParser.parse()
                        if len(words) > 2:
                            currentFileOption = words[2]
                        currentLST = []
                        currentLST.append(currentDir)
                        self.pulls.append(currentLST)
                    elif words[0] == 'protocol':
                        self.protocol = words[1]
                    elif words[0] == 'host':
                        self.host = words[1]
                    elif words[0] == 'user':
                        self.user = words[1]
                    elif words[0] == 'password':
                        self.passwd = words[1]
                    elif words[0] == 'ssh_keyfile':
                        self.ssh_keyfile = words[1]
                    elif words[0] == 'timeout_get':
                        self.timeout_get = int(words[1])
                    elif words[0] == 'ftp_mode':
                        self.ftp_mode = words[1]
                    elif words[0] == 'pull_sleep':
                        self.pull_sleep = int(words[1])
                    elif words[0] == 'pull_wait':
                        self.pull_wait = int(words[1])
                    elif words[0] == 'delete':
                        self.delete = isTrue(words[1])
                    elif words[0] == 'pull_prefix':
                        self.pull_prefix = words[1]

                        # AMQP
                    elif words[0] == 'exchange_key':
                        self.exchange_key = words[1]
                    elif words[0] == 'exchange_name':
                        self.exchange_name = words[1]
                    elif words[0] == 'exchange_realm':
                        self.exchange_realm = words[1]
                    elif words[0] == 'exchange_type':
                        if words[1] in [
                                'fanout', 'direct', 'topic', 'headers'
                        ]:
                            self.exchange_type = words[1]
                        else:
                            self.logger.error("Problem with exchange_type %s" %
                                              words[1])

                    # options for collector
                    if self.type == 'collector':
                        if words[0] == 'aaxx': self.aaxx = words[1].split(',')
                        if words[0] == 'metar':
                            self.metar = words[1].split(',')
                        elif words[0] == 'taf':
                            self.taf = words[1].split(',')
                        elif words[0] == 'history':
                            self.history = int(words[1])
                        elif words[0] == 'future':
                            self.future = int(words[1])
                        elif words[0] == 'issue':
                            if words[1] == 'all':
                                lst = []
                                lst.append(words[1])
                                self.issue_hours.append(lst)
                            else:
                                lst = words[1].split(",")
                                self.issue_hours.append(lst)
                            self.issue_primary.append(int(words[2]))
                            self.issue_cycle.append(int(words[3]))

                except:
                    self.logger.error(
                        "Problem with this line (%s) in configuration file of source %s"
                        % (words, self.name))

        config.close()

        if len(self.masks) > 0: self.patternMatching = True
        if len(self.masks_deprecated) > 0: self.patternMatching = True

        self.logger.debug("Configuration file of source  %s has been read" %
                          (self.name))

    def run_fx_script(self, filename, logger):
        if self.fx_script == None: return filename
        return self.fx_script(filename, logger)

    def run_lx_script(self, filelist, logger):
        if self.lx_script == None: return filelist
        return self.lx_script(filelist, logger)

    def run_pull_script(self, flow, logger, sleeping):
        filelist = []
        if self.pull_script == None: return filelist
        return self.pull_script(flow, logger, sleeping)

    def getTransformation(self, filename):
        for mask in self.tmasks:
            if fnmatch.fnmatch(filename, mask[0]):
                try:
                    return mask[1]
                except:
                    return None
        return None

    def fileMatchMask(self, filename):
        # IMPORTANT NOTE HERE FALLBACK BEHAVIOR IS TO ACCEPT THE FILE
        # THIS IS THE OPPOSITE OF THE CLIENT WHERE THE FALLBACK IS REJECT

        # check against the deprecated masks

        if len(self.masks_deprecated) > 0:
            for mask in self.masks_deprecated:
                if fnmatch.fnmatch(filename, mask[0]):
                    try:
                        if mask[2]: return True
                    except:
                        return False

        # check against the masks
        for mask in self.masks:
            if mask[3].match(filename): return mask[4]

        # fallback behavior
        return True

    def printInfos(self, source):
        print(
            "=========================================================================="
        )
        print("Name: %s " % source.name)
        print("Type: %s" % source.type)
        print("Batch: %s" % source.batch)
        print("Cache_size: %s" % source.cache_size)
        print("Bulletin_type: %s" % source.bulletin_type)
        print("Port: %s" % source.port)
        print("TCP SO_KEEPALIVE: %s" % source.keepAlive)
        print("Extension: %s" % source.extension)
        print("Arrival_Extension: %s" % source.arrival_extension)
        print("Arrival: %s" % source.mapEnteteDelai)
        print("addSMHeader: %s" % source.addSMHeader)
        print("addStationInFilename: %s" % source.addStationInFilename)
        print("Validation: %s" % source.validation)
        print("Source Pattern Matching: %s" % source.patternMatching)
        print("Clients Pattern Matching: %s" % source.clientsPatternMatching)
        print("mtime: %s" % source.mtime)
        print("Sorter: %s" % source.sorter)
        print("Routing table: %s" % source.routingTable)
        print("Route with Mask: %s" % source.routemask)
        print("No duplicates: %s" % source.nodups)
        print("FX script: %s" % source.fx_execfile)
        print("LX script: %s" % source.lx_execfile)
        print("Pull script: %s" % source.pull_execfile)
        print("VIP : %s" % source.vip)

        print("******************************************")
        print("*       AMQP stuff                       *")
        print("******************************************")

        print("exchange_key: %s" % source.exchange_key)
        print("exchange_name: %s" % source.exchange_name)
        print("exchange_realm: %s" % source.exchange_realm)
        print("exchange_type: %s" % source.exchange_type)

        print("******************************************")
        print("*       Source Masks                     *")
        print("******************************************")

        for mask in self.masks:
            if mask[4]:
                print(" accept %s" % mask[0])
            else:
                print(" reject %s" % mask[0])

        print("*       Source Masks deprecated          *")
        for mask in self.masks_deprecated:
            print mask

        print(
            "=========================================================================="
        )

        print("******************************************")
        print("*       Source T-Masks                   *")
        print("******************************************")

        for mask in self.tmasks:
            print mask

        print(
            "=========================================================================="
        )

        print("******************************************")
        print("*       sources to feed (collections...) *")
        print("******************************************")

        for feed in self.feeds:
            print feed

        print(
            "=========================================================================="
        )

        if self.type == 'pull-file':
            print("******************************************")
            print("*       Pull Params                      *")
            print("******************************************")

            print "protocol          %s" % self.protocol
            print "host              %s" % self.host
            print "user              %s" % self.user
            print "passwd            %s" % self.passwd
            print "ssh_keyfile       %s" % self.ssh_keyfile
            print "ftp_mode          %s" % self.ftp_mode
            print ""
            print "delete            %s" % self.delete
            print "pull_sleep        %s" % self.pull_sleep
            print "pull_wait         %s" % self.pull_wait
            print "pull_prefix       %s" % self.pull_prefix
            print "timeout_get       %s" % self.timeout_get

            print ""
            for lst in self.pulls:
                for pos, elem in enumerate(lst):
                    if pos == 0: print "directory         %s" % elem
                    else: print "get               %s" % elem

        print(
            "=========================================================================="
        )

        if self.type == 'collector':
            print("******************************************")
            print("*       Collection Params                *")
            print("******************************************")
            print "bulletin aaxx  %s" % self.aaxx
            print "bulletin metar %s" % self.metar
            print "bulletin taf   %s" % self.taf

            for position, header in enumerate(self.headers):
                print "\nHeader %s" % header
                lst = self.issue_hours[position]
                print "issue hours         %s" % lst
                print "issue primary       %s" % self.issue_primary[position]
                print "issue cycle         %s" % self.issue_cycle[position]

            print "history             %s" % self.history
            print "future              %s" % self.future

            print(
                "=========================================================================="
            )

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_ingestor.py Projeto: janroijen/meme-generator

 def test_parse_no_extension(self):
     with self.assertRaises(ValueError):
         _ = Ingestor.parse('fakefile')

Exemplo n.º 12

0

Exibir arquivo

Arquivo: Sourlient.py Projeto: hawkeye438/metpx

    def __init__(self, name='toto', logger=None, ingestion=True) :

        # General Attributes
        self.name = name                          # Sourlient's name
        if logger is None:
            self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger
        self.logger.info("Initialisation of sourlient %s" % self.name)

        self.ingestion = ingestion                # Determine if the Sourlient will have an Ingestor
        self.debug = False                        # If we want sections with debug code to be executed
        self.subscriber = True                    # False if it is a provider

        self.type = 'aftn'                        # Must be in ['aftn']
        self.host = 'localhost'                   # Remote host name (or ip) where to send files
        self.portR = 56550                        # Receiving port
        self.portS = 5160                         # Sending port

        self.stationID = 'SUB'                    # Three letter ID of this process
        self.otherStationID = 'MHS'               # Three letter ID of the other party
        self.address = 'CYHQUSER'                 # AFTN address of this process 
        self.otherAddress = 'CYHQMHSN'            # AFTN address of the other party
        self.digits = 4                           # Number of digits used in the CSN

        self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name
        self.ackUsed = True                       # Should always be True. False is only for testing purposes
        self.maxAckTime = 60                      # Number of second we wait for an ack before trying to reconnect

        self.batch = 100                          # Number of files that will be read in each pass
        self.cache_size = 125000                  # Maximum Number of md5sum from files kept in cache manager
        self.timeout = 10                         # Time we wait between each tentative to connect
        self.maxLength = 0                        # Max. length of a message... limit use for segmentation, 0 means unused
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name

        self.validation = False                   # Validation of the filename (prio + date)
        self.patternMatching = False              # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient)
        self.diskReaderPatternMatching = True     # Verification of the emask and imask of the sourlient before sending a file
        self.clientsPatternMatching = True        # Verification of the emask and imask of the clients before linking a file

        self.nodups = True                        # Check if the file has already been sent (md5sum present in the cache)
        self.mtime = 0                            # Integer indicating the number of seconds a file must not have
                                                  # been touched before being picked

        self.sorter = 'MultiKeysStringSorter'     # Class (or object) used to sort
        self.masks = []                           # All the masks (accept and reject)
        self.masks_deprecated = []                # All the masks (imask and emask)
        self.collection = None                    # Sourlient do not participate in the collection effort
        self.slow = False                         # Sleeps are added when we want to be able to decrypt log entries

        # Socket Attributes
        self.port = None 

        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path )

        # read in sourlient config

        filePath  = PXPaths.TRX_CONF +  self.name + '.conf'
        self.readConfig(filePath)
        
        if self.ingestion:
            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)
                #self.printInfos(self)
            self.ingestor.setClients()

Exemplo n.º 13

0

Exibir arquivo

    def __init__(self, name='toto', logger=None, ingestion=True):

        # General Attributes
        self.name = name  # Sourlient's name
        if logger is None:
            self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO',
                                 'TRX' + name)  # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger
        self.logger.info("Initialisation of sourlient %s" % self.name)

        self.ingestion = ingestion  # Determine if the Sourlient will have an Ingestor
        self.debug = False  # If we want sections with debug code to be executed
        self.subscriber = True  # False if it is a provider

        self.type = 'aftn'  # Must be in ['aftn']
        self.host = 'localhost'  # Remote host name (or ip) where to send files
        self.portR = 56550  # Receiving port
        self.portS = 5160  # Sending port

        self.stationID = 'SUB'  # Three letter ID of this process
        self.otherStationID = 'MHS'  # Three letter ID of the other party
        self.address = 'CYHQUSER'  # AFTN address of this process
        self.otherAddress = 'CYHQMHSN'  # AFTN address of the other party
        self.digits = 4  # Number of digits used in the CSN

        self.routingTable = PXPaths.ROUTING_TABLE  # Defaut routing table name
        self.ackUsed = True  # Should always be True. False is only for testing purposes
        self.maxAckTime = 60  # Number of second we wait for an ack before trying to reconnect

        self.batch = 100  # Number of files that will be read in each pass
        self.cache_size = 125000  # Maximum Number of md5sum from files kept in cache manager
        self.timeout = 10  # Time we wait between each tentative to connect
        self.maxLength = 0  # Max. length of a message... limit use for segmentation, 0 means unused
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name

        self.validation = False  # Validation of the filename (prio + date)
        self.patternMatching = False  # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient)
        self.diskReaderPatternMatching = True  # Verification of the emask and imask of the sourlient before sending a file
        self.clientsPatternMatching = True  # Verification of the emask and imask of the clients before linking a file

        self.nodups = True  # Check if the file has already been sent (md5sum present in the cache)
        self.mtime = 0  # Integer indicating the number of seconds a file must not have
        # been touched before being picked

        self.sorter = 'MultiKeysStringSorter'  # Class (or object) used to sort
        self.masks = []  # All the masks (accept and reject)
        self.masks_deprecated = []  # All the masks (imask and emask)
        self.collection = None  # Sourlient do not participate in the collection effort
        self.slow = False  # Sleeps are added when we want to be able to decrypt log entries

        # Socket Attributes
        self.port = None

        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path)

        # read in sourlient config

        filePath = PXPaths.TRX_CONF + self.name + '.conf'
        self.readConfig(filePath)

        if self.ingestion:
            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)
                #self.printInfos(self)
            self.ingestor.setClients()

Exemplo n.º 14

0

Exibir arquivo

class IngestorTest(unittest.TestCase):
    def setUp(self):
        filename = 'Test_Files/DatabaseManagerTest_15.csv'
        self.ingestor = Ingestor(filename)
        self.ingestor.readCSV()


    def test_headers(self):
        headerList = ["Street Address","owner's first name","last_name","Long","email","phone Number","Loan Amount","Lat"]
        #self.assertEqual(self.ingestor.getCSVHeaders(),headerList)

    def test_search_headers(self):
        searchCritera = ["last_name","Long","phone Number"]
        expectedRetun = [["last_name",2],["Long",3],["phone_Number",5]]

        self.assertEqual(self.ingestor.getHeaderIndex(searchCritera,self.ingestor.getCSVHeaders()),expectedRetun)

    def test_get_row(self):
        expectedRetun = ["8 Hoard Court","Samuele","Gulliver","-64.1305924","*****@*****.**","+54 (656) 804-6029","$14,895.21 ","-31.4325479"]
        self.assertEqual(self.ingestor.getRowAt(0),expectedRetun)

    def test_get_row_filtered(self):
        searchCritera = [["last_name",2],["Long",3],["phone_Number",5]]
        expectedRetun = ["Gulliver","-64.1305924","+54 (656) 804-6029"]
        self.ingestor.searchRows(searchCritera,self.ingestor.getRows())
        self.assertEqual(self.ingestor.getRowAt(0),expectedRetun)

        expectedRetun = ["Scoullar","121.5570313","+63 (634) 506-0432"]
        self.assertEqual(self.ingestor.getRowAt(4),expectedRetun)

    def test_number_of_headers(self):
        self.assertEqual(self.ingestor.getNumberOfHeaders(),8)

    def test_number_of_row(self):
        self.assertEqual(self.ingestor.getNumberOfRows(),15)


    def test_switch_files(self):
        self.assertFalse(self.ingestor.updateFileLoc(""))
        self.assertTrue(self.ingestor.updateFileLoc("Test_Files/DatabaseManagerTest_1000.csv"))

Exemplo n.º 15

0

Exibir arquivo

 def setUp(self):
     filename = 'Test_Files/DatabaseManagerTest_15.csv'
     self.ingestor = Ingestor(filename)
     self.ingestor.readCSV()

Exemplo n.º 16

0

Exibir arquivo

Arquivo: csv_importer.py Projeto: 2237montala/DirectMarketing_CMR

class csv_importer_popup(QtWidgets.QDialog):
    #Signals when csv_importer_popup closes
    importDoneSignal = QtCore.pyqtSignal('QString')

    def __init__(self, window_title, db_file_loc, tables,
                 protected_table_prefix):
        super().__init__()
        #Creating the window
        self.title = window_title
        self.setWindowTitle(self.title)
        self.protected_table_prefix = protected_table_prefix
        self.tablesInDB = tables
        #Database manager stuff
        self.db = DatabaseManager(db_file_loc, protected_table_prefix)

        #Create array with tables already in the database to be
        #put in the common files radio button box
        self.default_lists = []
        for table in tables:
            tempList = []
            for columnName in self.db.get_headers(table):
                tempList.append(columnName)
            self.default_lists.append(tempList)

        self.layout = QGridLayout()

    def run_popup(self, file_loc):
        #CSV file stuff
        self.ingestor = Ingestor(file_loc)
        self.ingestor.readCSV()

        self.rows = self.ingestor.getCSVHeaders()

        #Create buttons from the csv file headers that was just selected
        self.generate_checkboxes(self.rows)

        #Create a area that has a scroll bar
        scrollArea = QScrollArea()
        scrollArea.setWidget(self.csvHeaderGroup_box)
        scrollArea.horizontalScrollBar().setEnabled(False)

        #Create the buttons for tables that already exist in the database
        self.generate_radiobuttons(self.tablesInDB)

        #List of button groups
        self.buttonGroups = [self.commonFileTypesGroup, self.csvHeaderGroup]

        #Create label
        tableNameLabel = QtWidgets.QLabel("Table Name")

        #Create text field
        self.tableNameField = QtWidgets.QLineEdit()
        self.tableNameField.setPlaceholderText("Enter Custom Table Name")

        #Create buttons
        self.cancelButton = QPushButton('Cancel')
        self.importButton = QPushButton('Import')

        self.cancelButton.clicked.connect(self.closeWindow)
        self.importButton.clicked.connect(self.importCSV)

        #Create progress Bar
        self.progressBar = QtWidgets.QProgressBar()

        #Create the master layout which is a grid
        layout = QGridLayout()
        #Add widgets
        #format of addWidget(widget,row,col,row span, col span)
        layout.addWidget(scrollArea, 1, 1, 1, 2)
        layout.addWidget(tableNameLabel, 2, 1, 1, 2)
        layout.addWidget(self.tableNameField, 3, 1, 1, 2)
        layout.addWidget(self.commonFileTypesGroupBox, 4, 1, 1, 2)
        layout.addWidget(self.progressBar, 5, 1, 1, 2)
        layout.addWidget(self.cancelButton, 6, 1)
        layout.addWidget(self.importButton, 6, 2)
        self.setLayout(layout)
        self.resize(self.sizeHint())

    def generate_checkboxes(self, button_name_list):
        #Generate check_boxes
        self.csvHeaderGroup = QButtonGroup()
        self.csvHeaderGroup_layout = QVBoxLayout()
        self.csvHeaderGroup.setExclusive(False)
        self.csvHeaderGroup_box = QGroupBox('Select which headers')
        self.csvHeaderGroup_layout.addStretch(1)
        for button_name in button_name_list:
            #Add each button to the layout from the csv file
            checkbox = QCheckBox(button_name)
            self.csvHeaderGroup.addButton(checkbox)
            self.csvHeaderGroup_layout.addWidget(
                self.csvHeaderGroup.buttons()[-1])

        self.csvHeaderGroup_box.setLayout(self.csvHeaderGroup_layout)

    def generate_radiobuttons(self, button_name_list):
        #Generate Radio Buttons
        self.commonFileTypesGroup = QButtonGroup()
        self.commonFileTypesGroupLayout = QVBoxLayout()
        self.commonFileTypesGroupBox = QGroupBox('Select a pre-existing table')
        self.commonFileTypesGroupLayout.addStretch(1)
        count = 0
        for button_name in button_name_list:
            #Add each button from available lists in database
            radioButton = QRadioButton(button_name)
            self.commonFileTypesGroup.addButton(radioButton, count)
            self.commonFileTypesGroupLayout.addWidget(
                self.commonFileTypesGroup.buttons()[-1])
            count += 1

        self.commonFileTypesGroupBox.setLayout(self.commonFileTypesGroupLayout)

    def import_done(self, tableName):
        #Returns what table was created
        self.importDoneSignal.emit(tableName)
        self.accept()

    def closeWindow(self):
        #Closes the window
        self.reject()

    def importCSV(self):
        self.importButton.setEnabled(False)
        self.cancelButton.setEnabled(False)
        #Check if any radio buttons were pressed by checking if they were
        #checked and save the number in the button group
        radio_button_number = -1
        special_button_number = -1
        count = 0
        for radioButton in self.buttonGroups[0].buttons():
            if radioButton.isChecked():
                radio_button_number = count
                break
            count += 1
        for specialButton in self.buttonGroups[1].buttons(
        ):  # Do the same for the special buttons
            if specialButton.isChecked():
                special_button_number = count
                break
            count += 1

        if radio_button_number > -1:
            searchCritera = self.ingestor.getHeaderIndex(
                self.default_lists[radio_button_number],
                self.ingestor.getCSVHeaders())
            buttonText = self.buttonGroups[0].buttons(
            )[radio_button_number].text()
            #Check which table corresponds with the button pressed

            for tableName in self.tablesInDB:
                if buttonText.replace(' ', '_') == tableName:

                    #Uses the ingestor to search the un-filtered rows using
                    #this search criteria list

                    self.ingestor.searchRows(searchCritera,
                                             self.ingestor.getRows())
                    #Check if tables exists already
                    if not self.db.doesTableExist(tableName):
                        #If not the create it with the table name
                        self.db.create_table_list(
                            tableName,
                            self.db.remove_spaces(
                                self.default_lists[radio_button_number]),
                            'string')

                    self.import_with_progress_bar(
                        tableName, self.ingestor.getRows(),
                        self.default_lists[radio_button_number])
                    self.import_done(tableName)
        else:
            try:
                if self.tableNameField.text(
                ) == '' or self.protected_table_prefix in self.tableNameField.text(
                ):
                    raise Exception()
                else:
                    customTableName = self.db.is_valid_string(
                        self.tableNameField.text().replace(' ', '_'))
                    if special_button_number > -1:
                        # Default header option not chosen, so custom lists
                        try:
                            requestedHeaders = []
                            for item in self.buttonGroups[1].buttons():
                                if item.isChecked():
                                    requestedHeaders.append(item.text())

                            does_exist = self.db.doesTableExist(
                                customTableName)
                            has_same_cols = True
                            if not does_exist:
                                #If not the create it with the table name
                                self.db.create_table_list(
                                    customTableName,
                                    self.db.remove_spaces(requestedHeaders),
                                    'string')
                            else:
                                #Tables exists. Does it have the same columns?
                                if not (requestedHeaders ==
                                        self.db.get_headers(customTableName)):
                                    has_same_cols = False
                                    #Find the different column names
                                    #This works by turning the lists into sets
                                    #A set is an unordered list with no duplicate elements
                                    #A set supports matrix operations so you can subtract the two sets
                                    #This returns the elements that are not shared
                                    different_cols = list(
                                        set(
                                            self.db.remove_spaces(
                                                requestedHeaders)) - set(
                                                    self.db.get_headers(
                                                        customTableName)))
                                    #Add the extra columns
                                    for col in different_cols:
                                        self.db.add_column(
                                            customTableName, col, 'string')

                            if has_same_cols:
                                #New table is identical to existing one
                                print("same columns")
                                searchCritera = self.ingestor.getHeaderIndex(
                                    requestedHeaders,
                                    self.ingestor.getCSVHeaders())
                                self.ingestor.searchRows(
                                    searchCritera, self.ingestor.getRows())
                                rows = self.ingestor.getRows()
                                self.import_with_progress_bar(
                                    customTableName, self.ingestor.getRows(),
                                    requestedHeaders)
                            else:
                                #New table has different columns
                                #Combine the headers in the lists
                                print("diff columns")
                                combinedHeaders = self.db.get_headers(
                                    customTableName) + requestedHeaders
                                #Have to re order them to match the csv file
                                newRequestedHeaders = []
                                for header in self.db.remove_spaces(
                                        self.ingestor.getCSVHeaders()):
                                    #Find the header in the csv file
                                    #The order matters because the primary key is needed to update the row
                                    if header in combinedHeaders:
                                        newRequestedHeaders.append(header)

                                #Get the index for the header
                                searchCritera = self.ingestor.getHeaderIndex(
                                    newRequestedHeaders,
                                    self.ingestor.getCSVHeaders())
                                #Filter the rows so only the requested info is there
                                self.ingestor.searchRows(
                                    searchCritera, self.ingestor.getRows())
                                rows = self.ingestor.getRows()
                                #Import them nomrally
                                self.import_with_progress_bar(
                                    customTableName, self.ingestor.getRows(),
                                    newRequestedHeaders)

                            self.import_done(customTableName)
                        except Exception as er:
                            #General error message
                            print('Error message:', er.args[0])
                            return False
                    else:
                        raise Exception()

            except:
                ErrorBox = QtWidgets.QMessageBox()
                choice = ErrorBox.critical(
                    self, 'Table Name Error',
                    "Table name can only have letters numbers, and underscores",
                    ErrorBox.Ok)
                if choice == ErrorBox.Ok:
                    #User wants to try a new name
                    print("Closing")
                    ErrorBox.accept()
                    self.importButton.setEnabled(True)
                    self.cancelButton.setEnabled(True)

    def import_with_progress_bar(self, tableName, rows_to_be_added,
                                 column_headers):
        """
        Adds the ingestor rows to the db one row at a time so the progress
        bar will show the progress
        """
        #Set the max value of the progress bar to the number of rows to be add

        self.progressBar.setMaximum(len(rows_to_be_added))
        #self.db.add_list_of_rows(tableName,self.db.remove_spaces(self.default_lists[button_number]),rows)
        count = 0
        for row in rows_to_be_added:
            #For every row to be added add it to the db and increment the progress
            #bar value by 1
            count += 1
            self.db.add_row_list(tableName,
                                 self.db.remove_spaces(column_headers), row)
            self.progressBar.setValue(count)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: unittest_Ingestor.py Projeto: hawkeye438/metpx

 def setUp(self,logFile='log/Template.log'):
   self.logger = Logger(logFile, 'DEBUG', 'Sub')    
   self.logger = self.logger.getLogger() 
   self.source = Source('source-test', self.logger)
   self.ingestor = Ingestor(self.source,self.logger)
   self.ingestor.setClients()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_ingestor.py Projeto: janroijen/meme-generator

 def test_can_ingest(self):
     self.assertTrue(Ingestor.can_ingest('myfile.PDF'))
     self.assertFalse(Ingestor.can_ingest('myfile.HTML'))
     self.assertFalse(Ingestor.can_ingest(None))

Exemplo n.º 19

0

Exibir arquivo

Arquivo: Source.py Projeto: hawkeye438/metpx

    def __init__(self, name='toto', logger=None, ingestion=True, filter=False ) :
        
        # General Attributes
        self.name   = name                        # Source's name
        self.filter = filter                      # is this source realy defines a filter ?
        if logger is None:
            pathlog     = PXPaths.LOG + 'rx_' + name + '.log'
            namelog     = 'RX' + name
            if self.filter :
               pathlog  = PXPaths.LOG + 'fx_' + name + '.log'
               namelog  = 'FX' + name
            self.logger = Logger(pathlog, 'INFO', namelog ) # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger

        if not self.filter :
           self.logger.info("Initialisation of source %s" % self.name)
        else :
           self.logger.info("Initialisation of filter %s" % self.name)

        # Attributes coming from the configuration file of the source
        #self.extension = 'nws-grib:-CCCC:-TT:-CIRCUIT:Direct'  # Extension to be added to the ingest name
        self.ingestion = ingestion                # do we want to start the ingestion...
        self.debug = False                        # If we want sections with debug code to be executed
        self.batch = 100                          # Number of files that will be read in each pass
        self.cache_size = 125000                  # Maximum Number of md5sum from files kept in cache manager
        self.bulletin_type = None                 # type of bulletin ingested (None, am, wmo)
        self.masks = []                           # All the masks (accept and reject)
        self.masks_deprecated = []                # All the masks (imask and emask)
        self.routemask = True                     # use accept and parenthesis in mask to create a key and route with it
        self.routing_version = 1                  # directRouting version setting
        self.nodups = False                       # Check if the file was already received (md5sum present in the cache)
        self.tmasks = []                          # All the transformation maks (timask, temask)
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'   # Extension to be added to the ingest name
        # Extension to be added to the ingest name when the bulletin is outside its arrival range
        self.arrival_extension = None
        self.type = None                                       # Must be in ['filter','file','single-file', 'bulletin-file', 'am', 'wmo']
        self.port = None                                       # Port number if type is in ['am', 'wmo']
        self.routingTable = PXPaths.ROUTING_TABLE              # Defaut routing table name
        self.mapEnteteDelai = None                             #
        self.addStationInFilename = True                       #
        self.addSMHeader = False                               #
        self.validation = False                                # Validate the filename (ex: prio an timestamp)
        self.patternMatching = True                            # No pattern matching
        self.clientsPatternMatching = True                     # No clients pattern matching
        self.sorter = None                                     # No sorting on the filnames
        self.feeds = []                                        # more source to feed directly
        self.keepAlive = True                                  # TCP SO_KEEPALIVE on (True) or off(False)
        self.mtime = 0                                         # Integer indicating the number of seconds a file must not have 
                                                               # been touched before being picked


        # AMQP

        self.exchange_key   = ''
        self.exchange_name  = None
        self.exchange_realm = '/data'
        self.exchange_type  = 'fanout'

        #-----------------------------------------------------------------------------------------
        # Setting up pulls configuration values
        #-----------------------------------------------------------------------------------------

        self.pulls = []                           # All the directories and file patterns to pull
        self.host = 'localhost'                   # Remote host address (or ip) where to send files
        self.protocol = None                      # First thing in the url: ftp, file, am, wmo, amis
        self.url = None
        self.user = None                    # User name used to connect
        self.passwd = None                  # Password 
        self.ssh_keyfile = None             # ssh private key file for sftp protocol
        self.ftp_mode = 'passive'           # Default is 'passive', can be set to 'active'

        self.timeout_get = 0                # Timeout in sec. to consider a get to hang ( 0 means inactive )
        self.pull_sleep  = 600              # Time in sec. to retry the pull
        self.pull_wait   = 10               # Time in sec. to wait after ls before pulling (make sure files are arrived)
        self.delete = False                 # file is deleted after pull  if it is false, the file's ls is kept
                                            # to check if it changed...
        self.pull_prefix = ''               # file may be prefixed bu some string filename will than be prefix_filename
                                            # or value 'HDATETIME' for the file data time on remote host

        #-----------------------------------------------------------------------------------------
        # Setting up default collection configuration values
        #-----------------------------------------------------------------------------------------

        self.headers       = []   # Title for report in the form TT from (TTAAii)
        self.issue_hours   = []   # list of emission hours to collect
        self.issue_primary = []   # amount of minutes past emission hours for the primary collection (report on time)
        self.issue_cycle   = []   # amount of minutes for cycling after the primary collection for more reports
        self.history       = 25   # time in hours to consider a valid report even if "history" hours late.
        self.future        = 40   # time in minutes to consider a valid report even if "future" minutes too soon

        #-----------------------------------------------------------------------------------------
        # Setting file transformations/conversions... etc...
        #-----------------------------------------------------------------------------------------

        self.fx_script   = None   # a script to convert/modify each received files
        self.fx_execfile = None

        self.lx_script   = None   # a script to convert/modify a list of received files
        self.lx_execfile = None

        self.pull_script   = None # a script to pull files prior to read rxq
        self.pull_execfile = None

        #-----------------------------------------------------------------------------------------
        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source
        #-----------------------------------------------------------------------------------------

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path )

        #-----------------------------------------------------------------------------------------
        # Parse the configuration file
        #-----------------------------------------------------------------------------------------

        filePath                  = PXPaths.RX_CONF +  self.name + '.conf'
        if self.filter : filePath = PXPaths.FX_CONF +  self.name + '.conf'
        self.readConfig( filePath )

        #-----------------------------------------------------------------------------------------
        # instantiate the fx script in source class
        #-----------------------------------------------------------------------------------------

        if self.fx_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.fx_execfile )
           except : self.logger.error("Problem with fx_script %s" % self.fx_execfile)

        if self.lx_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.lx_execfile )
           except : self.logger.error("Problem with lx_script %s" % self.lx_execfile)

        if self.pull_execfile != None :
           try    : execfile(PXPaths.SCRIPTS + self.pull_execfile )
           except : self.logger.error("Problem with pull_script %s" % self.pull_execfile)

        #-----------------------------------------------------------------------------------------
        # Make sure the collection params are valid
        #-----------------------------------------------------------------------------------------
        if self.type == 'collector' :
           self.validateCollectionParams()

        #-----------------------------------------------------------------------------------------
        # If we do want to start the ingestor...
        #-----------------------------------------------------------------------------------------

        if self.ingestion :

           if hasattr(self, 'ingestor'):
               # Will happen only when a reload occurs
               self.ingestor.__init__(self)
           else:
               self.ingestor = Ingestor(self)

           if len(self.feeds) > 0 :
              self.ingestor.setFeeds(self.feeds)

           self.ingestor.setClients()

Exemplo n.º 20

0

Exibir arquivo

class Sourlient(object):
    def __init__(self, name='toto', logger=None, ingestion=True):

        # General Attributes
        self.name = name  # Sourlient's name
        if logger is None:
            self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO',
                                 'TRX' + name)  # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger
        self.logger.info("Initialisation of sourlient %s" % self.name)

        self.ingestion = ingestion  # Determine if the Sourlient will have an Ingestor
        self.debug = False  # If we want sections with debug code to be executed
        self.subscriber = True  # False if it is a provider

        self.type = 'aftn'  # Must be in ['aftn']
        self.host = 'localhost'  # Remote host name (or ip) where to send files
        self.portR = 56550  # Receiving port
        self.portS = 5160  # Sending port

        self.stationID = 'SUB'  # Three letter ID of this process
        self.otherStationID = 'MHS'  # Three letter ID of the other party
        self.address = 'CYHQUSER'  # AFTN address of this process
        self.otherAddress = 'CYHQMHSN'  # AFTN address of the other party
        self.digits = 4  # Number of digits used in the CSN

        self.routingTable = PXPaths.ROUTING_TABLE  # Defaut routing table name
        self.ackUsed = True  # Should always be True. False is only for testing purposes
        self.maxAckTime = 60  # Number of second we wait for an ack before trying to reconnect

        self.batch = 100  # Number of files that will be read in each pass
        self.cache_size = 125000  # Maximum Number of md5sum from files kept in cache manager
        self.timeout = 10  # Time we wait between each tentative to connect
        self.maxLength = 0  # Max. length of a message... limit use for segmentation, 0 means unused
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name

        self.validation = False  # Validation of the filename (prio + date)
        self.patternMatching = False  # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient)
        self.diskReaderPatternMatching = True  # Verification of the emask and imask of the sourlient before sending a file
        self.clientsPatternMatching = True  # Verification of the emask and imask of the clients before linking a file

        self.nodups = True  # Check if the file has already been sent (md5sum present in the cache)
        self.mtime = 0  # Integer indicating the number of seconds a file must not have
        # been touched before being picked

        self.sorter = 'MultiKeysStringSorter'  # Class (or object) used to sort
        self.masks = []  # All the masks (accept and reject)
        self.masks_deprecated = []  # All the masks (imask and emask)
        self.collection = None  # Sourlient do not participate in the collection effort
        self.slow = False  # Sleeps are added when we want to be able to decrypt log entries

        # Socket Attributes
        self.port = None

        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path): self.readConfig(pxconf_Path)

        # read in sourlient config

        filePath = PXPaths.TRX_CONF + self.name + '.conf'
        self.readConfig(filePath)

        if self.ingestion:
            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)
                #self.printInfos(self)
            self.ingestor.setClients()

    def readConfig(self, filePath):
        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        currentDir = '.'  # Current directory
        currentFileOption = 'WHATFN'  # Under what filename the file will be sent (WHATFN, NONE, etc., See PDS)

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error(
                                "Extension (%s) for source %s has wrong number of fields"
                                % (words[1], self.name))
                        else:
                            self.extension = ':' + words[1]
                    if words[0] == 'accept':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, True))
                    elif words[0] == 'reject':
                        cmask = re.compile(words[1])
                        self.masks.append((words[1], currentDir,
                                           currentFileOption, cmask, False))
                    elif words[0] == 'imask':
                        self.masks_deprecated.append(
                            (words[1], currentDir, currentFileOption))
                    elif words[0] == 'emask':
                        self.masks_deprecated.append((words[1], ))
                    elif words[0] == 'subscriber':
                        self.subscriber = isTrue(words[1])
                    elif words[0] == 'validation':
                        self.validation = isTrue(words[1])
                    elif words[0] == 'noduplicates':
                        self.nodups = isTrue(words[1])
                    elif words[0] == 'slow':
                        self.slow = isTrue(words[1])
                    elif words[0] == 'patternMatching':
                        self.diskReaderPatternMatching = isTrue(words[1])
                    elif words[0] == 'clientsPatternMatching':
                        self.clientsPatternMatching = isTrue(words[1])
                    elif words[0] == 'mtime':
                        self.mtime = int(words[1])
                    elif words[0] == 'sorter':
                        self.sorter = words[1]
                    elif words[0] == 'type':
                        self.type = words[1]
                    elif words[0] == 'maxLength':
                        self.maxLength = int(words[1])
                    elif words[0] == 'host':
                        self.host = words[1]
                    elif words[0] == 'portR':
                        self.portR = int(words[1])
                    elif words[0] == 'portS':
                        self.portS = int(words[1])

                    elif words[0] == 'stationID':
                        self.stationID = words[1]
                    elif words[0] == 'otherStationID':
                        self.otherStationID = words[1]
                    elif words[0] == 'address':
                        self.address = words[1]
                    elif words[0] == 'otherAddress':
                        self.otherAddress = words[1]
                    elif words[0] == 'digits':
                        self.digits = int(words[1])

                    elif words[0] == 'routingTable':
                        self.routingTable = words[1]
                    elif words[0] == 'ackUsed':
                        self.ackUsed = isTrue(words[1])
                    elif words[0] == 'maxAckTime':
                        self.maxAckTime = int(words[1])

                    elif words[0] == 'batch':
                        self.batch = int(words[1])
                    elif words[0] == 'cache_size':
                        self.cache_size = int(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]):
                        self.debug = True
                    elif words[0] == 'timeout':
                        self.timeout = int(words[1])
                    elif words[0] == 'timeout_send':
                        self.timeout_send = int(words[1])
                    elif words[0] == 'logrotate':
                        if words[1].isdigit():
                            self.logger.setBackupCount(int(words[1]))

                except:
                    self.logger.error(
                        "Problem with this line (%s) in configuration file of client %s"
                        % (words, self.name))

        if not self.validation:
            self.sorter = 'None'  # Must be a string because eval will be subsequently applied to this

        config.close()

    def _getMatchingMask(self, filename):

        if len(self.masks_deprecated) > 0:
            for mask in self.masks_deprecated:
                if fnmatch.fnmatch(filename, mask[0]):
                    try:
                        if mask[2]:
                            return mask
                    except:
                        return None

        for mask in self.masks:
            if mask[3].match(filename):
                if mask[4]: return mask
                return None

        return None

    def printInfos(self, client):
        print(
            "=========================================================================="
        )
        print("Name: %s " % client.name)
        print("Type: %s" % client.type)
        print("Subscriber: %s" % client.subscriber)
        print("Host: %s" % client.host)
        print("PortR: %s" % client.portR)
        print("PortS: %s" % client.portS)
        print("Station ID: %s" % client.stationID)
        print("Other Station ID: %s" % client.otherStationID)
        print("Address: %s" % client.address)
        print("Other Address: %s" % client.otherAddress)
        print("Digits: %i" % client.digits)
        print("Extension: %s" % client.extension)
        print("Slow: %s" % client.slow)
        print("Batch: %s" % client.batch)
        print("Cache_size: %s" % client.cache_size)
        print("Max length: %i" % client.maxLength)
        print("Mtime: %i" % client.mtime)
        print("Timeout: %s" % client.timeout)
        print("Sorter: %s" % client.sorter)
        print("Validation: %s" % client.validation)
        print("Routing table: %s" % client.routingTable)
        print("Maximum time (seconds) we wait for an ack: %s" %
              client.maxAckTime)
        print("Ack used: %s" % client.ackUsed)

        print("Sourlient Pattern Matching: %s" %
              client.diskReaderPatternMatching)
        print("Clients Pattern Matching: %s" % client.clientsPatternMatching)

        print("******************************************")
        print("*       Sourlient Masks                  *")
        print("******************************************")

        for mask in self.masks:
            if mask[4]:
                print(" accept %s" % mask[0])
            else:
                print(" reject %s" % mask[0])

        print(
            "=========================================================================="
        )

Exemplo n.º 21

0

Exibir arquivo

Arquivo: Elastic.py Projeto: softwarethreads/ca

class Elastic:
    def __init__(self):
        self.es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
        if self.es.ping():
            logging.info('Elasticsearch connected..')
        else:
            logging.info('Could not connect to Elasticsearch!')
        self.index_name = None
        self.ingest = Ingestor()
        self.db = Database()
        self.pp = pprint.PrettyPrinter(indent=4)
        self.default_zip = None
        self.default_house = None
        self.status = None

    def store_es_record(self, house, zipcode):
        try:
            self.index_name = 'listings_' + str(zipcode)
            self.es.index(index=self.index_name, body=house)
        except Exception as ex:
            logging.info('Error in indexing data', ex)
            logging.info(str(ex))

    def load_es(self, zipcode):
        self.index_name = 'listings_' + str(zipcode)
        objects = self.ingest.callWebservice(zipcode)
        dict = objects.json()
        total = dict['result']['total']
        listings = dict['result']['listings']
        for house in listings:
            self.store_es_record(house, zipcode)
        return total

    def process(self, search_object, original_keys, modified_keys):
        try:
            res = self.es.search(index=self.index_name, body=search_object)
            data_dict = ast.literal_eval(str(res))
            num = len(data_dict['hits']['hits'])
            listing = []
            for i in range(num):
                v = data_dict['hits']['hits'][i]['_source']
                val = json.dumps(v)
                listing.append(val)
            return listing
        except Exception as ex:
            logging.info('Error in process', ex)
            logging.info(str(ex))

    def process0(self, results):
        try:
            settings = {
                "settings": {
                    "number_of_shards": 1,
                    "number_of_replicas": 0
                }
            }
            zipcode = str(results[1][0])
            if not self.db.exists_zipcode(zipcode):
                # Ignore 400 means to ignore "Index Already Exist" error.
                index_name = 'listings_' + str(zipcode)
                self.es.indices.create(index_name, ignore=400, body=settings)
                print('Elasticsearch created index', str(index_name))
                num = self.load_es(zipcode)
                print("Loaded " + str(num) + " records into Elasticsearch")
                self.db.insert_db(zipcode, time.time())
                self.status = "Downloaded listings for " + str(results[1][0])
            else:
                self.status = str(results[1][0]) + " already downloaded"
            self.default_zip = zip
            self.index_name = 'listings_' + str(zip)
            status = {'status': self.status}
            data = ast.literal_eval(str(status))
            result = json.dumps(data)
            return result
        except Exception as ex:
            logging.info('Error in process0', ex)
            logging.info(str(ex))
            status = {
                'status': 'Downloading ' + str(results[1][0]) + ' failed'
            }
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result

    def process6(self, results):
        #what properties are on the market in the price range 100000 to 300000
        try:
            keywords = results[1]
            key = keywords[0]
            if key == 'price':
                if len(keywords) == 3:
                    unit_low = keywords[1]
                    unit_high = keywords[2]
                    low_factor = 0
                    high_factor = 0
                    if unit_low and unit_low.startswith('mil'):
                        low_factor = 1000000
                    if unit_high and unit_high.startswith('mil'):
                        high_factor = 1000000
                else:
                    low_factor = 1
                    high_factor = 1

            pricerange = results[2]
            low_range = pricerange[0]
            high_range = pricerange[1]
            original_keys = [
                "address.street", "address.city", "address.state",
                "address.zip", "listPrice", "sqft", "district", "size",
                "yearBuilt", "beds", "full"
            ]
            modified_keys = [
                "street", "city", "state", "zip", "listPrice", "lotSize",
                "district", "size", "yearBuilt", "beds", "baths"
            ]

            search_object = {
                "_source": {
                    "includes": original_keys
                },
                "query": {
                    "bool": {
                        "must": [{
                            "match": {
                                "address.zip": self.default_zip
                            }
                        }],
                        "filter": [{
                            "range": {
                                "listPrice": {
                                    "gte": int(low_range) * low_factor,
                                    "lte": int(high_range) * high_factor
                                }
                            }
                        }]
                    }
                }
            }
            val = self.process(search_object, original_keys, modified_keys)
            return val
        except Exception as ex:
            logging.info('Error in process2', ex)
            logging.info(str(ex))

    def process1(self, results):
        # "make 57402 Ernestine Radial the default house",
        try:
            address = results[1][0][0]
            default = results[1][1][0]
            house = results[1][1][1]
            #original_keys = ["address.street", "listPrice"]
            if default == 'default' and house == 'house':
                if address:
                    self.default_house = address
                    status = {
                        'status':
                        'setting default house to ' + self.default_house
                    }
                else:
                    status = {'status': 'error setting default house'}
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result
        except Exception as ex:
            logging.info('Error in process2', ex)
            logging.info(str(ex))

    def process2(self, results):
        # "what is the price of the house"
        try:
            keyword = results[1][1][0]
            original_keys = ['listPrice']
            if keyword == 'price':
                search_object = {
                    "_source": {
                        "includes": ["listPrice"]
                    },
                    "query": {
                        "match_phrase": {
                            "address.street": self.default_house
                        }
                    }
                }
                val = self.process(search_object, original_keys, original_keys)
                return val
        except Exception as ex:
            logging.info('Error in process3', ex)
            logging.info(str(ex))

    def process3(self, results):
        #"how many bedrooms and bathrooms does it have",
        try:
            bedrooms = results[1][0]
            bathrooms = results[1][1]

            original_keys = ["beds", "baths.full", "baths.half"]
            if (bedrooms == 'bedrooms' or bedrooms == 'bedroom') and (
                    bathrooms == 'baths' or bathrooms == 'bathrooms'
                    or bathrooms == 'bathroom' or bathrooms == 'bath'):
                search_object = {
                    "_source": {
                        "includes": ["beds", "baths.full", "baths.half"]
                    },
                    "query": {
                        "match_phrase": {
                            "address.street": self.default_house
                        }
                    }
                }
                val = self.process(search_object, original_keys, original_keys)
                return val
        except Exception as ex:
            logging.info('Error in process3', ex)
            logging.info(str(ex))

    def process4(self, results):
        #"how long has the property been on the market"
        try:
            original_keys = ["daysOnHJI"]
            search_object = {
                "_source": {
                    "includes": ["daysOnHJI"]
                },
                "query": {
                    "match_phrase": {
                        "address.street": self.default_house
                    }
                }
            }
            val = self.process(search_object, original_keys, original_keys)
            return val
        except Exception as ex:
            logging.info('Error in process5', ex)
            logging.info(str(ex))

    def process5(self, results):
        #"how much section 1 work is required",
        try:
            status = {'section_1': 5000}
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result
        except Exception as ex:
            logging.info('Error in process6', ex)
            logging.info(str(ex))

    def process7(self, results):
        #"what is the required setback from lot boundary"
        try:
            status = {'setback': '25 ft'}
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result
        except Exception as ex:
            logging.info('Error in process7', ex)
            logging.info(str(ex))

    def process8(self, results):
        #set default zip = zip
        try:
            zip = str(results[1][0])
            self.default_zip = zip
            self.index_name = 'listings_' + str(zip)
            status = {'status': 'setting default zip to ' + str(zip)}
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result
        except Exception as ex:
            logging.info('Error in process8', ex)
            logging.info(str(ex))

    def process9(self, results):
        #get default zip
        try:
            if self.default_zip:
                status = {
                    'status': 'the default zip is ' + str(self.default_zip)
                }
            else:
                status = {'status': 'the default zip is not set'}
            data = ast.literal_eval(str(status))
            result = [json.dumps(data)]
            return result
        except Exception as ex:
            logging.info('Error in process8', ex)
            logging.info(str(ex))

    def process11(self, results):
        try:
            zip = str(results[1][0])
            original_keys = [
                "address.street", "address.city", "address.state",
                "address.zip", "listPrice", "sqft", "district", "size",
                "yearBuilt", "beds", "full"
            ]
            modified_keys = [
                "street", "city", "state", "zip", "listPrice", "lotSize",
                "district", "size", "yearBuilt", "beds", "baths"
            ]
            search_object = {
                "_source": {
                    "includes": original_keys
                },
                "query": {
                    "bool": {
                        "must": [{
                            "match": {
                                "address.zip": zip
                            }
                        }]
                    }
                }
            }
            val = self.process(search_object, original_keys, modified_keys)
            return val
        except Exception as ex:
            logging.info('Error in process1', ex)
            logging.info(str(ex))

    def search_all(self):
        search_object = {"query": {"match_all": {}}}
        res = self.es.search(index=self.index_name, body=search_object)
        res_json = json.dumps(res)
        return res_json

    def search_id(self, id):
        search_object = {'query': {'match': {'id': str(id)}}}
        res = self.es.search(index=self.index_name, body=search_object)
        res_json = json.dumps(res)
        return res_json

    def filterKeys(self, res, original_keys, modified_keys):
        numberOfHouses = self.wrap_extract_values(res, ['value'])[0][0]
        vals = self.wrap_extract_values(res, original_keys)
        #self.pp.pprint(vals)
        numberOfAttrs = len(original_keys)
        houses = []
        for j in range(numberOfHouses):
            oneHouse = []
            for i in range(numberOfAttrs):
                if not vals[i][j] == None:
                    elem = vals[i][j]
                    oneHouse.append(elem)
            houses.append(oneHouse)
        houses_json = []
        for house in houses:
            res = dict(zip(modified_keys, house))
            houses_json.append(res)
        return houses_json

    def wrap_extract_values(self, obj, listkey):
        allvals = []
        for k in listkey:
            val = self.extract_values(obj, k)
            allvals.append(val)
        return allvals

    def extract_values(self, obj, key):
        """Pull all values of specified key from nested JSON."""
        arr = []

        def extract(obj, arr, key):
            """Recursively search for values of key in JSON tree."""
            if isinstance(obj, dict):
                for k, v in obj.items():
                    if isinstance(v, (dict, list)):
                        extract(v, arr, key)
                    elif k == key:
                        arr.append(v)
            elif isinstance(obj, list):
                for item in obj:
                    extract(item, arr, key)
            return arr

        results = extract(obj, arr, key)
        return results

Exemplo n.º 22

0

Exibir arquivo

Arquivo: DatabaseManagerTester.py Projeto: 2237montala/DirectMarketing_CMR

class DatabaseManagerTester(unittest.TestCase):
    def setUp(self):
        sqlite_file = 'test.db'
        self.db = DatabaseManager(sqlite_file, '__ADMIN__')

        filename = "Test_Files/DatabaseManagerTest_15.csv"
        self.ingestor = Ingestor(filename)
        self.ingestor.readCSV()

        tempHeaders = self.ingestor.getCSVHeaders()
        self.searchCritera = [
            tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4],
            tempHeaders[5], tempHeaders[6]
        ]
        searchCriteraTwoD = self.ingestor.getHeaderIndex(
            self.searchCritera, tempHeaders)
        self.ingestor.searchRows(searchCriteraTwoD, self.ingestor.getRows())
        self.searchCritera = self.db.remove_spaces(self.searchCritera)
        self.new_table = 'Test_15'

    def test_create_new_table(self):
        self.assertTrue(
            self.db.create_table_list(self.new_table, self.searchCritera,
                                      'string'))

    def test_add_row_list(self):
        self.assertTrue(
            self.db.add_list_of_rows(self.new_table, self.searchCritera,
                                     self.ingestor.getRows()))

    def test_get_headers(self):
        expectedReturn = [
            'Street_Address', "owner's_first_name", 'last_name', 'email',
            'phone_Number', 'Loan_Amount'
        ]
        self.assertEqual(self.db.get_headers(self.new_table), expectedReturn)

    def test_get_table(self):
        pass

    def test_get_table_names(self):
        tables_in_db = self.db.get_table_names()
        self.assertTrue(self.new_table in tables_in_db)

    def test_get_row_at_with_column(self):
        column_to_use = "72 Pearson Drive"
        row_from_db = self.db.get_row_at(self.new_table,
                                         column_name=self.searchCritera[0],
                                         column_value=column_to_use)
        expectedRetun = [
            u'72 Pearson Drive', u'Bartholemy', u'Parnaby',
            u'*****@*****.**', u'+55 (385) 326-3642', u'$44,795.68 '
        ]
        #The lists are the same but it doesn't think they are equal
        #self.assertEqual(row_from_db,expectedRetun)

    def test_get_row_at_with_rowid(self):
        rowid = 3
        row_from_db = self.db.get_row_at(self.new_table, row_id=rowid)
        expectedRetun = [
            u'72 Pearson Drive', u'Bartholemy', u'Parnaby',
            u'*****@*****.**', u'+55 (385) 326-3642', u'$44,795.68 '
        ]
        #self.assertEqual(row_from_db,expectedRetun)

    def test_delete_row(self):
        rowid = 9
        rowToDel = self.db.get_row_at(self.new_table, row_id=rowid)
        rowAfterToDel = self.db.get_row_at(self.new_table, row_id=rowid + 1)
        self.db.delete_row_at(self.new_table, rowid)
        self.assertEqual(self.db.get_row_at(self.new_table, row_id=rowid),
                         rowAfterToDel)

    def test_update_row(self):
        rowid = 9
        old_row = self.db.get_row_at(self.new_table, row_id=rowid)
        updated_row1 = [
            "a house", "josh", "green", "*****@*****.**",
            "228-192-2819", "$2.17"
        ]
        self.db.update_row_at(self.new_table,
                              primary_key=rowid,
                              new_row=updated_row1)
        self.assertTrue(old_row is not updated_row1)

Exemplo n.º 23

0

Exibir arquivo

 def setUp(self, logFile='log/Template.log'):
     self.logger = Logger(logFile, 'DEBUG', 'Sub')
     self.logger = self.logger.getLogger()
     self.source = Source('source-test', self.logger)
     self.ingestor = Ingestor(self.source, self.logger)
     self.ingestor.setClients()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: Sourlient.py Projeto: hawkeye438/metpx

class Sourlient(object):

    def __init__(self, name='toto', logger=None, ingestion=True) :

        # General Attributes
        self.name = name                          # Sourlient's name
        if logger is None:
            self.logger = Logger(PXPaths.LOG + 'trx_' + name + '.log', 'INFO', 'TRX' + name) # Enable logging
            self.logger = self.logger.getLogger()
        else:
            self.logger = logger
        self.logger.info("Initialisation of sourlient %s" % self.name)

        self.ingestion = ingestion                # Determine if the Sourlient will have an Ingestor
        self.debug = False                        # If we want sections with debug code to be executed
        self.subscriber = True                    # False if it is a provider

        self.type = 'aftn'                        # Must be in ['aftn']
        self.host = 'localhost'                   # Remote host name (or ip) where to send files
        self.portR = 56550                        # Receiving port
        self.portS = 5160                         # Sending port

        self.stationID = 'SUB'                    # Three letter ID of this process
        self.otherStationID = 'MHS'               # Three letter ID of the other party
        self.address = 'CYHQUSER'                 # AFTN address of this process 
        self.otherAddress = 'CYHQMHSN'            # AFTN address of the other party
        self.digits = 4                           # Number of digits used in the CSN

        self.routingTable = PXPaths.ROUTING_TABLE # Defaut routing table name
        self.ackUsed = True                       # Should always be True. False is only for testing purposes
        self.maxAckTime = 60                      # Number of second we wait for an ack before trying to reconnect

        self.batch = 100                          # Number of files that will be read in each pass
        self.cache_size = 125000                  # Maximum Number of md5sum from files kept in cache manager
        self.timeout = 10                         # Time we wait between each tentative to connect
        self.maxLength = 0                        # Max. length of a message... limit use for segmentation, 0 means unused
        self.extension = ':MISSING:MISSING:MISSING:MISSING:'  # Extension to be added to the ingest name

        self.validation = False                   # Validation of the filename (prio + date)
        self.patternMatching = False              # NEVER PUT TO TRUE (Represent sources masks, not used in sourlient)
        self.diskReaderPatternMatching = True     # Verification of the emask and imask of the sourlient before sending a file
        self.clientsPatternMatching = True        # Verification of the emask and imask of the clients before linking a file

        self.nodups = True                        # Check if the file has already been sent (md5sum present in the cache)
        self.mtime = 0                            # Integer indicating the number of seconds a file must not have
                                                  # been touched before being picked

        self.sorter = 'MultiKeysStringSorter'     # Class (or object) used to sort
        self.masks = []                           # All the masks (accept and reject)
        self.masks_deprecated = []                # All the masks (imask and emask)
        self.collection = None                    # Sourlient do not participate in the collection effort
        self.slow = False                         # Sleeps are added when we want to be able to decrypt log entries

        # Socket Attributes
        self.port = None 

        # All defaults for a source were set earlier in this class
        # But some of them may have been overwritten in the px.conf file
        # Load the px.conf stuff related to the source

        pxconf_Path = PXPaths.ETC + 'px.conf'
        if os.path.isfile(pxconf_Path) : self.readConfig( pxconf_Path )

        # read in sourlient config

        filePath  = PXPaths.TRX_CONF +  self.name + '.conf'
        self.readConfig(filePath)
        
        if self.ingestion:
            if hasattr(self, 'ingestor'):
                # Will happen only when a reload occurs
                self.ingestor.__init__(self)
            else:
                self.ingestor = Ingestor(self)
                #self.printInfos(self)
            self.ingestor.setClients()

    def readConfig(self,filePath):
        
        def isTrue(s):
            if  s == 'True' or s == 'true' or s == 'yes' or s == 'on' or \
                s == 'Yes' or s == 'YES' or s == 'TRUE' or s == 'ON' or \
                s == '1' or  s == 'On' :
                return True
            else:
                return False

        currentDir = '.'                # Current directory
        currentFileOption = 'WHATFN'    # Under what filename the file will be sent (WHATFN, NONE, etc., See PDS)

        try:
            config = open(filePath, 'r')
        except:
            (type, value, tb) = sys.exc_info()
            print("Type: %s, Value: %s" % (type, value))
            return 

        for line in config.readlines():
            words = line.split()
            if (len(words) >= 2 and not re.compile('^[ \t]*#').search(line)):
                try:
                    if words[0] == 'extension':
                        if len(words[1].split(':')) != 5:
                            self.logger.error("Extension (%s) for source %s has wrong number of fields" % (words[1], self.name))
                        else:
                            self.extension = ':' + words[1]
                    if   words[0] == 'accept':
                         cmask = re.compile(words[1])
                         self.masks.append((words[1], currentDir, currentFileOption,cmask,True))
                    elif words[0] == 'reject':
                         cmask = re.compile(words[1])
                         self.masks.append((words[1], currentDir, currentFileOption,cmask,False))
                    elif words[0] == 'imask' : self.masks_deprecated.append((words[1], currentDir, currentFileOption))  
                    elif words[0] == 'emask' : self.masks_deprecated.append((words[1],))
                    elif words[0] == 'subscriber': self.subscriber =  isTrue(words[1])
                    elif words[0] == 'validation': self.validation =  isTrue(words[1])
                    elif words[0] == 'noduplicates': self.nodups =  isTrue(words[1])
                    elif words[0] == 'slow': self.slow =  isTrue(words[1])
                    elif words[0] == 'patternMatching': self.diskReaderPatternMatching =  isTrue(words[1])
                    elif words[0] == 'clientsPatternMatching': self.clientsPatternMatching =  isTrue(words[1])
                    elif words[0] == 'mtime': self.mtime = int(words[1])
                    elif words[0] == 'sorter': self.sorter = words[1]
                    elif words[0] == 'type': self.type = words[1]
                    elif words[0] == 'maxLength': self.maxLength = int(words[1])
                    elif words[0] == 'host': self.host = words[1]
                    elif words[0] == 'portR': self.portR = int(words[1])
                    elif words[0] == 'portS': self.portS = int(words[1])

                    elif words[0] == 'stationID': self.stationID = words[1]
                    elif words[0] == 'otherStationID': self.otherStationID = words[1]
                    elif words[0] == 'address': self.address = words[1]
                    elif words[0] == 'otherAddress': self.otherAddress = words[1]
                    elif words[0] == 'digits': self.digits = int(words[1])

                    elif words[0] == 'routingTable': self.routingTable = words[1]
                    elif words[0] == 'ackUsed': self.ackUsed = isTrue(words[1])
                    elif words[0] == 'maxAckTime': self.maxAckTime = int(words[1])
                    
                    elif words[0] == 'batch': self.batch = int(words[1])
                    elif words[0] == 'cache_size': self.cache_size = int(words[1])
                    elif words[0] == 'debug' and isTrue(words[1]): self.debug = True
                    elif words[0] == 'timeout': self.timeout = int(words[1])
                    elif words[0] == 'timeout_send': self.timeout_send = int(words[1])
		    elif words[0] == 'logrotate':
                         if words[1].isdigit():
                                self.logger.setBackupCount(int(words[1]))

                except:
                    self.logger.error("Problem with this line (%s) in configuration file of client %s" % (words, self.name))

        if not self.validation:
            self.sorter = 'None'    # Must be a string because eval will be subsequently applied to this

        config.close()
    

    def _getMatchingMask(self, filename): 

        if len(self.masks_deprecated) > 0 :
           for mask in self.masks_deprecated:
               if fnmatch.fnmatch(filename, mask[0]):
                   try:
                       if mask[2]:
                           return mask
                   except:
                       return None

        for mask in self.masks:
            if mask[3].match(filename) :
               if mask[4] : return mask
               return None

        return None

    def printInfos(self, client):
        print("==========================================================================")
        print("Name: %s " % client.name)
        print("Type: %s" % client.type)
        print("Subscriber: %s" % client.subscriber)
        print("Host: %s" % client.host)
        print("PortR: %s" % client.portR)
        print("PortS: %s" % client.portS)
        print("Station ID: %s" % client.stationID)
        print("Other Station ID: %s" % client.otherStationID)
        print("Address: %s" % client.address)
        print("Other Address: %s" % client.otherAddress)
        print("Digits: %i" % client.digits)
        print("Extension: %s" % client.extension)
        print("Slow: %s" % client.slow)
        print("Batch: %s" %  client.batch)
        print("Cache_size: %s" %  client.cache_size)
        print("Max length: %i" % client.maxLength)
        print("Mtime: %i" % client.mtime)
        print("Timeout: %s" % client.timeout)
        print("Sorter: %s" % client.sorter)
        print("Validation: %s" % client.validation)
        print("Routing table: %s" % client.routingTable)
        print("Maximum time (seconds) we wait for an ack: %s" % client.maxAckTime)
        print("Ack used: %s" % client.ackUsed)

        print("Sourlient Pattern Matching: %s" % client.diskReaderPatternMatching)
        print("Clients Pattern Matching: %s" % client.clientsPatternMatching)

        print("******************************************")
        print("*       Sourlient Masks                  *")
        print("******************************************")

        for mask in self.masks:
            if mask[4] :
               print(" accept %s" % mask[0])
            else :
               print(" reject %s" % mask[0])

        print("==========================================================================")

Exemplo n.º 25

0

Exibir arquivo

# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
# https://www.dataquest.io/blog/python-pandas-databases/
# https://www.youtube.com/watch?v=pd-0G0MigUA
#https://stackoverflow.com/questions/17044259/python-how-to-check-if-table-exists
from DatabaseManager import DatabaseManager
from Ingestor import Ingestor

# SETTING IT EQUAL TO ':memory:' WILL HAVE IT RUN ON RAM AND NO SQLITE FILE WILL BE MADE.
sqlite_file = 'test.db'
db = DatabaseManager(sqlite_file, '__ADMIN__')

CLEAR_ON_COMPLETION = True

filename = "Test_Files/DatabaseManagerTest_15.csv"
ingestor = Ingestor(filename)
ingestor.readCSV()

tempHeaders = ingestor.getCSVHeaders()
searchCritera = [
    tempHeaders[0], tempHeaders[1], tempHeaders[2], tempHeaders[4],
    tempHeaders[5], tempHeaders[6]
]

searchCriteraTwoD = ingestor.getHeaderIndex(searchCritera, tempHeaders)
print("\nDictionary of search critera and their indexes in the csv")
print(searchCriteraTwoD)

ingestor.searchRows(searchCriteraTwoD, ingestor.getRows())
print("\nPrint filtered list from unfiltered row")
print(ingestor.getRowAt(0))

Exemplo n.º 26

0

Exibir arquivo

def main():
    filename = 'Test_Files/DatabaseManagerTest_15.csv'
    ingestor = Ingestor(filename)
    ingestor.readCSV()

    print("Header of csv file")
    print(ingestor.getCSVHeaders())
    tempHeaders = ingestor.getCSVHeaders()
    searchCritera = [tempHeaders[2], tempHeaders[3], tempHeaders[5]]

    searchCritera = ingestor.getHeaderIndex(searchCritera, tempHeaders)
    print("\nDictionary of search critera and their indexes in the csv")
    print(searchCritera)

    print("\nPrint raw list from csv")
    print(ingestor.getRowAt(1))
    ingestor.searchRows(searchCritera, ingestor.getRows())
    print("\nPrint filtered list from unfiltered row")
    print(ingestor.getRowAt(1))

    print(ingestor.getRowAt(2))
    print(ingestor.getRowAt(3))
    print(ingestor.getRowAt(4))

    print("\nNumber of columns")
    print(ingestor.getNumberOfHeaders())

    print("\nNumber of rows")
    print(ingestor.getNumberOfRows())

    print(
        "\nUpdating file to a csv in project folder names newList.csv. Expected:False"
    )
    print(
        ingestor.updateFileLoc(
            "/home/anthonym/Documents/SchoolWork/SoftwareEngineering/newList.csv"
        ))

    print("\nUpdating file to a csv with no name. Expected:false")
    print(ingestor.updateFileLoc(""))

    print(
        "\nUpdating file to a csv with location Test_Files/DatabaseManagerTest_1000.csv. Expected:true"
    )
    print(ingestor.updateFileLoc("Test_Files/DatabaseManagerTest_1000.csv"))

Exemplo n.º 27

0

Exibir arquivo

Arquivo: test_ingestor.py Projeto: janroijen/meme-generator

 def test_parse_fakefile(self):
     with self.assertRaises(FileNotFoundError):
         _ = Ingestor.parse('fakefile.pdf')