Пример #1
0
    def delete(self, paths, delete_marker=False):
        """ Deletes the provided paths from the metastore.  
        
            Completly removing files from the metastore can cause problems 
            because the s3 listing may show the files even though the data may 
            not be available.  This will cause MR jobs to fail.  The delete marker
            can be used to hide files from the listing.
        
            Example:
            s.delete([path1, path2]) -> True
        """
        if (self.disabled):
            return

        paths = self.__as_paths(paths)

        if delete_marker:
            for path in paths:
                item = self.db.get_item(path=path.parent().normalize(),
                                        file=path.filename())
                item['deleted'] = "true"
        else:
            with self.db.batch_write() as batch:
                for path in paths:
                    batch.delete_item(path=path.parent().normalize(),
                                      file=path.filename())
Пример #2
0
 def title(self):
     if not self._title:
         title = re.findall("<h1>(.*?)</h1>", self.html)
         self._title = title[0] if title else filename(
             self.destfile).rsplit(".")[0]
         #print("self._title:"+self._title)
     return self._title
Пример #3
0
 def abstract(self):
     if not self._abstract:
         abstract = re.findall("<p>(.*?)</p>", self.html, re.S)
         self._abstract = abstract[0] if abstract else filename(
             self.destfile).rsplit(".")[0]
         #print("self._abstract:"+self._abstract)
     return self._abstract
Пример #4
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(dirname(self.fromfile.replace(post_dir, website_dir)),
                          splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
Пример #5
0
 def title(self):
     if not self._title:
         title1 = re.findall("<h1>(.*?)</h1>", self.html)
         if title1:
             title = title1
         else:
             title = re.findall("<h2>(.*?)</h2>", self.html)
         self._title = title[0] if title else filename(
             self.destfile).rsplit(".")[0]
     return self._title
Пример #6
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(
         dirname(self.fromfile.replace(post_dir, website_dir)),
         splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
Пример #7
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(
         dirname(self.fromfile.replace(post_dir, website_dir)),
         splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
     self._image = None
     self._abstract = None
     self.lexer = get_lexer_by_name("java", stripall=True)
     self.formatter = HtmlFormatter()
Пример #8
0
    def delete(self, paths, delete_marker=False):
        """ Deletes the provided paths from the metastore.  
        
            Completly removing files from the metastore can cause problems 
            because the s3 listing may show the files even though the data may 
            not be available.  This will cause MR jobs to fail.  The delete marker
            can be used to hide files from the listing.
        
            Example:
            s.delete([path1, path2]) -> True
        """
        if self.disabled:
            return

        paths = self.__as_paths(paths)

        if delete_marker:
            for path in paths:
                item = self.db.get_item(path=path.parent().normalize(), file=path.filename())
                item["deleted"] = "true"
        else:
            with self.db.batch_write() as batch:
                for path in paths:
                    batch.delete_item(path=path.parent().normalize(), file=path.filename())
Пример #9
0
    def add(self, paths):
        """ Adds a list of Paths to the file metastore and returns True on success. 
        
            Example:
            s.add([path1, path2]) -> True
        """
        if self.disabled:
            return

        epoch = self.__time_now()

        paths = self.__as_paths(paths)

        with self.db.batch_write() as batch:
            for path in paths:
                batch.put_item(data={"path": path.parent().normalize(), "file": path.filename(), "epoch": epoch})
Пример #10
0
    def add(self, paths):
        """ Adds a list of Paths to the file metastore and returns True on success. 
        
            Example:
            s.add([path1, path2]) -> True
        """
        if self.disabled:
            return

        epoch = self.__time_now()

        paths = self.__as_paths(paths)

        with self.db.batch_write() as batch:
            for path in paths:
                batch.put_item(
                    data={
                        'path': path.parent().normalize(),
                        'file': path.filename(),
                        'epoch': epoch
                    })
Пример #11
0
def log_to_file(original_function, outfile=None, configfile=None):
    import logging, datetime
    from os import path as path
    logging._srcfile = None
    logging.logThreads = 0
    logging.logProcesses = 0
    configfile = kwargs.get('configfile', 'generic_logger_config.ini')
    if outfile is None:
        outfile = kwargs.get('outfile', str(original_function.__name__ + "_log.txt"))
        outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile))
    if configfile is not None:
        logging.config.fileConfig(path.normpath(configfile))
    else:
        logging.basicConfig(filename=outfile, level=logging.DEBUG) # level=logging.INFO)
    start_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d--%H:%M.%S')
    print "Logging to … {0}".format(path.abspath(outfile))
    def new_function(*args, **kwargs):
        result = original_function(*args, **kwargs)
        with open(outfile, "ab+") as logfile:
            logfile.write("Function '%s' called with positional arguments %s and keyword arguments %s. The result was %s.\n" % (original_function.__name__, args, kwargs, result))
        return result
    return new_function
Пример #12
0
def basic_log_file_obj(log_configuration='Admin_Client', **kwargs):
    import logging, datetime
    import logging.config
    logging._srcfile = None
    logging.logThreads = 0
    logging.logProcesses = 0
    from os import path as path
    configfile = kwargs.get('configfile', 'generic_logger_config.ini')
    if outfile is None:
        outfile = kwargs.get('outfile', str(__file__.__name__ + "_log.txt"))
        outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile))
    if configfile is not None and path.isfile(configfile):
        logging.config.fileConfig(path.normpath(configfile))
        hdlr = logging.FileHandler(outfile)
        #formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        formatter = logging.Formatter('%(asctime)s | %(name)s | %(module)s-%(lineno)04d | %(levelname)s | %(message)s')
        hdlr.setFormatter(formatter)
        myLogger = logging.getLogger(log_configuration)
        myLogger.addHandler(hdlr) 
        myLogger.setLevel(logging.WARNING)
        return myLogger
    else:
        logging.basicConfig(filename=outfile, filemode='w', level=logging.DEBUG) # level=logging.INFO)
        myLogger = logging.getLogger(log_configuration)      
        imsg='\nLOGGING Level 1 - Active....\nINFO MODE SET'
        wmsg='\nLOGGING Level 2 - Active....\nWARNING MODE SET'
        emsg='\nLOGGING Level 3 - Active....\nERROR MODE SET'
        cmsg='\nLOGGING Level 4 - Active....\nCRITICAL MODE SET'
        exmsg='\nEXCEPTION!!!!!....\n--------Exception Raised---------\n'
        dmsg='\nLOGGING DEBUGER - Active....\nDEBUG MODE SET'
        myLogger.info(imsg)
        myLogger.warn(wmsg)
        myLogger.error(emsg)
        myLogger.critical(cmsg)
        myLogger.exception(exmsg)
        myLogger.debug(dmsg)
        myLogger.setLevel(logging.WARNING)
        return myLogger
Пример #13
0
 def title(self):
     if not self._title:
         title = re.findall("<h2>(.*?)</h2>", self.html)
         self._title = title[0] if title else filename(self.destfile).rsplit(".")[0]
     return self._title