Пример #1
0
 def __songfiles(self):
     """Create an array of song objects for this card"""
     for songpath in self.body.splitlines():
         card_root = GlobalConfig.get("paths", "data_root") + "/private"
         songpath = card_root + "/" + self.config.get(
             "paths", "songs") + "/" + songpath
         self.songs.append(MedusaSong(songpath))
Пример #2
0
    def __add_file_to_index(self, fnmtime, filename, ctype="news"):
        """
        Reads in a file, processes it into lines, ElementTree grabs
        text out of the tags, processes the input to remove banal words
        and symbols, and then adds it to the index.
        """
        # Enable writing to our chosen index. To limit the index
        # locking, this is the only function that writes to the index.
        writer = self.index.writer()
        card_root = GlobalConfig.get("paths", "data_root") + "/private"
        card_path = card_root + "/" + self.config.get("paths", ctype)

        with open(card_path + "/" + filename, 'r', encoding='utf-8') as indexfh:
            body = ""
            lines = indexfh.read().splitlines()
            unrolled = unroll_newlines(lines)
            body += unrolled[0]
            body += unrolled[1]
            for line in unrolled:
                if line.find('<p') == 0:
                    e = fromstring(escape_amp(line))
                    for t in e.itertext():
                        body += t + " "
            self.__process_input(body, returning="contents")
            # Update wraps add if the document hasn't been inserted, and
            # replaces current indexed data if it has been inserted. This
            # requires the file parameter to be set "unique" in the Schema
            writer.update_document(file=filename, ctype=ctype, mtime=str(fnmtime), content=self.content)

        # Finish by commiting the updates
        writer.commit()
Пример #3
0
 def __random_choice(self):
     """
     If the configuration supports a random theme, and we didn't have a
     theme provided in the initial state, let's choose one randomly
     """
     seed()  # Enable non-seeded choice
     self.index = randint(0, self.count - 1)
     self.theme = GlobalConfig.get("themes", str(self.index))
     self.random = True
Пример #4
0
 def __choose_theme(self, desired_theme=None):
     """
     Given a valid index or "random", properly set the theme value from
     one of the numbered-index values in constantina.ini.
     """
     if desired_theme is None and self.theme == "random":
         self.__random_choice()
     else:
         self.index = [
             int(x[0]) for x in GlobalConfig.items("themes")[1:]
             if x[1] == self.theme
         ][0]
Пример #5
0
    def __init__(self):
        """
        Read the theme settings from a file, and set any defaults.
        desired_theme is either a numeric index into the array of themes, or
        'default' to allow for either default-config or random choice.

        self.theme is the directory path to the theme.
        self.index is which Nth value in the config has our theme.
           self.index and preferences.thm should be the same
        """
        self.index = None
        self.theme = None
        self.random = False
        self.count = len(GlobalConfig.items("themes")) - 1
        self.default = GlobalConfig.get("themes", "default")
        # The index and theme values must be set to something. Otherwise,
        # if the preferences cookie was malformed, we assume a valid session,
        # and then set an incorrect preferences cookie theme index.
        if self.default == "random":
            self.__random_choice()
        else:
            self.theme = self.default
            self.__choose_theme()
Пример #6
0
    def set(self, desired_theme=None):
        """
        The Global Theme is set during state loading, but we manage the
        attempted/imported values here, in case we need to deconflict between
        state-cookie theme settings and preferences theme settings.
        """
        if desired_theme is None:
            # Choose the plain default value
            self.theme = self.default
        elif desired_theme == -1:
            # Random choice selected from the menu
            self.__random_choice()
        else:
            # Choose based on user input, mod'ing to the number of themes
            # if the user input was some out-of-range number
            self.index = int(desired_theme) % self.count
            self.theme = GlobalConfig.get("themes", str(self.index))
            self.random = False

        # We either have index or "random" now. Make a final choice
        self.__choose_theme(desired_theme)
Пример #7
0
    def __add_ctype_to_index(self, ctype):
        """Take a file type, list all the files there, and add all the
        body contents to the index."""
        # Make sure BaseFiles is populated
        opendir(self.config, ctype)
        card_root = GlobalConfig.get("paths", "data_root") + "/private"
        card_path = card_root + "/" + self.config.get("paths", ctype)

        for filename in BaseFiles[ctype]:
            try:
                fnmtime = int(os.path.getmtime(card_path + "/" + filename))
            except:
                return   # File has been removed, nothing to index

            lastmtime = ''
            try:
                lastmtime = int(float(self.searcher.document(file=filename)['mtime']))
            except:
                lastmtime = 0
            # If small revisions were made after the fact, the indexes won't
            # be accurate unless we reindex this file now
            if lastmtime < fnmtime:
                self.__add_file_to_index(fnmtime, filename, ctype)
Пример #8
0
    def __init__(self, state):
        self.config = state.medusa.config

        # Upper limit on the permitted number of searchable items.
        # Since we use this as an array slice, add one to support N-1 elements
        self.max_query_count = GlobalConfig.getint("miscellaneous", "max_state_parameters") + 1

        # List of symbols to filter out in the unsafe input
        self.ignore_symbols = []
        # Regex of words that won't be indexed
        self.ignore_words = ''
        # After processing unsafe_query or unsafe_filter, save it in the object
        # Assume we're searching for all terms, unless separated by pluses
        self.query_string = ''
        self.filter_string = ''
        # The contents of the last file we read
        self.content = ''
        # Notes on what was searched for. This will either be an error
        # message, or provide context on the search results shown.
        # Array of ctypes, each with an array of filename hits
        self.hits = {}

        # Whoosh object defaults
        self.schema = ''
        self.index = ''
        self.query = ''
        self.parser = ''
        self.searcher = ''
        self.results = ''

        # Max search results per page is equal to the number of cards that would
        # be shown on a normal news page. And while whoosh expects pages starting
        # at one, the page state counting will be from zero
        self.page = state.page + 1
        self.resultcount = state.max_items
        self.filtered = state.filtered

        # File paths for loading things
        card_root = GlobalConfig.get("paths", "data_root") + "/private"
        self.index_dir = card_root + "/" + self.config.get('search', 'index_dir')
        self.words_file = card_root + "/" + self.config.get('search', 'ignore_words')
        self.symobls_file = card_root + "/" + self.config.get('search', 'ignore_symbols')
        self.search_types = self.config.get("card_properties", "search").replace(" ", "").split(",")

        unsafe_query_terms = state.medusa.search
        unsafe_filter_terms = state.medusa.card_filter

        # Support for Japanese text indexing
        self.tk = TinySegmenterTokenizer(tinysegmenter.TinySegmenter())

        # Define the indexing schema. Include the mtime to track updated
        # content in the backend, ctype so that we can manage the distribution
        # of returned search results similar to the normal pages, and the
        # filename itself as a unique identifier (most filenames are utimes).
        self.schema = Schema(file=ID(stored=True, unique=True, sortable=True), ctype=ID(stored=True), mtime=ID(stored=True), content=TEXT(analyzer=self.tk))

        # If index doesn't exist, create it
        if index.exists_in(self.index_dir):
            self.index = index.open_dir(self.index_dir)
            # syslog.syslog("Index exists")
        else:
            self.index = index.create_in(self.index_dir, schema=self.schema)
            # syslog.syslog("Index not found -- creating one")
        # Prepare for query searching (mtime update, search strings)
        self.searcher = self.index.searcher()

        for ctype in self.search_types:
            # Prior to processing input, prepare the results arrays.
            # Other functions will expect this to exist regardless.
            self.hits[ctype] = []

        # Process the filter strings first, in case that's all we have
        if unsafe_filter_terms is not None:
            self.__process_input(' '.join(unsafe_filter_terms[0:self.max_query_count]), 
                                     returning="filter")

        # Double check if the query terms exist or not
        if unsafe_query_terms is None:
            if self.filter_string != '':
                self.__filter_cardtypes()
                self.searcher.close()
                return
            else:
                self.searcher.close()
                return

        # If the query string is null after processing, don't do anything else.
        # Feed our input as a space-delimited set of terms. NOTE that we limit
        # this in the __import_state function in MedusaState.
        if not self.__process_input(' '.join(unsafe_query_terms[0:self.max_query_count])):
            if self.filter_string != '':
                self.__filter_cardtypes()
                self.searcher.close()
                return
            else:
                self.searcher.close()
                return

        for ctype in self.search_types:
            # Now we have good safe input, but we don't know if our index is
            # up-to-date or not. If have changed since their last-modified date,
            # reindex all the modified files
            self.__add_ctype_to_index(ctype)

        # Return only up to CARD_COUNT items per page for each type of returned
        # search result query. We calculate the max sum of all returned items,
        # and then we'll later determine which of these results we'll display
        # on the returned search results page.
        self.__search_index()
        self.searcher.close()
Пример #9
0
    def __interpretfile(self, thisfile):
        """File opening heuristics.

        First, assume that files in each folder are indicative of their
        relative type. Images are in the image folder, for instance.

        Secondly, assume that non-media folders follow the "news entity"
        format of title-line, keywords-line, and then body.

        Prove these heuristics with a Python file-type check. Anything
        that doesn't pass muster returns "wrongtype".
        """
        magi = magic.Magic(mime=True)

        card_root = GlobalConfig.get("paths", "data_root") + "/private"
        base_path = card_root + "/" + self.config.get("paths", self.ctype)
        fpath = base_path + "/" + thisfile
        if self.hidden is True:
            fpath = base_path + "/hidden/" + thisfile

        try:
            with open(fpath, 'r', encoding='utf-8') as cfile:
                ftype = magi.from_file(fpath)
                # News entries or features are processed the same way
                if (("text" in ftype) and
                    ((self.config.get("paths", "news") in cfile.name) or
                     (self.config.get("paths", "heading") in cfile.name) or
                     (self.config.get("paths", "quotes") in cfile.name) or
                     (self.config.get("paths", "topics") in cfile.name) or
                     (self.config.get("paths", "features") in cfile.name))):
                    self.title = cfile.readline().replace("\n", "")
                    rawtopics = cfile.readline().replace("\n", "")
                    for item in rawtopics.split(', '):
                        self.topics.append(item)
                    self.body = cfile.read()

                # Multiple-song playlists
                if (("text" in ftype)
                        and (self.config.get("paths", "songs") in cfile.name)):
                    self.title = fpath
                    self.topics.append("Song Playlist")
                    self.body = cfile.read()
                    self.__songfiles()  # Read song metadata

                # Single-image cards
                if ((("jpeg" in ftype) or ("png" in ftype)) and
                    (self.config.get("paths", "images") in cfile.name)):
                    # TODO: alt/img metadata
                    self.title = fpath
                    self.topics.append("Images")
                    self.body = fpath

                # Single-song orphan cards
                if ((("mpeg" in ftype) and ("layer iii" in ftype))
                        and (self.config.get("paths", "songs") in cfile.name)):
                    self.title = fpath  # TODO: filename from title
                    self.topics.append("Songs")  # TODO: include the album
                    self.body = fpath
                    self.__songfiles()  # Read song metadata

            # If the filename is in unix-time format, track the creation date
            if thisfile.isdigit():
                if int(thisfile) > 1141161200:
                    self.cdate = datetime.fromtimestamp(
                        int(thisfile)).strftime("%B %-d, %Y")
            else:
                fnmtime = os.path.getmtime(fpath)
                self.cdate = datetime.fromtimestamp(
                    int(fnmtime)).strftime("%B %-d, %Y")

        except IOError:  # File got moved in between dirlist caching and us reading it
            self.topics = [
            ]  # Makes the card go away if we had an error reading content
            return self.config.get("card_defaults", "file")

        if self.hidden is True:
            return self.config.get("paths", self.ctype) + "/hidden/" + thisfile
        else:
            return self.config.get("paths", self.ctype) + "/" + thisfile