def _get_vocabulary_group_information(self, vocabulary_group, category, language):
        """
        parses the word pool of a vocabularygroup
        """
        group_conf = conf["group_information"]

        vocabulary_group_header = self._get_vocabulary_group_header(vocabulary_group)

        text = vocabulary_group_header.xpath("td[1]//text()").extract_first()

        lis = text.split(group_conf["vocabulary_group_split_character"])

        regex_conf = None

        try:
            regex_conf = group_conf["regex"][str(len(lis))]
        except KeyError:
            log.error("There was no configuration for lists with " + str(len(lis)) + " words")
            # TODO: return default names

        word_pool_name_indices = regex_conf["default"]["wordpool_name_indices"]
        group_name_indices = regex_conf["default"]["group_name_indices"]

        if regex_conf.has_key("cases"):
            for case in regex_conf["cases"].itervalues():
                if re.search(case["regex"], text):
                    word_pool_name_indices = case["wordpool_name_indices"]
                    group_name_indices = case["group_name_indices"]
                    break

        # log.info(category + " " + language + " " + text)

        word_pool_name = self._get_word_pool_name_from_indices(word_pool_name_indices, lis)
        group_name = self._get_group_name_from_indices(group_name_indices, lis)
        return word_pool_name, group_name, text
    def parse(self, response):
        """
        parses all categories and searches a link to each
        """
        log.info("Start parsing ...")

        categories = response.xpath('//div[@id="col3_content"]/table[1]/tbody/tr[not(@class)]')

        for category in categories:
            anchor = category.xpath("td[1]//a")

            url = response.urljoin(anchor.xpath("@href").extract_first())
            category = anchor.xpath("text()").extract_first().strip()

            # request the category page
            request = scrapy.Request(url, callback=self.parse_category_contents)
            request.meta["category"] = category
            yield request