Exemplo n.º 1
0
    def test_prev(self, simple):
        """ Test prev property """
        # self.TEI.parse()
        # Normal passage checking
        p = self.TEI.getTextualNode(["2", "40", "8"], simple=simple)
        self.assertEqual(str(p.prev.reference), "2.40.7")
        p = self.TEI.getTextualNode(["2", "40"], simple=simple)
        self.assertEqual(str(p.prev.reference), "2.39")
        p = self.TEI.getTextualNode(["2"], simple=simple)
        self.assertEqual(str(p.prev.reference), "1")

        # test failing passage
        p = self.TEI.getTextualNode(["1", "pr", "1"], simple=simple)
        self.assertEqual(p.prev, None)
        p = self.TEI.getTextualNode(["1", "pr"], simple=simple)
        self.assertEqual(p.prev, None)
        p = self.TEI.getTextualNode(["1"], simple=simple)
        self.assertEqual(p.prev, None)

        # First child should get to parent's prev last child
        p = self.TEI.getTextualNode(["1", "1", "1"], simple=simple)
        self.assertEqual(str(p.prev.reference), "1.pr.22")

        # Beginning of lowest level passage and beginning of parent level
        p = self.TEI.getTextualNode(["2", "pr", "sa"], simple=simple)
        self.assertEqual(str(p.prev.reference), "1.39.8")
Exemplo n.º 2
0
def test_scan_video_invalid_extension(movies, tmpdir, monkeypatch):
    monkeypatch.chdir(str(tmpdir))
    movie_name = os.path.splitext(movies['man_of_steel'].name)[0] + '.mp3'
    tmpdir.ensure(movie_name)
    with pytest.raises(ValueError) as excinfo:
        scan_video(movie_name)
    assert str(excinfo.value) == '.mp3 is not a valid video extension'
Exemplo n.º 3
0
    def build_uri(self, base, matches):
        if not base:
            return None
        if self.uriTemplate:
            expanded = str(self.uriTemplate)
        elif self.fragmentTemplate:
            if "#" in base:
                base += self.space.fragmentSeparator
            else:
                base += "#"
            expanded = base + str(self.fragmentTemplate)
        else:
            return None

        expanded = expanded.replace("{+base}", base)
        for var, value in matches.items():
            slug = self.transform_value(value)
            expanded = expanded.replace("{%s}" % var, slug)
        # if base is eg "http://localhost/res/" and expanded is a
        # /-prefixed relative uri like "/sfs/9999:998", urljoin
        # results in "http://localhost/sfs/9999:998/", not
        # "http://localhost/res/" like you'd expect. So we work
        # around.
        if expanded[0] == "/":
            expanded = expanded[1:]
            
        if expanded.startswith("http://") or expanded.startswith("https://"):
            return urljoin(base, expanded)
        else:
            # see the test integrationLegalURI.CustomCoinstruct.test_1845_50_s.1
            return "%s/%s" % (base, expanded)
Exemplo n.º 4
0
 def config_string(self):
     """Generate a '|' delimited string of instance attributes, for saving to config.ini."""
     return '|'.join([
         self.name, self.url, self.api_key, self.cat_ids, str(int(self.enabled)),
         self.search_mode, str(int(self.search_fallback)),
         str(int(self.enable_daily)), str(int(self.enable_backlog)), str(int(self.enable_manualsearch))
     ])
Exemplo n.º 5
0
 def select(self, query, format="sparql"):
     # FIXME: workaround for the fact that rdflib select uses FROM
     # <%s> differently than Sesame/Fuseki. We remove the 'FROM
     # <%s>' part from the query and instead get a context graph
     # for the same URI.
     re_fromgraph = re.compile(r" FROM <(?P<graphuri>[^>]+)> ")
     graphuri = None
     m = re_fromgraph.search(query)
     if m:
         graphuri = m.group("graphuri")
         query = re_fromgraph.sub(" ", query)
     try:
         res = self._getcontextgraph(graphuri).query(query)
     except pyparsing.ParseException as e:
         raise errors.SparqlError(e)
     if format == "sparql":
         return res.serialize(format="xml")
     elif format == "json":
         return res.serialize(format="json")
     else:
         # or just
         # return self._sparql_results_to_list(res.serialize(format="xml"))
         l = []
         for r in res.bindings:
             d = {}
             for (key, val) in r.items():
                 d[str(key)] = str(val)
             l.append(d)
         return l
Exemplo n.º 6
0
def getprevnext(passage, request_urn, output=XML):
    _prev = ""
    _next = ""

    if passage.prev:
        _prev = URN("{}:{}".format(passage.urn.upTo(URN.VERSION), str(passage.prev)))
    if passage.next:
        _next = URN("{}:{}".format(passage.urn.upTo(URN.VERSION), str(passage.next)))

    if output == XML:
        return """
            <GetPrevNext xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns="http://chs.harvard.edu/xmlns/cts">
                <request>
                    <requestName>GetPrevNext</requestName>
                    <requestUrn>{request_urn}</requestUrn>
                </request>
                <reply>
                    <urn>{full_urn}</urn>
                    <prevnext>
                        <prev><urn>{prev}</urn></prev>
                        <next><urn>{next}</urn></next>
                    </prevnext>
                </reply>
            </GetPrevNext>""".format(
            request_urn=request_urn,
            full_urn=str(passage.urn),
            prev=str(_prev),
            next=str(_next)
        )
Exemplo n.º 7
0
 def test_prevnext_on_close_to_last_passage(self):
     passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39.2-2.40.5"))
     self.assertEqual(
         str(passage.nextId),
         "2.40.6-2.40.8",
         "Next reff should finish at the end of the text, no matter the length of the reference",
     )
     self.assertEqual(str(passage.prevId), "2.37.9-2.39.1", "Prev reff should be the same length as sibling")
Exemplo n.º 8
0
    def test_first_list(self):
        passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39"))
        self.assertEqual(str(passage.firstId), "2.39.1", "First reff should be the first")
        self.assertEqual(str(passage.lastId), "2.39.2", "Last reff should be the last")

        passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39-2.40"))
        self.assertEqual(str(passage.firstId), "2.39.1", "First reff should be the first")
        self.assertEqual(str(passage.lastId), "2.40.8", "Last reff should be the last")
Exemplo n.º 9
0
 def test_prevnext_on_close_to_first_passage(self):
     passage = self.text.getPassage(MyCapytain.common.reference.Reference("1.pr.10-1.2.1"))
     self.assertEqual(str(passage.nextId), "1.2.2-1.4.1", "Next reff should be the same length as sibling")
     self.assertEqual(
         str(passage.prevId),
         "1.pr.1-1.pr.9",
         "Prev reff should start at the beginning of the text, no matter the length of the reference",
     )
Exemplo n.º 10
0
 def _load_resources(self, resource_path):
     # returns a mapping [resource label] => [resource uri]
     # resource_path is given relative to cwd
     graph = Graph()
     graph.load(resource_path, format='n3')
     d = {}
     for uri, label in graph.subject_objects(RDFS.label):
         d[str(label)] = str(uri)
     return d
Exemplo n.º 11
0
 def test_highest(self):
     self.assertEqual(
         str((Reference("1.1-1.2.8")).highest), "1.1",
         "1.1 is higher"
     )
     self.assertEqual(
         str((Reference("1.1-2")).highest), "2",
         "2 is higher"
     )
Exemplo n.º 12
0
 def test_properties(self):
     a = Reference("[email protected]@Atreus[3]")
     self.assertEqual(str(a.start), "1.1@Achilles")
     self.assertEqual(a.start.list, ["1", "1"])
     self.assertEqual(a.start.subreference[0], "Achilles")
     self.assertEqual(str(a.end), "1.10@Atreus[3]")
     self.assertEqual(a.end.list, ["1", "10"])
     self.assertEqual(a.end.subreference[1], 3)
     self.assertEqual(a.end.subreference, ("Atreus", 3))
Exemplo n.º 13
0
 def test_Unicode_Support(self):
     a = Reference("1.1@καὶ[0]-1.10@Ἀλκιβιάδου[3]")
     self.assertEqual(str(a.start), "1.1@καὶ[0]")
     self.assertEqual(a.start.list, ["1", "1"])
     self.assertEqual(a.start.subreference[0], "καὶ")
     self.assertEqual(str(a.end), "1.10@Ἀλκιβιάδου[3]")
     self.assertEqual(a.end.list, ["1", "10"])
     self.assertEqual(a.end.subreference[1], 3)
     self.assertEqual(a.end.subreference, ("Ἀλκιβιάδου", 3))
Exemplo n.º 14
0
 def test_xml_Work_GetItem(self):
     """ Test access through getItem obj[urn] """
     TI = TextInventory(resource=self.getCapabilities, id="TestInv")
     tg = TI["urn:cts:latinLit:phi1294"]
     self.assertIsInstance(tg["urn:cts:latinLit:phi1294.phi002"], Work)
     self.assertEqual(str(tg["urn:cts:latinLit:phi1294.phi002"].urn), "urn:cts:latinLit:phi1294.phi002")
     self.assertIsInstance(tg["urn:cts:latinLit:phi1294.phi002.perseus-lat2"], Text)
     self.assertEqual(
         str(tg["urn:cts:latinLit:phi1294.phi002.perseus-lat2"].urn), "urn:cts:latinLit:phi1294.phi002.perseus-lat2"
     )
Exemplo n.º 15
0
 def test_str(self):
     a = Citation(name="book", xpath="/tei:div[@n=\"?\"]", scope="/tei:TEI/tei:body/tei:text/tei:div")
     self.assertEqual(
         str(a),"<tei:cRefPattern n=\"book\" matchPattern=\"(\\w+)\" replacementPattern=\"#xpath(/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"$1\"])\"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>"
     )
     b = Citation(name="chapter", xpath="/tei:div[@n=\"?\"]", scope="/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"?\"]")
     self.assertEqual(
         str(b),"<tei:cRefPattern n=\"chapter\" matchPattern=\"(\\w+)\.(\\w+)\" replacementPattern=\"#xpath(/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"$1\"]/tei:div[@n=\"$2\"])\"><tei:p>This pointer pattern extracts chapter</tei:p></tei:cRefPattern>"
     )
     a = Citation()
     self.assertEqual(str(a), "")
Exemplo n.º 16
0
def mock_volume(override=None):
    volume_model = {
        "id": str(uuid.uuid4()),
        "sizeInKb": (8 * constants.GIGABYTE) // constants.KILOBYTE,
        "storagePoolId": str(uuid.uuid4()),
        "useRmcache": False,
        "volumeType": constants.VOLUME_TYPE_THICK,
        "mappedSdcInfo": []
    }
    volume_model.update(override or {})
    return volume_model
Exemplo n.º 17
0
 def __init__(self, resource):
     self.base = str(resource.value(COIN.base))
     self.fragmentSeparator = str(resource.value(COIN.fragmentSeparator))
     self.slugTransform = SlugTransformer(resource.value(COIN.slugTransform))
     self.templates = [Template(self, template_resource)
                       for template_resource in resource.objects(
                               COIN.template)]
     # primary sort order by :priority
     # secondary sort by type specificity (wether a self.forType is specified)
     # tertiary sort order by specificity (number of vars per template)
     self.templates.sort(key=lambda x: (x.priority, x.forType, len(x.bindings)),
                         reverse=True) 
Exemplo n.º 18
0
 def test_run_revsort(self):
     outDir = self._createTempDir()
     self._tester('src/toil/test/cwl/revsort.cwl',
                  'src/toil/test/cwl/revsort-job.json',
                  outDir, {
         # Having unicode string literals isn't necessary for the assertion but makes for a
         # less noisy diff in case the assertion fails.
         u'output': {
             u'location': "file://" + str(os.path.join(outDir, 'output.txt')),
             u'basename': str("output.txt"),
             u'size': 1111,
             u'class': u'File',
             u'checksum': u'sha1$b9214658cc453331b62c2282b772a5c063dbd284'}})
Exemplo n.º 19
0
def test_particle():
    assert str(Eun) == u'은(는)'
    assert str(Eul) == u'을(를)'
    assert str(Ida) == u'(이)'
    if PY2:
        try:
            __import__('unidecode')
        except ImportError:
            assert repr(Ida) == u"<Particle: u'(\\uc774)'>"
        else:
            assert repr(Ida) == u'<Particle: (i)>'
    else:
        assert repr(Ida) == u'<Particle: (이)>'
Exemplo n.º 20
0
    def test_get_parent(self):
        a = Reference("1.1")
        b = Reference("1")
        c = Reference("1.1-2.3")
        d = Reference("1.1-1.2")
        e = Reference("1.1@Something[0]-1.2@SomethingElse[2]")
        f = Reference("1-2")

        self.assertEqual(str(a.parent), "1")
        self.assertEqual(b.parent, None)
        self.assertEqual(str(c.parent), "1-2")
        self.assertEqual(str(d.parent), "1")
        self.assertEqual(str(e.parent), "1@Something[0]-1@SomethingElse[2]")
        self.assertEqual(f.parent, None)
Exemplo n.º 21
0
    def test_prev_prev_next_property(self):
        """ Test reference property
        As of 0.1.0, .next and prev are URNs
        """
        passage = Passage(
            urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1",
            resource=GET_PASSAGE,
            retriever=self.endpoint
        )

        # When next does not exist from the original resource
        self.assertEqual(str((URN(passage.prevId)).reference), "1.pr")
        self.assertEqual(str((URN(passage.nextId)).reference), "1.2")
        self.endpoint.getPrevNextUrn.assert_called_with(urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1")
Exemplo n.º 22
0
    def test_get_passage_hyper_context_double_slash_xpath(self):
        simple = self.seneca.getTextualNode(Reference("1-10"))
        str_simple = simple.export(
            output=Mimetypes.XML.Std
        )
        text = Text(
            resource=str_simple,
            citation=self.seneca.citation
        )
        self.assertEqual(
            text.getTextualNode(Reference("1"), simple=True).export(
                output=Mimetypes.PLAINTEXT,
                exclude=["tei:note"]
            ).strip(),
            "Di coniugales tuque genialis tori,",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
        )
        self.assertEqual(
            text.getTextualNode(Reference("10"), simple=True).export(
                output=Mimetypes.PLAINTEXT
            ).strip(),
            "aversa superis regna manesque impios",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=1))),
            ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
        )

        simple = self.seneca.getTextualNode(Reference("1"))
        str_simple = simple.tostring(encoding=str)
        text = Text(
            resource=str_simple,
            citation=self.seneca.citation
        )
        self.assertEqual(
            text.getTextualNode(Reference("1"), simple=True).export(
                output=Mimetypes.PLAINTEXT,
                exclude=["tei:note"]
            ).strip(),
            "Di coniugales tuque genialis tori,",
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
        )
        self.assertEqual(
            list(map(lambda x: str(x), text.getValidReff(level=1))),
            ["1"],
            "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
        )
Exemplo n.º 23
0
    def getValidReff(self, level=1, reference=None):
        """ Cached method of the original object

        :param level:
        :param reference: Reference object
        :return: References
        """
        __cachekey__ = _cache_key("Text_GetValidReff", level, str(self.urn), str(reference))
        __cached__ = self.cache.get(__cachekey__)
        if __cached__:
            return __cached__
        else:
            __cached__ = super(Text, self).getValidReff(level, reference)
            self.cache.set(__cachekey__, __cached__, timeout=Text.TIMEOUT["getValidReff"])
            return __cached__
Exemplo n.º 24
0
    def testFindCitation(self):
        self.assertEqual(
            str(self.TEI.citation),
            '<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>'
        )
        self.assertEqual(
            str(self.TEI.citation.child),
            '<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\']/tei:div[@n=\\\'$2\\\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>'
        )
        self.assertEqual(
            str(self.TEI.citation.child.child),
            '<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\']/tei:div[@n=\\\'$2\\\']/tei:l[@n=\\\'$3\\\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>'
        )

        self.assertEqual(len(self.TEI.citation), 3)
Exemplo n.º 25
0
 def assert_cmd_exit_equals(self, cmd, main_func, expected):
     sys.argv = re.sub(' +', ' ', cmd).split(' ')
     try:
         main_func()
         assert("exit() not called.")
     except SystemExit as e:
         self.assertEqual(str(e), expected)
Exemplo n.º 26
0
 def test_get_passage_hypercontext_complex_xpath(self):
     simple = self.text_complex.getTextualNode(Reference("pr.1-1.2"))
     str_simple = simple.tostring(encoding=str)
     text = Text(
         resource=str_simple,
         citation=self.text_complex.citation
     )
     self.assertIn(
         "Pervincis tandem",
         text.getTextualNode(Reference("pr.1"), simple=True).export(
             output=Mimetypes.PLAINTEXT,
             exclude=["tei:note"]).strip(),
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
     )
     self.assertEqual(
         text.getTextualNode(Reference("1.2"), simple=True).export(
             output=Mimetypes.PLAINTEXT).strip(),
         "lusimus quos in Suebae gratiam virgunculae,",
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
     )
     self.assertEqual(
         list(map(lambda x: str(x), text.getValidReff(level=2))),
         [
             "pr.1", "1.1", "1.2"
         ],
         "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)"
     )
Exemplo n.º 27
0
    def test_urn(self):
        """ Test setters and getters for urn """

        # Should work with string
        self.TEI.urn = "urn:cts:latinLit:tg.wk.v" 
        self.assertEqual(isinstance(self.TEI.urn, MyCapytain.common.reference.URN), True)
        self.assertEqual(str(self.TEI.urn), "urn:cts:latinLit:tg.wk.v")

        # Test for URN
        self.TEI.urn = MyCapytain.common.reference.URN("urn:cts:latinLit:tg.wk.v2") 
        self.assertEqual(isinstance(self.TEI.urn, MyCapytain.common.reference.URN), True)
        self.assertEqual(str(self.TEI.urn), "urn:cts:latinLit:tg.wk.v2")

        # Test it fails if not basestring or URN
        with self.assertRaises(TypeError): 
            self.TEI.urn = 2
Exemplo n.º 28
0
def parse_links(html, encoding=None):
    """Process all links in given html and replace them if markup is added."""
    if encoding is None:
        encoding = settings.DEFAULT_CHARSET

    # The passed HTML may be a string or bytes, depending on what is calling
    # this method. For example, Django response.content is always bytes. We
    # always want this content to be a string for our purposes.
    html_as_text = force_text(html, encoding=encoding)

    # This call invokes Wagail-specific logic that converts references to
    # Wagtail pages, documents, and images to their proper link URLs.
    expanded_html = expand_db_html(html_as_text)

    soup = BeautifulSoup(expanded_html, 'html.parser')
    link_tags = get_link_tags(soup)
    for tag in link_tags:
        original_link = str(tag)
        link_with_markup = add_link_markup(tag)
        if link_with_markup:
            expanded_html = expanded_html.replace(
                original_link,
                link_with_markup
            )

    return expanded_html
Exemplo n.º 29
0
    def test_children(self):
        """ Test next property, given that next information already exists or not)
        """

        self.endpoint.getPassage = mock.MagicMock(return_value=GET_PASSAGE)
        self.endpoint.getPrevNextUrn = mock.MagicMock(return_value=NEXT_PREV)
        self.endpoint.getFirstUrn = mock.MagicMock(return_value=Get_FIRST)
        self.endpoint.getValidReff = mock.MagicMock(return_value=GET_VALID_REFF_1_1)
        passage = Text(
            urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1",
            retriever=self.endpoint
        )

        self.assertEqual(len(list(passage.children)), 6)
        self.assertEqual(
            [str(x.urn) for x in passage.children],
            [
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.1",
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.2",
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.3",
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.4",
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.5",
                "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.6"
            ],
            "Passage should be retrieved and have the correct URN"
        )
Exemplo n.º 30
0
    def test_getpassageplus(self, requests):
        text = Text("urn:cts:latinLit:phi1294.phi002.perseus-lat2", self.endpoint)
        requests.return_value.text = GET_PASSAGE_PLUS

        # Test with -1
        passage = text.getPassagePlus(reference="1.1")
        requests.assert_called_with(
            "http://services.perseids.org/api/cts",
            params={
                "request": "GetPassagePlus",
                "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1"
            }
        )

        self.assertIsInstance(passage, Passage)
        self.assertEqual(str(passage.urn), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1")
        self.assertEqual(
            passage.xml.findall(".//{http://www.tei-c.org/ns/1.0}l[@n='1']")[0].text,
            "Hic est quem legis ille, quem requiris, "
        )
        self.assertEqual(text.citation.name, "book")
        self.assertEqual(len(text.citation), 3)

        # Test without reference
        text.getPassagePlus()
        requests.assert_called_with(
            "http://services.perseids.org/api/cts",
            params={
                "request": "GetPassagePlus",
                "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2"
            }
        )
Exemplo n.º 31
0
    def _ep_data(self, ep_obj):
        """
        Creates an elementTree XML structure for a MediaBrowser style episode.xml
        and returns the resulting data object.

        show_obj: a Series instance to create the NFO for
        """

        eps_to_write = [ep_obj] + ep_obj.related_episodes

        persons_dict = {u'Director': [], u'GuestStar': [], u'Writer': []}

        my_show = self._get_show_data(ep_obj.series)
        if not my_show:
            return None

        root_node = etree.Element(u'Item')

        # write an MediaBrowser XML containing info for all matching episodes
        for ep_to_write in eps_to_write:

            try:
                my_ep = my_show[ep_to_write.season][ep_to_write.episode]
            except (IndexerEpisodeNotFound, IndexerSeasonNotFound):
                log.info(
                    u'Unable to find episode {number} on {indexer}... has it been removed? Should I delete from db?',
                    {
                        u'number':
                        episode_num(ep_to_write.season, ep_to_write.episode),
                        u'indexer':
                        indexerApi(ep_obj.series.indexer).name
                    })
                return None

            if ep_to_write == ep_obj:
                # root (or single) episode

                # default to today's date for specials if firstaired is not set
                if ep_to_write.season == 0 and not getattr(
                        my_ep, u'firstaired', None):
                    my_ep[u'firstaired'] = str(datetime.date.fromordinal(1))

                if not (getattr(my_ep, u'episodename', None)
                        and getattr(my_ep, u'firstaired', None)):
                    return None

                episode = root_node

                if ep_to_write.name:
                    episode_name = etree.SubElement(episode, u'EpisodeName')
                    episode_name.text = ep_to_write.name

                episode_number = etree.SubElement(episode, u'EpisodeNumber')
                episode_number.text = str(ep_obj.episode)

                if ep_obj.related_episodes:
                    episode_number_end = etree.SubElement(
                        episode, u'EpisodeNumberEnd')
                    episode_number_end.text = str(ep_to_write.episode)

                season_number = etree.SubElement(episode, u'SeasonNumber')
                season_number.text = str(ep_to_write.season)

                if not ep_obj.related_episodes and getattr(
                        my_ep, u'absolute_number', None):
                    absolute_number = etree.SubElement(episode,
                                                       u'absolute_number')
                    absolute_number.text = str(my_ep[u'absolute_number'])

                if ep_to_write.airdate != datetime.date.fromordinal(1):
                    first_aired = etree.SubElement(episode, u'FirstAired')
                    first_aired.text = str(ep_to_write.airdate)

                metadata_type = etree.SubElement(episode, u'Type')
                metadata_type.text = u'Episode'

                if ep_to_write.description:
                    overview = etree.SubElement(episode, u'Overview')
                    overview.text = ep_to_write.description

                if not ep_obj.related_episodes:
                    if getattr(my_ep, u'rating', None):
                        rating = etree.SubElement(episode, u'Rating')
                        rating.text = str(my_ep[u'rating'])

                    if getattr(my_show, u'imdb_id', None):
                        IMDB_ID = etree.SubElement(episode, u'IMDB_ID')
                        IMDB_ID.text = my_show[u'imdb_id']

                        IMDB = etree.SubElement(episode, u'IMDB')
                        IMDB.text = my_show[u'imdb_id']

                        IMDbId = etree.SubElement(episode, u'IMDbId')
                        IMDbId.text = my_show[u'imdb_id']

                indexer_id = etree.SubElement(episode, u'id')
                indexer_id.text = str(ep_to_write.indexerid)

                persons = etree.SubElement(episode, u'Persons')

                if getattr(my_show, u'_actors', None):
                    for actor in my_show[u'_actors']:
                        if not (u'name' in actor and actor[u'name'].strip()):
                            continue

                        cur_actor = etree.SubElement(persons, u'Person')

                        cur_actor_name = etree.SubElement(cur_actor, u'Name')
                        cur_actor_name.text = actor[u'name'].strip()

                        cur_actor_type = etree.SubElement(cur_actor, u'Type')
                        cur_actor_type.text = u'Actor'

                        if u'role' in actor and actor[u'role'].strip():
                            cur_actor_role = etree.SubElement(
                                cur_actor, u'Role')
                            cur_actor_role.text = actor[u'role'].strip()

                language = etree.SubElement(episode, u'Language')
                try:
                    language.text = my_ep[u'language']
                except Exception:
                    language.text = app.INDEXER_DEFAULT_LANGUAGE  # tvrage api doesn't provide language so we must assume a value here

                thumb = etree.SubElement(episode, u'filename')
                # TODO: See what this is needed for.. if its still needed
                # just write this to the NFO regardless of whether it actually exists or not
                # note: renaming files after nfo generation will break this, tough luck
                thumb_text = self.get_episode_thumb_path(ep_obj)
                if thumb_text:
                    thumb.text = thumb_text

            else:
                # append data from (if any) related episodes
                episode_number_end.text = str(ep_to_write.episode)

                if ep_to_write.name:
                    if not episode_name.text:
                        episode_name.text = ep_to_write.name
                    else:
                        episode_name.text = u', '.join(
                            [episode_name.text, ep_to_write.name])

                if ep_to_write.description:
                    if not overview.text:
                        overview.text = ep_to_write.description
                    else:
                        overview.text = u'\r'.join(
                            [overview.text, ep_to_write.description])

            # collect all directors, guest stars and writers
            if getattr(my_ep, u'director', None):
                persons_dict[u'Director'] += [
                    x.strip() for x in my_ep[u'director'].split(u'|')
                    if x.strip()
                ]
            if getattr(my_ep, u'gueststars', None):
                persons_dict[u'GuestStar'] += [
                    x.strip() for x in my_ep[u'gueststars'].split(u'|')
                    if x.strip()
                ]
            if getattr(my_ep, u'writer', None):
                persons_dict[u'Writer'] += [
                    x.strip() for x in my_ep[u'writer'].split(u'|')
                    if x.strip()
                ]

        # fill in Persons section with collected directors, guest starts and writers
        for person_type, names in iteritems(persons_dict):
            # remove doubles
            names = list(set(names))
            for cur_name in names:
                person = etree.SubElement(persons, u'Person')
                cur_person_name = etree.SubElement(person, u'Name')
                cur_person_name.text = cur_name
                cur_person_type = etree.SubElement(person, u'Type')
                cur_person_type.text = person_type

        # Make it purdy
        helpers.indent_xml(root_node)
        data = etree.ElementTree(root_node)

        return data
Exemplo n.º 32
0
def _values_to_encode(data):
    return [(k, str(data[k]).strip().encode('utf8')) for k in data
            if k != 'signature']
Exemplo n.º 33
0
def test_movie_fromguess_insufficient_data(movies):
    guess = {'type': 'movie'}
    with pytest.raises(ValueError) as excinfo:
        Movie.fromguess(movies['man_of_steel'].name, guess)
    assert str(excinfo.value) == 'Insufficient data to process the guess'
Exemplo n.º 34
0
def test_scan_videos_path_is_not_a_directory(movies, tmpdir, monkeypatch):
    monkeypatch.chdir(str(tmpdir))
    tmpdir.ensure(movies['man_of_steel'].name)
    with pytest.raises(ValueError) as excinfo:
        scan_videos(movies['man_of_steel'].name)
    assert str(excinfo.value) == 'Path is not a directory'
Exemplo n.º 35
0
def test_hash_thesubdb_too_small(tmpdir):
    path = tmpdir.ensure('test_too_small.mkv')
    assert hash_thesubdb(str(path)) is None
Exemplo n.º 36
0
    def _ep_data(self, ep_obj):
        """
        Creates an elementTree XML structure for a MediaBrowser style episode.xml
        and returns the resulting data object.

        show_obj: a Series instance to create the NFO for
        """

        eps_to_write = [ep_obj] + ep_obj.related_episodes

        my_show = self._get_show_data(ep_obj.series)
        if not my_show:
            return None

        root_node = etree.Element('details')
        movie = etree.SubElement(root_node, 'movie')

        movie.attrib['isExtra'] = 'false'
        movie.attrib['isSet'] = 'false'
        movie.attrib['isTV'] = 'true'

        # write an MediaBrowser XML containing info for all matching episodes
        for ep_to_write in eps_to_write:

            try:
                my_ep = my_show[ep_to_write.season][ep_to_write.episode]
            except (IndexerEpisodeNotFound, IndexerSeasonNotFound):
                log.info(
                    'Unable to find episode {ep_num} on {indexer}...'
                    ' has it been removed? Should I delete from db?', {
                        'ep_num':
                        episode_num(ep_to_write.season, ep_to_write.episode),
                        'indexer':
                        indexerApi(ep_obj.series.indexer).name,
                    })
                return None

            if ep_to_write == ep_obj:
                # root (or single) episode

                # default to today's date for specials if firstaired is not set
                if ep_to_write.season == 0 and not getattr(
                        my_ep, 'firstaired', None):
                    my_ep['firstaired'] = str(datetime.date.fromordinal(1))

                if not (getattr(my_ep, 'episodename', None)
                        and getattr(my_ep, 'firstaired', None)):
                    return None

                episode = movie

                if ep_to_write.name:
                    episode_name = etree.SubElement(episode, 'title')
                    episode_name.text = ep_to_write.name

                season_number = etree.SubElement(episode, 'season')
                season_number.text = str(ep_to_write.season)

                episode_number = etree.SubElement(episode, 'episode')
                episode_number.text = str(ep_to_write.episode)

                if getattr(my_show, 'firstaired', None):
                    try:
                        year_text = str(
                            datetime.datetime.strptime(my_show['firstaired'],
                                                       dateFormat).year)
                        if year_text:
                            year = etree.SubElement(episode, 'year')
                            year.text = year_text
                    except Exception:
                        pass

                if getattr(my_show, 'overview', None):
                    plot = etree.SubElement(episode, 'plot')
                    plot.text = my_show['overview']

                if ep_to_write.description:
                    overview = etree.SubElement(episode, 'episodeplot')
                    overview.text = ep_to_write.description

                if getattr(my_show, 'contentrating', None):
                    mpaa = etree.SubElement(episode, 'mpaa')
                    mpaa.text = my_show['contentrating']

                if not ep_obj.related_episodes and getattr(
                        my_ep, 'rating', None):
                    try:
                        rating = int((float(my_ep['rating']) * 10))
                    except ValueError:
                        rating = 0

                    if rating:
                        rating = etree.SubElement(episode, 'rating')
                        rating.text = str(rating)

                if getattr(my_ep, 'director', None):
                    director = etree.SubElement(episode, 'director')
                    director.text = my_ep['director']

                if getattr(my_ep, 'writer', None):
                    writer = etree.SubElement(episode, 'credits')
                    writer.text = my_ep['writer']

                if getattr(my_show, '_actors', None) or getattr(
                        my_ep, 'gueststars', None):
                    cast = etree.SubElement(episode, 'cast')
                    if getattr(my_ep, 'gueststars', None) and isinstance(
                            my_ep['gueststars'], string_types):
                        for actor in (x.strip()
                                      for x in my_ep['gueststars'].split('|')
                                      if x.strip()):
                            cur_actor = etree.SubElement(cast, 'actor')
                            cur_actor.text = actor

                    if getattr(my_show, '_actors', None):
                        for actor in my_show['_actors']:
                            if 'name' in actor and actor['name'].strip():
                                cur_actor = etree.SubElement(cast, 'actor')
                                cur_actor.text = actor['name'].strip()

            else:
                # append data from (if any) related episodes

                if ep_to_write.name:
                    if not episode_name.text:
                        episode_name.text = ep_to_write.name
                    else:
                        episode_name.text = ', '.join(
                            [episode_name.text, ep_to_write.name])

                if ep_to_write.description:
                    if not overview.text:
                        overview.text = ep_to_write.description
                    else:
                        overview.text = '\r'.join(
                            [overview.text, ep_to_write.description])

        # Make it purdy
        helpers.indent_xml(root_node)

        data = etree.ElementTree(root_node)

        return data
Exemplo n.º 37
0
    def _output(self, fileformat, **keywords):
        """
        Internal function that eases its modification by daughter classes.
        """
        # check for stamp attribute
        keywords["stamp"] = getattr(self, '_stamp', '')

        # add the default parameters, they will be checked against the keywords
        util.setdefaults(
            keywords,
            cols=False,
            distances=False,
            entries=("concept", "counterpart"),
            entry='concept',
            fileformat=fileformat,
            filename=rcParams['filename'],
            formatter='concept',
            modify_ref=False,
            meta=self._meta,
            missing=0,
            prettify='false',
            ignore='all',
            ref='cogid',
            rows=False,
            subset=False,  # setup a subset of the data,
            taxa='taxa',
            threshold=0.6,  # threshold for flat clustering
            tree_calc='neighbor')

        if fileformat in ['triple', 'triples', 'triples.tsv']:
            return tsv2triple(self, keywords['filename'] + '.' + fileformat)

        if fileformat in ['paps.nex', 'paps.csv']:
            paps = self.get_paps(ref=keywords['ref'],
                                 entry=keywords['entry'],
                                 missing=keywords['missing'])
            kw = dict(filename=keywords['filename'] + '.paps')
            if fileformat == 'paps.nex':
                kw['missing'] = keywords['missing']
                return pap2nex(self.cols, paps, **kw)
            return pap2csv(self.cols, paps, **kw)

        # simple printing of taxa
        if fileformat == 'taxa':
            assert hasattr(self, 'taxa')
            return util.write_text_file(keywords['filename'] + '.taxa',
                                        self.cols)

        # csv-output
        if fileformat in ['csv', 'qlc', 'tsv']:

            # get the header line
            header = sorted(
                [s for s in set(self._alias.values()) if s in self._header],
                key=lambda x: self._header[x])
            header = [h.upper() for h in header]

            self._meta.setdefault('taxa', self.cols)

            # get the data, in case a subset is chosen
            if not keywords['subset']:
                # write stuff to file
                return wl2qlc(header, self._data, **keywords)

            cols, rows = keywords['cols'], keywords['rows']

            if not isinstance(cols, (list, tuple, bool)):
                raise ValueError(
                    "[i] Argument 'cols' should be list or tuple.")
            if not isinstance(rows, (dict, bool)):
                raise ValueError("[i] Argument 'rows' should be a dictionary.")

            # check for chosen header
            if cols:
                # get indices for header
                indices = [self._header[x] for x in cols]
                header = [c.upper() for c in cols]
            else:
                indices = [r for r in range(len(self.header))]

            if rows:
                stmts = []
                for key, value in rows.items():
                    if key == 'ID':
                        stmts += ["key " + value]
                    else:
                        idx = self._header[key]
                        stmts += ["line[{0}] ".format(idx) + value]

            log.debug("calculated what should be excluded")

            # get the data
            out = {}
            for key, line in self._data.items():
                log.debug(key)

                if rows:
                    if eval(" and ".join(stmts)):
                        out[key] = [line[i] for i in indices]
                else:
                    out[key] = [line[i] for i in indices]

            log.debug("passing data to wl2qlc")
            return wl2qlc(header, out, **keywords)

        # output dst-format (phylip)
        if fileformat == 'dst':
            # check for distances as keyword
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self, **keywords)

            out = matrix2dst(self._meta['distances'],
                             self.taxa,
                             stamp=keywords['stamp'],
                             taxlen=keywords.get('taxlen', 0))
            return _write_file(keywords['filename'], out, fileformat)

        # output tre-format (newick)
        if fileformat in ['tre', 'nwk']:  # ,'cluster','groups']:
            if 'tree' not in self._meta:
                # check for distances
                if 'distances' not in self._meta:
                    self._meta['distances'] = wl2dst(self)
                # we look up a function to calculate a tree in the cluster module:
                tree = getattr(cluster, keywords['tree_calc'])(
                    self._meta['distances'],
                    self.cols,
                    distances=keywords['distances'])
            else:
                tree = self._meta['tree']

            return _write_file(keywords['filename'], '{0}'.format(tree),
                               fileformat)

        if fileformat in ['cluster', 'groups']:
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self)  # check for keywords

            if 'groups' not in self._meta:
                self._meta['groups'] = cluster.matrix2groups(
                    keywords['threshold'], self._meta['distances'], self.taxa)
            lines = []
            for taxon, group in sorted(self._meta['groups'].items(),
                                       key=lambda x: x[0]):
                lines.append('{0}\t{1}'.format(taxon, group))
            return _write_file(keywords['filename'], lines, fileformat)

        if fileformat in ['starling', 'star.csv']:
            # make lambda inline for data-check
            l = lambda x: ['-' if x == 0 else x][0]

            lines = []
            if 'cognates' not in keywords:
                lines.append('ID\tConcept\t' + '\t'.join(self.taxa))
                for i, concept in enumerate(self.concepts):
                    for line in self.get_list(row=concept,
                                              entry=keywords['entry']):
                        lines.append(
                            str(i + 1) + '\t' + concept + '\t' +
                            '\t'.join([l(t) for t in line]))
            else:
                lines.append(
                    'ID\tConcept\t' +
                    '\t'.join(['{0}\t COG'.format(t) for t in self.taxa]))
                for i, concept in enumerate(self.concepts):
                    cogs = self.get_list(row=concept,
                                         entry=keywords['cognates'])
                    for j, line in enumerate(
                            self.get_list(row=concept,
                                          entry=keywords['entry'])):
                        part = '\t'.join('{0}\t{1}'.format(l(a), b)
                                         for a, b in zip(line, cogs[j]))
                        lines.append(util.tabjoin(i + 1, concept, part))

            return _write_file(keywords['filename'], lines,
                               'starling_' + keywords['entry'] + '.csv')

        if fileformat == 'multistate.nex':
            if not keywords['filename'].endswith('.multistate.nex'):
                keywords['filename'] += '.multistate.nex'

            matrix = wl2multistate(self, keywords['ref'], keywords['missing'])
            return multistate2nex(self.taxa, matrix, keywords['filename'])

        if fileformat == 'separated':
            if not os.path.isdir(keywords['filename']):
                os.mkdir(keywords['filename'])

            for l in self.cols:
                lines = [''] if 'ignore_keys' in keywords else ['ID\t']
                lines[0] += '\t'.join(x.upper() for x in keywords['entries'])
                for key in self.get_list(col=l, flat=True):
                    line = [] if 'ignore_keys' in keywords else [key]
                    for entry in keywords['entries']:
                        tmp = self[key, entry]
                        if isinstance(tmp, list):
                            tmp = ' '.join([str(x) for x in tmp])
                        line += [tmp]
                    lines.append('\t'.join('{0}'.format(x) for x in line))
                _write_file('{0}/{1}'.format(keywords['filename'], l), lines,
                            'tsv')
Exemplo n.º 38
0
 def __str__(self):
     return ' '.join([str(x) for x in self])
Exemplo n.º 39
0
    def _show_data(self, show_obj):
        """
        Creates an elementTree XML structure for a MediaBrowser-style series.xml
        returns the resulting data object.

        show_obj: a Series instance to create the NFO for
        """
        my_show = self._get_show_data(show_obj)

        # If by any reason it couldn't get the shows indexer data let's not go throught the rest of this method
        # as that pretty useless.
        if not my_show:
            return False

        tv_node = etree.Element(u'Series')

        if getattr(my_show, u'id', None):
            indexerid = etree.SubElement(tv_node, u'id')
            indexerid.text = str(my_show[u'id'])

        if getattr(my_show, u'seriesname', None):
            series_name = etree.SubElement(tv_node, u'SeriesName')
            series_name.text = my_show[u'seriesname']

        if getattr(my_show, u'status', None):
            status = etree.SubElement(tv_node, u'Status')
            status.text = my_show[u'status']

        if getattr(my_show, u'network', None):
            network = etree.SubElement(tv_node, u'Network')
            network.text = my_show[u'network']

        if getattr(my_show, u'airs_time', None):
            airs_time = etree.SubElement(tv_node, u'Airs_Time')
            airs_time.text = my_show[u'airs_time']

        if getattr(my_show, u'airs_dayofweek', None):
            airs_day_of_week = etree.SubElement(tv_node, u'Airs_DayOfWeek')
            airs_day_of_week.text = my_show[u'airs_dayofweek']

        first_aired = etree.SubElement(tv_node, u'FirstAired')
        if getattr(my_show, u'firstaired', None):
            first_aired.text = my_show[u'firstaired']

        if getattr(my_show, u'contentrating', None):
            content_rating = etree.SubElement(tv_node, u'ContentRating')
            content_rating.text = my_show[u'contentrating']

            mpaa = etree.SubElement(tv_node, u'MPAARating')
            mpaa.text = my_show[u'contentrating']

            certification = etree.SubElement(tv_node, u'certification')
            certification.text = my_show[u'contentrating']

        metadata_type = etree.SubElement(tv_node, u'Type')
        metadata_type.text = u'Series'

        if getattr(my_show, u'overview', None):
            overview = etree.SubElement(tv_node, u'Overview')
            overview.text = my_show[u'overview']

        if getattr(my_show, u'firstaired', None):
            premiere_date = etree.SubElement(tv_node, u'PremiereDate')
            premiere_date.text = my_show[u'firstaired']

        if getattr(my_show, u'rating', None):
            rating = etree.SubElement(tv_node, u'Rating')
            rating.text = str(my_show[u'rating'])

        if getattr(my_show, u'firstaired', None):
            try:
                year_text = str(
                    datetime.datetime.strptime(my_show[u'firstaired'],
                                               dateFormat).year)
                if year_text:
                    production_year = etree.SubElement(tv_node,
                                                       u'ProductionYear')
                    production_year.text = year_text
            except Exception:
                pass

        if getattr(my_show, u'runtime', None):
            running_time = etree.SubElement(tv_node, u'RunningTime')
            running_time.text = str(my_show[u'runtime'])

            runtime = etree.SubElement(tv_node, u'Runtime')
            runtime.text = str(my_show[u'runtime'])

        if getattr(my_show, u'imdb_id', None):
            imdb_id = etree.SubElement(tv_node, u'IMDB_ID')
            imdb_id.text = my_show[u'imdb_id']

            imdb_id = etree.SubElement(tv_node, u'IMDB')
            imdb_id.text = my_show[u'imdb_id']

            imdb_id = etree.SubElement(tv_node, u'IMDbId')
            imdb_id.text = my_show[u'imdb_id']

        if getattr(my_show, u'zap2it_id', None):
            zap2it_id = etree.SubElement(tv_node, u'Zap2ItId')
            zap2it_id.text = my_show[u'zap2it_id']

        if getattr(my_show, u'genre', None) and isinstance(
                my_show[u'genre'], string_types):
            genres = etree.SubElement(tv_node, u'Genres')
            for genre in my_show[u'genre'].split(u'|'):
                if genre.strip():
                    cur_genre = etree.SubElement(genres, u'Genre')
                    cur_genre.text = genre.strip()

            genre = etree.SubElement(tv_node, u'Genre')
            genre.text = u'|'.join([
                x.strip() for x in my_show[u'genre'].split(u'|') if x.strip()
            ])

        if getattr(my_show, u'network', None):
            studios = etree.SubElement(tv_node, u'Studios')
            studio = etree.SubElement(studios, u'Studio')
            studio.text = my_show[u'network']

        if getattr(my_show, u'_actors', None):
            persons = etree.SubElement(tv_node, u'Persons')
            for actor in my_show[u'_actors']:
                if not (u'name' in actor and actor[u'name'].strip()):
                    continue

                cur_actor = etree.SubElement(persons, u'Person')

                cur_actor_name = etree.SubElement(cur_actor, u'Name')
                cur_actor_name.text = actor[u'name'].strip()

                cur_actor_type = etree.SubElement(cur_actor, u'Type')
                cur_actor_type.text = u'Actor'

                if u'role' in actor and actor[u'role'].strip():
                    cur_actor_role = etree.SubElement(cur_actor, u'Role')
                    cur_actor_role.text = actor[u'role'].strip()

        helpers.indent_xml(tv_node)

        data = etree.ElementTree(tv_node)

        return data
Exemplo n.º 40
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:

            rows = html('item')
            if not rows:
                log.debug(
                    'No results returned from provider. Check chosen Newznab search categories'
                    ' in provider settings and/or usenet retention')
                return items

            for item in rows:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = None

                    if item.enclosure:
                        url = item.enclosure.get('url', '').strip()
                        if url:
                            download_url = url

                    if not download_url and item.link:
                        url = item.link.get_text(strip=True)
                        if url:
                            download_url = url

                        if not download_url:
                            url = item.link.next.strip()
                            if url:
                                download_url = url

                    if not (title and download_url):
                        continue

                    if 'gingadaddy' in self.url:
                        size_regex = re.search(r'\d*.?\d* [KMGT]B',
                                               str(item.description))
                        item_size = size_regex.group() if size_regex else -1
                    else:
                        item_size = item.size.get_text(
                            strip=True) if item.size else -1
                        # Use regex to find name-spaced tags
                        # see BeautifulSoup4 bug 1720605
                        # https://bugs.launchpad.net/beautifulsoup/+bug/1720605
                        newznab_attrs = item(re.compile('newznab:attr'))
                        for attr in newznab_attrs:
                            item_size = attr['value'] if attr[
                                'name'] == 'size' else item_size

                    size = convert_size(item_size) or -1

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        log.debug('Found result: {0}', title)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Exemplo n.º 41
0
    def search(self,
               search_strings,
               age=0,
               ep_obj=None,
               force_query=False,
               manual_search=False,
               **kwargs):
        """
        Search a provider and parse the results.

        :param search_strings: A dict with mode (key) and the search value (value)
        :param age: Not used
        :param ep_obj: Not used
        :param force_query: Newznab will by default search using the tvdb/tmdb/imdb id for a show. As a backup it
        can also search using a query string, like the showtitle with the season/episode number. The force_query
        parameter can be passed to force a search using the query string.
        :param manual_search: If the search is started through a manual search, we're utilizing the force_query param.
        :returns: A list of search results (structure)
        """
        results = []
        if not self._check_auth():
            return results

        # For providers that don't have caps, or for which the t=caps is not working.
        if not self.params and all(
                provider not in self.url
                for provider in self.providers_without_caps):
            self.get_capabilities(just_params=True)

        # Search Params
        search_params = {
            't': 'search',
            'limit': 100,
            'offset': 0,
            'cat': ','.join(self.cat_ids),
            'maxage': app.USENET_RETENTION
        }

        for mode in search_strings:
            log.debug('Search mode: {0}', mode)

            if self.needs_auth and self.api_key:
                search_params['apikey'] = self.api_key

            if mode != 'RSS':

                match_indexer = self._match_indexer()
                if match_indexer and not force_query:
                    search_params['t'] = 'tvsearch'
                    search_params.update(match_indexer)

                    if ep_obj.series.air_by_date or ep_obj.series.sports:
                        date_str = str(ep_obj.airdate)
                        search_params['season'] = date_str.partition('-')[0]
                        search_params['ep'] = date_str.partition(
                            '-')[2].replace('-', '/')
                    else:
                        search_params['season'] = ep_obj.scene_season
                        search_params['ep'] = ep_obj.scene_episode
                else:
                    search_params['t'] = 'search'

            if mode == 'Season':
                search_params.pop('ep', '')

            for search_string in search_strings[mode]:

                if mode != 'RSS':
                    # If its a PROPER search, need to change param to 'search'
                    # so it searches using 'q' param
                    if any(proper_string in search_string
                           for proper_string in self.proper_strings):
                        search_params['t'] = 'search'

                    log.debug(
                        'Search show using {search}', {
                            'search':
                            'search string: {search_string}'.format(
                                search_string=search_string
                                if search_params['t'] != 'tvsearch' else
                                'indexer_id: {indexer_id}'.format(
                                    indexer_id=match_indexer))
                        })

                    if search_params['t'] != 'tvsearch':
                        search_params['q'] = search_string

                response = self.session.get(urljoin(self.url, 'api'),
                                            params=search_params)
                if not response or not response.text:
                    log.debug('No data returned from provider')
                    continue

                results += self.parse(response.text, mode)

                # Since we aren't using the search string,
                # break out of the search string loop
                if any(param in search_params
                       for param in itervalues(INDEXERS_PARAM)):
                    break

        # Reprocess but now use force_query = True if there are no results
        if not results and not force_query:
            return self.search(search_strings, ep_obj=ep_obj, force_query=True)

        return results
Exemplo n.º 42
0
    def tell_sentry(exception, state, allow_reraise=True):

        if isinstance(exception, pando.Response) and exception.code < 500:
            # Only log server errors
            return

        if isinstance(exception, NeedDatabase):
            # Don't flood Sentry when DB is down
            return

        if isinstance(exception, psycopg2.Error):
            from liberapay.website import website
            if getattr(website, 'db', None):
                try:
                    website.db.one('SELECT 1 AS x')
                except psycopg2.Error:
                    # If it can't answer this simple query, it's down.
                    website.db = NoDB()
                    # Show the proper 503 error page
                    state['exception'] = NeedDatabase()
                    # Tell gunicorn to gracefully restart this worker
                    os.kill(os.getpid(), signal.SIGTERM)

                if 'read-only' in str(exception):
                    # DB is in read only mode
                    state['db_is_readonly'] = True
                    # Show the proper 503 error page
                    state['exception'] = NeedDatabase()
                    # Don't reraise this in tests
                    allow_reraise = False

        if not sentry:
            # No Sentry, log to stderr instead
            traceback.print_exc()
            # Reraise if allowed
            if env.sentry_reraise and allow_reraise:
                raise
            return

        user = state.get('user')
        extra = {}
        if user is None:
            user_id = 'no user'
        elif user is ANON:
            user_id = 'ANON'
        elif not hasattr(user, 'id'):
            user_id = 'no id'
        else:
            user_id = user.id
            extra['user_url'] = 'https://liberapay.com/~{}/'.format(user_id)

        # Tell Sentry
        tags = {
            'user_id': user_id,
            'username': getattr(user, 'username', None),
        }
        extra['request_line'] = getattr(state.get('request'), 'line', None)
        result = sentry.captureException(tags=tags, extra=extra)

        # Put the Sentry id in the state for logging, etc
        state['sentry_ident'] = sentry.get_ident(result)
Exemplo n.º 43
0
 def render_content(self, context):
     d = dict((k, v) for k, v in self.__dict__.items() if k[0] != '_')
     return str(json_.dumps(d))
Exemplo n.º 44
0
    def _export(self,
                fileformat,
                sections=None,
                entries=None,
                entry_sep='',
                item_sep='',
                template='',
                exclude=None,
                entry_start='',
                entry_close='',
                **keywords):
        """
        Export a wordlist to various file formats.
        """
        if not sections:
            if fileformat == 'txt':
                sections = dict(h1=('concept', '\n# Concept: {0}\n'),
                                h2=('cogid', '## Cognate-ID: {0}\n'))
            elif fileformat == 'tex':
                sections = dict(
                    h1=('concept', r'\section{{Concept: ``{0}"}}' + '\n'),
                    h2=('cogid', r'\subsection{{Cognate Set: ``{0}"}}' + '\n'))
            elif fileformat == 'html':
                sections = dict(h1=('concept', '<h1>Concept: {0}</h1>'),
                                h2=('cogid', '<h2>Cognate Set: {0}</h2>'))

        if not entries:
            if fileformat == 'txt':
                entries = [('language', '{0} '), ('ipa', '{0}\n')]
            elif fileformat == 'tex':
                entries = [('language', '{0} '), ('ipa', '[{0}]' + '\n')]
            elif fileformat == 'html':
                entries = [('language', '{0}&nbsp;'), ('ipa', '[{0}]\n')]

        util.setdefaults(keywords, filename=rcParams['filename'])

        # get the temporary dictionary
        out = wl2dict(self, sections, entries, exclude)

        # assign the output string
        out_string = ''

        # iterate over the dictionary and start to fill the string
        for key in sorted(out, key=lambda x: str(x).lower()):
            # write key to file
            out_string += key[1]

            # reassign tmp
            tmp = out[key]

            # set the pointer and the index
            pointer = {0: [tmp, sorted(tmp.keys())]}

            while True:
                idx = max(pointer.keys())

                # check for type of current point
                if isinstance(tmp, dict):
                    if pointer[idx][1]:
                        next_key = pointer[idx][1].pop()
                        out_string += next_key[1]
                        tmp = pointer[idx][0][next_key]
                        if isinstance(tmp, dict):
                            pointer[idx + 1] = [tmp, sorted(tmp.keys())]
                        else:
                            pointer[idx + 1] = [tmp, tmp]
                    else:
                        del pointer[idx]
                        if idx == 0:
                            break
                else:
                    tmp_strings = []
                    for line in sorted(tmp):
                        tmp_strings += [item_sep.join(line)]
                    out_string += entry_start + entry_sep.join(
                        tmp_strings) + entry_close
                    tmp = pointer[idx - 1][0]
                    del pointer[idx]

        if fileformat == 'tex':
            out_string = out_string.replace('_', r'\_')
        tmpl = util.read_text_file(template) if template else '{0}'
        _write_file(keywords['filename'], tmpl.format(out_string), fileformat)
Exemplo n.º 45
0
    def __init__(self, infile=None, col='list', row='key', conf=None):
        QLCParser.__init__(self, infile
                           or util.data_path('swadesh', 'swadesh.qlc'), conf
                           or util.data_path('conf', 'swadesh.rc'))

        # get row and key index
        if not hasattr(self, '_rowidx'):
            try:
                rowIdx = self.header[self._alias[row]]
                colIdx = self.header[self._alias[col]]
            except:
                raise ValueError(
                    "[!] Could not find row and col in configuration or input file!"
                )

            basic_rows = sorted(set([
                self._data[k][rowIdx] for k in self._data
                if k != 0 and type(k) == int
            ]),
                                key=lambda x: ('%s' % x).lower())
            basic_cols = sorted(set([
                self._data[k][colIdx] for k in self._data
                if k != 0 and type(k) == int
            ]),
                                key=lambda x: x.lower())

            # define rows and cols as attributes of the word list
            self.rows = basic_rows
            self.cols = basic_cols

            # define height and width of the word list
            self.height = len(self.rows)
            self.width = len(self.cols)

            # row and column index point to the place where the data of the main
            # items is stored in the original dictionary
            self._rowIdx = rowIdx
            self._colIdx = colIdx
            self._row_name = self._alias[row]
            self._col_name = self._alias[col]

            # create a basic array which assigns ids for the entries in a starling
            # manner.

            # first, find out, how many items (== synonyms) are there maximally for
            # each row
            tmp_dict = {}
            for key, value in [(k, v) for k, v in self._data.items()
                               if k != 0 and str(k).isnumeric()]:
                try:
                    tmp_dict[value[rowIdx]][value[colIdx]] += [key]
                except KeyError:
                    try:
                        tmp_dict[value[rowIdx]][value[colIdx]] = [key]
                    except KeyError:
                        tmp_dict[value[rowIdx]] = {}
                        tmp_dict[value[rowIdx]][value[colIdx]] = [key]

            # assign the values as _dict-attribute to the dictionary
            self._dict = tmp_dict

            # create the array by counting the maximal number of occurrences, store
            # the row names separately in a dictionary
            tmp_list = []
            row_dict = {}

            count = 0
            for k, d in self._dict.items():

                row_dict[k] = []

                # get maximal amount of "synonyms"
                m = max([len(x) for x in d.values()])

                for i in range(m):
                    tmp = []
                    for j in range(self.width):
                        try:
                            tmp.append(d[self.cols[j]][i])
                        except:
                            tmp.append(0)
                    row_dict[k] += [count]
                    count += 1
                    tmp_list += [tmp]

            # create the array
            self._array = np.array(tmp_list)
            self._idx = row_dict

            # add indices to alias dictionary for swadesh lists
            for i, col in enumerate(self.cols):
                self._meta[col] = self._array[np.nonzero(self._array[:, i]),
                                              i][0]

        # define a cache dictionary for stored data for quick access
        self._cache = {}
Exemplo n.º 46
0
    def _show_data(self, show_obj):
        """
        Creates an elementTree XML structure for a MediaBrowser-style series.xml
        returns the resulting data object.

        show_obj: a Series instance to create the NFO for
        """
        my_show = self._get_show_data(show_obj)

        # If by any reason it couldn't get the shows indexer data let's not go throught the rest of this method
        # as that pretty useless.
        if not my_show:
            return False

        root_node = etree.Element('details')
        tv_node = etree.SubElement(root_node, 'movie')
        tv_node.attrib['isExtra'] = 'false'
        tv_node.attrib['isSet'] = 'false'
        tv_node.attrib['isTV'] = 'true'

        title = etree.SubElement(tv_node, 'title')
        title.text = my_show['seriesname']

        if getattr(my_show, 'genre', None):
            genres = etree.SubElement(tv_node, 'genres')
            for genre in my_show['genre'].split('|'):
                if genre and genre.strip():
                    cur_genre = etree.SubElement(genres, 'Genre')
                    cur_genre.text = genre.strip()

        if getattr(my_show, 'firstaired', None):
            first_aired = etree.SubElement(tv_node, 'premiered')
            first_aired.text = my_show['firstaired']
            try:
                year_text = str(
                    datetime.datetime.strptime(my_show['firstaired'],
                                               dateFormat).year)
                if year_text:
                    year = etree.SubElement(tv_node, 'year')
                    year.text = year_text
            except Exception:
                pass

        if getattr(my_show, 'overview', None):
            plot = etree.SubElement(tv_node, 'plot')
            plot.text = my_show['overview']

        if getattr(my_show, 'rating', None):
            try:
                rating = int(float(my_show['rating']) * 10)
            except ValueError:
                rating = 0

            if rating:
                rating = etree.SubElement(tv_node, 'rating')
                rating.text = str(rating)

        if getattr(my_show, 'status', None):
            status = etree.SubElement(tv_node, 'status')
            status.text = my_show['status']

        if getattr(my_show, 'contentrating', None):
            mpaa = etree.SubElement(tv_node, 'mpaa')
            mpaa.text = my_show['contentrating']

        if getattr(my_show, 'imdb_id', None):
            imdb_id = etree.SubElement(tv_node, 'id')
            imdb_id.attrib['moviedb'] = 'imdb'
            imdb_id.text = my_show['imdb_id']

        if getattr(my_show, 'id', None):
            indexer_id = etree.SubElement(tv_node, 'indexerid')
            indexer_id.text = str(my_show['id'])

        if getattr(my_show, 'runtime', None):
            runtime = etree.SubElement(tv_node, 'runtime')
            runtime.text = str(my_show['runtime'])

        if getattr(my_show, '_actors', None):
            cast = etree.SubElement(tv_node, 'cast')
            for actor in my_show['_actors']:
                if 'name' in actor and actor['name'].strip():
                    cur_actor = etree.SubElement(cast, 'actor')
                    cur_actor.text = actor['name'].strip()

        helpers.indent_xml(root_node)

        data = etree.ElementTree(root_node)

        return data
Exemplo n.º 47
0
    def __init__(self, filename, conf=''):
        """
        Parse data regularly if the data has not been loaded from a pickled version.
        """
        self.log = log.get_logger()

        # try to load the data
        internal_import = False

        # check whether it's a dictionary from which we load
        if isinstance(filename, dict):
            input_data = filename
            if 'filename' not in input_data:
                self.filename = rcParams['filename']
            internal_import = True

            # make check for correct input, there was a bug with a wrong
            # evaluation which is hopefully fixed by now
            tmp_keys = [k for k in input_data if isinstance(k, int)]
            if len(input_data[0]) != len(input_data[tmp_keys[0]]):
                print(input_data[0], input_data[tmp_keys[0]])
                raise ValueError("[!] Wrong input format!")  # pragma: no cover
        # check whether it's another wordlist-object
        elif hasattr(filename, '_data') and hasattr(filename, '_meta'):
            input_data = dict([(key, [v for v in value]) for key, value in \
                    filename._data.items()])
            input_data.update(filename._meta.items())
            input_data[0] = [
                a for a, b in sorted(
                    filename.header.items(), key=lambda x: x[1], reverse=False)
            ]
            internal_import = True
            self.filename = rcParams['filename']
        # or whether the data is an actual file
        elif isinstance(filename, string_types) and os.path.isfile(filename):
            input_data = read_qlc(filename)
            self.filename = filename
        # raise an error otherwise
        elif isinstance(filename, string_types):
            raise IOError("Input file '{0}' does not exist.".format(filename))
        else:
            raise TypeError(
                "Unrecognized type for 'filename' argument: {0}".format(
                    type(filename).__name__))

        # load the configuration file
        if not conf:
            conf = util.data_path('conf', 'qlc.rc')

        # read the file defined by its path in conf
        tmp = [line.split('\t') for line in util.read_config_file(conf)]

        # define two attributes, _alias, and _class which store the aliases and
        # the datatypes (classes) of the given entries
        self._alias, self._class, self._class_string, self._alias2 = {}, {}, {}, {}
        for name, cls, alias in tmp:
            # make sure the name itself is there
            self._alias[name.lower()] = self._alias[name.upper()] = name
            self._class[name.lower()] = self._class[name.upper()] = eval(cls)
            self._class_string[name.lower()] = self._class_string[
                name.upper()] = cls

            # add the aliases
            for a in alias.split(','):
                self._alias[a.lower()] = self._alias[a.upper()] = name
                self._class[a.lower()] = self._class[a.upper()] = eval(cls)
                self._class_string[a.lower()] = self._class_string[
                    a.upper()] = cls

            self._alias2[name] = sorted(set(alias.split(','))) + [name]

        # append the names in data[0] to self.conf to make sure that all data
        # is covered, even the types which are not specifically defined in the
        # conf file. the datatype defaults here to "str"
        for name in input_data[0]:
            if name.lower() not in self._alias:
                self._alias[name.lower()] = name.lower()
                self._class[name.lower()] = str
            if name.upper() not in self._alias:
                self._alias[name.upper()] = name.lower()
                self._class[name.upper()] = str

        # add empty alias for empty strings XXX why was that? I can't remember
        # why this was important XXX
        self._alias[''] = ''

        # the header stores the indices of the data in the original data dictionary
        self.header = dict(
            zip([self._alias[x] for x in input_data[0]],
                range(len(input_data[0]))))

        # now create a specific header which has all aliases
        self._header = {k: v for k, v in self.header.items()}

        # add a sorted header for reference
        self.columns = sorted(self.header, key=lambda x: self.header[x])

        # assign all aliases to the header
        for alias in self._alias:
            try:
                self._header[alias] = self._header[self._alias[alias]]
            except:
                pass

        # assign the data as attribute to the word list class. Note that we
        # need to check for the type here, but since numpy also offers integer
        # types, we don't check for type(x) == int, but instead use the
        # str.numeric-function that returns numeric values only if it is an
        # integer
        self._data = {
            int(k): v
            for k, v in input_data.items() if k != 0 and str(k).isnumeric()
        }
        # check for same length of all columns
        check_errors = ''
        for k, v in self._data.items():
            if len(v) != len(self.header):
                check_errors += 'Row {0} in your data contains {1} fields (expected {2})\n'.format(
                    k, len(v), len(self.header))
        if check_errors:
            raise ValueError(check_errors + '\n' +
                             ', '.join(sorted(self.header)))

        # iterate over self._data and change the values according to the
        # functions (only needed when reading from file)
        if not internal_import:
            heads = sorted(self._header.items(), key=lambda x: x[1])
            for key in self._data:
                check = []
                for head, i in heads:
                    if i not in check:
                        logstring = 'Problem with row {0} in col {1}, expected' + \
                                    ' «{4}» as datatype but received «{3}» ' + \
                                    ' (ROW: {2}, entry {5}).'
                        try:
                            self._data[key][i] = self._class[head](
                                self._data[key][i])
                            check.append(i)
                        except KeyError:
                            log.warn(
                                logstring.format(
                                    key, i,
                                    '|'.join([str(x) for x in self._data[key]
                                              ]), self._data[key][i],
                                    self._class[head], head))
                        except ValueError:
                            log.warn(
                                logstring.format(
                                    key, i,
                                    '|'.join([str(x) for x in self._data[key]
                                              ]), self._data[key][i],
                                    self._class[head], head))

        # create entry attribute of the wordlist
        self.entries = sorted(
            set([b.lower() for a, b in self._alias.items() if b]))

        # assign meta-data
        self._meta = {}
        for key in [k for k in input_data if type(k) != int]:
            self._meta[key] = input_data[key]
Exemplo n.º 48
0
    def tell_sentry(exception, state, allow_reraise=True):

        if isinstance(exception, pando.Response) and exception.code < 500:
            # Only log server errors
            return

        if isinstance(exception, NeedDatabase):
            # Don't flood Sentry when DB is down
            return

        if isinstance(exception, psycopg2.Error):
            from liberapay.website import website
            if getattr(website, 'db', None):
                try:
                    website.db.one('SELECT 1 AS x')
                except psycopg2.Error:
                    # If it can't answer this simple query, it's down.
                    website.db = NoDB()
                    # Show the proper 503 error page
                    state['exception'] = NeedDatabase()
                    # Tell gunicorn to gracefully restart this worker
                    os.kill(os.getpid(), signal.SIGTERM)

                if 'read-only' in str(exception):
                    # DB is in read only mode
                    state['db_is_readonly'] = True
                    # Show the proper 503 error page
                    state['exception'] = NeedDatabase()
                    # Don't reraise this in tests
                    allow_reraise = False

        if isinstance(exception, ValueError):
            if 'cannot contain NUL (0x00) characters' in str(exception):
                # https://github.com/liberapay/liberapay.com/issues/675
                response = state.get('response') or pando.Response()
                response.code = 400
                response.body = str(exception)
                return {'exception': None}

        if not sentry:
            # No Sentry, log to stderr instead
            traceback.print_exc()
            # Reraise if allowed
            if env.sentry_reraise and allow_reraise:
                raise
            return {'sentry_ident': None}

        # Prepare context data
        sentry_data = {}
        if state:
            try:
                sentry_data['tags'] = {
                    'lang': getattr(state.get('locale'), 'language', None),
                }
                request = state.get('request')
                user_data = sentry_data['user'] = {}
                if request is not None:
                    user_data['ip_address'] = str(request.source)
                    sentry_data['request'] = {
                        'method': request.method,
                        'url': request.line.uri,
                        'headers': {
                            k: b', '.join(v) for k, v in request.headers.items()
                            if k != b'Cookie'
                        },
                    }
                user = state.get('user')
                if isinstance(user, Participant):
                    user_data['id'] = getattr(user, 'id', None)
                    user_data['username'] = getattr(user, 'username', None)
            except Exception as e:
                tell_sentry(e, {})

        # Tell Sentry
        result = sentry.captureException(data=sentry_data)

        # Put the Sentry id in the state for logging, etc
        return {'sentry_ident': sentry.get_ident(result)}
Exemplo n.º 49
0
 def get(self, key):
     return str(self.source.get(self.sectionkey, key))
Exemplo n.º 50
0
    def __init__(self, filename, row, col, conf):
        QLCParser.__init__(self, filename, conf=conf)

        try:
            self._row_name = self._alias[row]
            self._col_name = self._alias[col]
            rowIdx = self.header[self._alias[row]]
            colIdx = self.header[self._alias[col]]
        except KeyError:
            raise ValueError(
                "Could not find row or col in configuration or input file!")

        def unique_sorted(idx, key):
            return sorted(set([
                self._data[k][idx] for k in self._data
                if k != 0 and isinstance(k, int)
            ]),
                          key=key)

        # define rows and cols as attributes of the word list
        self.rows = unique_sorted(rowIdx, lambda x: ('%s' % x).lower())
        self.cols = unique_sorted(colIdx, lambda x: x.lower())

        # define height and width of the word list
        self.height = len(self.rows)
        self.width = len(self.cols)

        # row and column index point to the place where the data of the main
        # items is stored in the original dictionary
        self._rowIdx = rowIdx
        self._colIdx = colIdx

        # create a basic array which assigns ids for the entries in a starling manner.
        # first, find out, how many items (== synonyms) are there maximally for each row
        self._dict = defaultdict(lambda: defaultdict(list))
        for key, value in [(k, v) for k, v in self._data.items()
                           if k != 0 and str(k).isnumeric()]:
            self._dict[value[rowIdx]][value[colIdx]].append(key)

        # We must cast to a regular dict to make the attribute picklable.
        self._dict = dict(self._dict)

        # create the array by counting the maximal number of occurrences, store
        # the row names separately in a dictionary
        tmp_list = []
        self._idx = {}

        count = 0
        for k, d in self._dict.items():
            self._idx[k] = []

            # get maximal amount of "synonyms"
            for i in range(max([len(x) for x in d.values()])):
                tmp = []
                for j in range(self.width):
                    try:
                        tmp.append(d[self.cols[j]][i])
                    except:
                        tmp.append(0)
                self._idx[k] += [count]
                count += 1
                tmp_list += [tmp]

        self._array = np.array(tmp_list)
Exemplo n.º 51
0
def _get_test_name(string):
    """Returns test name for resource."""

    return TEST_NAME_PREFIX + str(string)
Exemplo n.º 52
0
    def parse(self, data, mode):
        """
        Parse search results for items.

        :param data: The raw response from a search
        :param mode: The current mode used to search, e.g. RSS

        :return: A list of items found
        """
        items = []

        with BS4Parser(data, 'html5lib') as html:

            rows = html('item')
            if not rows:
                log.debug(
                    'No results returned from provider. Check chosen Newznab search categories'
                    ' in provider settings and/or usenet retention')
                return items

            try:
                self.torznab = 'xmlns:torznab' in html.rss.attrs
            except AttributeError:
                self.torznab = False

            for item in rows:
                try:
                    title = item.title.get_text(strip=True)
                    download_url = None

                    if item.enclosure:
                        url = item.enclosure.get('url', '').strip()
                        if url.startswith('magnet:'):
                            download_url = url
                        elif validators.url(url):
                            download_url = url
                            # Jackett needs extension added (since v0.8.396)
                            if not url.endswith('.torrent'):
                                content_type = item.enclosure.get('type', '')
                                if content_type == 'application/x-bittorrent':
                                    download_url = '{0}{1}'.format(
                                        url, '.torrent')

                    if not download_url and item.link:
                        url = item.link.get_text(strip=True)
                        if validators.url(url) or url.startswith('magnet:'):
                            download_url = url

                        if not download_url:
                            url = item.link.next.strip()
                            if validators.url(url) or url.startswith(
                                    'magnet:'):
                                download_url = url

                    if not (title and download_url):
                        continue

                    seeders = leechers = -1
                    if 'gingadaddy' in self.url:
                        size_regex = re.search(r'\d*.?\d* [KMGT]B',
                                               str(item.description))
                        item_size = size_regex.group() if size_regex else -1
                    else:
                        item_size = item.size.get_text(
                            strip=True) if item.size else -1
                        # Use regex to find name-spaced tags
                        # see BeautifulSoup4 bug 1720605
                        # https://bugs.launchpad.net/beautifulsoup/+bug/1720605
                        newznab_attrs = item(re.compile('newznab:attr'))
                        torznab_attrs = item(re.compile('torznab:attr'))
                        for attr in newznab_attrs + torznab_attrs:
                            item_size = attr['value'] if attr[
                                'name'] == 'size' else item_size
                            seeders = try_int(
                                attr['value']
                            ) if attr['name'] == 'seeders' else seeders
                            peers = try_int(
                                attr['value']
                            ) if attr['name'] == 'peers' else None
                            leechers = peers - seeders if peers else leechers

                    if not item_size or (self.torznab and
                                         (seeders == -1 or leechers == -1)):
                        continue

                    size = convert_size(item_size) or -1

                    pubdate_raw = item.pubdate.get_text(strip=True)
                    pubdate = self.parse_pubdate(pubdate_raw)

                    item = {
                        'title': title,
                        'link': download_url,
                        'size': size,
                        'seeders': seeders,
                        'leechers': leechers,
                        'pubdate': pubdate,
                    }
                    if mode != 'RSS':
                        if seeders == -1:
                            log.debug('Found result: {0}', title)
                        else:
                            log.debug(
                                'Found result: {0} with {1} seeders and {2} leechers',
                                title, seeders, leechers)

                    items.append(item)
                except (AttributeError, TypeError, KeyError, ValueError,
                        IndexError):
                    log.exception('Failed parsing provider.')

        return items
Exemplo n.º 53
0
def test_video_fromguess_wrong_type(episodes):
    guess = {'type': 'subtitle'}
    with pytest.raises(ValueError) as excinfo:
        Video.fromguess(episodes['bbt_s07e05'].name, guess)
    assert str(
        excinfo.value) == 'The guess must be an episode or a movie guess'
Exemplo n.º 54
0
    def _ep_data(self, ep_obj):
        """
        Creates an elementTree XML structure for a WDTV style episode.xml
        and returns the resulting data object.

        ep_obj: a Series instance to create the NFO for
        """

        eps_to_write = [ep_obj] + ep_obj.related_episodes

        my_show = self._get_show_data(ep_obj.series)
        if not my_show:
            return None

        root_node = etree.Element('details')

        # write an WDTV XML containing info for all matching episodes
        for ep_to_write in eps_to_write:

            try:
                my_ep = my_show[ep_to_write.season][ep_to_write.episode]
            except (IndexerEpisodeNotFound, IndexerSeasonNotFound):
                log.info(
                    'Unable to find episode {number} on {indexer}... has it been removed? Should I delete from db?', {
                        'number': ep_num(ep_to_write.season, ep_to_write.episode),
                        'indexer': indexerApi(ep_obj.series.indexer).name,
                    }
                )
                return None

            if ep_obj.season == 0 and not getattr(my_ep, 'firstaired', None):
                my_ep['firstaired'] = str(datetime.date.fromordinal(1))

            if not (getattr(my_ep, 'episodename', None) and getattr(my_ep, 'firstaired', None)):
                return None

            if len(eps_to_write) > 1:
                episode = etree.SubElement(root_node, 'details')
            else:
                episode = root_node

            # TODO: get right EpisodeID
            episode_id = etree.SubElement(episode, 'id')
            episode_id.text = str(ep_to_write.indexerid)

            title = etree.SubElement(episode, 'title')
            title.text = ep_obj.pretty_name()

            if getattr(my_show, 'seriesname', None):
                series_name = etree.SubElement(episode, 'series_name')
                series_name.text = my_show['seriesname']

            if ep_to_write.name:
                episode_name = etree.SubElement(episode, 'episode_name')
                episode_name.text = ep_to_write.name

            season_number = etree.SubElement(episode, 'season_number')
            season_number.text = str(ep_to_write.season)

            episode_num = etree.SubElement(episode, 'episode_number')
            episode_num.text = str(ep_to_write.episode)

            first_aired = etree.SubElement(episode, 'firstaired')

            if ep_to_write.airdate != datetime.date.fromordinal(1):
                first_aired.text = str(ep_to_write.airdate)

            if getattr(my_show, 'firstaired', None):
                try:
                    year_text = str(datetime.datetime.strptime(my_show['firstaired'], dateFormat).year)
                    if year_text:
                        year = etree.SubElement(episode, 'year')
                        year.text = year_text
                except Exception:
                    pass

            if ep_to_write.season != 0 and getattr(my_show, 'runtime', None):
                runtime = etree.SubElement(episode, 'runtime')
                runtime.text = str(my_show['runtime'])

            if getattr(my_show, 'genre', None):
                genre = etree.SubElement(episode, 'genre')
                genre.text = ' / '.join([x.strip() for x in my_show['genre'].split('|') if x.strip()])

            if getattr(my_ep, 'director', None):
                director = etree.SubElement(episode, 'director')
                director.text = my_ep['director']

            if getattr(my_show, '_actors', None):
                for actor in my_show['_actors']:
                    if not ('name' in actor and actor['name'].strip()):
                        continue

                    cur_actor = etree.SubElement(episode, 'actor')

                    cur_actor_name = etree.SubElement(cur_actor, 'name')
                    cur_actor_name.text = actor['name']

                    if 'role' in actor and actor['role'].strip():
                        cur_actor_role = etree.SubElement(cur_actor, 'role')
                        cur_actor_role.text = actor['role'].strip()

            if ep_to_write.description:
                overview = etree.SubElement(episode, 'overview')
                overview.text = ep_to_write.description

            # Make it purdy
            helpers.indent_xml(root_node)
            data = etree.ElementTree(root_node)

        return data
Exemplo n.º 55
0
def test_hash_opensubtitles_too_small(tmpdir):
    path = tmpdir.ensure('test_too_small.mkv')
    assert hash_opensubtitles(str(path)) is None
Exemplo n.º 56
0
 def __add__(self, other):
     return lists(str(self) + self.sep + str(other))
Exemplo n.º 57
0
def test_scan_videos_path_does_not_exist(movies):
    with pytest.raises(ValueError) as excinfo:
        scan_videos(movies['man_of_steel'].name)
    assert str(excinfo.value) == 'Path does not exist'
Exemplo n.º 58
0
 def __add__(self, other):
     return _strings(self._type,
                     str(self) + ' ' + str(_strings(self._type, other)))
Exemplo n.º 59
0
 def __repr__(self):
     return '%s(%r, params=%r)' % (self.__class__.__name__, str(self), self.params)
Exemplo n.º 60
0
    def __init__(self, *args, **kwargs):
        get_first_name = getattr(settings, 'PAYFAST_GET_USER_FIRST_NAME',
                                 attrgetter('first_name'))
        get_last_name = getattr(settings, 'PAYFAST_GET_USER_LAST_NAME',
                                attrgetter('last_name'))

        user = kwargs.pop('user', None)
        if user:

            if get_first_name is not None:
                kwargs['initial'].setdefault('name_first',
                                             get_first_name(user))
            if get_last_name is not None:
                kwargs['initial'].setdefault('name_last', get_last_name(user))

            # Django 1.11 adds AbstractBaseUser.get_email_field_name()
            email_address = (user.email if django.VERSION <
                             (1, 11) else getattr(
                                 user,
                                 get_user_model().get_email_field_name()))
            kwargs['initial'].setdefault('email_address', email_address)

        kwargs['initial'].setdefault('notify_url', notify_url())
        kwargs['initial'].setdefault('merchant_id', conf.MERCHANT_ID)
        kwargs['initial'].setdefault('merchant_key', conf.MERCHANT_KEY)

        super(PayFastForm, self).__init__(*args, **kwargs)

        if 'm_payment_id' in self.initial:
            # If the caller supplies m_payment_id, find the existing order, or create it.
            (self.order, created) = PayFastOrder.objects.get_or_create(
                m_payment_id=self.initial['m_payment_id'],
                defaults=dict(
                    user=user,
                    amount_gross=self.initial['amount'],
                ),
            )
            if not created:
                # If the order is existing, check the user and amount fields,
                # and update if necessary.
                #
                # XXX: Also consistency-check that the order is not paid yet?
                #
                if not (self.order.user == user
                        and self.order.amount_gross == self.initial['amount']):
                    self.order.user = user
                    self.order.amount_gross = self.initial['amount']
                    self.order.save()
        else:
            # Old path: Create a new PayFastOrder each time form is instantiated.
            self.order = PayFastOrder.objects.create(
                user=user,
                amount_gross=self.initial['amount'],
            )

            # Initialise m_payment_id from the pk.
            self.order.m_payment_id = str(self.order.pk)
            self.order.save()

            self.initial['m_payment_id'] = self.order.m_payment_id

        # Coerce values to strings, for signing.
        data = {k: str(v) for (k, v) in self.initial.items()}
        self._signature = self.fields[
            'signature'].initial = api.checkout_signature(data)