Пример #1
0
    def test_intervals(self):
        """ Basic intervals tests. """

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 10, 1, 0, pytz.utc),
            every_minute(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 1, 1, 0, pytz.utc),
            hourly(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

#
# Daily or less often
#
        self.assertEquals(
            datetime.date(2000, 12, 3),
            daily(datetime.date(2000, 12, 3)))

        self.assertEquals(
            # 2000-11-27 was a Monday
            datetime.date(2000, 11, 27),
            weekly(datetime.date(2000, 12, 3)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            biweekly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 12, 15),
            biweekly(datetime.date(2000, 12, 16)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 10, 1),
            quarterly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 7, 1),
            semiyearly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 1, 1),
            yearly(datetime.date(2000, 12, 12)))
Пример #2
0
class GutenbergTopIndexTerms(GutenbergTask):
    """ Sort and count top index terms. """
    date = luigi.DateParameter(default=weekly())

    def requires(self):
        return GutenbergIndexTerms(date=self.date)

    def run(self):
        output = shellout("cut -f 2- {input}| sort | uniq -c | sort -nr > {output}",
                          input=self.input().path)
        luigi.File(output).move(self.output().path)

    def output(self):
        return luigi.LocalTarget(path=self.path(), format=TSV)
Пример #3
0
    def test_intervals(self):
        """ Basic intervals tests. """

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 10, 1, 0, pytz.utc),
            every_minute(
                datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 1, 1, 0, pytz.utc),
            hourly(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        #
        # Daily or less often
        #
        self.assertEquals(datetime.date(2000, 12, 3),
                          daily(datetime.date(2000, 12, 3)))

        self.assertEquals(
            # 2000-11-27 was a Monday
            datetime.date(2000, 11, 27),
            weekly(datetime.date(2000, 12, 3)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          biweekly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 12, 15),
                          biweekly(datetime.date(2000, 12, 16)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 10, 1),
                          quarterly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 7, 1),
                          semiyearly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 1, 1),
                          yearly(datetime.date(2000, 12, 12)))
Пример #4
0
class GutenbergDump(GutenbergTask):
    """
    Download dump.

    Updated usually every four days. These lists include the basic information
    about each eBook.
    """
    date = luigi.DateParameter(default=weekly())

    def requires(self):
        return [Executable(name='wget'), Executable(name='bunzip2')]

    def run(self):
        url = "http://gutenberg.readingroo.ms/cache/generated/feeds/catalog.marc.bz2"
        output = shellout('wget -q "{url}" -O {output}', url=url)
        output = shellout('bunzip2 {input} -c > {output}', input=output)
        luigi.File(output).move(self.output().path)

    def output(self):
        return luigi.LocalTarget(path=self.path(ext='mrc'))
Пример #5
0
class GutenbergIndexTerms(GutenbergTask):
    """ Extract all 653 a index terms. """
    date = luigi.DateParameter(default=weekly())
    
    def requires(self):
        return {'dump': GutenbergDump(date=self.date),
                'apps': Executable(name='marctotsv',
                                   message='https://github.com/miku/gomarckit')}

    def run(self):
        output = shellout('marctotsv -k -s "|" {input} 001 653.a > {output}',
                 input=self.input().get('dump').path)
        with luigi.File(output, format=TSV).open() as handle:
            with self.output().open('w') as output:
                for row in handle.iter_tsv(cols=('id', 'terms')):
                    for subfield in row.terms.split('|'):
                        for term in subfield.split('--'):
                            term = term.strip()
                            output.write_tsv(row.id, term)

    def output(self):
        return luigi.LocalTarget(path=self.path(), format=TSV)
Пример #6
0
Файл: kxp.py Проект: zazi/siskin
 def closest(self):
     return weekly(date=self.date)
Пример #7
0
 def closest(self):
     return weekly(date=self.date)
Пример #8
0
 def closest(self):
     """ Update weekly. """
     return weekly(self.date)