示例#1
0
    def test_intervals(self):
        """ Basic intervals tests. """

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 10, 1, 0, pytz.utc),
            every_minute(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 1, 1, 0, pytz.utc),
            hourly(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

#
# Daily or less often
#
        self.assertEquals(
            datetime.date(2000, 12, 3),
            daily(datetime.date(2000, 12, 3)))

        self.assertEquals(
            # 2000-11-27 was a Monday
            datetime.date(2000, 11, 27),
            weekly(datetime.date(2000, 12, 3)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            biweekly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 12, 15),
            biweekly(datetime.date(2000, 12, 16)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 12, 1),
            monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 10, 1),
            quarterly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 7, 1),
            semiyearly(datetime.date(2000, 12, 12)))

        self.assertEquals(
            datetime.date(2000, 1, 1),
            yearly(datetime.date(2000, 12, 12)))
示例#2
0
class GutenbergTopIndexTerms(GutenbergTask):
    """ Sort and count top index terms. """
    date = luigi.DateParameter(default=weekly())

    def requires(self):
        return GutenbergIndexTerms(date=self.date)

    def run(self):
        output = shellout("cut -f 2- {input}| sort | uniq -c | sort -nr > {output}",
                          input=self.input().path)
        luigi.File(output).move(self.output().path)

    def output(self):
        return luigi.LocalTarget(path=self.path(), format=TSV)
示例#3
0
    def test_intervals(self):
        """ Basic intervals tests. """

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 10, 1, 0, pytz.utc),
            every_minute(
                datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        self.assertEquals(
            datetime.datetime(2000, 12, 3, 10, 1, 1, 0, pytz.utc),
            hourly(datetime.datetime(2000, 12, 3, 10, 10, 10, 0, pytz.utc)))

        #
        # Daily or less often
        #
        self.assertEquals(datetime.date(2000, 12, 3),
                          daily(datetime.date(2000, 12, 3)))

        self.assertEquals(
            # 2000-11-27 was a Monday
            datetime.date(2000, 11, 27),
            weekly(datetime.date(2000, 12, 3)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          biweekly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 12, 15),
                          biweekly(datetime.date(2000, 12, 16)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 12, 1),
                          monthly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 10, 1),
                          quarterly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 7, 1),
                          semiyearly(datetime.date(2000, 12, 12)))

        self.assertEquals(datetime.date(2000, 1, 1),
                          yearly(datetime.date(2000, 12, 12)))
示例#4
0
class GutenbergDump(GutenbergTask):
    """
    Download dump.

    Updated usually every four days. These lists include the basic information
    about each eBook.
    """
    date = luigi.DateParameter(default=weekly())

    def requires(self):
        return [Executable(name='wget'), Executable(name='bunzip2')]

    def run(self):
        url = "http://gutenberg.readingroo.ms/cache/generated/feeds/catalog.marc.bz2"
        output = shellout('wget -q "{url}" -O {output}', url=url)
        output = shellout('bunzip2 {input} -c > {output}', input=output)
        luigi.File(output).move(self.output().path)

    def output(self):
        return luigi.LocalTarget(path=self.path(ext='mrc'))
示例#5
0
class GutenbergIndexTerms(GutenbergTask):
    """ Extract all 653 a index terms. """
    date = luigi.DateParameter(default=weekly())
    
    def requires(self):
        return {'dump': GutenbergDump(date=self.date),
                'apps': Executable(name='marctotsv',
                                   message='https://github.com/miku/gomarckit')}

    def run(self):
        output = shellout('marctotsv -k -s "|" {input} 001 653.a > {output}',
                 input=self.input().get('dump').path)
        with luigi.File(output, format=TSV).open() as handle:
            with self.output().open('w') as output:
                for row in handle.iter_tsv(cols=('id', 'terms')):
                    for subfield in row.terms.split('|'):
                        for term in subfield.split('--'):
                            term = term.strip()
                            output.write_tsv(row.id, term)

    def output(self):
        return luigi.LocalTarget(path=self.path(), format=TSV)
示例#6
0
文件: kxp.py 项目: zazi/siskin
 def closest(self):
     return weekly(date=self.date)
示例#7
0
文件: cambridge.py 项目: miku/siskin
 def closest(self):
     return weekly(date=self.date)
示例#8
0
文件: gbi.py 项目: ubleipzig/siskin
 def closest(self):
     """ Update weekly. """
     return weekly(self.date)