def hbase_setupjob(job, args):
    """
    Set up a job to run on telemetry date ranges using data from HBase

    Telemetry jobs expect two arguments, startdate and enddate, both in yyyymmdd format.
    """

    import java.text.SimpleDateFormat as SimpleDateFormat
    import java.util.Calendar as Calendar
    import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU
    import com.mozilla.util.Pair

    if len(args) != 2:
        raise Exception("Usage: <startdate-YYYYMMDD> <enddate-YYYYMMDD>")

    sdf = SimpleDateFormat(dateformat)
    startdate = Calendar.getInstance()
    startdate.setTime(sdf.parse(args[0]))
    enddate = Calendar.getInstance()
    enddate.setTime(sdf.parse(args[1]))

    columns = [com.mozilla.util.Pair('data', 'json')]
    scans = MSTMRU.generateBytePrefixScans(startdate, enddate, dateformat,
                                           columns, 500, False)
    MSTMRU.initMultiScanTableMapperJob('telemetry', scans, None, None, None,
                                       job)

    # inform HadoopDriver about the columns we expect to receive
    job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json")
Exemplo n.º 2
0
def setupjob(job, args):
    """
    Set up a job to run on telemetry date ranges using data from HBase

    Telemetry jobs expect two arguments, startdate and enddate, both in yyyymmdd format.
    """

    import java.text.SimpleDateFormat as SimpleDateFormat
    import java.util.Calendar as Calendar
    import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU
    import com.mozilla.util.Pair

    if len(args) != 2:
        raise Exception("Usage: <startdate-YYYYMMDD> <enddate-YYYYMMDD>")

    sdf = SimpleDateFormat(dateformat)
    startdate = Calendar.getInstance()
    startdate.setTime(sdf.parse(args[0]))
    enddate = Calendar.getInstance()
    enddate.setTime(sdf.parse(args[1]))

    columns = [com.mozilla.util.Pair('data', 'json')]
    scans = MSTMRU.generateBytePrefixScans(startdate, enddate, dateformat,
                                           columns, 500, False)
    MSTMRU.initMultiScanTableMapperJob(
        'telemetry',
        scans,
        None, None, None, job)

    # inform HadoopDriver about the columns we expect to receive
    job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json");
def setupjob(job, args):
    """
    Set up a job to run full table scans for FHR data.

    We don't expect any arguments.
    """

    import org.apache.hadoop.hbase.client.Scan as Scan
    import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU

    scan = Scan()
    scan.setCaching(500)
    scan.setCacheBlocks(False)
    scan.addColumn(bytearray('data'), bytearray('json'))

    # FIXME: do it without this multi-scan util
    scans = [scan]
    MSTMRU.initMultiScanTableMapperJob(
        'metrics', scans,
        None, None, None, job)

    # inform HadoopDriver about the columns we expect to receive
    job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json");
    # job.getConfiguration().set("mapred.reduce.tasks",4352)
    job.setNumReduceTasks(4352)
Exemplo n.º 4
0
def setupjob(job, args):
    """
    Set up a job to run full table scans for FHR data.

    We don't expect any arguments.
    """

    import org.apache.hadoop.hbase.client.Scan as Scan
    import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU

    scan = Scan()
    scan.setCaching(500)
    scan.setCacheBlocks(False)
    scan.addColumn(bytearray('data'), bytearray('json'))

    # FIXME: do it without this multi-scan util
    scans = [scan]
    MSTMRU.initMultiScanTableMapperJob('metrics', scans, None, None, None, job)

    # inform HBaseDriver about the columns we expect to receive
    job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json")
Exemplo n.º 5
0
    def setupjob(job, args):
        """
        Set up a job to run on crash-stats date ranges.

        Expects three arguments:
          startdate (yymmdd)
          enddate (yymmdd)
        """

        import java.text.SimpleDateFormat as SimpleDateFormat
        import java.util.Calendar as Calendar
        import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU
        from com.mozilla.util import Pair

        if len(args) != 2:
            raise Exception("Usage: <startdate-yymmdd> <enddate-yymmdd>")

        startarg, endarg = args

        sdf = SimpleDateFormat(dateformat)
        startdate = Calendar.getInstance()
        startdate.setTime(sdf.parse(startarg))
        enddate = Calendar.getInstance()
        enddate.setTime(sdf.parse(endarg))

        columns = [Pair(family, qualifier) for family, qualifier in columnlist]

        scans = MSTMRU.generateHexPrefixScans(startdate, enddate, dateformat,
                                              columns, 500, False)
        MSTMRU.initMultiScanTableMapperJob(
            'crash_reports',
            scans,
            None, None, None, job)

        # inform HadoopDriver about the columns we expect to receive
        job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns",
                                   ','.join(':'.join(column) for column in columnlist))
Exemplo n.º 6
0
    def setupjob(job, args):
        """
        Set up a job to run on crash-stats date ranges.

        Expects three arguments:
          startdate (yymmdd)
          enddate (yymmdd)
        """

        import java.text.SimpleDateFormat as SimpleDateFormat
        import java.util.Calendar as Calendar
        import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU
        from com.mozilla.util import Pair

        if len(args) != 2:
            raise Exception("Usage: <startdate-yymmdd> <enddate-yymmdd>")

        startarg, endarg = args

        sdf = SimpleDateFormat(dateformat)
        startdate = Calendar.getInstance()
        startdate.setTime(sdf.parse(startarg))
        enddate = Calendar.getInstance()
        enddate.setTime(sdf.parse(endarg))

        columns = [Pair(family, qualifier) for family, qualifier in columnlist]

        scans = MSTMRU.generateHexPrefixScans(startdate, enddate, dateformat,
                                              columns, 500, False)
        MSTMRU.initMultiScanTableMapperJob(
            'crash_reports',
            scans,
            None, None, None, job)

        # inform HBaseDriver about the columns we expect to receive
        job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns",
                                   ','.join(':'.join(column) for column in columnlist))