def run(self): def code(): run_jar("%s/flink-staging/flink-streaming/flink-streaming-examples/target/" % get_flink_path(), "flink-streaming-*-WordCount.jar", args = [self.wordcount_in, self.wordcount_out], clazz = "org.apache.flink.streaming.examples.wordcount.WordCount") master(code)
def run(self): def code(): run_jar("%s/examples/" % get_flink_dist_path(), "flink-java-*WordCount.jar", args = [self.wordcount_in, self.wordcount_out], clazz = "org.apache.flink.examples.java.wordcount.WordCount") master(code)
def run(self): def code(): run_jar("%s/examples/streaming/" % get_flink_dist_path(), "WordCount.jar", args = ["--input", self.wordcount_in, "--output", self.wordcount_out], clazz = "org.apache.flink.streaming.examples.wordcount.WordCount") master(code)
def run(self): def code(): run_jar("%s/examples/streaming/" % get_flink_dist_path(), "WindowWordCount.jar", args = [self.wordcount_in, self.wordcount_out, 10000], clazz = "org.apache.flink.streaming.examples.windowing.WindowWordCount") master(code)
def run(self): grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) grep_in = "%s/text" % get_hdfs_address() def code(): run_jar("~/flink-perf/flink-jobs/target", "flink-jobs-*.jar", args = [grep_in, grep_out, "these", "are", "test", "words"], clazz = "com.github.projectflink.grep.GrepJob") master(code)
def run(self): wordcount_in = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) wordcount_out = "%s/text" % get_hdfs_address() def code(): run_jar("%s/examples/" % get_flink_dist_path(), "flink-java-*WordCount.jar", args = [wordcount_out, wordcount_in], clazz = "org.apache.flink.examples.java.wordcount.WordCount") master(code)
def run(self): def code(): run_jar("%s/flink-jobs/target" % generators.Avro.repo.get_absolute_path(), "flink-jobs-*.jar", args=[self.out_path, self.in_path], clazz="com.github.projectflink.avro.CompareJob") master(code)
def run(self): def code(): run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(), "flink-jobs-*.jar", args=[self.dop, self.out_path, self.size_gb], clazz="com.github.projectflink.generators.Text") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = [wordcount_in, self.wordcount_out], clazz = "com.dataartisans.flink.dataflow.GoogleStreamingPipeline.examples") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = [wordcount_in, self.wordcount_out], clazz = self.implicit_clazz if self.implicit_combine else self.explicit_clazz) master(code)
def run(self): def code(): run_jar( "%s/examples/streaming/" % get_flink_dist_path(), "WindowWordCount.jar", args=[self.wordcount_in, self.wordcount_out, 10000], clazz= "org.apache.flink.streaming.examples.windowing.WindowWordCount" ) master(code)
def run(self): grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) grep_in = "%s/text" % get_hdfs_address() def code(): run_jar("~/flink-perf/flink-jobs/target", "flink-jobs-*.jar", args=[grep_in, grep_out, "these", "are", "test", "words"], clazz="com.github.projectflink.grep.GrepJob") master(code)
def run(self): def code(): run_jar(path = "experiments/wordcount_files/", jar_name = "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar", args = [ "hdfs://%s:50040/generated-wc.txt" % env.master, "hdfs://%s:50040/tmp/wc-out/" % env.master ], upload=True ) master(code)
def run(self): def code(): run_jar("%s/examples/batch/" % get_flink_dist_path(), "WordCount.jar", args=[ "--input", self.wordcount_in, "--output", self.wordcount_out ], clazz="org.apache.flink.examples.java.wordcount.WordCount") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[wordcount_in, self.wordcount_out], clazz=self.implicit_clazz if self.implicit_combine else self.explicit_clazz) master(code)
def setup(self): self.in_path = self.generator.experiment.out self.out_path = get_hdfs_address() + "/avro-benchmark/tpch1-avro" def code(): run_jar("%s/flink-jobs/target" % generators.Avro.repo.get_absolute_path(), "flink-jobs-*.jar", args=[self.in_path, self.out_path], clazz="com.github.projectflink.avro.Prepare") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = ["--", # Flink 0.8 way of specifying options to user programs "--input=%s" % wordcount_in, "--output=%s" % self.wordcount_out], clazz = "com.dataartisans.flink.dataflow.examples.DataflowWordCount") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar( "%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[wordcount_in, self.wordcount_out], clazz= "com.dataartisans.flink.dataflow.examples.StreamingPipeline") master(code)
def run(self): def code(): run_jar( path="experiments/wordcount_files/", jar_name= "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar", args=[ "hdfs://%s:50040/generated-wc.txt" % env.master, "hdfs://%s:50040/tmp/wc-out/" % env.master ], upload=True) master(code)
def run(self): def code(): run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(), "flink-jobs-*.jar", args=[ self.num_rows, self.num_cols, self.mean_entry, self.variance_entry, self.mean_num_row_entries, self.variance_num_row_entries, self.out_path ], clazz="com.github.projectflink.als.ALSDataGeneration") master(code)
def run(self): # TODO get from generator directly als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address() als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time())) def code(): run_jar( "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out], clazz="com.github.projectflink.als.ALSJoinBlocking", ) master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar( "%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[ "--", # Flink 0.8 way of specifying options to user programs "--input=%s" % wordcount_in, "--output=%s" % self.wordcount_out ], clazz= "com.dataartisans.flink.dataflow.examples.DataflowWordCount") master(code)
def run(self): self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv def code(): run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(), "flink-jobs-*.jar", args=[ "-s", self.scale_factor, "-p", self.parallelism, "-o", self.out, ], clazz="com.github.projectflink.avro.GenerateLineitems" ) master(code) # update path for benchmark self.out += "lineitems.csv"
def run(self): # TODO get from generator directly als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address() als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time())) def code(): run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args = [ "master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out ], clazz = "com.github.projectflink.als.ALSJoinBlocking") master(code)
def maven(self, target): # avoid building multiple times if target != self.built_using: master("cd %s && mvn %s > /dev/null" % (self.path, target)) self.built_using = target
def shutdown(self): master("rm -rf /tmp/wc-data/generated-wc.txt") master(lambda: delete_from_hdfs("generated-wc.txt")) master(lambda: delete_from_hdfs("/tmp/wc-out"))
def clone(self): if not self.cloned: master("rm -rf %s && git clone %s %s" % (self.path, self.url, self.path)) self.cloned = True
def checkout(self, commit): if commit != self.commit: master("cd %s && git checkout %s" % (self.path, commit)) self.commit = commit self.built_using = None
def setup(self): # generate wc data master(lambda: install("wget")) master(lambda: install("ruby")) master(lambda: install("bzip2")) master(lambda: install("aspell")) generate_wc_data = render_template( "experiments/wordcount_files/gen_wc_data.sh.mustache", self.params) master(lambda: exec_bash(generate_wc_data)) master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt", "generated-wc.txt"))
def setup(self): # generate wc data master(lambda: install("wget")) master(lambda: install("ruby")) master(lambda: install("bzip2")) master(lambda: install("aspell")) generate_wc_data = render_template( "experiments/wordcount_files/gen_wc_data.sh.mustache", self.params ) master(lambda: exec_bash(generate_wc_data)) master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt", "generated-wc.txt"))
def shutdown(self): # delete out_path to be able to restart benchmark master(lambda: delete_from_hdfs(self.out_path))
def shutdown(self): master(lambda: delete_from_hdfs(self.wordcount_out))