def Part3(self): cmd = ' ' if self._python == False: cmd = base.StripMargin(""" |express job \\ | ${EXPRESS_MUSIC_JAR} \\ | org.kiji.express.music.SongPlayCounter \\ | --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\ | --table-uri ${KIJI}/users \\ | --output ${HDFS_BASE}/express-tutorial/songcount-output \\ | --hdfs """) else: cmd = base.StripMargin(""" |express.py \\ | job \\ | -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\ | -user_jar=${EXPRESS_MUSIC_JAR} \\ | -job_name=org.kiji.express.music.SongPlayCounter \\ | -mode=hdfs \\ | --table-uri ${KIJI}/users \\ | --output ${HDFS_BASE}/express-tutorial/songcount-output \\ """) play_count = self.Command(cmd) assert (play_count.exit_code == 0) fs_text = self.Command(""" hadoop fs -text ${HDFS_BASE}/express-tutorial/songcount-output/part-00000 | head -3 """) tutorial_test.Expect(expect=0, actual=fs_text.exit_code) lines = list(filter(None, self.StripJavaHomeLine( fs_text.output_lines))) # filter empty lines tutorial_test.Expect(expect=3, actual=len(lines)) for line in lines: tutorial_test.ExpectRegexMatch(expect=r'^song-\d+\t\d+$', actual=line)
def Part1(self): """Runs the setup part of the KijiExpress Music tutorial. http://docs.kiji.org/tutorials/express-recommendation/DEVEL/express-setup/ """ # -------------------------------------------------------------------------- install = self.Command('kiji install --kiji=${KIJI}') assert (install.exit_code == 0) assert ('Successfully created kiji instance: ' in install.output_text) # -------------------------------------------------------------------------- create_table = self.Command( base.StripMargin(""" |kiji-schema-shell \\ | --kiji=${KIJI} \\ | --file=${MUSIC_EXPRESS_HOME}/music-schema.ddl \\ """)) print(create_table.error_text) assert (create_table.exit_code == 0) # -------------------------------------------------------------------------- list_tables = self.Command('kiji ls ${KIJI}') assert (list_tables.exit_code == 0) assert ('songs' in list_tables.output_text), ('Missing table "songs": %s' % list_tables.output_lines) assert ('users' in list_tables.output_text), ('Missing table "users": %s' % list_tables.output_lines) # -------------------------------------------------------------------------- mkdir = self.Command('hadoop fs -mkdir ${HDFS_BASE}/express-tutorial/') assert (mkdir.exit_code == 0) copy = self.Command( base.StripMargin(""" |hadoop fs -copyFromLocal \\ | ${MUSIC_EXPRESS_HOME}/example_data/*.json \\ | ${HDFS_BASE}/express-tutorial/ """)) assert (copy.exit_code == 0)
def Part4(self): cmd = ' ' if self._python == False: cmd = base.StripMargin(""" |express job \\ | ${EXPRESS_MUSIC_JAR} \\ | org.kiji.express.music.TopNextSongs \\ | --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\ | --users-table ${KIJI}/users \\ | --songs-table ${KIJI}/songs --hdfs """) else: cmd = base.StripMargin(""" |express.py \\ | job \\ | -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\ | -user_jar=${EXPRESS_MUSIC_JAR} \\ | -job_name=org.kiji.express.music.TopNextSongs \\ | -mode=hdfs \\ | --users-table ${KIJI}/users \\ | --songs-table ${KIJI}/songs --hdfs """) top_songs = self.Command(cmd) assert (top_songs.exit_code == 0) list_rows = self.Command('kiji scan ${KIJI}/songs --max-rows=2') assert (list_rows.exit_code == 0) stripped_output = self.StripJavaHomeLine(list_rows.output_lines) assert (stripped_output[0].startswith('Scanning kiji table: kiji://')) assert (len(stripped_output) >= 5 * 2 + 1), len(stripped_output) for row in range(0, 2): tutorial_test.ExpectRegexMatch( expect=r"^entity-id=\['song-\d+'\] \[\d+\] info:metadata$", actual=stripped_output[1 + row * 5]) tutorial_test.ExpectRegexMatch( expect= r"^\s*{\s*\"song_name\".*\"album_name\".*\"artist_name\".*\"genre\".*\"tempo\".*\"duration\".*\s*}\s*$", actual=stripped_output[2 + row * 5]) tutorial_test.ExpectRegexMatch( expect= r"^entity-id=\['song-\d+'\] \[\d+\] info:top_next_songs$", actual=stripped_output[3 + row * 5]) tutorial_test.ExpectRegexMatch(expect=r"^\s*{\s*\"top_songs\".*}$", actual=stripped_output[4 + row * 5]) tutorial_test.ExpectRegexMatch(expect=r"^$", actual=stripped_output[5 + row * 5])
def Part5(self): cmd = ' ' if self._python == False: cmd = base.StripMargin(""" |express job ${EXPRESS_MUSIC_JAR} \\ | org.kiji.express.music.SongRecommender \\ | --songs-table ${KIJI}/songs \\ | --users-table ${KIJI}/users """) else: cmd = base.StripMargin(""" |express.py \\ | job \\ | -user_jar=${EXPRESS_MUSIC_JAR} \\ | -job_name=org.kiji.express.music.SongRecommender \\ | -mode=hdfs \\ | --songs-table ${KIJI}/songs \\ | --users-table ${KIJI}/users """) song_recommend = self.Command(cmd) assert (song_recommend.exit_code == 0) list_rows = self.Command("kiji scan ${KIJI}/users --max-rows=2") assert (list_rows.exit_code == 0) stripped_output = self.StripJavaHomeLine(list_rows.output_lines) assert (stripped_output[0].startswith('Scanning kiji table: kiji://')) assert (len(stripped_output) >= 5 * 2 + 1), len(stripped_output) for row in range(0, 2): tutorial_test.ExpectRegexMatch( expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:track_plays$", actual=stripped_output[1 + row * 5]) tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$", actual=stripped_output[2 + row * 5]) tutorial_test.ExpectRegexMatch( expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:next_song_rec$", actual=stripped_output[3 + row * 5]) tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$", actual=stripped_output[4 + row * 5]) tutorial_test.ExpectRegexMatch(expect=r"^$", actual=stripped_output[5 + row * 5])
def Part2(self): """Runs the importing part of the KijiExpress Music tutorial. http://docs.kiji.org/tutorials/express-recommendation/DEVEL/express-importing-data/ """ # -------------------------------------------------------------------------- cmd = ' ' if self._python == False: cmd = base.StripMargin(""" |express job \\ | ${EXPRESS_MUSIC_JAR} \\ | org.kiji.express.music.SongMetadataImporter \\ | --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\ | --input ${HDFS_BASE}/express-tutorial/song-metadata.json \\ | --table-uri ${KIJI}/songs --hdfs """) else: cmd = base.StripMargin(""" |express.py \\ | job \\ | --libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\ | --user_jar=${EXPRESS_MUSIC_JAR} \\ | --job_name=org.kiji.express.music.SongMetadataImporter \\ | --mode=hdfs \\ | --input ${HDFS_BASE}/express-tutorial/song-metadata.json \\ | --table-uri ${KIJI}/songs """) songMetadataImport = self.Command(cmd) assert (songMetadataImport.exit_code == 0) # -------------------------------------------------------------------------- list_rows = self.Command('kiji scan ${KIJI}/songs --max-rows=5') assert (list_rows.exit_code == 0) # Strip the first line from the output, if it is about $JAVA_HOME not set. stripped_output = self.StripJavaHomeLine(list_rows.output_lines) assert (stripped_output[0].startswith('Scanning kiji table: kiji://')) assert (len(stripped_output) >= 3 * 5 + 1), len(stripped_output) for row in range(0, 5): tutorial_test.ExpectRegexMatch( expect=r"^entity-id=\['song-\d+'\] \[\d+\] info:metadata$", actual=stripped_output[1 + row * 3]) tutorial_test.ExpectRegexMatch( expect= r"^\s*{\s*\"song_name\".*\"album_name\".*\"artist_name\".*\"genre\".*\"tempo\".*\"duration\".*\s*}\s*$", actual=stripped_output[2 + row * 3]) tutorial_test.ExpectRegexMatch(expect=r"^$", actual=stripped_output[3 + row * 3]) # -------------------------------------------------------------------------- cmd = ' ' if self._python == False: cmd = base.StripMargin(""" |express job \\ | ${EXPRESS_MUSIC_JAR} \\ | org.kiji.express.music.SongPlaysImporter \\ | --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\ | --input ${HDFS_BASE}/express-tutorial/song-plays.json \\ | --table-uri ${KIJI}/users --hdfs """) else: cmd = base.StripMargin(""" |express.py \\ | job \\ | -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\ | -user_jar=${EXPRESS_MUSIC_JAR} \\ | -job_name=org.kiji.express.music.SongPlaysImporter \\ | -mode=hdfs \\ | --input ${HDFS_BASE}/express-tutorial/song-plays.json \\ | --table-uri ${KIJI}/users """) userDataImport = self.Command(cmd) assert (userDataImport.exit_code == 0) # -------------------------------------------------------------------------- list_rows = self.Command('kiji scan ${KIJI}/users --max-rows=5') assert (list_rows.exit_code == 0) stripped_output = self.StripJavaHomeLine(list_rows.output_lines) assert (stripped_output[0].startswith('Scanning kiji table: kiji://')) assert (len(stripped_output) >= 3 * 5 + 1), len(stripped_output) for row in range(0, 5): tutorial_test.ExpectRegexMatch( expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:track_plays$", actual=stripped_output[1 + row * 3]) tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$", actual=stripped_output[2 + row * 3]) tutorial_test.ExpectRegexMatch(expect=r"^$", actual=stripped_output[3 + row * 3])