def get_wiki_urls_top_n_phrases(self, n): # TODO get proper categories for i in range(n): phrase = self.string_phrases_nouns[i][1] try: url, content, cats = get_wiki_url_and_content_by_keyphrase( phrase) category = get_shortest_in_list(cats, phrase) root_list = {} root_list["article_url"] = url root_list["content"] = quote(str(content)) root_list["title"] = quote( str(list_to_string(url.split('/')[-1].split('_')))) root_list["topic"] = quote(str(category)) self.mapping.append(root_list) except Exception: try: url, content, cats = get_wiki_url_and_content_by_keyphrase( phrase[:len(phrase.split(' ')) / 2]) category = get_shortest_in_list( cats, phrase[:len(phrase.split(' ')) / 2]) root_list = {} root_list["article_url"] = url root_list["content"] = quote(str(content)) root_list["title"] = quote( str(list_to_string(url.split('/')[-1].split('_')))) root_list["topic"] = quote(str(category)) self.mapping.append(root_list) except Exception: pass pass
def collect_mms_ids(project, table): logging.info('project {}: starting collection of mms ids'.format(project)) new_table_rows = [] for index, row in table.iterrows(): portfolios, packages = alma_primo_service.collect_portfolios_and_packages(project=project, field='lds03', search_term=row['Kennzeichen'], number_of_hits=row['total_in_Alma']) row['Paket_MMS'] = list_to_string(packages) row['Portfolio_MMS'] = list_to_string(portfolios) new_table_rows.append(row) # erweiterte Tabelle speichen und zurückgeben return write_table(project=project, rows=new_table_rows)
def update_collections(project, table): # Alle Pakete durchgehen new_table_rows = [] for index, row in table.iterrows(): service_ids = [] # Alle Collection-IDs durchgehen for collection_id in row['Collection_IDs'].split(';'): logging.info('updating collection {}'.format(collection_id)) # Den Type auf selective package ändern success_type_change = alma_electronic_service.set_type_to_selective_package(project=project, collection_id=collection_id) # Falls die Änderung erfolgreich war, einen Full-Text-Service hinzufügen if success_type_change: service = None service_all = alma_electronic_service.retrieve_services(collection_id=collection_id) if service_all is not None: for service_individual in service_all: if service_individual['type']['value'] == 'getFullText': service = service_individual if service_all is None: service = alma_electronic_service.add_active_full_text_service(project=project, collection_id=collection_id) service_ids.append(service['id']) row['Service_IDs'] = list_to_string(service_ids) new_table_rows.append(row) # erweiterte Tabelle speichen und zurückgeben return write_table(project=project, rows=new_table_rows)
def _get_queued_video_info( feature = 0 ): log( "_get_queued_video_info() Started" ) equivalent_mpaa = "NR" try: # get movie name plist = _store_playlist() movie_detail = _movie_details( plist[feature]['id'] ) movie_title = movie_detail['title'] path = movie_detail['file'] mpaa = movie_detail['mpaa'] genre = list_to_string( movie_detail['genre'] ) try: audio = movie_detail['streamdetails']['audio'][0]['codec'] except: audio = "other" equivalent_mpaa, short_mpaa = get_equivalent_rating( mpaa ) except: traceback.print_exc() movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa, short_mpaa = "" # spew queued video info to log log( "Queued Movie Information" ) log( "%s" % log_sep ) log( "Title: %s" % movie_title ) log( "Path: %s" % path ) log( "Genre: %s" % genre ) log( "MPAA: %s" % short_mpaa ) log( "Audio: %s" % audio ) if video_settings[ "audio_videos_folder" ]: log( "Folder: %s" % ( video_settings[ "audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + video_settings[ "audio_videos_folder" ][ -1 ], ) ) log( "%s" % log_sep ) # return results return short_mpaa, audio, genre, path, equivalent_mpaa
def _get_queued_video_info(feature=0): utils.log("_get_queued_video_info() Started") equivalent_mpaa = "NR" try: # get movie name plist = _store_playlist() movie_detail = _movie_details(plist[feature]['id']) movie_title = movie_detail['title'] path = movie_detail['file'] mpaa = movie_detail['mpaa'] movie_id = plist[feature]['id'] genre = utils.list_to_string(movie_detail['genre']) try: audio = movie_detail['streamdetails']['audio'][0]['codec'] except: audio = "other" try: stereomode = movie_detail['streamdetails']['video'][0][ 'stereomode'] except: stereomode = "" equivalent_mpaa, short_mpaa = get_equivalent_rating(mpaa) except: traceback.print_exc() movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa = short_mpaa = stereomode = "" movie_id = 0 if not stereomode in ("mono", ""): is_3d_movie = True elif stereomode == "": # if database still has an empty stereomode, test filename is_3d_movie = test_for_3d(path) else: is_3d_movie = False # spew queued video info to log utils.log("Queued Movie Information") utils.log("%s" % log_sep) utils.log("Title: %s" % movie_title) utils.log("Database ID: %s" % movie_id) utils.log("Path: %s" % path) utils.log("Genre: %s" % genre) utils.log("Rating: %s" % short_mpaa) utils.log("Audio: %s" % audio) utils.log("Stereo Mode: %s" % stereomode) utils.log("3D Movie: %s" % ("False", "True")[is_3d_movie]) if video_settings["audio_videos_folder"]: if is_3d_movie and _3d_settings["3d_audio_videos_folder"]: utils.log("Folder: %s" % (_3d_settings["3d_audio_videos_folder"] + audio_formats.get(audio, "Other") + _3d_settings["3d_audio_videos_folder"][-1], )) else: utils.log("Folder: %s" % (video_settings["audio_videos_folder"] + audio_formats.get(audio, "Other") + video_settings["audio_videos_folder"][-1], )) utils.log("%s" % log_sep) # return results return short_mpaa, audio, genre, movie_id, equivalent_mpaa, is_3d_movie
def retrieve_service_ids(project, table, library=None): new_table_rows = [] # Alle Pakete durchgehen for index, row in table.iterrows(): service_ids = alma_electronic_service.retrieve_service_ids(project=project, collection_ids=row['Collection_IDs']) row['Service_IDs'] = list_to_string(service_ids) new_table_rows.append(row) # erweiterte Tabelle speichen und zurückgeben return write_table(project=project, rows=new_table_rows)
def retrieve_portfolio_ids(project, table, library=None): new_table_rows = [] # Alle Pakete durchgehen for index, row in table.iterrows(): portfolio_ids = alma_bib_service.retrieve_portfolio_ids(project=project, mms_ids=row['Portfolio_MMS'], library=library) row['Portfolio_IDs'] = list_to_string(portfolio_ids) new_table_rows.append(row) # erweiterte Tabelle speichen und zurückgeben return write_table(project=project, rows=new_table_rows)
def __repr__(self): cseg = self[0] hline = "{0}".format("-" * ((len(cseg) * 2) + 3)) cseg_str = utils.list_to_string(cseg) com_matrix = self[1:] com_matrix_str = [(str(cseg[i]) + " | " + \ utils.replace_list_to_plus_minus(line)) \ for (i, line) in enumerate(com_matrix)] half_matrix_1 = " | " + cseg_str + "\n" + hline + "\n" half_matrix_2 = "".join([x + "\n" for x in com_matrix_str]) return half_matrix_1 + half_matrix_2
def get_category_from_categories(self, phrases_list): lst = [] bad_words = ['articles', 'links', 'containing'] for phrase in phrases_list: phrase_words = phrase.split() for word in phrase_words: if word not in bad_words and word not in stopwords.words( "english"): lst.append(word) lst.append('.') text = list_to_string(lst) rec = Rake() rec.extract_keywords_from_text(text) cat = rec.get_ranked_phrases() return cat
def _get_queued_video_info( feature = 0 ): utils.log( "_get_queued_video_info() Started" ) equivalent_mpaa = "NR" try: # get movie name plist = _store_playlist() movie_detail = _movie_details( plist[feature]['id'] ) movie_title = movie_detail['title'] path = movie_detail['file'] mpaa = movie_detail['mpaa'] genre = utils.list_to_string( movie_detail['genre'] ) try: audio = movie_detail['streamdetails']['audio'][0]['codec'] except: audio = "other" try: stereomode = movie_detail['streamdetails']['video'][0]['stereomode'] except: stereomode = "" equivalent_mpaa, short_mpaa = get_equivalent_rating( mpaa ) except: traceback.print_exc() movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa, short_mpaa, stereomode = "" if not stereomode in ( "mono", "" ): is_3d_movie = True elif stereomode == "": # if database still has an empty stereomode, test filename is_3d_movie = test_for_3d( path ) else: is_3d_movie = False # spew queued video info to log utils.log( "Queued Movie Information" ) utils.log( "%s" % log_sep ) utils.log( "Title: %s" % movie_title ) utils.log( "Path: %s" % path ) utils.log( "Genre: %s" % genre ) utils.log( "Rating: %s" % short_mpaa ) utils.log( "Audio: %s" % audio ) utils.log( "Stereo Mode: %s" % stereomode ) utils.log( "3D Movie: %s" % ( "False", "True" )[ is_3d_movie ] ) if video_settings[ "audio_videos_folder" ]: if is_3d_movie and _3d_settings[ "3d_audio_videos_folder" ]: utils.log( "Folder: %s" % ( _3d_settings[ "3d_audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + _3d_settings[ "3d_audio_videos_folder" ][ -1 ], ) ) else: utils.log( "Folder: %s" % ( video_settings[ "audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + video_settings[ "audio_videos_folder" ][ -1 ], ) ) utils.log( "%s" % log_sep ) # return results return short_mpaa, audio, genre, path, equivalent_mpaa, is_3d_movie
def retrieve_collection_ids(project, table, library=None): new_table_rows = [] # Alle Pakete durchgehen for index, row in table.iterrows(): if library is None: collection_ids = alma_bib_service.retrieve_collection_id_for_mms_id(project=project, mms_ids=row['Paket_MMS']) else: collection_ids = alma_bib_service.retrieve_collection_id_for_mms_id(project=project, mms_ids=row['Paket_MMS'], library=library) row['Collection_IDs'] = list_to_string(collection_ids) new_table_rows.append(row) # erweiterte Tabelle speichen und zurückgeben return write_table(project=project, rows=new_table_rows)
def _connect_widgets(self): self.settings.bind( "enable-notifications", self.ui.get_widget("enable-notifications"), "active", Gio.SettingsBindFlags.DEFAULT ) self.settings.bind( "monitor-networks", self.ui.get_widget("monitor-networks"), "active", Gio.SettingsBindFlags.DEFAULT ) self.settings.bind( "notify-before-poweroff", self.ui.get_widget("notify-before-poweroff"), "value", Gio.SettingsBindFlags.DEFAULT, ) self.settings.bind( "player-poweroff-timeout", self.ui.get_widget("player-poweroff-timeout"), "value", Gio.SettingsBindFlags.DEFAULT, ) self.settings.bind("actions-file", self.ui.get_widget("actions-file"), "text", Gio.SettingsBindFlags.DEFAULT) self.ui.get_widget("power-off-intervals").set_text( utils.list_to_string(self.settings.get_unpacked("poweroff-intervals")) ) self._validator_po = Validator( self.ui.get_widget("power-off-intervals"), allowed_chars="0123456789, ", regexp=r"(\d{1,6}\s?,?\s?){1,20}", min_length=1, on_check=self.on_power_off_intervals_changed, no_cb_inittialy=True, ) wnames = ("shutdown", "suspend", "hibernate") for name in wnames: if self.power_actions.get_action(name).get_active(): self.ui.get_widget(name).set_active(True) for name in wnames: self.ui.get_widget(name).connect("toggled", self.on_poweroff_type_activate, name)
def fetch_trailers( self ): # get watched list self._get_watched() count = 0 if self.settings[ "trailer_unwatched_movie_only" ]: jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [ { "field": "playcount", "operator": "is", "value": "0" }, { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] } }, "id": 1}''' % ( self.mpaa, self.genre ) else: jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [ { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] } }, "id": 1}''' % ( self.mpaa, self.genre ) jsonresponse = xbmc.executeJSONRPC( jsonquery ) data = simplejson.loads( jsonresponse ) if data.has_key('result'): if data['result'].has_key('movies'): trailers = data['result']['movies'] shuffle( trailers ) for trailer in trailers: trailer_rating = trailer['mpaa'] # shorten MPAA/BBFC ratings if trailer_rating == "": trailer_rating = "NR" #MPAA if trailer_rating.startswith("Rated"): trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "G", "PG", "PG-13", "R", "NC-17", "Unrated", ) ] #BBFC elif trailer_rating.startswith("UK"): if trailer_rating.startswith( "UK:" ): trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ] else: trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "12", "12A", "PG", "15", "18", "R18", "MA", "U", ) ] #FSK elif trailer_rating.startswith("FSK"): if trailer_rating.startswith( "FSK:" ): trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ] else: trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] #DEJUS elif trailer_rating in ( "Livre", "10 Anos", "12 Anos", "14 Anos", "16 Anos", "18 Anos" ): trailer_rating = trailer_rating # adding this just in case there is some with different labels in database else: trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "0", "6", "12", "12A", "PG", "15", "16", "18", "R18", "MA", "U", ) ] # add trailer to our final list if trailer[ 'trailer' ]: if trailer['trailer'].startswith( 'plugin://' ) and self.settings['trailer_skip_youtube']: continue else: trailer_info = ( xbmc.getCacheThumbName( trailer['trailer'] ), # id trailer['label'], # title trailer['trailer'], # trailer trailer['thumbnail'], # thumb trailer['plot'], # plot '', # runtime trailer_rating, # mpaa '', # release date '', # studio utils.list_to_string( trailer['genre'] ), # genre 'Trailer', # writer '', # director 32613 ) self.trailers += [ trailer_info ] # add id to watched file TODO: maybe don't add if not user preference self.watched += [ xbmc.getCacheThumbName( trailer['trailer'] ) ] # increment counter count += 1 # if we have enough exit if count == self.settings[ "trailer_count" ]: break else: utils.log( "No Movie Trailers found", xbmc.LOGNOTICE ) self._save_watched() return self.trailers else: utils.log( "No results found", xbmc.LOGNOTICE ) return []
i = 0 j = 0 for [language, text] in ut.TRAIN: if language not in lang: lang[language] = i lang_previous[i] = language i += 1 for element in text: if element not in bigrams: bigrams[element] = j j += 1 in_dim = len(bigrams) out_dim = len(lang) train_data = build_data(ut.TRAIN, bigrams, lang) dev_data = build_data(ut.DEV, bigrams, lang) test_data = build_data(ut.TEST, bigrams, lang) num_iterations = 500 learning_rate = 0.0001 hid_dim = int(np.log(in_dim * out_dim)) params = ml1.create_classifier(in_dim, hid_dim, out_dim) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) results = [] for [label, data] in test_data: results.append(lang_previous[ml.predict(data, trained_params)]) output_file = open('test.pred', 'w') output_file.write(ut.list_to_string(results)) output_file.close()
""" Example solution to "Finding a Motif in DNA", from rosalind.info. """ from utils import list_to_string def find(string, motif): pos = [] for i in range(len(string) - len(motif) + 1): if string[i:].startswith(motif): pos.append(i + 1) return pos if __name__ == "__main__": print(find("GATATATGCATATACTTATAT", "ATAT")) with open("inps/rosalind_subs.txt") as f: string, motif = f.read().split() print(list_to_string(find(string, motif)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=1, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config model_options = { 'n_source_quant': config['n_source_quant'], 'n_target_quant': config['n_target_quant'], 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'sample_size': config['sample_size'], 'decoder_filter_width': config['decoder_filter_width'], 'batch_size': args.batch_size, } byte_net = model.Byte_net_model(model_options) bn_tensors = byte_net.build_prediction_model() optim = tf.train.AdamOptimizer(args.learning_rate, beta1=args.beta1).minimize( bn_tensors['loss'], var_list=bn_tensors['variables']) sess = tf.InteractiveSession() tf.initialize_all_variables().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.data_dir }) text_samples = dl.load_generator_data(config['sample_size']) print text_samples.shape for i in range(args.max_epochs): batch_no = 0 batch_size = args.batch_size while (batch_no + 1) * batch_size < text_samples.shape[0]: text_batch = text_samples[batch_no * batch_size:(batch_no + 1) * batch_size, :] _, loss, prediction = sess.run( [optim, bn_tensors['loss'], bn_tensors['prediction']], feed_dict={bn_tensors['sentence']: text_batch}) print "-------------------------------------------------------" print utils.list_to_string(prediction) print "Loss", i, batch_no, loss print "********************************************************" # print prediction batch_no += 1 if (batch_no % 500) == 0: save_path = saver.save( sess, "Data/Models/model_epoch_{}.ckpt".format(i))
def prob(seq, probs): """Log10 probability of seq being randomly built with given probabilities.""" acc = 0 for symb in seq: acc += log10(probs[symb]) return acc if __name__ == "__main__": print( prob("ACGATACAA", { "A": 0.871 / 2, "T": 0.871 / 2, "C": 0.129 / 2, "G": 0.129 / 2 })) with open("inps/rosalind_prob.txt", "r") as f: contents = f.read().split("\n") dna = contents[0] floats = contents[1] probs = [] for s in floats.split(" "): f = float(s) p, p_ = f / 2, (1 - f) / 2 d = {"A": p_, "T": p_, "C": p, "G": p} probs.append(round(prob(dna, d), 3)) print(list_to_string(probs))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning Rate') parser.add_argument('--batch_size', type=int, default=1, help='Learning Rate') parser.add_argument('--max_epochs', type=int, default=1000, help='Max Epochs') parser.add_argument('--beta1', type=float, default=0.5, help='Momentum for Adam Update') parser.add_argument('--resume_model', type=str, default=None, help='Pre-Trained Model Path, to resume from') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') parser.add_argument('--log_dir', type=str, default='logs', help='Path to TensorBoard logs') args = parser.parse_args() # model_config = json.loads( open('model_config.json').read() ) config = model_config.predictor_config model_options = { 'n_source_quant': config['n_source_quant'], 'n_target_quant': config['n_target_quant'], 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'sample_size': config['sample_size'], 'decoder_filter_width': config['decoder_filter_width'], 'batch_size': args.batch_size, } byte_net = model.Byte_net_model(model_options) bn_tensors = byte_net.build_prediction_model() # Set up logging for TensorBoard writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph()) run_metadata = tf.RunMetadata() summaries = tf.summary.merge_all() optim = tf.train.AdamOptimizer(args.learning_rate, beta1 = args.beta1) \ .minimize(bn_tensors['loss'], var_list=bn_tensors['variables']) sess = tf.InteractiveSession() tf.global_variables_initializer().run() saver = tf.train.Saver() if args.resume_model: saver.restore(sess, args.resume_model) dl = data_loader.Data_Loader({ 'model_type': 'generator', 'dir_name': args.data_dir }) text_samples = dl.load_generator_data(config['sample_size']) print(text_samples.shape) models_path = "Data/Models/" if not os.path.exists(models_path): os.makedirs(models_path) for epoch in range(args.max_epochs): step = 0 batch_size = args.batch_size while (step + 1) * batch_size < text_samples.shape[0]: text_batch = text_samples[step * batch_size:(step + 1) * batch_size, :] _, summary, loss, prediction = sess.run([ optim, summaries, bn_tensors['loss'], bn_tensors['prediction'] ], feed_dict={ bn_tensors['sentence']: text_batch }) print("-------------------------------------------------------") print(utils.list_to_string(prediction)) print("Epoch", epoch, " Step", step, " Loss", loss) print("********************************************************") writer.add_summary(summary, step) writer.add_run_metadata( run_metadata, 'epoch_{:04d}, step_{:04d}'.format(epoch, step)) step += 1 if step % 500 == 0: saver.save(sess, models_path + "model_epoch_{}.ckpt".format(epoch))
def fetch_trailers( self ): # get watched list self._get_watched() count = 0 if self.settings[ "trailer_unwatched_movie_only" ]: jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [ { "field": "playcount", "operator": "is", "value": "0" }, { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] } }, "id": 1}''' % ( self.mpaa, self.genre ) else: jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [ { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] } }, "id": 1}''' % ( self.mpaa, self.genre ) jsonresponse = xbmc.executeJSONRPC( jsonquery ) data = simplejson.loads( jsonresponse ) if data.has_key('result'): if data['result'].has_key('movies'): trailers = data['result']['movies'] shuffle( trailers ) for trailer in trailers: trailer_rating = trailer['mpaa'] # shorten MPAA/BBFC ratings if trailer_rating == "": trailer_rating = "NR" #MPAA if trailer_rating.startswith("Rated"): trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "G", "PG", "PG-13", "R", "NC-17", "Unrated", ) ] #BBFC elif trailer_rating.startswith("UK"): if trailer_rating.startswith( "UK:" ): trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ] else: trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "12", "12A", "PG", "15", "18", "R18", "MA", "U", ) ] #FSK elif trailer_rating.startswith("FSK"): if trailer_rating.startswith( "FSK:" ): trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ] else: trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ] #DEJUS elif trailer_rating in ( "Livre", "10 Anos", "12 Anos", "14 Anos", "16 Anos", "18 Anos" ): trailer_rating = trailer_rating # adding this just in case there is some with different labels in database else: trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "0", "6", "12", "12A", "PG", "15", "16", "18", "R18", "MA", "U", ) ] # add trailer to our final list if trailer[ 'trailer' ]: if trailer['trailer'].startswith( 'plugin://' ) and self.settings['trailer_skip_youtube']: utils.log( "Trailer contains plugin://. Skipping", xbmc.LOGNOTICE ) continue else: if trailer[ "movieid" ] == self.movie and self.settings[ 'trailer_not_movie' ]: utils.log( "Trailer matches selected movie. Skipping", xbmc.LOGNOTICE ) continue else: trailer_info = ( xbmc.getCacheThumbName( trailer['trailer'] ), # id trailer['label'], # title trailer['trailer'], # trailer trailer['thumbnail'], # thumb trailer['plot'], # plot '', # runtime trailer_rating, # mpaa '', # release date '', # studio utils.list_to_string( trailer['genre'] ), # genre 'Trailer', # writer '', # director 32613 ) self.trailers += [ trailer_info ] # add id to watched file TODO: maybe don't add if not user preference self.watched += [ xbmc.getCacheThumbName( trailer['trailer'] ) ] # increment counter count += 1 # if we have enough exit if count == self.settings[ "trailer_count" ]: break else: utils.log( "No Movie Trailers found", xbmc.LOGNOTICE ) self._save_watched() return self.trailers else: utils.log( "No results found", xbmc.LOGNOTICE ) return []
def get_score_dict(self): score_dict = {} for counter, (post, post_dict) in enumerate(self.user_dict['posts'].items()): # image processing self.photo_count += 1 post_dict['img_topic'] = TextApi( list_to_string(post_dict['img_api'])).get_response() for interest in INTERESTS: key = interest[0] interest_score = 0 if post_dict.get('img_topic', None): for j, keyword in enumerate(interest): print(j) for i, (topic, prob) in enumerate( post_dict['img_topic'].items()): try: interest_score += SimilarApi( topic, keyword).get_response() * prob except: pass if i == 2: break if j == 1: interest_score = interest_score / 6 else: interest_score = interest_score / 3 if score_dict.get('p_' + key, None): score_dict['p_' + key] = score_dict['p_' + key] + interest_score else: score_dict['p_' + key] = interest_score # caption/tags processing text = None if post_dict['hashtags']: text = ' '.join(post_dict['hashtags']) self.content_count += 1 elif post_dict['caption']: text = deemojify(post_dict['caption']) self.content_count += 1 if text: post_dict['content_topic'] = TextApi(text).get_response() for interest in INTERESTS: key = interest[0] interest_score = 0 if post_dict.get('content_topic', None): for j, keyword in enumerate(interest): for i, (topic, prob) in enumerate( post_dict['content_topic'].items()): interest_score += SimilarApi( topic, keyword).get_response() * prob if i == 1: break if j == 2: interest_score = interest_score / 6 else: interest_score = interest_score / 3 if score_dict.get(key, None): score_dict[key] = score_dict[key] + interest_score else: score_dict[key] = interest_score if counter == 10: break for interest, score in score_dict.items(): try: if interest.startswith('p_'): score_dict[interest] = score / self.photo_count else: score_dict[interest] = score / self.content_count except: pass return score_dict
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_path', type=str, default="Data/Models/model_epoch_6.ckpt", help='Pre-Trained Model Path') parser.add_argument('--data_dir', type=str, default='Data', help='Data Directory') parser.add_argument('--seed', type=str, default="ANTONIO", help='seed') parser.add_argument('--num_char', type=int, default=1000, help='seed') parser.add_argument('--output_file', type=str, default='sample.txt', help='Output File') args = parser.parse_args() config = model_config.config model_options = { 'n_source_quant': config['n_source_quant'], 'n_target_quant': config['n_target_quant'], 'residual_channels': config['residual_channels'], 'decoder_dilations': config['decoder_dilations'], 'sample_size': args.num_char, 'decoder_filter_width': config['decoder_filter_width'], 'batch_size': 1, } seed_ = [ord(s) for s in args.seed ] + [0 for i in range(args.num_char - len(args.seed))] seed_ = np.array(seed_, dtype='int32') seed_ = seed_.reshape([1, -1]) byte_net = model.Byte_net_model(model_options) generator = byte_net.build_generator(args.num_char) sess = tf.InteractiveSession() saver = tf.train.Saver() saver.restore(sess, args.model_path) input_batch = seed_ print "INPUT", input_batch for i in range(0, args.num_char - len(args.seed)): prediction, probs = sess.run( [generator['prediction'], generator['probs']], feed_dict={generator['source_sentence']: input_batch}) last_prediction = np.array( [utils.weighted_pick(probs[i + len(args.seed) - 1])]) last_prediction = last_prediction.reshape([1, -1]) input_batch[:, i + len(args.seed)] = last_prediction res = utils.list_to_string(input_batch[0, 0:i + len(args.seed) + 1]) if i % 100 == 0: print res with open(args.output_file, 'wb') as f: f.write(res)
def __init__(self, dataset='ansim', ov=3, split=1, nfft=1024, db=30, wav_extra_name='', desc_extra_name=''): # TODO: Change the path according to your machine. # TODO: It should point to a folder which consists of sub-folders for audio and metada datasets_dir = os.path.join('..', 'datasets') self._base_folder = os.path.join(datasets_dir, dataset) logger.info(f"self._base_folder {self._base_folder}") # Output directories self._label_dir = None self._feat_dir = None self._feat_dir_norm = None # Local parameters self._mode = None self._ov = list_to_string(ov) self._split = list_to_string(split) self._db = db self._nfft = nfft self._win_len = self._nfft self._hop_len = self._nfft // 2 self._dataset = dataset self._eps = np.spacing(np.float(1e-16)) # Input directories self._aud_dir = os.path.join( self._base_folder, 'wav_ov{}_split{}_{}db{}'.format(self._ov, self._split, db, wav_extra_name)) self._desc_dir = os.path.join( self._base_folder, 'desc_ov{}_split{}{}'.format(self._ov, self._split, desc_extra_name)) logger.info(f"self._aud_dir {self._aud_dir}") logger.info(f"self._desc_dir {self._desc_dir}") self.feat_formatted_name = 'ov{}_split{}_{}db_nfft{}_'.format( self._ov, self._split, self._db, self._nfft) self.label_formatted_name = 'ov{}_split{}_nfft{}_'.format( self._ov, self._split, self._nfft) # If circular-array 8 channels else 4 for Ambisonic if 'c' in self._dataset: self._nb_channels = 8 else: self._nb_channels = 4 # Sound event classes dictionary self._unique_classes = dict() if 'real' in self._dataset: # Urbansound8k sound events self._unique_classes = \ { '1': 0, '3': 1, '4': 2, '5': 3, '6': 4, '7': 5, '8': 6, '9': 7 } else: # DCASE 2016 Task 2 sound events self._unique_classes = \ { 'clearthroat': 2, 'cough': 8, 'doorslam': 9, 'drawer': 1, 'keyboard': 6, 'keysDrop': 4, 'knock': 0, 'laughter': 10, 'pageturn': 7, 'phone': 3, 'speech': 5 } self._fs = 44100 self._frame_res = self._fs / float(self._hop_len) self._hop_len_s = self._nfft / 2.0 / self._fs self._nb_frames_1s = int(1 / self._hop_len_s) self._fade_win_size = 0.01 * self._fs self._resolution = 10 self._azi_list = range(-180, 180, self._resolution) self._length = len(self._azi_list) self._ele_list = range(-60, 60, self._resolution) self._height = len(self._ele_list) self._weakness = None # For regression task only self._default_azi = 180 self._default_ele = 60 if self._default_azi in self._azi_list: logger.error( 'ERROR: chosen default_azi value {} should not exist in azi_list' .format(self._default_azi)) exit() if self._default_ele in self._ele_list: logger.error( 'ERROR: chosen default_ele value {} should not exist in ele_list' .format(self._default_ele)) exit() self._audio_max_len_samples = 30 * self._fs # TODO: Fix the audio synthesis code to always generate 30s of # audio. Currently it generates audio till the last active sound event, which is not always 30s long. This is a # quick fix to overcome that. We need this because, for processing and training we need the length of features # to be fixed. self._max_frames = int( np.ceil((self._audio_max_len_samples - self._win_len) / float(self._hop_len)))
def main(argv): """ Main wrapper for training sound event localization and detection network. :param argv: expects two optional inputs. first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1 second input: task_id - (optional) To chose the system configuration in parameters.py. (default) uses default parameters if len(argv) != 4: logger.info('\n\n') logger.info('-------------------------------------------------------------------------------------------------------') logger.info('The code expected three inputs') logger.info('\t>> python seld.py <job-id> <train-test> <task-id>') logger.info('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). ' 'You can use any number or string for this.') logger.info('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py') logger.info('Using default inputs for now') logger.info('-------------------------------------------------------------------------------------------------------') logger.info('\n\n') """ job_id = 1 if len(argv) < 2 else argv[1] # use parameter set defined by user task_id = '1' if len(argv) < 3 else argv[2] params = parameter.get_params(task_id) isTraining = True if len(argv) < 4 else (True if argv[3] == 'train' else False) logger.info(f"isTraining {isTraining}") log_dir_name = None if len(argv) < 5 else argv[4] if not log_dir_name and not isTraining: raise ValueError("Specify log_dir if evaluation mode") model_dir = os.path.join(os.pardir, 'models') if isTraining: utils.create_folder(model_dir) unique_name = '{}_ov{}_train{}_val{}_{}'.format( params['dataset'], list_to_string(params['overlap']), list_to_string(params['train_split']), list_to_string(params['val_split']), job_id) if not isTraining: unique_name = job_id logger.info(f"unique_name: {unique_name}") dnn_type = 'QTCN' if params['use_quaternions'] else params['recurrent_type'] if not log_dir_name: log_dir_name = "-".join([dnn_type, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")]) logger.info(f"log_dir_name: {log_dir_name}") log_dir = os.path.join(model_dir, unique_name, log_dir_name) logger.info(f"log_dir: {log_dir}") if isTraining: utils.create_folder(log_dir) utils.setup_logger(log_dir, console_logger_level=logging.INFO) logger.info(f"log_dir {log_dir}") logger.info("unique_name: {}\n".format(unique_name)) data_gen_train = None data_gen_val = None data_gen_test = None if isTraining: load_files_train_splitting_point = None if params['train_val_split'] == 1.0 else 'before' data_gen_train = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params, load_files_before_after_splitting_point=load_files_train_splitting_point ) if not params['quick_test']: load_files_val_splitting_point = None if params['train_val_split'] == 1.0 else 'after' data_gen_val = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params, load_files_before_after_splitting_point=load_files_val_splitting_point ) else: import copy data_gen_val = copy.deepcopy(data_gen_train) logger.warning(f"Quick test, validation set is a deep copy of training set.") else: data_gen_test = cls_data_generator.DataGenerator( dataset=params['dataset'], ov=params['overlap'], split=params['test_split'], db=params['db'], nfft=params['nfft'], batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'], weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'], xyz_def_zero=params['xyz_def_zero'], azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'], data_format=params['data_format'], params=params ) data_gen_for_shapes = data_gen_train if isTraining else data_gen_test data_in, data_out = data_gen_for_shapes.get_data_sizes() logger.info( 'FEATURES:\n' '\tdata_in: {}\n' '\tdata_out: {}\n'.format( data_in, data_out ) ) logger.info( 'MODEL:\n' '\tdropout_rate: {}\n' '\tCNN: nb_cnn_filt: {}, pool_size{}\n' '\trnn_size: {}, fnn_size: {}\n'.format( params['dropout_rate'], params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'], params['rnn_size'], params['fnn_size'] ) ) network.set_global_num_classes(params) keras.backend.set_image_data_format(params['data_format']) logger.info(f"Data format set to {params['data_format']}") model = None if isTraining: if params['use_quaternions']: assert (params['data_format'] == 'channels_last') if params['use_giusenso']: assert (params['data_format'] == 'channels_first') model = keras_model_giusenso.get_model_giusenso(data_in, data_out, params['dropout_rate'], params['nb_cnn2d_filt'], params['pool_size'], params['fnn_size'], params['loss_weights']) else: model = network.get_model(input_shape=data_in, output_shape=data_out, dropout_rate=params['dropout_rate'], pool_size=params['pool_size'], rnn_size=params['rnn_size'], fnn_size=params['fnn_size'], weights=params['loss_weights'], data_format=params['data_format'], params=params) model_path = os.path.join(log_dir, 'model') logger.info(f"model_path {model_path}") if os.path.exists(model_path): logger.info(f"Loading pretrained model from {model_path}") model = network.load_seld_model(model_path, params['doa_objective']) else: if not isTraining: raise FileNotFoundError(f"test mode but model was not found at {os.path.abspath(model_path)}") try: dot_img_file = os.path.join(log_dir, 'model_plot.png') keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) except ImportError: logger.warning(f"Failed to import pydot, skip plotting") if isTraining: utils.copy_source_code(log_dir) train(model, data_gen_train, data_gen_val, params, log_dir=log_dir, unique_name=unique_name) else: evaluate(model, data_gen_test, params, log_dir=log_dir, unique_name=unique_name)