예제 #1
0
    def get_wiki_urls_top_n_phrases(self, n):
        # TODO get proper categories
        for i in range(n):
            phrase = self.string_phrases_nouns[i][1]
            try:
                url, content, cats = get_wiki_url_and_content_by_keyphrase(
                    phrase)
                category = get_shortest_in_list(cats, phrase)

                root_list = {}
                root_list["article_url"] = url
                root_list["content"] = quote(str(content))
                root_list["title"] = quote(
                    str(list_to_string(url.split('/')[-1].split('_'))))
                root_list["topic"] = quote(str(category))
                self.mapping.append(root_list)
            except Exception:
                try:
                    url, content, cats = get_wiki_url_and_content_by_keyphrase(
                        phrase[:len(phrase.split(' ')) / 2])
                    category = get_shortest_in_list(
                        cats, phrase[:len(phrase.split(' ')) / 2])

                    root_list = {}
                    root_list["article_url"] = url
                    root_list["content"] = quote(str(content))
                    root_list["title"] = quote(
                        str(list_to_string(url.split('/')[-1].split('_'))))
                    root_list["topic"] = quote(str(category))
                    self.mapping.append(root_list)
                except Exception:
                    pass
                pass
예제 #2
0
def collect_mms_ids(project, table):
    logging.info('project {}: starting collection of mms ids'.format(project))
    new_table_rows = []
    for index, row in table.iterrows():
        portfolios, packages = alma_primo_service.collect_portfolios_and_packages(project=project, field='lds03',
                                                                                  search_term=row['Kennzeichen'],
                                                                                  number_of_hits=row['total_in_Alma'])
        row['Paket_MMS'] = list_to_string(packages)
        row['Portfolio_MMS'] = list_to_string(portfolios)
        new_table_rows.append(row)

    # erweiterte Tabelle speichen und zurückgeben
    return write_table(project=project, rows=new_table_rows)
예제 #3
0
def update_collections(project, table):
    # Alle Pakete durchgehen
    new_table_rows = []
    for index, row in table.iterrows():
        service_ids = []
        # Alle Collection-IDs durchgehen
        for collection_id in row['Collection_IDs'].split(';'):
            logging.info('updating collection {}'.format(collection_id))

            # Den Type auf selective package ändern
            success_type_change = alma_electronic_service.set_type_to_selective_package(project=project,
                                                                                        collection_id=collection_id)
            # Falls die Änderung erfolgreich war, einen Full-Text-Service hinzufügen
            if success_type_change:
                service = None
                service_all = alma_electronic_service.retrieve_services(collection_id=collection_id)
                if service_all is not None:
                    for service_individual in service_all:
                        if service_individual['type']['value'] == 'getFullText':
                            service = service_individual
                if service_all is None:
                    service = alma_electronic_service.add_active_full_text_service(project=project, collection_id=collection_id)
                service_ids.append(service['id'])
        row['Service_IDs'] = list_to_string(service_ids)
        new_table_rows.append(row)

    # erweiterte Tabelle speichen und zurückgeben
    return write_table(project=project, rows=new_table_rows)
예제 #4
0
def _get_queued_video_info( feature = 0 ):
    log( "_get_queued_video_info() Started" )
    equivalent_mpaa = "NR"
    try:
        # get movie name
        plist = _store_playlist()
        movie_detail = _movie_details( plist[feature]['id'] )
        movie_title = movie_detail['title']
        path = movie_detail['file']
        mpaa = movie_detail['mpaa']
        genre = list_to_string( movie_detail['genre'] )
        try:
            audio = movie_detail['streamdetails']['audio'][0]['codec']
        except:
            audio = "other"
        equivalent_mpaa, short_mpaa = get_equivalent_rating( mpaa )
    except:
        traceback.print_exc()
        movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa, short_mpaa = ""
    # spew queued video info to log
    log( "Queued Movie Information" )
    log( "%s" % log_sep )
    log( "Title: %s" % movie_title )
    log( "Path: %s" % path )
    log( "Genre: %s" % genre )
    log( "MPAA: %s" % short_mpaa )
    log( "Audio: %s" % audio )
    if video_settings[ "audio_videos_folder" ]:
        log( "Folder: %s" % ( video_settings[ "audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + video_settings[ "audio_videos_folder" ][ -1 ], ) )
    log( "%s" % log_sep )
    # return results
    return short_mpaa, audio, genre, path, equivalent_mpaa
예제 #5
0
def _get_queued_video_info(feature=0):
    utils.log("_get_queued_video_info() Started")
    equivalent_mpaa = "NR"
    try:
        # get movie name
        plist = _store_playlist()
        movie_detail = _movie_details(plist[feature]['id'])
        movie_title = movie_detail['title']
        path = movie_detail['file']
        mpaa = movie_detail['mpaa']
        movie_id = plist[feature]['id']
        genre = utils.list_to_string(movie_detail['genre'])
        try:
            audio = movie_detail['streamdetails']['audio'][0]['codec']
        except:
            audio = "other"
        try:
            stereomode = movie_detail['streamdetails']['video'][0][
                'stereomode']
        except:
            stereomode = ""
        equivalent_mpaa, short_mpaa = get_equivalent_rating(mpaa)
    except:
        traceback.print_exc()
        movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa = short_mpaa = stereomode = ""
        movie_id = 0
    if not stereomode in ("mono", ""):
        is_3d_movie = True
    elif stereomode == "":  # if database still has an empty stereomode, test filename
        is_3d_movie = test_for_3d(path)
    else:
        is_3d_movie = False
    # spew queued video info to log
    utils.log("Queued Movie Information")
    utils.log("%s" % log_sep)
    utils.log("Title: %s" % movie_title)
    utils.log("Database ID: %s" % movie_id)
    utils.log("Path: %s" % path)
    utils.log("Genre: %s" % genre)
    utils.log("Rating: %s" % short_mpaa)
    utils.log("Audio: %s" % audio)
    utils.log("Stereo Mode: %s" % stereomode)
    utils.log("3D Movie: %s" % ("False", "True")[is_3d_movie])
    if video_settings["audio_videos_folder"]:
        if is_3d_movie and _3d_settings["3d_audio_videos_folder"]:
            utils.log("Folder: %s" %
                      (_3d_settings["3d_audio_videos_folder"] +
                       audio_formats.get(audio, "Other") +
                       _3d_settings["3d_audio_videos_folder"][-1], ))
        else:
            utils.log("Folder: %s" %
                      (video_settings["audio_videos_folder"] +
                       audio_formats.get(audio, "Other") +
                       video_settings["audio_videos_folder"][-1], ))
    utils.log("%s" % log_sep)
    # return results
    return short_mpaa, audio, genre, movie_id, equivalent_mpaa, is_3d_movie
예제 #6
0
def retrieve_service_ids(project, table, library=None):
    new_table_rows = []

    # Alle Pakete durchgehen
    for index, row in table.iterrows():
        service_ids = alma_electronic_service.retrieve_service_ids(project=project,
                                                                   collection_ids=row['Collection_IDs'])
        row['Service_IDs'] = list_to_string(service_ids)
        new_table_rows.append(row)

    # erweiterte Tabelle speichen und zurückgeben
    return write_table(project=project, rows=new_table_rows)
예제 #7
0
def retrieve_portfolio_ids(project, table, library=None):
    new_table_rows = []

    # Alle Pakete durchgehen
    for index, row in table.iterrows():
        portfolio_ids = alma_bib_service.retrieve_portfolio_ids(project=project, mms_ids=row['Portfolio_MMS'],
                                                                library=library)
        row['Portfolio_IDs'] = list_to_string(portfolio_ids)
        new_table_rows.append(row)

    # erweiterte Tabelle speichen und zurückgeben
    return write_table(project=project, rows=new_table_rows)
예제 #8
0
    def __repr__(self):

        cseg = self[0]
        hline = "{0}".format("-" * ((len(cseg) * 2) + 3))
        cseg_str = utils.list_to_string(cseg)
        com_matrix = self[1:]
        com_matrix_str = [(str(cseg[i]) + " | " + \
                           utils.replace_list_to_plus_minus(line)) \
                          for (i, line) in enumerate(com_matrix)]
        half_matrix_1 = "  | " + cseg_str + "\n" + hline + "\n"
        half_matrix_2 = "".join([x + "\n" for x in com_matrix_str])
        return half_matrix_1 + half_matrix_2
예제 #9
0
    def get_category_from_categories(self, phrases_list):
        lst = []
        bad_words = ['articles', 'links', 'containing']
        for phrase in phrases_list:
            phrase_words = phrase.split()
            for word in phrase_words:
                if word not in bad_words and word not in stopwords.words(
                        "english"):
                    lst.append(word)
            lst.append('.')

        text = list_to_string(lst)
        rec = Rake()
        rec.extract_keywords_from_text(text)
        cat = rec.get_ranked_phrases()
        return cat
def _get_queued_video_info( feature = 0 ):
    utils.log( "_get_queued_video_info() Started" )
    equivalent_mpaa = "NR"
    try:
        # get movie name
        plist = _store_playlist()
        movie_detail = _movie_details( plist[feature]['id'] )
        movie_title = movie_detail['title']
        path = movie_detail['file']
        mpaa = movie_detail['mpaa']
        genre = utils.list_to_string( movie_detail['genre'] )
        try:
            audio = movie_detail['streamdetails']['audio'][0]['codec']
        except:
            audio = "other"
        try:
            stereomode = movie_detail['streamdetails']['video'][0]['stereomode']
        except:
            stereomode = ""
        equivalent_mpaa, short_mpaa = get_equivalent_rating( mpaa )
    except:
        traceback.print_exc()
        movie_title = path = mpaa = audio = genre = movie = equivalent_mpaa, short_mpaa, stereomode = ""
    if not stereomode in ( "mono", "" ):
        is_3d_movie = True
    elif stereomode == "": # if database still has an empty stereomode, test filename
        is_3d_movie = test_for_3d( path )
    else:
        is_3d_movie = False
    # spew queued video info to log
    utils.log( "Queued Movie Information" )
    utils.log( "%s" % log_sep )
    utils.log( "Title: %s" % movie_title )
    utils.log( "Path: %s" % path )
    utils.log( "Genre: %s" % genre )
    utils.log( "Rating: %s" % short_mpaa )
    utils.log( "Audio: %s" % audio )
    utils.log( "Stereo Mode: %s" % stereomode )
    utils.log( "3D Movie: %s" % ( "False", "True" )[ is_3d_movie ] )
    if video_settings[ "audio_videos_folder" ]:
        if is_3d_movie and _3d_settings[ "3d_audio_videos_folder" ]:
            utils.log( "Folder: %s" % ( _3d_settings[ "3d_audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + _3d_settings[ "3d_audio_videos_folder" ][ -1 ], ) )
        else:
            utils.log( "Folder: %s" % ( video_settings[ "audio_videos_folder" ] + audio_formats.get( audio, "Other" ) + video_settings[ "audio_videos_folder" ][ -1 ], ) )
    utils.log( "%s" % log_sep )
    # return results
    return short_mpaa, audio, genre, path, equivalent_mpaa, is_3d_movie
예제 #11
0
def retrieve_collection_ids(project, table, library=None):
    new_table_rows = []

    # Alle Pakete durchgehen
    for index, row in table.iterrows():
        if library is None:
            collection_ids = alma_bib_service.retrieve_collection_id_for_mms_id(project=project,
                                                                                mms_ids=row['Paket_MMS'])
        else:
            collection_ids = alma_bib_service.retrieve_collection_id_for_mms_id(project=project,
                                                                                mms_ids=row['Paket_MMS'],
                                                                                library=library)

        row['Collection_IDs'] = list_to_string(collection_ids)
        new_table_rows.append(row)

    # erweiterte Tabelle speichen und zurückgeben
    return write_table(project=project, rows=new_table_rows)
예제 #12
0
    def _connect_widgets(self):
        self.settings.bind(
            "enable-notifications", self.ui.get_widget("enable-notifications"), "active", Gio.SettingsBindFlags.DEFAULT
        )
        self.settings.bind(
            "monitor-networks", self.ui.get_widget("monitor-networks"), "active", Gio.SettingsBindFlags.DEFAULT
        )

        self.settings.bind(
            "notify-before-poweroff",
            self.ui.get_widget("notify-before-poweroff"),
            "value",
            Gio.SettingsBindFlags.DEFAULT,
        )
        self.settings.bind(
            "player-poweroff-timeout",
            self.ui.get_widget("player-poweroff-timeout"),
            "value",
            Gio.SettingsBindFlags.DEFAULT,
        )
        self.settings.bind("actions-file", self.ui.get_widget("actions-file"), "text", Gio.SettingsBindFlags.DEFAULT)

        self.ui.get_widget("power-off-intervals").set_text(
            utils.list_to_string(self.settings.get_unpacked("poweroff-intervals"))
        )
        self._validator_po = Validator(
            self.ui.get_widget("power-off-intervals"),
            allowed_chars="0123456789, ",
            regexp=r"(\d{1,6}\s?,?\s?){1,20}",
            min_length=1,
            on_check=self.on_power_off_intervals_changed,
            no_cb_inittialy=True,
        )

        wnames = ("shutdown", "suspend", "hibernate")
        for name in wnames:
            if self.power_actions.get_action(name).get_active():
                self.ui.get_widget(name).set_active(True)
        for name in wnames:
            self.ui.get_widget(name).connect("toggled", self.on_poweroff_type_activate, name)
예제 #13
0
파일: scraper.py 프로젝트: camster1/RTOTV
 def fetch_trailers( self ):        
     # get watched list
     self._get_watched()
     count = 0
     if self.settings[ "trailer_unwatched_movie_only" ]:
         jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [  { "field": "playcount", "operator": "is", "value": "0" }, { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] }  }, "id": 1}''' % ( self.mpaa, self.genre )
     else:
         jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [  { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] }  }, "id": 1}''' % ( self.mpaa, self.genre )
     jsonresponse = xbmc.executeJSONRPC( jsonquery )
     data = simplejson.loads( jsonresponse )
     if data.has_key('result'):
         if data['result'].has_key('movies'):
             trailers = data['result']['movies']
             shuffle( trailers )
             for trailer in trailers:
                 trailer_rating = trailer['mpaa']
                 # shorten MPAA/BBFC ratings
                 if trailer_rating == "":
                     trailer_rating = "NR"
                 #MPAA    
                 if trailer_rating.startswith("Rated"):
                     trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "G", "PG", "PG-13", "R", "NC-17", "Unrated", ) ]
                 #BBFC
                 elif trailer_rating.startswith("UK"):
                     if trailer_rating.startswith( "UK:" ):
                         trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ]
                     else:
                         trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "12", "12A", "PG", "15", "18", "R18", "MA", "U", ) ]
                 #FSK
                 elif trailer_rating.startswith("FSK"):
                     if trailer_rating.startswith( "FSK:" ):
                         trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ]
                     else:
                         trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                 #DEJUS
                 elif trailer_rating in ( "Livre", "10 Anos", "12 Anos", "14 Anos", "16 Anos", "18 Anos" ):
                     trailer_rating = trailer_rating   # adding this just in case there is some with different labels in database
                 else:
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "0", "6", "12", "12A", "PG", "15", "16", "18", "R18", "MA", "U", ) ]
                 # add trailer to our final list
                 if trailer[ 'trailer' ]:
                     if trailer['trailer'].startswith( 'plugin://' ) and self.settings['trailer_skip_youtube']:
                         continue
                     else:
                         trailer_info = ( xbmc.getCacheThumbName( trailer['trailer'] ), # id
                                          trailer['label'], # title
                                          trailer['trailer'], # trailer
                                          trailer['thumbnail'], # thumb
                                          trailer['plot'], # plot
                                          '', # runtime
                                          trailer_rating, # mpaa
                                          '', # release date
                                          '', # studio
                                          utils.list_to_string( trailer['genre'] ), # genre
                                          'Trailer', # writer
                                          '', # director 32613
                                         )
                         self.trailers += [ trailer_info ]
                         # add id to watched file TODO: maybe don't add if not user preference
                         self.watched += [ xbmc.getCacheThumbName( trailer['trailer'] ) ]
                         # increment counter
                         count += 1
                         # if we have enough exit
                         if count == self.settings[ "trailer_count" ]:
                            break
         else:
             utils.log( "No Movie Trailers found", xbmc.LOGNOTICE )
         self._save_watched()
         return self.trailers
     else:
         utils.log( "No results found", xbmc.LOGNOTICE )
         return []
예제 #14
0
    i = 0
    j = 0
    for [language, text] in ut.TRAIN:
        if language not in lang:
            lang[language] = i
            lang_previous[i] = language
            i += 1
        for element in text:
            if element not in bigrams:
                bigrams[element] = j
                j += 1
    in_dim = len(bigrams)
    out_dim = len(lang)
    train_data = build_data(ut.TRAIN, bigrams, lang)
    dev_data = build_data(ut.DEV, bigrams, lang)
    test_data = build_data(ut.TEST, bigrams, lang)
    num_iterations = 500
    learning_rate = 0.0001
    hid_dim = int(np.log(in_dim * out_dim))
    params = ml1.create_classifier(in_dim, hid_dim, out_dim)
    trained_params = train_classifier(train_data, dev_data, num_iterations,
                                      learning_rate, params)

    results = []
    for [label, data] in test_data:
        results.append(lang_previous[ml.predict(data, trained_params)])

    output_file = open('test.pred', 'w')
    output_file.write(ut.list_to_string(results))
    output_file.close()
예제 #15
0
"""
Example solution to "Finding a Motif in DNA", from rosalind.info.
"""

from utils import list_to_string


def find(string, motif):
    pos = []
    for i in range(len(string) - len(motif) + 1):
        if string[i:].startswith(motif):
            pos.append(i + 1)
    return pos


if __name__ == "__main__":
    print(find("GATATATGCATATACTTATAT", "ATAT"))

    with open("inps/rosalind_subs.txt") as f:
        string, motif = f.read().split()

    print(list_to_string(find(string, motif)))
예제 #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )

    config = model_config.predictor_config

    model_options = {
        'n_source_quant': config['n_source_quant'],
        'n_target_quant': config['n_target_quant'],
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'sample_size': config['sample_size'],
        'decoder_filter_width': config['decoder_filter_width'],
        'batch_size': args.batch_size,
    }

    byte_net = model.Byte_net_model(model_options)
    bn_tensors = byte_net.build_prediction_model()

    optim = tf.train.AdamOptimizer(args.learning_rate,
                                   beta1=args.beta1).minimize(
                                       bn_tensors['loss'],
                                       var_list=bn_tensors['variables'])

    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.data_dir
    })
    text_samples = dl.load_generator_data(config['sample_size'])
    print text_samples.shape

    for i in range(args.max_epochs):
        batch_no = 0
        batch_size = args.batch_size
        while (batch_no + 1) * batch_size < text_samples.shape[0]:
            text_batch = text_samples[batch_no * batch_size:(batch_no + 1) *
                                      batch_size, :]
            _, loss, prediction = sess.run(
                [optim, bn_tensors['loss'], bn_tensors['prediction']],
                feed_dict={bn_tensors['sentence']: text_batch})
            print "-------------------------------------------------------"
            print utils.list_to_string(prediction)
            print "Loss", i, batch_no, loss
            print "********************************************************"
            # print prediction
            batch_no += 1

            if (batch_no % 500) == 0:
                save_path = saver.save(
                    sess, "Data/Models/model_epoch_{}.ckpt".format(i))
예제 #17
0
def prob(seq, probs):
    """Log10 probability of seq being randomly built with given probabilities."""
    acc = 0
    for symb in seq:
        acc += log10(probs[symb])
    return acc


if __name__ == "__main__":
    print(
        prob("ACGATACAA", {
            "A": 0.871 / 2,
            "T": 0.871 / 2,
            "C": 0.129 / 2,
            "G": 0.129 / 2
        }))

    with open("inps/rosalind_prob.txt", "r") as f:
        contents = f.read().split("\n")
    dna = contents[0]
    floats = contents[1]

    probs = []
    for s in floats.split(" "):
        f = float(s)
        p, p_ = f / 2, (1 - f) / 2
        d = {"A": p_, "T": p_, "C": p, "G": p}
        probs.append(round(prob(dna, d), 3))
    print(list_to_string(probs))
예제 #18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Learning Rate')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='Learning Rate')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=1000,
                        help='Max Epochs')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Momentum for Adam Update')
    parser.add_argument('--resume_model',
                        type=str,
                        default=None,
                        help='Pre-Trained Model Path, to resume from')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')
    parser.add_argument('--log_dir',
                        type=str,
                        default='logs',
                        help='Path to TensorBoard logs')

    args = parser.parse_args()

    # model_config = json.loads( open('model_config.json').read() )

    config = model_config.predictor_config

    model_options = {
        'n_source_quant': config['n_source_quant'],
        'n_target_quant': config['n_target_quant'],
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'sample_size': config['sample_size'],
        'decoder_filter_width': config['decoder_filter_width'],
        'batch_size': args.batch_size,
    }

    byte_net = model.Byte_net_model(model_options)
    bn_tensors = byte_net.build_prediction_model()

    # Set up logging for TensorBoard
    writer = tf.summary.FileWriter(args.log_dir, graph=tf.get_default_graph())
    run_metadata = tf.RunMetadata()
    summaries = tf.summary.merge_all()

    optim = tf.train.AdamOptimizer(args.learning_rate, beta1 = args.beta1) \
     .minimize(bn_tensors['loss'], var_list=bn_tensors['variables'])

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()
    saver = tf.train.Saver()

    if args.resume_model:
        saver.restore(sess, args.resume_model)

    dl = data_loader.Data_Loader({
        'model_type': 'generator',
        'dir_name': args.data_dir
    })
    text_samples = dl.load_generator_data(config['sample_size'])
    print(text_samples.shape)

    models_path = "Data/Models/"
    if not os.path.exists(models_path): os.makedirs(models_path)

    for epoch in range(args.max_epochs):
        step = 0
        batch_size = args.batch_size
        while (step + 1) * batch_size < text_samples.shape[0]:
            text_batch = text_samples[step * batch_size:(step + 1) *
                                      batch_size, :]
            _, summary, loss, prediction = sess.run([
                optim, summaries, bn_tensors['loss'], bn_tensors['prediction']
            ],
                                                    feed_dict={
                                                        bn_tensors['sentence']:
                                                        text_batch
                                                    })

            print("-------------------------------------------------------")
            print(utils.list_to_string(prediction))
            print("Epoch", epoch, "  Step", step, "  Loss", loss)
            print("********************************************************")

            writer.add_summary(summary, step)
            writer.add_run_metadata(
                run_metadata, 'epoch_{:04d}, step_{:04d}'.format(epoch, step))

            step += 1

            if step % 500 == 0:
                saver.save(sess,
                           models_path + "model_epoch_{}.ckpt".format(epoch))
예제 #19
0
 def fetch_trailers( self ):        
     # get watched list
     self._get_watched()
     count = 0
     if self.settings[ "trailer_unwatched_movie_only" ]:
         jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [  { "field": "playcount", "operator": "is", "value": "0" }, { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] }  }, "id": 1}''' % ( self.mpaa, self.genre )
     else:
         jsonquery = '''{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": { "sort": { "order": "ascending", "method": "title", "ignorearticle": true }, "properties" : ["trailer", "mpaa", "genre", "thumbnail", "plot"], "filter": { "and": [  { "field": "mpaarating", "operator": "contains", "value": "%s" }, { "field": "genre", "operator": "contains", "value": "%s" }, { "field": "hastrailer", "operator": "true", "value": "true" } ] }  }, "id": 1}''' % ( self.mpaa, self.genre )
     jsonresponse = xbmc.executeJSONRPC( jsonquery )
     data = simplejson.loads( jsonresponse )
     if data.has_key('result'):
         if data['result'].has_key('movies'):
             trailers = data['result']['movies']
             shuffle( trailers )
             for trailer in trailers:
                 trailer_rating = trailer['mpaa']
                 # shorten MPAA/BBFC ratings
                 if trailer_rating == "":
                     trailer_rating = "NR"
                 #MPAA    
                 if trailer_rating.startswith("Rated"):
                     trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "G", "PG", "PG-13", "R", "NC-17", "Unrated", ) ]
                 #BBFC
                 elif trailer_rating.startswith("UK"):
                     if trailer_rating.startswith( "UK:" ):
                         trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ]
                     else:
                         trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "12", "12A", "PG", "15", "18", "R18", "MA", "U", ) ]
                 #FSK
                 elif trailer_rating.startswith("FSK"):
                     if trailer_rating.startswith( "FSK:" ):
                         trailer_rating = trailer_rating.split( ":" )[ 1 - ( len( trailer_rating.split( ":" ) ) == 1 ) ]
                     else:
                         trailer_rating = trailer_rating.split( " " )[ 1 - ( len( trailer_rating.split( " " ) ) == 1 ) ]
                 #DEJUS
                 elif trailer_rating in ( "Livre", "10 Anos", "12 Anos", "14 Anos", "16 Anos", "18 Anos" ):
                     trailer_rating = trailer_rating   # adding this just in case there is some with different labels in database
                 else:
                     trailer_rating = ( trailer_rating, "NR", )[ trailer_rating not in ( "0", "6", "12", "12A", "PG", "15", "16", "18", "R18", "MA", "U", ) ]
                 # add trailer to our final list
                 if trailer[ 'trailer' ]:
                     if trailer['trailer'].startswith( 'plugin://' ) and self.settings['trailer_skip_youtube']:
                         utils.log( "Trailer contains plugin://.  Skipping", xbmc.LOGNOTICE )
                         continue
                     else:
                         if trailer[ "movieid" ] == self.movie and self.settings[ 'trailer_not_movie' ]:
                             utils.log( "Trailer matches selected movie.  Skipping", xbmc.LOGNOTICE )
                             continue
                         else:
                             trailer_info = ( xbmc.getCacheThumbName( trailer['trailer'] ), # id
                                              trailer['label'], # title
                                              trailer['trailer'], # trailer
                                              trailer['thumbnail'], # thumb
                                              trailer['plot'], # plot
                                              '', # runtime
                                              trailer_rating, # mpaa
                                              '', # release date
                                              '', # studio
                                              utils.list_to_string( trailer['genre'] ), # genre
                                              'Trailer', # writer
                                              '', # director 32613
                                             )
                             self.trailers += [ trailer_info ]
                             # add id to watched file TODO: maybe don't add if not user preference
                             self.watched += [ xbmc.getCacheThumbName( trailer['trailer'] ) ]
                             # increment counter
                             count += 1
                             # if we have enough exit
                             if count == self.settings[ "trailer_count" ]:
                                break
         else:
             utils.log( "No Movie Trailers found", xbmc.LOGNOTICE )
         self._save_watched()
         return self.trailers
     else:
         utils.log( "No results found", xbmc.LOGNOTICE )
         return []
 def get_score_dict(self):
     score_dict = {}
     for counter, (post,
                   post_dict) in enumerate(self.user_dict['posts'].items()):
         # image processing
         self.photo_count += 1
         post_dict['img_topic'] = TextApi(
             list_to_string(post_dict['img_api'])).get_response()
         for interest in INTERESTS:
             key = interest[0]
             interest_score = 0
             if post_dict.get('img_topic', None):
                 for j, keyword in enumerate(interest):
                     print(j)
                     for i, (topic, prob) in enumerate(
                             post_dict['img_topic'].items()):
                         try:
                             interest_score += SimilarApi(
                                 topic, keyword).get_response() * prob
                         except:
                             pass
                         if i == 2:
                             break
                     if j == 1:
                         interest_score = interest_score / 6
                     else:
                         interest_score = interest_score / 3
             if score_dict.get('p_' + key, None):
                 score_dict['p_' +
                            key] = score_dict['p_' + key] + interest_score
             else:
                 score_dict['p_' + key] = interest_score
         # caption/tags processing
         text = None
         if post_dict['hashtags']:
             text = ' '.join(post_dict['hashtags'])
             self.content_count += 1
         elif post_dict['caption']:
             text = deemojify(post_dict['caption'])
             self.content_count += 1
         if text:
             post_dict['content_topic'] = TextApi(text).get_response()
             for interest in INTERESTS:
                 key = interest[0]
                 interest_score = 0
                 if post_dict.get('content_topic', None):
                     for j, keyword in enumerate(interest):
                         for i, (topic, prob) in enumerate(
                                 post_dict['content_topic'].items()):
                             interest_score += SimilarApi(
                                 topic, keyword).get_response() * prob
                             if i == 1:
                                 break
                         if j == 2:
                             interest_score = interest_score / 6
                         else:
                             interest_score = interest_score / 3
                 if score_dict.get(key, None):
                     score_dict[key] = score_dict[key] + interest_score
                 else:
                     score_dict[key] = interest_score
         if counter == 10:
             break
     for interest, score in score_dict.items():
         try:
             if interest.startswith('p_'):
                 score_dict[interest] = score / self.photo_count
             else:
                 score_dict[interest] = score / self.content_count
         except:
             pass
     return score_dict
예제 #21
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--model_path',
                        type=str,
                        default="Data/Models/model_epoch_6.ckpt",
                        help='Pre-Trained Model Path')
    parser.add_argument('--data_dir',
                        type=str,
                        default='Data',
                        help='Data Directory')
    parser.add_argument('--seed', type=str, default="ANTONIO", help='seed')
    parser.add_argument('--num_char', type=int, default=1000, help='seed')

    parser.add_argument('--output_file',
                        type=str,
                        default='sample.txt',
                        help='Output File')

    args = parser.parse_args()

    config = model_config.config

    model_options = {
        'n_source_quant': config['n_source_quant'],
        'n_target_quant': config['n_target_quant'],
        'residual_channels': config['residual_channels'],
        'decoder_dilations': config['decoder_dilations'],
        'sample_size': args.num_char,
        'decoder_filter_width': config['decoder_filter_width'],
        'batch_size': 1,
    }

    seed_ = [ord(s) for s in args.seed
             ] + [0 for i in range(args.num_char - len(args.seed))]
    seed_ = np.array(seed_, dtype='int32')
    seed_ = seed_.reshape([1, -1])

    byte_net = model.Byte_net_model(model_options)
    generator = byte_net.build_generator(args.num_char)

    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    saver.restore(sess, args.model_path)

    input_batch = seed_
    print "INPUT", input_batch
    for i in range(0, args.num_char - len(args.seed)):

        prediction, probs = sess.run(
            [generator['prediction'], generator['probs']],
            feed_dict={generator['source_sentence']: input_batch})

        last_prediction = np.array(
            [utils.weighted_pick(probs[i + len(args.seed) - 1])])
        last_prediction = last_prediction.reshape([1, -1])
        input_batch[:, i + len(args.seed)] = last_prediction
        res = utils.list_to_string(input_batch[0, 0:i + len(args.seed) + 1])

        if i % 100 == 0:
            print res

        with open(args.output_file, 'wb') as f:
            f.write(res)
예제 #22
0
    def __init__(self,
                 dataset='ansim',
                 ov=3,
                 split=1,
                 nfft=1024,
                 db=30,
                 wav_extra_name='',
                 desc_extra_name=''):

        # TODO: Change the path according to your machine.
        # TODO: It should point to a folder which consists of sub-folders for audio and metada
        datasets_dir = os.path.join('..', 'datasets')
        self._base_folder = os.path.join(datasets_dir, dataset)
        logger.info(f"self._base_folder {self._base_folder}")

        # Output directories
        self._label_dir = None
        self._feat_dir = None
        self._feat_dir_norm = None

        # Local parameters
        self._mode = None
        self._ov = list_to_string(ov)
        self._split = list_to_string(split)
        self._db = db
        self._nfft = nfft
        self._win_len = self._nfft
        self._hop_len = self._nfft // 2
        self._dataset = dataset
        self._eps = np.spacing(np.float(1e-16))

        # Input directories
        self._aud_dir = os.path.join(
            self._base_folder,
            'wav_ov{}_split{}_{}db{}'.format(self._ov, self._split, db,
                                             wav_extra_name))
        self._desc_dir = os.path.join(
            self._base_folder,
            'desc_ov{}_split{}{}'.format(self._ov, self._split,
                                         desc_extra_name))
        logger.info(f"self._aud_dir {self._aud_dir}")
        logger.info(f"self._desc_dir {self._desc_dir}")

        self.feat_formatted_name = 'ov{}_split{}_{}db_nfft{}_'.format(
            self._ov, self._split, self._db, self._nfft)
        self.label_formatted_name = 'ov{}_split{}_nfft{}_'.format(
            self._ov, self._split, self._nfft)

        # If circular-array 8 channels else 4 for Ambisonic
        if 'c' in self._dataset:
            self._nb_channels = 8
        else:
            self._nb_channels = 4

        # Sound event classes dictionary
        self._unique_classes = dict()
        if 'real' in self._dataset:
            # Urbansound8k sound events
            self._unique_classes = \
                {
                    '1': 0,
                    '3': 1,
                    '4': 2,
                    '5': 3,
                    '6': 4,
                    '7': 5,
                    '8': 6,
                    '9': 7
                }
        else:
            # DCASE 2016 Task 2 sound events
            self._unique_classes = \
                {
                    'clearthroat': 2,
                    'cough': 8,
                    'doorslam': 9,
                    'drawer': 1,
                    'keyboard': 6,
                    'keysDrop': 4,
                    'knock': 0,
                    'laughter': 10,
                    'pageturn': 7,
                    'phone': 3,
                    'speech': 5
                }

        self._fs = 44100
        self._frame_res = self._fs / float(self._hop_len)
        self._hop_len_s = self._nfft / 2.0 / self._fs
        self._nb_frames_1s = int(1 / self._hop_len_s)
        self._fade_win_size = 0.01 * self._fs

        self._resolution = 10
        self._azi_list = range(-180, 180, self._resolution)
        self._length = len(self._azi_list)
        self._ele_list = range(-60, 60, self._resolution)
        self._height = len(self._ele_list)
        self._weakness = None

        # For regression task only
        self._default_azi = 180
        self._default_ele = 60

        if self._default_azi in self._azi_list:
            logger.error(
                'ERROR: chosen default_azi value {} should not exist in azi_list'
                .format(self._default_azi))
            exit()
        if self._default_ele in self._ele_list:
            logger.error(
                'ERROR: chosen default_ele value {} should not exist in ele_list'
                .format(self._default_ele))
            exit()

        self._audio_max_len_samples = 30 * self._fs  # TODO: Fix the audio synthesis code to always generate 30s of
        # audio. Currently it generates audio till the last active sound event, which is not always 30s long. This is a
        # quick fix to overcome that. We need this because, for processing and training we need the length of features
        # to be fixed.

        self._max_frames = int(
            np.ceil((self._audio_max_len_samples - self._win_len) /
                    float(self._hop_len)))
예제 #23
0
def main(argv):
    """
    Main wrapper for training sound event localization and detection network.
    
    :param argv: expects two optional inputs. 
        first input: job_id - (optional) all the output files will be uniquely represented with this. (default) 1
        second input: task_id - (optional) To chose the system configuration in parameters.py. 
                                (default) uses default parameters
    
    if len(argv) != 4:
        logger.info('\n\n')
        logger.info('-------------------------------------------------------------------------------------------------------')
        logger.info('The code expected three inputs')
        logger.info('\t>> python seld.py <job-id> <train-test> <task-id>')
        logger.info('\t\t<job-id> is a unique identifier which is used for output filenames (models, training plots). '
              'You can use any number or string for this.')
        logger.info('\t\t<task-id> is used to choose the user-defined parameter set from parameter.py')
        logger.info('Using default inputs for now')
        logger.info('-------------------------------------------------------------------------------------------------------')
        logger.info('\n\n')
	"""
    job_id = 1 if len(argv) < 2 else argv[1]

    # use parameter set defined by user
    task_id = '1' if len(argv) < 3 else argv[2]
    params = parameter.get_params(task_id)

    isTraining = True if len(argv) < 4 else (True if argv[3] == 'train' else False)
    logger.info(f"isTraining {isTraining}")

    log_dir_name = None if len(argv) < 5 else argv[4]
    if not log_dir_name and not isTraining:
        raise ValueError("Specify log_dir if evaluation mode")

    model_dir = os.path.join(os.pardir, 'models')
    if isTraining:
        utils.create_folder(model_dir)

    unique_name = '{}_ov{}_train{}_val{}_{}'.format(
        params['dataset'], list_to_string(params['overlap']), list_to_string(params['train_split']),
        list_to_string(params['val_split']),
        job_id)
    
    if not isTraining:
        unique_name = job_id
        
    logger.info(f"unique_name: {unique_name}")

    dnn_type = 'QTCN' if params['use_quaternions'] else params['recurrent_type']
    if not log_dir_name:
        log_dir_name = "-".join([dnn_type, datetime.datetime.now().strftime("%Y%m%d-%H%M%S")])
    logger.info(f"log_dir_name: {log_dir_name}")

    log_dir = os.path.join(model_dir, unique_name, log_dir_name)
    logger.info(f"log_dir: {log_dir}")

    if isTraining:
        utils.create_folder(log_dir)

    utils.setup_logger(log_dir, console_logger_level=logging.INFO)
    
    logger.info(f"log_dir {log_dir}")
    logger.info("unique_name: {}\n".format(unique_name))
    
    data_gen_train = None
    data_gen_val = None
    data_gen_test = None
    if isTraining:
        load_files_train_splitting_point = None if params['train_val_split'] == 1.0 else 'before'
        data_gen_train = cls_data_generator.DataGenerator(
            dataset=params['dataset'], ov=params['overlap'], split=params['train_split'], db=params['db'],
            nfft=params['nfft'],
            batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
            weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'],
            xyz_def_zero=params['xyz_def_zero'],
            azi_only=params['azi_only'], debug_load_single_batch=params['debug_load_single_batch'],
            data_format=params['data_format'], params=params,
            load_files_before_after_splitting_point=load_files_train_splitting_point
        )

        if not params['quick_test']:
            load_files_val_splitting_point = None if params['train_val_split'] == 1.0 else 'after'
            data_gen_val = cls_data_generator.DataGenerator(
                dataset=params['dataset'], ov=params['overlap'], split=params['val_split'], db=params['db'],
                nfft=params['nfft'],
                batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
                weakness=params['weakness'], datagen_mode='train', cnn3d=params['cnn_3d'],
                xyz_def_zero=params['xyz_def_zero'],
                azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'],
                data_format=params['data_format'], params=params,
                load_files_before_after_splitting_point=load_files_val_splitting_point
            )
        else:
            import copy
            data_gen_val = copy.deepcopy(data_gen_train)
            logger.warning(f"Quick test, validation set is a deep copy of training set.")

    else:
        data_gen_test = cls_data_generator.DataGenerator(
            dataset=params['dataset'], ov=params['overlap'], split=params['test_split'], db=params['db'],
            nfft=params['nfft'],
            batch_size=params['batch_size'], seq_len=params['sequence_length'], classifier_mode=params['mode'],
            weakness=params['weakness'], datagen_mode='test', cnn3d=params['cnn_3d'],
            xyz_def_zero=params['xyz_def_zero'],
            azi_only=params['azi_only'], shuffle=False, debug_load_single_batch=params['debug_load_single_batch'],
            data_format=params['data_format'], params=params
        )

    data_gen_for_shapes = data_gen_train if isTraining else data_gen_test
    data_in, data_out = data_gen_for_shapes.get_data_sizes()
    logger.info(
        'FEATURES:\n'
        '\tdata_in: {}\n'
        '\tdata_out: {}\n'.format(
            data_in, data_out
        )
    )

    logger.info(
        'MODEL:\n'
        '\tdropout_rate: {}\n'
        '\tCNN: nb_cnn_filt: {}, pool_size{}\n'
        '\trnn_size: {}, fnn_size: {}\n'.format(
            params['dropout_rate'],
            params['nb_cnn3d_filt'] if params['cnn_3d'] else params['nb_cnn2d_filt'], params['pool_size'],
            params['rnn_size'], params['fnn_size']
        )
    )

    network.set_global_num_classes(params)
    keras.backend.set_image_data_format(params['data_format'])
    logger.info(f"Data format set to {params['data_format']}")
    
    model = None
    if isTraining:
        if params['use_quaternions']:
            assert (params['data_format'] == 'channels_last')

        if params['use_giusenso']:
            assert (params['data_format'] == 'channels_first')
            model = keras_model_giusenso.get_model_giusenso(data_in, data_out, params['dropout_rate'],
                                                            params['nb_cnn2d_filt'],
                                                            params['pool_size'], params['fnn_size'], params['loss_weights'])
        else:
            model = network.get_model(input_shape=data_in, output_shape=data_out, dropout_rate=params['dropout_rate'],
                                      pool_size=params['pool_size'],
                                      rnn_size=params['rnn_size'], fnn_size=params['fnn_size'],
                                      weights=params['loss_weights'], data_format=params['data_format'],
                                      params=params)
    
    model_path = os.path.join(log_dir, 'model')
    logger.info(f"model_path {model_path}")
    if os.path.exists(model_path):
        logger.info(f"Loading pretrained model from {model_path}")
        model = network.load_seld_model(model_path, params['doa_objective'])
    else:
        if not isTraining:
            raise FileNotFoundError(f"test mode but model was not found at {os.path.abspath(model_path)}")

    try:
        dot_img_file = os.path.join(log_dir, 'model_plot.png')
        keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True)
    except ImportError:
        logger.warning(f"Failed to import pydot, skip plotting")

    if isTraining:
        utils.copy_source_code(log_dir)
        train(model, data_gen_train, data_gen_val, params, log_dir=log_dir, unique_name=unique_name)
    else:
        evaluate(model, data_gen_test, params, log_dir=log_dir, unique_name=unique_name)