示例#1
0
def _save_durations_csv(input_path, durs_path, data_kind):
    it_multi = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                          names=['patient', 'trial'])
    df_durs = pd.DataFrame(index=it_multi, columns=['duration_s', 'sfreq'])
    for file in files_builder(data_kind):
        file_path = os.path.join(input_path, file.name)
        index = get_index(file_path)
        trial = get_trial(file_path)
        df_durs.loc[(index, trial),
                    'duration_s'] = get_duration(file_path, file.df)
        df_durs.loc[(index, trial),
                    'sfreq'] = float(get_sampling_frequency(file_path))
    df_durs.to_pickle(durs_path)
    return df_durs
示例#2
0
    def check_results(self):
        if self.result in [Result.PASSED, Result.NOT_RUN] and self.scenario is None:
            res, msg = self.check_encoded_file()
            self.set_result(res, msg)
        else:
            if self.result == utils.Result.TIMEOUT:
                missing_eos = False
                try:
                    if utils.get_duration(self.dest_file) == self.project.get_duration():
                        missing_eos = True
                except Exception as e:
                    pass

                if missing_eos is True:
                    self.set_result(utils.Result.TIMEOUT, "The rendered file add right duration, MISSING EOS?\n",
                                    "failure")
            else:
                GstValidateTest.check_results(self)
示例#3
0
    def check_results(self):
        if self.result is Result.PASSED and self.scenario is None:
            res, msg = utils.compare_rendered_with_original(self.duration, self.dest_file)
            self.set_result(res, msg)
        else:
            if self.result == utils.Result.TIMEOUT:
                missing_eos = False
                try:
                    if utils.get_duration(self.dest_file) == self.duration:
                        missing_eos = True
                except Exception as e:
                    pass

                if missing_eos is True:
                    self.set_result(utils.Result.TIMEOUT, "The rendered file add right duration, MISSING EOS?\n",
                                    "failure", e)
            else:
                GstValidateTest.check_results(self)
    def day_conferences_fct(self, obj):

        day_conference_lst = []

        day_conferences = DayConference.objects.filter(conference=obj)
        for day_conference in day_conferences:
            day_conference_dict = {}
            day_conference_dict['id'] = day_conference.id
            day_conference_dict['day'] = cal_utils.get_day(day_conference.day)
            day_conference_dict['hour_start'] = cal_utils.get_hour(
                day_conference.hour_start)
            day_conference_dict['hour_end'] = cal_utils.get_hour(
                day_conference.hour_end)
            day_conference_dict['duration'] = cal_utils.get_duration(
                day_conference.hour_start, day_conference.hour_end)
            day_conference_dict['is_full'] = day_conference.is_full
            day_conference_lst.append(day_conference_dict)

        return day_conference_lst
示例#5
0
    def check_results(self):
        if self.result in [Result.PASSED, Result.NOT_RUN
                           ] and self.scenario is None:
            res, msg = self.check_encoded_file()
            self.set_result(res, msg)
        else:
            if self.result == utils.Result.TIMEOUT:
                missing_eos = False
                try:
                    if utils.get_duration(
                            self.dest_file) == self.project.get_duration():
                        missing_eos = True
                except Exception as e:
                    pass

                if missing_eos is True:
                    self.set_result(
                        utils.Result.TIMEOUT,
                        "The rendered file add right duration, MISSING EOS?\n",
                        "failure")
            else:
                GstValidateTest.check_results(self)
示例#6
0
def create_clips(video_path, output_folder, interval_seconds, clip_length):
    if not os.path.exists(video_path):
        raise ClipError(f'The specified video file does not exist.')

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    duration = int(float(get_duration(video_path)))

    if interval_seconds > duration:
        raise ClipError(f'The interval ({interval_seconds}s) may not be longer than the video ({duration}s).')

    number_steps = math.trunc(duration / interval_seconds)
    output_clip_names = 'clips.txt'
    output_file_path = f'{output_folder}/{output_clip_names}'
    clip_file = open(output_file_path, 'w')
    line()
    print(f'Creating a {clip_length} second clip every {interval_seconds} seconds from {video_path}...')
    line()

    try:
        for step in range(1, number_steps):
            clip_name = f'clip{step}.mkv'
            clip_file.write(f'file \'{clip_name}\'\n')
            output_filename = os.path.join(output_folder, clip_name)
            clip_offset = step_to_movie_timestamp(step * interval_seconds)
            print(f'Creating clip {step} which starts at {clip_offset}...')
            subprocess_cut_args = [
                "ffmpeg", "-loglevel", "warning", "-stats", "-y",
                "-ss", str(clip_offset), "-i", video_path,
                "-map", "0", "-t", str(clip_length),
                "-c:v", "libx264", "-crf", "0", "-preset", "ultrafast",
                "-an", "-sn", output_filename
            ]
            subprocess.run(subprocess_cut_args)
    finally:
        clip_file.close()

    return output_file_path
示例#7
0
def main(corpus_dir,
         labels_dir,
         output_dir,
         sample_rate=16000,
         use_reference=False):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    datadir = os.path.join(corpus_dir, 'core')
    transdir = os.path.join(labels_dir, 'transcriptions')
    speaker_map_filename = os.path.join(corpus_dir, 'doc', 'speakers')
    speaker_map = read_speaker_map(speaker_map_filename)
    wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate)

    # skip utterances of types D (articulatory), E (non-speech), and F (other)
    skip_tasks = ('D', 'E', 'F')

    for subset in speaker_map:
        print('Processing {0} data'.format(subset))
        subset_outdir = os.path.join(output_dir, subset)

        if not os.path.exists(subset_outdir):
            os.makedirs(subset_outdir)

        speaker_utts = {}
        text, wav = [], []
        utt2spk, spk2utt = [], []
        utt2dur = []

        for speaker in speaker_map[subset]:

            speaker_dir = os.path.join(datadir, speaker)
            flist = [f for f in os.listdir(speaker_dir) if f.endswith('.wav')]

            for f in flist:
                f = f.replace('.wav', '')
                if f.endswith(skip_tasks):
                    continue

                # read transcription and convert to SLT/CHILD tokens
                fileid = '-'.join([speaker, f])
                txt_f = os.path.join(transdir, fileid + '.txt')
                with open(txt_f, 'r') as fid:
                    txt = fid.readline().rstrip()

                words = []
                for w in txt.split():
                    w = w.upper()
                    w = 'SLT' if 'SLT' in w else 'CHILD'
                    words.append(w)

                words = ' '.join([fileid] + words)
                text.append(words)

                # prepare wav.scp
                wavpath = os.path.join(speaker_dir, f + '.wav')
                file_wav = wav_base.replace('FILEID', fileid)
                file_wav = file_wav.replace('WAVPATH', wavpath)
                wav.append(file_wav)

                # prepare utt2dur
                dur = get_duration(wavpath)
                utt2dur.append('{0} {1}'.format(fileid, dur))

                # prepare utt2spk
                utt2spk.append('{0} {1}'.format(fileid, speaker))

                if speaker in speaker_utts:
                    speaker_utts[speaker].append(fileid)
                else:
                    speaker_utts[speaker] = [fileid]

        # prepare spk2utt
        for speaker in speaker_utts:
            spk_utts = '{0} {1}'.format(
                speaker, ' '.join(sorted(speaker_utts[speaker])))
            spk2utt.append(spk_utts)

        text_f = os.path.join(subset_outdir, 'text')
        wav_f = os.path.join(subset_outdir, 'wav.scp')
        utt2spk_f = os.path.join(subset_outdir, 'utt2spk')
        spk2utt_f = os.path.join(subset_outdir, 'spk2utt')
        utt2dur_f = os.path.join(subset_outdir, 'utt2dur')

        write_data(text, text_f)
        write_data(wav, wav_f)
        write_data(utt2spk, utt2spk_f)
        write_data(spk2utt, spk2utt_f)
        write_data(utt2dur, utt2dur_f)

        # validate data directory
        validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format(
            subset_outdir)
        os.system(validate_cmd)
示例#8
0
def upload():

    # app.logger.warning(request)
    log_request_info(request)
    # video is a werkzeug.datastructures.FileStorage object
    video = request.files['video-blob']
    app.logger.warning(video)
    # app.logger.warning(video['contents'])
    app.logger.warning(type(video))
    # data = video.read()
    # app.logger.warning(data)
    app.logger.warning("filename: {0}".format(video.filename))
    audio = request.files['audio-blob']
    app.logger.warning(audio)
    # data = audio.read()
    # app.logger.warning(len(data))

    try:
        # Get the name of the uploaded file
        # file = request.files['file']
        video = request.files['video-blob']
        audio = request.files['audio-blob']
    except Exception as e:
        app.logger.warning("error: {0}".format(e))
        raise Exception(e)

    # let's time it
    start = time.time()

    video_filename = ''
    audio_filename = ''

    if video:  # and allowed_file(video.filename):
        # data = video.read()
        # Make the filename safe, remove unsupported chars
        filename = secure_filename(video.filename) + '_video' + '.webm'
        video_filename = filename
        # Move the file form the temporal folder to
        # the upload folder we setup
        #file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
        video.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
        # Redirect the user to the uploaded_file route, which
        # will basicaly show on the browser the uploaded file
        # return redirect(url_for('uploaded_file',
        #                         filename=filename))
    if audio:
        audio_filename = secure_filename(
            audio.filename) + '_audio' + '.wav'  #.mp3?
        print("AUDIO_1!!!: {0}".format(audio_filename))
        #        audio_filename = new_filename(filename, "_mono")
        audio.save(os.path.join(app.config['UPLOAD_FOLDER'], audio_filename))
        app.logger.warning("filename: {0}".format(audio_filename))
        stereo_to_mono(
            os.path.join(app.config['UPLOAD_FOLDER'], audio_filename))
        audio_filename = new_filename(audio_filename, "_mono")
        audio_filename = os.path.join(app.config['UPLOAD_FOLDER'],
                                      audio_filename)
        outfilename = '/home/ec2-user/flask_attempts/data/test.txt'
        stats = dict()
        # decode the speech in the file
        #ling_stats = decode_speech_driver(filename, outfilename)
        ling_stats = decode_speech(audio_filename)

        end = time.time()
        total_time = round(end - start)

        stats['time_to_analyze'] = total_time
        print("AUDIO_2!!!: {0}".format(audio_filename))
        stats['total speech time'] = get_duration(audio_filename)
        # combine the different stats to display in the template
        stats = dict(stats.items() + ling_stats.items())

        app.logger.warning('stats: {0}'.format(stats))

        # render the speech as text on a different page
        return render_template('decoded_speech.html',
                               stats=stats,
                               video_filename=video_filename,
                               audio_filename=audio_filename)
示例#9
0
def generate_planets(theta, stars=stlr, mes_threshold=10):
    """
    theta = (lnf0, alpha, beta, fB, gamma)
    """
    lnf0, alpha, beta, fB, gamma = theta
    
    planets = pd.DataFrame({'kepid':[], 'koi_prad':[], 'koi_period':[],
                           'koi_prad_true':[], 'koi_max_mult_ev':[]})

    n_skipped = 0
    
    for _, star in stars.iterrows():
        if np.isnan(star.radius) or np.isnan(star.mass):
            n_skipped += 1
            continue
            
        n_planets = poisson(np.exp(lnf0)).rvs()
        if n_planets == 0:
            continue
            
        try:
            star2, flux_ratio = get_companion(theta, star)
        except ValueError:
            n_skipped += 1
            continue
            #logging.warning('Skipping {}; cannot simulate binary.'.format(star.kepid))
        
        for i in range(n_planets):
            # First, figure out true & observed properties of planet
            radius, period = draw_planet(theta) 
            observed_radius, host_star = diluted_radius(radius, star, star2, flux_ratio)
            
            logging.debug('True: {:.2f}, Observed: {:.2f} ({})'.format(radius, 
                                                               observed_radius,
                                                              flux_ratio))
            
            # Then, is it detected?
            # First, geometric:
            aor = get_a(period, host_star.mass)
            if np.isnan(aor):
                raise RuntimeError('aor is nan: P={} M={}'.format(period, host_star.mass))
            #print(host_star.mass, aor)
            transit_prob = get_pgeom(aor / host_star.radius, 0.) # no ecc.
            
            if np.random.random() > transit_prob:
                continue
            
            # Then depth and MES:
            depth = get_delta(observed_radius * R_EARTH / star.radius)
            tau = get_duration(period, aor, 0.) * 24 # no ecc.
            try:
                mes = get_mes(star, period, depth, tau)
            except ValueError:
                n_skipped += 1
                #raise RuntimeError('MES is nan! {}, {}, {}'.format(depth, tau))
                
            
            if mes < mes_threshold:
                continue
            
            # Add planet to catalog
            planets = planets.append({'kepid':star.kepid,
                               'koi_prad':observed_radius,
                               'koi_period':period,
                               'koi_prad_true':radius,
                                'koi_max_mult_ev':mes}, ignore_index=True)
        
    print('{} planets generated ({} of {} stars skipped.)'.format(len(planets),
                                                                 n_skipped, len(stars)))
    return planets
示例#10
0
def get_kodidb_setdata(metadatautils, set_id):
    '''get moviesetdetails from Kodi DB'''
    details = {}
    movieset = metadatautils.kodidb.movieset(set_id, FIELDS_MOVIES)
    count = 0
    runtime = 0
    unwatchedcount = 0
    watchedcount = 0
    runtime = 0
    writer = []
    director = []
    genre = []
    countries = []
    studio = []
    years = []
    plot = ""
    title_list = ""
    total_movies = len(movieset['movies'])
    title_header = "[B]%s %s[/B][CR]" % (total_movies, xbmc.getLocalizedString(20342))
    all_fanarts = []
    details["art"] = movieset["art"]
    movieset_movies = sorted(movieset['movies'], key=itemgetter("year"))
    for count, item in enumerate(movieset_movies):
        if item["playcount"] == 0:
            unwatchedcount += 1
        else:
            watchedcount += 1

        # generic labels
        for label in ["label", "plot", "year", "rating"]:
            details['%s.%s' % (count, label)] = item[label]
        details["%s.DBID" % count] = item["movieid"]
        details["%s.duration" % count] = item['runtime'] / 60

        # art labels
        art = item['art']
        for label in ["poster", "fanart", "landscape", "clearlogo", "clearart", "banner", "discart"]:
            if art.get(label):
                details['%s.art.%s' % (count, label)] = get_clean_image(art[label])
                if not movieset["art"].get(label):
                    movieset["art"][label] = get_clean_image(art[label])
        all_fanarts.append(get_clean_image(art.get("fanart")))

        # streamdetails
        if item.get('streamdetails', ''):
            streamdetails = item["streamdetails"]
            audiostreams = streamdetails.get('audio', [])
            videostreams = streamdetails.get('video', [])
            subtitles = streamdetails.get('subtitle', [])
            if len(videostreams) > 0:
                stream = videostreams[0]
                height = stream.get("height", "")
                width = stream.get("width", "")
                if height and width:
                    resolution = ""
                    if width <= 720 and height <= 480:
                        resolution = "480"
                    elif width <= 768 and height <= 576:
                        resolution = "576"
                    elif width <= 960 and height <= 544:
                        resolution = "540"
                    elif width <= 1280 and height <= 720:
                        resolution = "720"
                    elif width <= 1920 and height <= 1080:
                        resolution = "1080"
                    elif width * height >= 6000000:
                        resolution = "4K"
                    details["%s.resolution" % count] = resolution
                details["%s.Codec" % count] = stream.get("codec", "")
                if stream.get("aspect", ""):
                    details["%s.aspectratio" % count] = round(stream["aspect"], 2)
            if len(audiostreams) > 0:
                # grab details of first audio stream
                stream = audiostreams[0]
                details["%s.audiocodec" % count] = stream.get('codec', '')
                details["%s.audiochannels" % count] = stream.get('channels', '')
                details["%s.audiolanguage" % count] = stream.get('language', '')
            if len(subtitles) > 0:
                # grab details of first subtitle
                details["%s.SubTitle" % count] = subtitles[0].get('language', '')

        title_list += "%s (%s)[CR]" % (item['label'], item['year'])
        if item['plotoutline']:
            plot += "[B]%s (%s)[/B][CR]%s[CR][CR]" % (item['label'], item['year'], item['plotoutline'])
        else:
            plot += "[B]%s (%s)[/B][CR]%s[CR][CR]" % (item['label'], item['year'], item['plot'])
        runtime += item['runtime']
        if item.get("writer"):
            writer += [w for w in item["writer"] if w and w not in writer]
        if item.get("director"):
            director += [d for d in item["director"] if d and d not in director]
        if item.get("genre"):
            genre += [g for g in item["genre"] if g and g not in genre]
        if item.get("country"):
            countries += [c for c in item["country"] if c and c not in countries]
        if item.get("studio"):
            studio += [s for s in item["studio"] if s and s not in studio]
        years.append(str(item['year']))
    details["plots"] = plot
    if total_movies > 1:
        details["extendedplots"] = title_header + title_list + "[CR]" + plot
    else:
        details["extendedplots"] = plot
    details["titles"] = title_list
    details["runtime"] = runtime / 60
    details.update(get_duration(runtime / 60))
    details["writer"] = writer
    details["director"] = director
    details["genre"] = genre
    details["studio"] = studio
    details["years"] = years
    if len(years) > 1:
        details["year"] = "%s - %s" % (years[0], years[-1])
    else:
        details["year"] = years[0] if years else ""
    details["country"] = countries
    details["watchedcount"] = str(watchedcount)
    details["unwatchedcount"] = str(unwatchedcount)
    details.update(metadatautils.studiologos.get_studio_logo(studio, metadatautils.studiologos_path))
    details["count"] = total_movies
    details["art"]["fanarts"] = all_fanarts
    return details
示例#11
0
from utils import get_duration

ends = (
    '2018-11-20 00:00:01',
    '2018-11-20 00:01:00',
    '2018-11-20 00:02:02',
    '2018-11-20 03:00:04',
    '2018-11-20 03:04:05',
    '2018-11-21 00:00:00',
    '2018-11-22 16:00:00',
    '2018-11-23 00:56:00',
    '2018-11-27 00:14:45',
    '2018-12-30 12:34:56',
)

for end in ends:
    print(get_duration(start='2018-11-20 00:00:00', end=end))
示例#12
0
def main(corpus_dir,
         labels_dir,
         output_dir,
         sample_rate=16000,
         use_reference=False):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    datadir = os.path.join(corpus_dir, 'core')
    speaker_map_filename = os.path.join(corpus_dir, 'doc', 'speakers')
    speaker_map = read_speaker_map(speaker_map_filename)
    wav_base = 'FILEID sox WAVPATH -r {0} -t .wav - |'.format(sample_rate)

    if use_reference:
        ref_dir = os.path.join(labels_dir, 'reference_labels',
                               'speaker_labels', 'lab')
        reference_list = [f.replace('.lab', '') for f in os.listdir(ref_dir)]

    speaker_utts = {}
    text, wav = [], []
    utt2spk, spk2utt = [], []
    utt2dur = []

    for subset in speaker_map:
        print('Processing {0} data'.format(subset))

        for speaker in speaker_map[subset]:

            speaker_dir = os.path.join(datadir, speaker)
            flist = [f for f in os.listdir(speaker_dir) if f.endswith('.wav')]

            for f in flist:
                f = f.replace('.wav', '')
                fileid = '-'.join([speaker, f])

                if use_reference:
                    if fileid not in reference_list:
                        continue

                # use prompt for text, although it will be ignored for decoding
                txt_f = os.path.join(speaker_dir, f + '.txt')
                with open(txt_f, 'r') as fid:
                    txt = fid.readline().rstrip()

                words = []
                for w in txt.split():
                    w = w.upper()
                    words.append(w)

                words = ' '.join([fileid] + words)
                text.append(words)

                # prepare wav.scp
                wavpath = os.path.join(speaker_dir, f + '.wav')
                file_wav = wav_base.replace('FILEID', fileid)
                file_wav = file_wav.replace('WAVPATH', wavpath)
                wav.append(file_wav)

                # prepare utt2dur
                dur = get_duration(wavpath)
                utt2dur.append('{0} {1}'.format(fileid, dur))

                # prepare utt2spk
                utt2spk.append('{0} {1}'.format(fileid, speaker))

                if speaker in speaker_utts:
                    speaker_utts[speaker].append(fileid)
                else:
                    speaker_utts[speaker] = [fileid]

    # prepare spk2utt
    for speaker in speaker_utts:
        spk_utts = '{0} {1}'.format(speaker,
                                    ' '.join(sorted(speaker_utts[speaker])))
        spk2utt.append(spk_utts)

    text_f = os.path.join(output_dir, 'text')
    wav_f = os.path.join(output_dir, 'wav.scp')
    utt2spk_f = os.path.join(output_dir, 'utt2spk')
    spk2utt_f = os.path.join(output_dir, 'spk2utt')
    utt2dur_f = os.path.join(output_dir, 'utt2dur')

    write_data(text, text_f)
    write_data(wav, wav_f)
    write_data(utt2spk, utt2spk_f)
    write_data(spk2utt, spk2utt_f)
    write_data(utt2dur, utt2dur_f)

    # validate data directory
    validate_cmd = './utils/validate_data_dir.sh --no-feats {0}'.format(
        output_dir)
    os.system(validate_cmd)
    def process(self):
        # fill na
        for column in self.nanum_columns:
            print("Fill NA {}".format(column))
            self.df_all[column].fillna(-1, inplace=True)

        for column in self.nastr_columns:
            print("Fill NA {}".format(column))
            self.df_all[column].fillna("", inplace=True)

        # new features
        self.df_all["dstipscope_dominate"] = self.df_all.apply(
            lambda row: utils.get_ip_scope(row["dstipcategory_dominate"]),
            axis=1)
        self.df_all["srcipscope_dominate"] = self.df_all.apply(
            lambda row: utils.get_ip_scope(row["srcipcategory_dominate"]),
            axis=1)

        # ip zone features
        self.df_all["ip_zone_1"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 1), axis=1)
        self.df_all["ip_zone_2"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 2), axis=1)
        self.df_all["ip_zone_3"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 3), axis=1)
        self.df_all["ip_zone_4"] = self.df_all.apply(
            lambda row: utils.get_ip_zone(row["ip"], 4), axis=1)

        # concatenation features
        self.df_all["ip_zone_12"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_1"], row["ip_zone_2"]]),
            axis=1)
        self.df_all["ip_zone_123"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_1"], row["ip_zone_2"], row["ip_zone_3"]]),
            axis=1)
        self.df_all["ip_zone_34"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_3"], row["ip_zone_4"]]),
            axis=1)
        self.df_all["ip_zone_234"] = self.df_all.apply(
            lambda row: utils.concatenate_values(
                [row["ip_zone_2"], row["ip_zone_3"], row["ip_zone_4"]]),
            axis=1)
        self.le_columns.append("ip_zone_12")
        self.le_columns.append("ip_zone_123")
        self.le_columns.append("ip_zone_34")
        self.le_columns.append("ip_zone_234")

        feature_pairs = [("categoryname", "ipcategory_scope"), \
                         ("categoryname", "overallseverity"), \
                         ("srcipscope_dominate", "dstipscope_dominate")]

        for item in feature_pairs:
            f1 = item[0]
            f2 = item[1]
            fn = f1 + "_" + f2
            self.df_all[fn] = self.df_all.apply(
                lambda row: utils.concatenate_values([row[f1], row[f2]]),
                axis=1)
            self.le_columns.append(fn)

        # timestamp_dist in hour and minute
        self.df_all["timestamp_hour"] = self.df_all.apply(
            lambda row: utils.get_duration(row["timestamp_dist"]), axis=1)

        # ending time features
        self.df_all["end_hour"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "hour"),
            axis=1)
        self.df_all["end_minute"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "minute"),
            axis=1)
        self.df_all["end_second"] = self.df_all.apply(
            lambda row: utils.get_end_time(row["start_hour"], row[
                "start_minute"], row["start_second"], row["timestamp_dist"],
                                           "second"),
            axis=1)

        # sum score features
        self.df_all["sum_score"] = self.df_all.apply(
            lambda row: utils.get_sum([
                row["{}score".format(score)]
                for score in ["untrust", "flow", "trust", "enforcement"]
            ]),
            axis=1)
        self.df_all["sum_n"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["n{}".format(i)] for i in range(1, 11)]),
                                                 axis=1)
        self.df_all["sum_p5"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["p5{}".format(p5)] for p5 in ["m", "w", "d"]]),
                                                  axis=1)
        self.df_all["sum_p8"] = self.df_all.apply(lambda row: utils.get_sum(
            [row["p8{}".format(p8)] for p8 in ["m", "w", "d"]]),
                                                  axis=1)
        #self.df_all["sum_p58"] = self.df_all.apply(lambda row: utils.get_sum([row["sum_p5"], row["sum_p8"]]), axis = 1)

        # get ratio features
        # self.df_all["thrcnt_month_week"] = self.df_all.apply(lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_week"]), axis = 1)
        self.df_all["thrcnt_month_day"] = self.df_all.apply(
            lambda row: utils.get_ratio(row["thrcnt_month"], row["thrcnt_day"]
                                        ),
            axis=1)
        self.df_all["thrcnt_week_day"] = self.df_all.apply(
            lambda row: utils.get_ratio(row["thrcnt_week"], row["thrcnt_day"]),
            axis=1)

        # encode features with label encoder
        label_encoder = LabelEncoder()
        for column in self.le_columns:
            print("Label encoding {}".format(column))
            label_encoder.fit(self.df_all[column])
            self.df_all[column] = label_encoder.transform(self.df_all[column])

        # encode features with one-hot encoder
        for column in self.oe_columns:
            print("One-hot encoding {}".format(column))
            pd_encoded = pd.get_dummies(self.df_all[column])
            pd_encoded.columns = [
                "{}_{}".format(column, "_".join(str(col).lower().split()))
                for col in pd_encoded.columns
            ]
            self.df_all.drop(column, axis=1, inplace=True)
            self.df_all = pd.concat([self.df_all, pd_encoded], axis=1)