Exemplo n.º 1
0
    def run(self, context):
        if not self.directory_name.isabsolute():
            directory = Path(Path(context['package_project_dir']), self.directory_name)
        else:
            directory = Path(context['package_root'], Path(strip_root(self.directory_name)))

        directory.mkdir(parents=True, mode=self.mode)
Exemplo n.º 2
0
def sort_episode(series_name, episode, torrent_path):
    # Ensure the series directory exists
    series_dir = Path(SERIES_DIR, series_name)
    series_dir.mkdir(True)

    if torrent_path.isdir():
        files = [torrent_path.listdir('*.' + ext) for ext in VIDEO_FILES]
        files = [f for sublist in files for f in sublist]
        files = remove_samples(files)
        logging.debug('List of files: {}'.format(files))

        if len(files) == 0:
            logging.critical('No video file found in series directory!')
            sys.exit(1)
        src_file = files[0]
        dst_file = Path(series_dir,
            series_name + ' - ' + episode + files[0].ext)
    else:
        if torrent_path.ext.replace('.', '') not in VIDEO_FILES:
            logging.warning('Unknown video file extention: {}'.format(
                torrent_path.ext))
        src_file = torrent_path
        dst_file = Path(series_dir, series_name + ' - ' + episode + \
            torrent_path.ext)

    logging.info('Copying single file to destination: {}'.format(
            dst_file))
    copy_file(src_file, dst_file)
Exemplo n.º 3
0
    def append(self, key):
        key_path = Path(str(key))

        if key_path.isfile():
            with open(str(key_path)) as f:
                key = f.read()

        if not isinstance(key, bytes):
            key = key.encode('utf-8')

        if key in self:
            return

        directory = Path(self.user.path, 'keydir', self.user.name,
                         hashlib.md5(key.strip().split()[1]).hexdigest())
        directory.mkdir(parents=True)

        key_file = Path(directory, "%s.pub" % self.user.name)
        if key_file.exists() and key_file.read_file() == key:
            return

        key_file.write_file(key, mode='wb')

        self.user.git.commit(['keydir'],
                             'Added new key for user %s' % self.user.name)

        super(ListKeys, self).append(key)
Exemplo n.º 4
0
def create(dataset, work_dir):
    # Find all the pages in the dataset
    img_dir = Path(Path.cwd().ancestor(1), 'data/hwr_data/pages', dataset)
    ann_dir = Path(Path.cwd().ancestor(1), 'data/charannotations')
    images = img_dir.listdir('*.jpg')
    annotations = ann_dir.listdir(dataset + '*.words')
    files = merge(images, annotations)

    # Create character segmentations
    stats = {}
    for f in files:
        # Preprocess
        logging.info("Preprocessing %s", str(f[0]))
        pagesPathFolder = Path(work_dir, 'pages')
        pagesPathFolder.mkdir()
        pagePath = Path(pagesPathFolder, f[0].stem + '.ppm')
        img = cv2.imread(f[0], cv2.IMREAD_GRAYSCALE)
        img = preprocess(img)
        cv2.imwrite(pagePath, img)

        # Segment
        segmentPathFolder = Path(work_dir, 'segments')
        segmentPathFolder.mkdir()
        e = ET.parse(f[1]).getroot()
        logging.info("Segmenting %s", str(f[0]))
        segment(img, e, segmentPathFolder, stats)

    print_statistics(stats, dataset)
Exemplo n.º 5
0
    def __init__(self, subject):
        self.session = subject.copy()
        self.session['date'] = data.getDateStr()
        self.session['computer'] = socket.gethostname()

        self.trials = Trials(**subject)
        self.load_sounds('stimuli/sounds')
        self.feedback = {}
        self.feedback['audio'] = {
            0: sound.Sound('stimuli/feedback/incorrect.wav'),
            1: sound.Sound('stimuli/feedback/correct.wav'),
        }
        self.texts = yaml.load(open('texts.yaml'))
        self.device = ResponseDevice(gamepad={
            0: 1,
            3: 0
        },
                                     keyboard={
                                         'y': 1,
                                         'n': 0
                                     })

        data_dir = Path(DATA_FILE.format(**subject)).parent
        if not data_dir.isdir():
            data_dir.mkdir()
        self.data_file = open(DATA_FILE.format(**subject), 'w', 0)
        self.write_trial()  # write header
Exemplo n.º 6
0
def savejson(d):
    status = d['status']
    if status.startswith('Pendent'):
        status = 'Pendent'
    carpeta = Path(exportpath, status, d['year'], d['month'])
    carpeta.mkdir(parents=True)
    fpath = carpeta.child('{}.json'.format(d['id']))
    with open(fpath, 'w') as f:
        json.dump(d, f, sort_keys=True, indent=4, separators=(',', ': '))
Exemplo n.º 7
0
    def get_backups_folder(self) -> Path:
        """Get the path to the local backup folder. Automatically creates folder if does not exist.

        Returns:
            path: List of Paths 
        """
        backup_folder = Path(settings.BACKUP['folder'])
        if not backup_folder.exists():
            backup_folder.mkdir()
        return backup_folder
Exemplo n.º 8
0
def get(project=None):
    """Get the data from different experiments.

    Warning! Experiment directories are expected in a particular location
    outside of this (words-in-transition) directory.

    Options are:

        telephone-app
        acoustic-similarity
        learning-sound-names

    """
    if project is None or project == 'telephone-app':
        app_dir = Path('../telephone-app')
        snapshot_dir = Path(app_dir, 'words-in-transition')
        if src_dir.exists():
            src_dir.rmtree()
        copytree(snapshot_dir, src_dir)

    if project is None or project == 'acoustic-similarity':
        # src
        proj_dir = Path('../acoustic-similarity/data')
        judgments = Path(proj_dir, 'judgments')

        # dst
        acoustic_similarity_dir = Path(data_raw, 'acoustic-similarity')
        if not acoustic_similarity_dir.isdir():
            acoustic_similarity_dir.mkdir()

        # copy the csvs in the root proj data dir
        for csv in proj_dir.listdir('*.csv'):
            csv.copy(Path(acoustic_similarity_dir, csv.name))

        # concat and save judgments files
        judgments_csv = Path(acoustic_similarity_dir, 'judgments.csv')
        judgments = [pd.read_csv(x) for x in judgments.listdir('*.csv')]
        if judgments:
            (pd.concat(judgments, ignore_index=True).to_csv(judgments_csv,
                                                            index=False))

    if project is None or project == 'learning-sound-names':
        src = Path('../learning-sound-names/data')
        dst = Path(data_raw, 'learning_sound_names.csv')
        data = pd.concat([pd.read_csv(x) for x in src.listdir('LSN*.csv')])
        data['is_correct'] = data.is_correct.astype(int)
        data.to_csv(dst, index=False)

        # also get subject info and questionnaire data
        to_get = ['questionnaire_v1', 'subject_info']
        for x in to_get:
            src_file = Path(src, '{}.csv'.format(x))
            dst_file = Path(data_raw, 'learning_sound_names_{}.csv'.format(x))
            run('cp {} {}'.format(src_file, dst_file))
Exemplo n.º 9
0
        def copy_files(context):
            # copy files from deployments
            if context["deployment_files_dir"].exists() and context["deployment_files_dir"].isdir():
                for i in context["deployment_files_dir"].walk():
                    rel_path = get_relative_path(i, context["deployment_files_dir"])
                    target = Path(context["package_project_dir"], rel_path)

                    if i.isdir():
                        target.mkdir(parents=True, mode=self.mode)
                    elif i.isfile():
                        local("cp -R %(i)s %(target)s" % locals())
Exemplo n.º 10
0
def get(project=None):
    """Get the data from different experiments.

    Warning! Experiment directories are expected in a particular location
    outside of this (words-in-transition) directory.

    Options are:

        telephone-app
        acoustic-similarity
        learning-sound-names

    """
    if project is None or project == 'telephone-app':
        app_dir = Path('../telephone-app')
        snapshot_dir = Path(app_dir, 'words-in-transition')
        if src_dir.exists():
            src_dir.rmtree()
        copytree(snapshot_dir, src_dir)

    if project is None or project == 'acoustic-similarity':
        # src
        proj_dir = Path('../acoustic-similarity/data')
        judgments = Path(proj_dir, 'judgments')

        # dst
        acoustic_similarity_dir = Path(data_raw, 'acoustic-similarity')
        if not acoustic_similarity_dir.isdir():
            acoustic_similarity_dir.mkdir()

        # copy the csvs in the root proj data dir
        for csv in proj_dir.listdir('*.csv'):
            csv.copy(Path(acoustic_similarity_dir, csv.name))

        # concat and save judgments files
        judgments_csv = Path(acoustic_similarity_dir, 'judgments.csv')
        judgments = [pd.read_csv(x) for x in judgments.listdir('*.csv')]
        if judgments:
            (pd.concat(judgments, ignore_index=True)
               .to_csv(judgments_csv, index=False))

    if project is None or project == 'learning-sound-names':
        src = Path('../learning-sound-names/data')
        dst = Path(data_raw, 'learning_sound_names.csv')
        data = pd.concat([pd.read_csv(x) for x in src.listdir('LSN*.csv')])
        data['is_correct'] = data.is_correct.astype(int)
        data.to_csv(dst, index=False)

        # also get subject info and questionnaire data
        to_get = ['questionnaire_v1', 'subject_info']
        for x in to_get:
            src_file = Path(src, '{}.csv'.format(x))
            dst_file = Path(data_raw, 'learning_sound_names_{}.csv'.format(x))
            run('cp {} {}'.format(src_file, dst_file))
Exemplo n.º 11
0
def dict2dir(dir, dic, mode="w"):
    dir = Path(dir)
    if not dir.exists():
        dir.mkdir()
    for filename, content in dic.items():
        p = Path(dir, filename)
        if isinstance(content, dict):
            dict2dir(p, content)
            continue
        f = open(p, mode)
        f.write(content)
        f.close()
Exemplo n.º 12
0
def dict2dir(dir, dic, mode="w"):
    dir = Path(dir)
    if not dir.exists():
        dir.mkdir()
    for filename, content in dic.items():
        p = Path(dir, filename)
        if isinstance(content, dict):
            dict2dir(p, content)
            continue
        f = open(p, mode)
        f.write(content)
        f.close()
Exemplo n.º 13
0
def process(appname):
    appdir = Path(appname)
    if not appdir.isdir():
        print("Error: there is no app called {0}.".format(appdir))
        sys.exit(1)
    # else
    static = Path(appname, 'static', appname)
    static.mkdir(True)
    templates = Path(appname, 'templates', appname)
    templates.mkdir(True)
    urls = Path(appname, 'urls.py')
    if not urls.isfile():
        urls.write_file(urls_py)
Exemplo n.º 14
0
class cwd(object):
    def __init__(self, cwd):
        self.prev_cwd = FSPath.cwd()
        self.cwd = Path(cwd)
        if not self.cwd.exists():
            self.cwd.mkdir(parents=True)

    def __enter__(self):
        self.cwd.chdir()
        return self.cwd

    def __exit__(self, type_, value, traceback):
        self.prev_cwd.chdir()
Exemplo n.º 15
0
def segment(img, annotation, work_dir, stats):
    sides = ['left', 'top', 'right', 'bottom']
    # Parse the given sentences
    for sentence in annotation:
        for word in sentence:
            for char in word:
                c = char.get('text')
                if c in '!-,.':
                     continue # Skip stupid labels
                cdir = Path(work_dir, c)
                cdir.mkdir()
                f = Path(cdir, str(uuid.uuid1()) + '.ppm')

                rect = {side: int(char.get(side)) for side in sides}
                # Correct for swapped coordinates
                if rect['top'] > rect['bottom']:
                    rect['top'], rect['bottom'] = rect['bottom'], rect['top']
                if rect['left'] > rect['right']:
                    rect['left'], rect['right'] = rect['right'], rect['left']
                cropped_im = img[rect['top']:rect['bottom'], rect['left']:rect['right']]

                # Remove rows from the top if they're white
                while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \
                        and min(cropped_im[0,:]) == 255:
                    cropped_im = cropped_im[1:,:]
                # Remove from the bottom
                while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \
                        and min(cropped_im[-1,:]) == 255:
                    cropped_im = cropped_im[:-1,:]
                # Remove from the left
                while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \
                        and min(cropped_im[:,0]) == 255:
                    cropped_im = cropped_im[:,1:]
                # Remove from the right
                while cropped_im.shape[1] > 0 and cropped_im.shape[0] > 0 \
                        and min(cropped_im[:,-1]) == 255:
                    cropped_im = cropped_im[:,:-1]
                if cropped_im.shape[0] <= 5 or cropped_im.shape[1] <= 5:
                    print "Discarding image"
                    continue
                aspect_ratio = cropped_im.shape[0] / float(cropped_im.shape[1])
                if not (1/3.0) <= aspect_ratio <= 5.5:
                    print "Image with wrong aspect ratio:", aspect_ratio, c
                    continue
                cv2.imwrite(f, cropped_im)

                # Add to statistics
                if c not in stats.keys():
                    stats[c] = {'width': [], 'height': []}
                stats[c]['width'].append(cropped_im.shape[1])
                stats[c]['height'].append(cropped_im.shape[0])
Exemplo n.º 16
0
def download_survey_responses(survey_name):
    """Download the survey data.

    Args:
        survey_name: 'sound_similarity_6' or 'sound_similarity_4'
    """
    qualtrics = Qualtrics(**get_creds())
    responses = qualtrics.get_survey_responses(survey_name)
    survey_dir = Path(exp_dir, survey_name)
    if not survey_dir.exists():
        survey_dir.mkdir()

    output = Path(survey_dir, survey_name + '.csv')
    responses.to_csv(output, index=False)
Exemplo n.º 17
0
def download_survey_responses(survey_name):
    """Download the survey data.

    Args:
        survey_name: 'sound_similarity_6' or 'sound_similarity_4'
    """
    qualtrics = Qualtrics(**get_creds())
    responses = qualtrics.get_survey_responses(survey_name)
    survey_dir = Path(exp_dir, survey_name)
    if not survey_dir.exists():
        survey_dir.mkdir()

    output = Path(survey_dir, survey_name + '.csv')
    responses.to_csv(output, index=False)
Exemplo n.º 18
0
def create():
    # create a clean temporary directory
    work_dir = Path("tmp")
    work_dir.mkdir()

    lexicon = create_lexicon()

    lexicon_path = Path(work_dir, "lexicon.csv")
    w = csv.writer(open(lexicon_path, "w"))
    for key, val in lexicon.items():
        w.writerow([key, val])
    return lexicon

    return lexicon
Exemplo n.º 19
0
def setup_report_dir(app_name, random_dir=True):
    if random_dir:
        date_dir = time.strftime('%y-%m-%d')
        date_dir += "-"
        date_dir += ''.join(
            random.choice(string.ascii_lowercase + string.digits)
            for _ in range(6))
    else:
        date_dir = ""

    report_dir = Path(Path.cwd(), [app_name, date_dir])
    report_dir.mkdir(parents=True)

    return report_dir
Exemplo n.º 20
0
def gather(ctx):
    """Gather the experiment data and put it in the R pkg data-raw folder.

    Currently set to get the fourth run experiment data.
    """
    dest_dir = Path('propertyverificationdata', 'data-raw', 'question_first',
                    'fourth_run', 'data')
    if not dest_dir.exists():
        dest_dir.mkdir(parents=True)
    data_files = Path('experiment/data').listdir('PV*csv')
    for data_file in data_files:
        dest = Path(dest_dir, data_file.name)
        run('cp {src} {dest}'.format(src=data_file, dest=dest))
        # move the subj info sheet
        run('cp experiment/subj_info.csv {}'.format(dest_dir.parent))
Exemplo n.º 21
0
def copy_sounds(ctx, force=False):
    """Copy sounds from acoustic-similarity to use in this experiment."""
    src_dir = Path('../acoustic-similarity/data/sounds')
    assert src_dir.isdir(), 'expecting sounds to be in {}'.format(src_dir)

    dst_dir = Path('stimuli/sounds')

    if not dst_dir.isdir():
        dst_dir.mkdir()

    trials = pandas.read_csv('stimuli/messages.csv')
    for seed_id in trials.seed_id.unique():
        seed_name = '{}.wav'.format(seed_id)
        dst = Path(dst_dir, seed_name)
        if force or not dst.exists():
            Path(src_dir, seed_name).copy(dst)
Exemplo n.º 22
0
def main():
    global conn
    opts, args = parser.parse_args()
    common.init_logging(opts.log_sql)
    if opts.debug:
        log.setLevel(logging.DEBUG)
    if len(args) != 3:
        parser.error("wrong number of command-line arguments")
    site, dburl, output_dir = args
    engine = sa.create_engine(dburl)
    log.debug("Starting")
    conn = engine.connect()
    output_dir = Path(output_dir)
    output_dir.mkdir()
    output_raw_referers(site, conn, output_dir, True)
    output_raw_referers(site, conn, output_dir, False)
    output_search_terms(site, conn, output_dir)
Exemplo n.º 23
0
def main():
    global conn
    opts, args = parser.parse_args()
    common.init_logging(opts.log_sql)
    if opts.debug:
        log.setLevel(logging.DEBUG)
    if len(args) != 3:
        parser.error("wrong number of command-line arguments")
    site, dburl, output_dir = args
    engine = sa.create_engine(dburl)
    log.debug("Starting")
    conn = engine.connect()
    output_dir = Path(output_dir)
    output_dir.mkdir()
    output_raw_referers(site, conn, output_dir, True)
    output_raw_referers(site, conn, output_dir, False)
    output_search_terms(site, conn, output_dir)
Exemplo n.º 24
0
def main():
    if len(sys.argv) != 2 or sys.argv[1] not in ['KNMP', 'Stanford', 'both']:
        print("Usage: python %s <dataset>" % sys.argv[0])
        print("\tDataset should be either 'KNMP' or 'Stanford' or 'both'")
        sys.exit(1)

    # create a clean temporary directory
    if os.path.exists("tmp"):
        shutil.rmtree("tmp")
    os.makedirs("tmp")
    work_dir = Path("tmp")
    work_dir.rmtree()
    work_dir.mkdir()

    if sys.argv[1] in ['KNMP', 'both']:
        create('KNMP', work_dir)
    if sys.argv[1] in ['Stanford', 'both']:
        create('Stanford', work_dir)
Exemplo n.º 25
0
def convert_wav_to_mp3(src_dir=None, dst_dir=None):
    """Convert wav sounds to mp3."""
    if src_dir is None:
        src_dir = seeds_dir
    if dst_dir is None:
        dst_dir = src_dir

    src_dir = Path(src_dir)
    assert src_dir.exists()

    dst_dir = Path(dst_dir)
    if not dst_dir.exists():
        dst_dir.mkdir(True)

    wav_seeds = Path(src_dir).listdir('*.wav', names_only=True)
    for wav in wav_seeds:
        mp3 = Path(wav).stem + '.mp3'
        cmd = 'ffmpeg -i {} -codec:a libmp3lame -qscale:a 2 {}'
        local_run(cmd.format(Path(src_dir, wav), Path(dst_dir, mp3)))
Exemplo n.º 26
0
def convert_wav_to_mp3(src_dir=None, dst_dir=None):
    """Convert wav sounds to mp3."""
    if src_dir is None:
        src_dir = seeds_dir
    if dst_dir is None:
        dst_dir = src_dir

    src_dir = Path(src_dir)
    assert src_dir.exists()

    dst_dir = Path(dst_dir)
    if not dst_dir.exists():
        dst_dir.mkdir(True)

    wav_seeds = Path(src_dir).listdir('*.wav', names_only=True)
    for wav in wav_seeds:
        mp3 = Path(wav).stem + '.mp3'
        cmd = 'ffmpeg -i {} -codec:a libmp3lame -qscale:a 2 {}'
        local_run(cmd.format(Path(src_dir, wav), Path(dst_dir, mp3)))
Exemplo n.º 27
0
def env_create_package_files(workspace, packages, files):
    for p in packages:
        if files:
            for f in files:
                f = Path(workspace, 'test', p, f)
                f.mkdir(parents=True)
                version = '.'.join([x for x in f.name if x.isdigit()])
                i = env_create_package_pkginfo(p, version)
                Path(f, '%s.egg-info' % f.name).mkdir()
                Path(f, 'PKG-INFO').write_file(i)
                Path(f, '%s.egg-info' % f.name, 'PKG-INFO').write_file(i)

                os.chdir(f.parent)
                tar = tarfile.open('%s.tar.gz' % f, 'w:gz')
                tar.add('%s' % f.name)
                tar.close()
                f.rmtree()
        else:
            Path(workspace, 'test', p).mkdir(parents=True)
Exemplo n.º 28
0
def env_create_package_files(workspace, packages, files):
    for p in packages:
        if files:
            for f in files:
                f = Path(workspace, 'test', p, f)
                f.mkdir(parents=True)
                version = '.'.join([x for x in f.name if x.isdigit()])
                i = env_create_package_pkginfo(p, version)
                Path(f, '%s.egg-info' % f.name).mkdir()
                Path(f, 'PKG-INFO').write_file(i)
                Path(f, '%s.egg-info' % f.name, 'PKG-INFO').write_file(i)

                os.chdir(f.parent)
                tar = tarfile.open('%s.tar.gz' % f, 'w:gz')
                tar.add('%s' % f.name)
                tar.close()
                f.rmtree()
        else:
            Path(workspace, 'test', p).mkdir(parents=True)
Exemplo n.º 29
0
def sort_movie(movie_name, movie_year, torrent_path):
    movie_dir = Path(MOVIE_DIR, movie_name + ' (' + movie_year + ')')
    movie_dir.mkdir(True)

    if torrent_path.isdir():
        files = list_files(torrent_path, VIDEO_FILES)
        logging.debug('List of files: {}'.format(files))

        # Remove videos that are not part of the movie
        videos = []
        for f in files:
            if any(n.lower() in f.stem.lower() for n in movie_name.split()):
                videos.append(f)
        if len(videos) < len(files) and len(videos) > 0:
            files = videos

        if len(files) == 0:
            logging.critical('No video files found in movie directory!')
            sys.exit(1)
        elif len(files) == 1:
            src_file = files[0]
            dst_file = Path(movie_dir, movie_name + files[0].ext)
            logging.info('Copying single file to destination: {}'.format(
                dst_file))
            copy_file(src_file, dst_file)
        elif len(files) > 1:
            i = 1
            for f in files:
                dst_file = Path(movie_dir, movie_name + ' - CD' + str(i) + \
                    f.ext)
                logging.info('Copying part {} from {} to {}'.format(i,
                    f, dst_file))
                copy_file(f, dst_file)
    else:
        if torrent_path.ext not in VIDEO_FILES:
            logging.warning('Unknown video file extention: {}'.format(
                torrent_path.ext))
        src_file = torrent_path
        dst_file = Path(movie_dir, movie_name + torrent_path.ext)
        logging.info('Copying single file to destination: {}'.format(
            dst_file))
        copy_file(src_file, dst_file)
Exemplo n.º 30
0
def setup_test_sdist():
    if len(env.demo_databases) == 0:
        return
    ve_path = Path(env.temp_dir, 'test_sdist')
    #~ if ve_path.exists():
    ve_path.rmtree()
    #~ rmtree_after_confirm(ve_path)
    ve_path.mkdir()
    script = ve_path.child('tmp.sh')

    context = dict(name=env.SETUP_INFO['name'], sdist_dir=env.sdist_dir,
                   ve_path=ve_path)
    #~ file(script,'w').write(TEST_SDIST_TEMPLATE % context)
    txt = TEST_SDIST_TEMPLATE % context
    for db in env.demo_databases:
        txt += "django-admin.py test --settings=%s --traceback\n" % db
    script.write_file(txt)
    script.chmod(0o777)
    with lcd(ve_path):
        local(script)
Exemplo n.º 31
0
def get_audio_features(mp3_name, interval_size = 5000.0):
    analyses_dir = Path(settings.ANALYSES_DIR)
    if not analyses_dir.exists():
        analyses_dir.mkdir()

    converted_wav = _convert_mp3_to_wav(mp3_name)
    chunks_of_wav = _break_wav_into_chunks(converted_wav)

    values = list()
    for chunk in chunks_of_wav:
        segment = pydub.AudioSegment.from_file(chunk, format = 'wav')
        moments = pydub.utils.make_chunks(segment, interval_size)
        del segment

        dbfs_of_peaks = [chunk.dBFS for chunk in moments]
        del moments

        values.extend([max(0, loudness + 120) for loudness in dbfs_of_peaks])

    times = [i * (interval_size / 1000.0) for i in range(len(values))]
    return zip(times, values)
Exemplo n.º 32
0
def create_folder():
    base = Path(current_app.config.get('INUPYPI_REPO', Path('.', 'packages')))
    path = sanitize_path(request.form.get('path', ''))
    folder = sanitize_path(request.form.get('folder_name', ''))

    if Path(path, folder):
        temp_base = Path(base, path).absolute()
        create_path = Path(temp_base, folder)

        if create_path.exists():
            return redirect('%s' % Path(path, folder))
        search = search_path(folder, temp_base)

        if search:
            base = Path(base).absolute()
            return redirect('%s' % sanitize_path(search.replace(base, '')))

        try:
            create_path.mkdir(parents=True)
        except Exception, e:
            status = 'Failed to create %s' % e
            abort(500, status)
Exemplo n.º 33
0
def create_folder():
    base = Path(current_app.config.get('INUPYPI_REPO',
                Path('.', 'packages')))
    path = sanitize_path(request.form.get('path', ''))
    folder = sanitize_path(request.form.get('folder_name', ''))

    if Path(path, folder):
        temp_base = Path(base, path).absolute()
        create_path = Path(temp_base, folder)

        if create_path.exists():
            return redirect('%s' % Path(path, folder))
        search = search_path(folder, temp_base)

        if search:
            base = Path(base).absolute()
            return redirect('%s' % sanitize_path(search.replace(base, '')))

        try:
            create_path.mkdir(parents=True)
        except Exception, e:
            status = 'Failed to create %s' % e
            abort(500, status)
Exemplo n.º 34
0
def download_file(downloadable_url):
    """ File is only downloaded if it doesn't exist in DOWNLOADS_DIR

    This function does not rename the file. It only downloads the file
    if the expected name is not present in the DOWNLOADS_DIR.

    Returns a django.core.files.File object that can be stored in a FileField.
    """
    download_dir = Path(settings.DOWNLOADS_DIR)
    if not download_dir.exists():
        download_dir.mkdir()

    name_in_url = Path(downloadable_url).name
    expected_loc = Path(download_dir, name_in_url)

    # only download if necessary
    if not expected_loc.exists():
        response = requests.get(downloadable_url, stream = True)
        with open(expected_loc, 'wb') as expected_loc_handle:
            for chunk in response.iter_content(chunk_size = 1024):
                expected_loc_handle.write(chunk)

    return File(open(expected_loc, 'rb'))
Exemplo n.º 35
0
def setup_dir(dir):
    report_dir = Path(dir)
    report_dir.mkdir(parents=True)
    prod = Path(report_dir, "prod")
    prod.mkdir()
    sandbox = Path(report_dir, "sandbox")
    sandbox.mkdir()

    return report_dir
Exemplo n.º 36
0
    def append(self, key):
        key_path = Path(key)

        if key_path.isfile():
            with open(str(key_path)) as f:
                key = f.read()

        if key in self:
                return

        directory = Path(self.user.path, 'keydir', self.user.name,
                                         hashlib.md5(key.strip().split()[1]).hexdigest())
        directory.mkdir(parents=True)

        key_file = Path(directory, "%s.pub" % self.user.name)
        if key_file.exists() and key_file.read_file() == key:
                return

        key_file.write_file(key)

        self.user.git.commit(['keydir'],
                                                 'Added new key for user %s' % self.user.name)

        super(ListKeys, self).append(key)
Exemplo n.º 37
0
def create_photo_directory(headline, *datetime_obj):
    """
    Given a datetime object, create a directory in
    portfolio/static/portfolio/media labeled YEAR_MONTH_DAY_HEADLINE

    input: python datetime object
    output: boolean indicating success or failure of directory creation
    """
    # TODO: This should create a dictionary in the project root
    # --> Make sure that the test is updated
    if not datetime_obj:
        datetime_obj = datetime.now()
    underscore_headline = headline.replace(' ', '_')
    post_dir = datetime_obj.strftime('%Y_%m_%d_{}'.format(underscore_headline))
    media_dir = Path('./portfolio/static/portfolio/media/{}'.format(post_dir))
    return media_dir.mkdir()
Exemplo n.º 38
0
        print('Usage: python {} <image> <words>'.format(sys.argv[0]))
        sys.exit(1)

    img_file = sys.argv[1]
    word_file = sys.argv[2]

    logger.info('Loading pretrained models...')
    with open('tmp/svm.pickle', 'r') as f:
        svm = pickle.load(f)

    logger.info('Loading image and words file')
    img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
    img = prep.preprocess(img)
    xml = ET.parse(word_file).getroot()

    out_dir.mkdir()
    for f in out_dir.walk('*'):
        f.remove()

    logger.info('Starting to create a split file')
    for sentence in xml:
        for word in sentence:
            text = word.get('text')
            print(text)
            #if '@' in text or len(text) < 6:
            if text != 'buton':
                continue # Skip short words

            # Get the word image
            rect = {side: int(word.get(side)) for side in sides}
            word_img = img[rect['top']:rect['bottom'], rect['left']:rect['right']]
Exemplo n.º 39
0
class BatchWriter(object):
    def __init__(self,
                 data_path,
                 img_size=32,
                 channels=3,
                 max_batch_size=10000):
        self.data_path = Path(data_path).child('batches')
        if not self.data_path.exists():
            self.data_path.mkdir(parents=True)
        self.img_size = img_size
        self.channels = channels
        self.max_batch_size = max_batch_size
        self.datasets = {}
        self.next_batch = 1
        self.train_range = None
        self.test_range = None

    def prepare_training(self, train_dset, test_dset):
        if train_dset is not None:
            train_data = self.preprocess_data(train_dset.data)
            train_batch_size = self.calculate_batch_size(train_data.shape[0])
            self.train_range = self.dump_batches(train_data, train_dset.output,
                                                 train_dset.filenames,
                                                 train_batch_size)
        test_data = self.preprocess_data(test_dset.data)
        test_batch_size = self.calculate_batch_size(test_data.shape[0])
        self.test_range = self.dump_batches(test_data, test_dset.output,
                                            test_dset.filenames,
                                            test_batch_size)
        if train_dset is None:
            label_names = test_dset.labels.copy()
            data = test_data
            batch_size = test_batch_size
        else:
            label_names = train_dset.labels.copy()
            label_names.update(test_dset.labels)
            data = np.vstack((train_data, test_data))
            batch_size = train_batch_size
        self.dump_meta_batch(data, label_names, batch_size)

    def dump_meta_batch(self, data, label_names, batch_size):
        mean = data.transpose().mean(axis=1).reshape((-1, 1))
        data_path = self.data_path.child('batches.meta')
        self.write_cifar_meta_batch(data_path, mean, label_names, batch_size)

    def dump_batches(self, data, output, filenames, batch_size):
        start_batch = self.next_batch
        for i, mark in enumerate(range(0, data.shape[0], batch_size)):
            slice_ = slice(mark, mark + batch_size)
            self.write_cifar_batch(
                self.data_path.child('data_batch_' + str(self.next_batch)),
                data[slice_].transpose(), output[slice_], filenames[slice_])
            self.next_batch += 1
        return start_batch, self.next_batch - 1

    def write_cifar_batch(self, data_path, data, labels, filenames):
        data = {
            'batch_label': '',
            'labels': labels,
            'data': data,
            'filenames': filenames,
        }
        with open(data_path, 'wb') as f:
            cPickle.dump(data, f)

    def write_cifar_meta_batch(self, data_path, mean, label_names, batch_size):
        data = {
            'data_mean': mean,
            'label_names': label_names,
            'num_cases_per_batch': batch_size,
            'num_vis': mean.shape[0]
        }
        with open(data_path, 'wb') as f:
            cPickle.dump(data, f)

    def preprocess_data(self, dset_data):
        dset_size = int(np.sqrt(dset_data.shape[1] / self.channels))
        data = np.empty((dset_data.shape[0], self.img_size**2 * self.channels),
                        dtype=np.uint8)
        if self.img_size != dset_size:
            for i in range(data.shape[0]):
                image = ConvImage.from_array(dset_data[i], self.channels,
                                             dset_size)
                image.to_size(self.img_size)
                data[i] = image.to_array()
            return data
        return dset_data

    def get_data_options(self):
        return [
            '--data-path=%s' % self.data_path,
            '--train-range=%s-%s' % self.train_range,
            '--test-range=%s-%s' % self.test_range,
            '--img-size=%s' % self.img_size, '--data-provider=cifar'
        ]

    def get_data_options_test(self):
        return [
            '--data-dir=%s' % self.data_path,
            '--test-range=%s-%s' % self.test_range, '--is-dataset=1'
        ]

    def calculate_batch_size(self, data_size):
        if data_size % self.max_batch_size == 0:
            return self.max_batch_size
        c = data_size / self.max_batch_size + 1
        return data_size / c
Exemplo n.º 40
0
 def test_mkdir_and_rmdir_with_parents(self):
     abc = Path(self.d, "a", "b", "c")
     abc.mkdir(parents=True)
     assert abc.isdir()
     abc.rmdir(parents=True)
     assert not Path(self.d, "a").exists()
Exemplo n.º 41
0
#! coding=utf-8

import datetime, smtplib, imp, os
from email.mime.text import MIMEText

import requests
from git import Repo
from git.errors import *
from unipath import Path
from jinja2 import Template

# Local settings are stored in $HOME/.ometrics/settings.py
# Find and load module there, creating files/dirs where appropriate
om = Path( os.environ['HOME'] ).absolute().child('.ometrics')
om.mkdir()
module_path = om.absolute()
if not om.child('settings.py').exists():
    print('Copying setitngs.py into %s' % str(om.child('settings.py')))
    Path(__file__).parent.child('settings.py').copy(om.child('settings.py'))
imp.load_source('settings', module_path.child('settings.py'))

from settings import *

from template import HTML_MAIL



now = datetime.datetime.utcnow()
gap = datetime.timedelta(days=INTERVAL_DAYS)
begins = now - gap
class ProjectInstaller(Installer):

    flavor = 'django_custom'
    git_repo = 'https://github.com/Libermentix/project_skeletton_directory.git'

    def __init__(self, project_dir, project_name,
                 db_sudo=False, db_sudo_user=None, *args, **kwargs):

        super(ProjectInstaller, self).__init__(
            project_dir, project_name, *args, **kwargs
        )

        self.var_dict = dict(
            project_dir=add_trailing_slash(project_dir),
            project_name=add_trailing_slash(project_name)
        )

        self._tmp_dir = None

        if self.install_path.exists():
            self.install_path.rmtree()

        self.db_installer = DatabaseInstaller(
            project_dir=project_dir, project_name=project_name,
            sudo=db_sudo, sudo_user=db_sudo_user
        )
        self.django_installer = DjangoInstaller(
            project_dir=project_dir, project_name=project_name
        )

    def run(self):
        self.run_prepare_configuration()
        self.run_create_configuration()
        self.run_post_create_configuration()

    def run_prepare_configuration(self):
        self.get_git_repo()
        self.install_skeletton()
        self.install_requirements()


    def run_post_create_configuration(self):
        """
        run the the post_run_command_stack
        """
        self.db_installer()
        self.django_installer()

        self.move_to_venv(which_one='postactivate')
        self.move_to_venv(which_one='activate')

        self.finish_queued_commands()

        #run the post create configuration command for the children
        for item in self.db_installer.post_run_command_stack:
            # should be a callable or None
            if item: item()

        for item in self.django_installer.post_run_command_stack:
            # should be a callable or None
            if item:
                logger.info('%s: Executing a django_installer_script ...' % item)
                item()


    @property
    def requirements_file(self):
        return Path(
            self.install_path, 'requirements', 'base.txt'
        ).absolute()

    @property
    def repo_dir(self):
        #get last
        directory = self.git_repo.split('/')[-1:][0]
        #remove .git
        directory = directory.split('.')[0]
        return Path(self._tmp_dir, directory)

    def create_tmp_dir(self):
        # TODO:
        # Account for existing project paths, here it should ask to remove
        # or abort.
        self._tmp_dir = Path(self.install_path, 'tmp')
        self._tmp_dir.mkdir()
        self._tmp_dir.chdir()

    def delete_tmp_dir(self):
        self.project_dir.chdir()
        self._tmp_dir.rmtree()

    def get_git_repo(self):
        self.create_tmp_dir()
        logger.info('Cloning repository ...')

        if self.repo_dir.exists():
            logger.info('Repo dir exists removing it...')
            self.repo_dir.rmtree()

        git.Git().clone(self.git_repo)
        logger.info('..done')

    def install_skeletton(self):
        logger.info('Installing %s' % self.flavor)

        source = Path(self.repo_dir, self.flavor)

        #move all items in the directory into the install_path
        for item in source.listdir():
            item.move(self.install_path)
        self.delete_tmp_dir()
        logger.info('...done')

    def install_virtualenv(self):
        """
        Calls a script that creates the virtual environment and installs
        its dependencies, currently only sports python2.7 support.
        """
        exec_path = Path(Path(__file__).parent, 'bash', 'installer.sh')

        command = '%s %s %s %s' % (exec_path,
                                   self.install_path,
                                   self.project_name,
                                   self.requirements_file)

        logger.info('Installing virtualenv... (calling %s)' % command)
        self.run_command(command)

    def install_requirements(self):
        if not self.is_envwrapper:
            self.install_virtualenv()
        else:
            # we can assume that we are in the virtualenv now, and mkproject
            # was called
            command = 'pip install -r %s' % self.requirements_file
            self.run_command(command)
Exemplo n.º 43
0
    def set_data_dir(self):
        # 201811 초기셋팅
        # DATA ROOT (Level 1)
        data_root = Path(self.dataPath)
        data_root.mkdir()

        # DATA IN/OUT/TMP (Level 2)
        data_input_path = Path(data_root.rstrip("/") + "/" + "INPUT")
        data_input_path.mkdir()
        data_output_path = Path(data_root.rstrip("/") + "/" + "OUTPUT")
        data_output_path.mkdir()
        data_tmp_path = Path(data_root.rstrip("/") + "/" + "TMP")
        data_tmp_path.mkdir()

        # DATA (INPUT) Equv ID
        data_input_eid_path = Path(data_input_path.rstrip("/") + "/" + self.app_id.upper())
        data_input_eid_path.mkdir()
        data_input_rid_path = Path(data_input_eid_path.rstrip("/") + "/" + self.aimsId.upper())
        data_input_rid_path.mkdir()
        data_input_lid_path = Path(data_input_rid_path.rstrip("/") + "/" + self.hostId.upper())
        data_input_lid_path.mkdir()

        # DATA (INPUT) CSV, JSON, IMAGE, VIDEO (Level 3)
        data_input_csv_path = Path(data_input_lid_path.rstrip("/") + "/" + "CSV")
        data_input_csv_path.mkdir()
        data_input_json_path = Path(data_input_lid_path.rstrip("/") + "/" + "JSON")
        data_input_json_path.mkdir()
        data_input_image_path = Path(data_input_lid_path.rstrip("/") + "/" + "IMAGE")
        data_input_image_path.mkdir()
        data_input_video_path = Path(data_input_lid_path.rstrip("/") + "/" + "VIDEO")
        data_input_video_path.mkdir()

        # DATA (tmp) Equv ID
        data_tmp_eid_path = Path(data_tmp_path.rstrip("/") + "/" + self.app_id.upper())
        data_tmp_eid_path.mkdir()
        data_tmp_rid_path = Path(data_tmp_eid_path.rstrip("/") + "/" + self.aimsId.upper())
        data_tmp_rid_path.mkdir()
        data_tmp_lid_path = Path(data_tmp_rid_path.rstrip("/") + "/" + self.hostId.upper())
        data_tmp_lid_path.mkdir()

        # DATA (tmp) CSV, JSON, IMAGE, VIDEO (Level 3)
        data_tmp_csv_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "CSV")
        data_tmp_csv_path.mkdir()
        data_tmp_json_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "JSON")
        data_tmp_json_path.mkdir()
        data_tmp_image_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "IMAGE")
        data_tmp_image_path.mkdir()

        data_tmp_imagecrp_path = Path(data_tmp_image_path.rstrip("/") + "/" + "CROP")
        data_tmp_imagecrp_path.mkdir()
        # logger.debug("CROP dir :{}".format(data_tmp_imagecrp_path))

        data_tmp_imageinten_path = Path(data_tmp_image_path.rstrip("/") + "/" + "ITENSITY")
        data_tmp_imageinten_path.mkdir()
        # logger.debug("Intensity dir :{}".format(data_tmp_imageinten_path))

        data_tmp_video_path = Path(data_tmp_lid_path.rstrip("/") + "/" + "VIDEO")
        data_tmp_video_path.mkdir()

        # DATA (output) Equv ID
        data_output_eid_path = Path(data_output_path.rstrip("/") + "/" + self.app_id.upper())
        data_output_eid_path.mkdir()
        data_output_rid_path = Path(data_output_eid_path.rstrip("/") + "/" + self.aimsId.upper())
        data_output_rid_path.mkdir()
        data_output_lid_path = Path(data_output_rid_path.rstrip("/") + "/" + self.hostId.upper())
        data_output_lid_path.mkdir()

        # DATA (output) CSV, JSON, IMAGE, VIDEO (Level 3)
        data_output_csv_path = Path(data_output_lid_path.rstrip("/") + "/" + "CSV")
        data_output_csv_path.mkdir()
        data_output_json_path = Path(data_output_lid_path.rstrip("/") + "/" + "JSON")
        data_output_json_path.mkdir()
        data_output_image_path = Path(data_output_lid_path.rstrip("/") + "/" + "IMAGE")
        data_output_image_path.mkdir()

        data_output_video_path = Path(data_output_lid_path.rstrip("/") + "/" + "VIDEO")
        data_output_video_path.mkdir()

        data_out_videointen_path = Path(data_output_video_path.rstrip("/") + "/" + "ITENSITY")
        data_out_videointen_path.mkdir()

        data = {
            "DATA_NAME": ["INPUT_CSV_PATH", "INPUT_JSON_PATH", "INPUT_IMAGE_PATH", "INPUT_VIDEO_PATH",
                          "OUTPUT_CSV_PATH", "OUTPUT_JSON_PATH", "OUTPUT_IMAGE_PATH",
                          "OUTPUT_VIDEO_PATH",
                          "TMP_CSV_PATH", "TMP_JSON_PATH", "TMP_IMAGE_PATH", "TMP_VIDEO_PATH", "TMP_IMAGE_CROP_PATH",
                          "TMP_IMAGE_INTEN_PATH", "OUTPUT_VIDEO_INTEN_PATH"
                          ],
            "DATA_PATH": [data_input_csv_path, data_input_json_path, data_input_image_path,
                          data_input_video_path,
                          data_output_csv_path, data_output_json_path,
                          data_output_image_path, data_output_video_path,
                          data_tmp_csv_path, data_tmp_json_path, data_tmp_image_path,
                          data_tmp_video_path, data_tmp_imagecrp_path, data_tmp_imageinten_path,
                          data_out_videointen_path
                          ],
            "USE_YN": ["Y", "Y", "Y", "Y",
                       "Y", "Y", "Y", "Y",
                       "Y", "Y", "Y", "Y", "Y", "Y", "Y"
                       ]}
        self.df_datafile = pd.DataFrame(data, columns=["DATA_NAME", "DATA_PATH", "USE_YN"])

        #
        return self.df_datafile
Exemplo n.º 44
0
from unipath import Path

# proj_root/py_pkg/settings.py
PROJ_ROOT = Path(__file__).ancestor(2).absolute()
DATA_DIR = Path(PROJ_ROOT, 'data')
if not DATA_DIR.isdir():
    DATA_DIR.mkdir()

SQLITE_PATH = Path(DATA_DIR, 'article_qualities.sqlite')
Exemplo n.º 45
0
class Installer(object):
    install_path = None
    postactivate = None
    postdeactivate = None

    project_name = None
    project_dir = None

    var_dict = {}

    post_run_command_stack = []


    def __init__(self, project_dir, project_name, envwrapper=False,
                 *args, **kwargs):
        self.project_dir = Path(project_dir).absolute()
        self.project_name = project_name
        self.is_envwrapper = envwrapper

        # make all attributes overridable so that external applications
        # can make use of the pattern and reset the variable names
        for k, v in six.iteritems(kwargs):
            setattr(self, k, v)

        self.install_path = Path(self.project_dir, project_name)
        self.install_path.mkdir()
        self.install_path.chdir()

        self._environment_cache = False
        self._template_dir_cache = False
        self._template_cache = False

    @property
    def venv_folder(self):
        """
        extracts the venv folder from the environment variables ($WORKON_HOME,
        to be precise and combines it with the project name.
        """
        path = os.environ.copy().get('WORKON_HOME')
        if path:
            return Path(path)
        else:
            return None


    @property
    def template_env(self):
        """
        provides the template environment
        """
        if not getattr(self, '_environment_cache', False):
            self._environment_cache = Environment(
                loader=FileSystemLoader(self.get_template_dir())
            )

        return self._environment_cache

    def run_command(self, command, blocking=False):
        command = Command(command)

        command()

        if blocking:
            logger.debug('Waiting for command to finish...')
            command.wait()

        return True

    def finish_queued_commands(self):
        finish_queued_commands()

    def get_installer_name(self):
        return self.__class__.__name__.lower()

    def get_template_dir(self):
        if not getattr(self, '_template_cache', False):
            self._template_dir_cache = Path(Path(__file__).parent, 'templates')
        return self._template_dir_cache

    def get_template(self, which_one):
        """
        provides a wrapper around jinja2 get_template. Caches the result.
        returns a cached template
        """
        if not getattr(self, '_template_cache', False):
            self._template_cache = dict()

        if not self._template_cache.get(which_one, False):
            template_file = '%s.%s.sh' % (self.get_installer_name(), which_one)
            self._template_cache[which_one] = \
                self.template_env.get_template(template_file)

        return self._template_cache[which_one]

    def run_prepare_configuration(self):
        raise NotImplementedError('Must be implemented in subclass')

    def render_config_for_file_template(self, which_one):
        logger.info('preparing config variables for %s ...' % which_one)

        template = self.get_template(which_one=which_one)
        contents = template.render(**self.var_dict)

        setattr(self, '%s' % which_one, contents)

        logger.info('...done')

    def create_file(self, which_one):
        self.render_config_for_file_template(which_one=which_one)

        logger.info('Creating config files in parent dir: %s'
                    % self.install_path)

        #gets self.postdeactivate if which_one=postdeactivate
        contents = getattr(self, which_one)

        logger.info('%s: Writing contents to file ...' % which_one)

        p = Path(self.install_path, which_one)
        #write configuration and append it to the file
        p.write_file(contents, 'a+')
        logger.info('...done')

    def move_to_venv(self, which_one):
        """
        Moves the created config_files into the bin folder to be executed.
        Does this by first pasting all the contents of the temporary file
        into the new or existing target file and then deleting the temp file.
        """
        target = Path(self.venv_folder, self.project_name, 'bin', which_one)
        source = Path(self.install_path, which_one)
        logger.info('target: %s, move_orig: %s' % (target, source))

        if source.exists():
            logger.info('Moving %s into place ...' % which_one)
            content = source.read_file()

            #make sure the directory exists
            if not target.parent.exists():
                target.parent.mkdir(parents=True)
            target.write_file(content, 'w+')

            source.remove()

        logger.info('...done')

    def run_create_configuration(self):
        self.create_file(which_one='postactivate')
        self.create_file(which_one='postdeactivate')

    def run_post_create_configuration(self):
        pass

    def run(self):
        self.run_prepare_configuration()
        self.run_create_configuration()
        self.run_post_create_configuration()

    def __call__(self, *args, **kwargs):
        self.run()
Exemplo n.º 46
0
class Base(object):
    """
    set and make directory
    
    Parameters
    ----------
    home : str
        set a directory as home
    """

    def __init__(self, home='.'):
        """
        set home directory

        Parameters
        ----------
        home : str
            set a directory as home

        Returns
        -------
        """
        self._home = Path(home).absolute()

    def __str__(self):
        return self.home

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, self.home)

    # def __abs(self, string):
    #     return os.path.abspath(string)

    @property
    def home(self):
        return self._home.__str__()

    @home.setter
    def home(self, path):
        self._home = Path(path).absolute()

    def make_home(self, force=False):
        """
        make home directory

        Parameters
        ----------
        force : bool
            if True, if home exists and is a dir that
            containing contents, then delete contents
            in it, if exists and not a dir, remove it
            and make dir

        Returns
        -------

        """
        self.__mkdir(force)

    def __mkdir(self, force=False):
        if self._home.exists():
            if not self._home.isdir():
                if not force:
                    raise Exception('%s exists but is not a dir' % self.home)
                self._home.remove()
                self._home.mkdir()
            if force:
                self._home.rmtree()
                self._home.mkdir()
        else:
            self._home.mkdir(parents=True)

    def __rmdir(self, force=False):
        if self._home.exists():
            if not self._home.isdir():
                if not force:
                    raise Exception('%s exists but is not a dir' % self.home)
                self._home.remove()

            if force:
                self._home.rmtree()
            else:
                self._home.rmdir()

    def rm_home(self, force=False):
        """
        remove home directory

        Parameters
        ----------
        force : bool
            if True, if home exists and is a dir that
            containing contents, then delete it and
            it's contents, if exists and not a dir,
            remove then

        Returns
        -------

        """
        self.__rmdir(force)
Exemplo n.º 47
0
def _tidy_survey(survey_name):
    # Inputs
    survey_csv = Path(exp_dir, survey_name, survey_name + '.csv')
    survey = pd.read_csv(survey_csv, skiprows=[
        0,
    ])

    loop_merge_csv = Path(exp_dir, survey_name, 'loop_merge.csv')
    loop_merge = pd.read_csv(loop_merge_csv)

    # Outputs
    survey_dir = Path(out_dir, survey_name)
    if not survey_dir.exists():
        survey_dir.mkdir()

    bad_subjs_csv = Path(survey_dir, 'bad_subjs.csv')
    odd_one_out_csv = Path(survey_dir, 'odd_one_out.csv')

    # Begin tidying
    id_col = 'workerId'

    # label the workers who passed the catch trial
    survey.loc[:, 'describe_catch'].fillna('', inplace=True)
    survey['failed_catch_trial'] = ~survey.describe_catch.str.contains(
        'piano', case=False)

    # export the subjects to deny payment
    survey.ix[survey.failed_catch_trial].to_csv(bad_subjs_csv, index=False)

    # label the workers who reported problems with audio
    is_problem_col = survey.columns.str.contains('problems\ ')
    problem_cols = survey.columns[is_problem_col].tolist()
    problem = pd.melt(survey,
                      id_col,
                      problem_cols,
                      var_name='qualtrics',
                      value_name='problem_with_audio')

    problem['loop_merge_row'] = problem.qualtrics.str.extract(
        '\((\d)\)$').astype(int)
    problem['problem_with_audio'] = problem.problem_with_audio.fillna(
        False).astype(bool)
    problem.drop('qualtrics', axis=1, inplace=True)

    # combine filters
    subjs = pd.merge(survey[[id_col, 'failed_catch_trial']], problem)
    subjs['failed_catch_trial'] = subjs.failed_catch_trial.astype(int)
    subjs['problem_with_audio'] = subjs.problem_with_audio.astype(int)

    # tidy the survey data
    is_odd_col = survey.columns.str.contains('odd_one_out\ ')
    odd_cols = survey.columns[is_odd_col].tolist()
    odd = pd.melt(survey,
                  id_col,
                  odd_cols,
                  var_name='qualtrics',
                  value_name='odd_one_out')

    odd['loop_merge_row'] = odd.qualtrics.str.extract('\((\d)\)$').astype(int)

    file_map = pd.melt(loop_merge.drop('loop_merge_row', axis=1),
                       'category',
                       var_name='odd_one_out',
                       value_name='url')
    file_map['odd_one_out'] = file_map.odd_one_out.astype(int)
    file_map['filename'] = file_map.url.apply(lambda x: Path(x).name)
    file_map.drop('url', axis=1, inplace=True)

    odd = odd.merge(loop_merge[['category', 'loop_merge_row']])
    odd = odd.merge(file_map)
    odd = odd.merge(subjs)
    odd.sort(['workerId', 'category'], inplace=True)

    odd = odd[[
        'workerId', 'failed_catch_trial', 'problem_with_audio', 'category',
        'filename'
    ]]
    odd.to_csv(odd_one_out_csv, index=False)
Exemplo n.º 48
0
import re
import urllib
import logging
import httplib2
import datetime
from unipath import Path
from lxml.html import document_fromstring
from dateutil.parser import parse as date_parse

from ..config import Config

CACHE = Path("~/newsclips2/").expand()
CACHE.mkdir(parents=True)
HTTP = httplib2.Http(CACHE)

class Article(object):
    def __init__(self, line):
        self.log = logging.getLogger('newsclips.article')
        self.url, self.notes = re.search(r'^(https?://[^ ]+) ?(.*)$', line).groups()
        self.tree = self.get_tree()

        self.config = Config()

    def get_tree(self):
        """
        Return the DOM for the article content.

        Note this actually returns the XPATH method on the tree, so
        you can do: a.tree(<xpath>) directly.
        """
        quoted_url = urllib.quote(self.url, safe='')
Exemplo n.º 49
0
def _tidy_survey(survey_name):
    # Inputs
    survey_csv = Path(exp_dir, survey_name, survey_name + '.csv')
    survey = pd.read_csv(survey_csv, skiprows=[0, ])

    loop_merge_csv = Path(exp_dir, survey_name, 'loop_merge.csv')
    loop_merge = pd.read_csv(loop_merge_csv)

    # Outputs
    survey_dir = Path(out_dir, survey_name)
    if not survey_dir.exists():
        survey_dir.mkdir()

    bad_subjs_csv = Path(survey_dir, 'bad_subjs.csv')
    odd_one_out_csv = Path(survey_dir, 'odd_one_out.csv')

    # Begin tidying
    id_col = 'workerId'

    # label the workers who passed the catch trial
    survey.loc[:, 'describe_catch'].fillna('', inplace=True)
    survey['failed_catch_trial'] = ~survey.describe_catch.str.contains(
        'piano', case=False
    )

    # export the subjects to deny payment
    survey.ix[survey.failed_catch_trial].to_csv(bad_subjs_csv, index=False)

    # label the workers who reported problems with audio
    is_problem_col = survey.columns.str.contains('problems\ ')
    problem_cols = survey.columns[is_problem_col].tolist()
    problem = pd.melt(survey, id_col, problem_cols,
                      var_name = 'qualtrics', value_name = 'problem_with_audio')

    problem['loop_merge_row'] = problem.qualtrics.str.extract('\((\d)\)$').astype(int)
    problem['problem_with_audio'] = problem.problem_with_audio.fillna(False).astype(bool)
    problem.drop('qualtrics', axis=1, inplace=True)

    # combine filters
    subjs = pd.merge(survey[[id_col, 'failed_catch_trial']], problem)
    subjs['failed_catch_trial'] = subjs.failed_catch_trial.astype(int)
    subjs['problem_with_audio'] = subjs.problem_with_audio.astype(int)

    # tidy the survey data
    is_odd_col = survey.columns.str.contains('odd_one_out\ ')
    odd_cols = survey.columns[is_odd_col].tolist()
    odd = pd.melt(survey, id_col, odd_cols,
                  var_name = 'qualtrics', value_name = 'odd_one_out')

    odd['loop_merge_row'] = odd.qualtrics.str.extract('\((\d)\)$').astype(int)

    file_map = pd.melt(loop_merge.drop('loop_merge_row', axis=1),
                       'category', var_name='odd_one_out', value_name='url')
    file_map['odd_one_out'] = file_map.odd_one_out.astype(int)
    file_map['filename'] = file_map.url.apply(lambda x: Path(x).name)
    file_map.drop('url', axis=1, inplace=True)

    odd = odd.merge(loop_merge[['category', 'loop_merge_row']])
    odd = odd.merge(file_map)
    odd = odd.merge(subjs)
    odd.sort(['workerId', 'category'], inplace=True)

    odd = odd[['workerId', 'failed_catch_trial', 'problem_with_audio', 'category', 'filename']]
    odd.to_csv(odd_one_out_csv, index=False)