示例#1
0
def btnExportClick():
    selectId = int(selectVar.get())
    exportDir = inputBox.get()
    config["exportDir"] = exportDir
    config["selectId"] = selectId
    print "SVN UPDATE"
    client = pysvn.Client()
    client.update('./')
    curItemName = dirs[selectId]
    ExportList = [curItemName]
    getParent(curItemName,ExportList)
    targetDir = "../skinlocal_"+curItemName
    Util.ensureDir(targetDir)
    for itemName in reversed(ExportList):
		print "copy=",itemName
		utils.copytree(itemName, targetDir)
	
    print u"拷贝资源成功"
    os.chdir("tools")
    json_str = json.dumps(config)
    Util.writeStringToFile("export.db",json_str)
    targetDir = "../../skinlocal_" + curItemName
	#发布资源
    print(u"发布资源中")
    ccsexport.export(targetDir, os.path.join(exportDir, "res", "ui"), True)
    print(u"发布成功")
    print(u"按任意键退出")
    raw_input()
    window.destroy()
示例#2
0
文件: build.py 项目: skydark/matools
def build_ons_data(root_dir, out_dir):
    build_ons_script(root_dir, out_dir)
    se_dir = os.path.join(root_dir, 'download', 'sound')
    print("复制bgm中...")
    copyfiles(se_dir, os.path.join(out_dir, "bgm"), lambda n: n.startswith('bgm'))
    print("复制音效中...")
    copyfiles(se_dir, os.path.join(out_dir, "se"), lambda n: n.startswith('se'))
    adv_dir = os.path.join(root_dir, 'download', 'image', 'adv')
    print("解密背景图片中...")
    copyfiles(adv_dir, os.path.join(out_dir, 'bgimage'),
            lambda n: n.startswith('adv_bg'),
            decrypt=decrypt, tranformer=lambda n: n+'.png')
    copyfiles(os.path.join(root_dir, 'download', 'rest'),
            os.path.join(out_dir, 'bgimage'),
            lambda n: n == 'exp_map_bg',
            decrypt=decrypt, tranformer=lambda n: 'map.png')
    print("解密角色图片中...")
    copyfiles(adv_dir, os.path.join(out_dir, 'chara'),
            lambda n: n.startswith('adv_chara'),
            decrypt=decrypt, tranformer=lambda n: n+'.png')
    print("生成对话框中...")
    image_dir = os.path.join(out_dir, 'image')
    os.makedirs(image_dir, exist_ok=True)
    with open(os.path.join(root_dir, 'download', 'rest', 'que_adv'), 'rb') as f:
        img = decrypt(f.read())
        build_que_adv(img, os.path.join(image_dir, 'que_adv.png'))
    voice_dir = os.path.join(root_dir, 'download', 'voice')
    if os.path.isdir(voice_dir):
        print("复制语音目录中...")
        copytree(voice_dir, os.path.join(out_dir, 'voice'))
    else:
        print("语音目录不存在,已忽略")
    def _install_from(self, fromPath, fromLoc, toLocation=None, ignore=None):
        """Copy file or directory from a location to the droplet

        Copies a file or directory from a location to the application
        droplet. Directories are copied recursively, but specific files
        in those directories can be ignored by specifing the ignore parameter.

            fromPath   -> file to copy, relative build pack
            fromLoc    -> root of the from path.  Full path to file or
                          directory to be copied is fromLoc + fromPath
            toLocation -> optional location where to copy the file
                          relative to app droplet.  If not specified
                          uses fromPath.
            ignore     -> an optional callable that is passed to
                          the ignore argument of shutil.copytree.
        """
        self._log.debug("Install file [%s] from [%s]", fromPath, fromLoc)
        fullPathFrom = os.path.join(fromLoc, fromPath)
        if os.path.exists(fullPathFrom):
            fullPathTo = os.path.join(
                self._ctx['BUILD_DIR'],
                ((toLocation is None) and fromPath or toLocation))
            safe_makedirs(os.path.dirname(fullPathTo))
            self._log.debug("Copying [%s] to [%s]", fullPathFrom, fullPathTo)
            if os.path.isfile(fullPathFrom):
                shutil.copy(fullPathFrom, fullPathTo)
            else:
                utils.copytree(fullPathFrom, fullPathTo, ignore=ignore)
示例#4
0
    def _install_from(self, fromPath, fromLoc, toLocation=None, ignore=None):
        """Copy file or directory from a location to the droplet

        Copies a file or directory from a location to the application
        droplet. Directories are copied recursively, but specific files
        in those directories can be ignored by specifing the ignore parameter.

            fromPath   -> file to copy, relative build pack
            fromLoc    -> root of the from path.  Full path to file or
                          directory to be copied is fromLoc + fromPath
            toLocation -> optional location where to copy the file
                          relative to app droplet.  If not specified
                          uses fromPath.
            ignore     -> an optional callable that is passed to
                          the ignore argument of shutil.copytree.
        """
        self._log.debug("Install file [%s] from [%s]", fromPath, fromLoc)
        fullPathFrom = os.path.join(fromLoc, fromPath)
        if os.path.exists(fullPathFrom):
            fullPathTo = os.path.join(
                self._ctx['BUILD_DIR'],
                ((toLocation is None) and fromPath or toLocation))
            safe_makedirs(os.path.dirname(fullPathTo))
            self._log.debug("Copying [%s] to [%s]", fullPathFrom, fullPathTo)
            if os.path.isfile(fullPathFrom):
                shutil.copy(fullPathFrom, fullPathTo)
            else:
                utils.copytree(fullPathFrom, fullPathTo, ignore=ignore)
def copy_specfem_stuff(specfemdir, targetdir):
    dir_list = ["DATA", "OUTPUT_FILES", "bin"]
    # copy DATA
    for _dir in dir_list:
        fromdir = os.path.join(specfemdir, _dir)
        todir = os.path.join(targetdir, _dir)
        cleantree(todir)
        copytree(fromdir, todir)
def copy_specfem_stuff(specfemdir, targetdir):
    dir_list = ["DATA", "OUTPUT_FILES", "bin"]
    # copy DATA
    for _dir in dir_list:
        fromdir = os.path.join(specfemdir, _dir)
        todir = os.path.join(targetdir, _dir)
        cleantree(todir)
        copytree(fromdir, todir)
示例#7
0
def __main__(args=None):
    conf = getconf(args)
    pydwarf.log.debug('Proceeding with configuration: %s.' % conf)
    
    # Report versions
    pydwarf.log.info('Running PyDwarf manager version %s.' % __version__)
    pydwarf.log.debug('With PyDwarf version %s.' % pydwarf.__version__)
    pydwarf.log.debug('With raws version %s.' % raws.__version__)
    
    # Handle flags that completely change behavior
    if args.list:
        pydwarf.urist.list()
        exit(0)
    elif args.meta is not None:
        pydwarf.urist.doclist(args.meta)
        exit(0)
    
    # Verify that input directory exists
    if not os.path.exists(conf.input):
        pydwarf.log.error('Specified raws directory %s does not exist.' % conf.input)
        exit(1)
    
    # Make backup
    if conf.backup is not None:
        pydwarf.log.info('Backing up raws to %s.' % conf.backup)
        try:
            copytree(conf.input, conf.backup)
        except:
            pydwarf.log.error('Failed to create backup.')
            exit(1)
    else:
        pydwarf.log.warning('Proceeding without backing up raws.')
    
    # Read input raws
    pydwarf.log.info('Reading raws from input directory %s.' % conf.input)
    pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log)
    
    # Run each script
    pydwarf.log.info('Running scripts.')
    pydwarf.urist.session.handleall(conf.scripts)
    
    # Get the output directory, remove old raws if present
    outputdir = conf.output if conf.output else conf.input
    if os.path.exists(outputdir):
        pydwarf.log.info('Removing obsolete raws from %s.' % outputdir)
        for removefile in [os.path.join(outputdir, f) for f in os.listdir(outputdir)]:
            pydwarf.log.debug('Removing file %s.' % removefile)
            if removefile.endswith('.txt'): os.remove(removefile)
    else:
        pydwarf.log.info('Creating raws output directory %s.' % outputdir)
        os.makedirs(outputdir)
    
    # Write the output
    pydwarf.log.info('Writing changes to raws to %s.' % outputdir)
    pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log)
    
    # All done!
    pydwarf.log.info('All done!')
示例#8
0
def __main__():
    
    # Get configuration
    conf = config.export
    if not conf:
        pydwarf.log.error('No configuration specified. Imported config package must contain an export variable.')
        exit(1)
    
    # Things to do with versions
    pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__)
    if conf.version is not None:
        pydwarf.log.info('Managing Dwarf Fortress version %s.' % conf.version)
        pydwarf.urist.session.dfversion = conf.version
    else:
        pydwarf.log.error('No Dwarf Fortress version was specified in conf. Scripts will be run regardless of their indicated compatibility.')
    
    # Verify that input directory exists
    if not os.path.exists(conf.input):
        pydwarf.log.error('Specified raws directory %s does not exist.' % conf.input)
        exit(1)
    
    # Make backup
    if conf.backup is not None:
        pydwarf.log.info('Backing up raws to %s...' % conf.backup)
        try:
            copytree(conf.input, conf.backup)
        except:
            pydwarf.log.error('Failed to create backup.')
            exit(1)
    else:
        pydwarf.log.warning('Proceeding without backing up raws.')
    
    # Read input raws
    pydwarf.log.info('Reading raws from input directory %s...' % conf.input)
    pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log)
    
    # Run each script
    pydwarf.log.info('Running scripts...')
    pydwarf.urist.session.handleall(conf.scripts)
    
    # Get the output directory, remove old raws if present
    outputdir = conf.output if conf.output else conf.input
    if os.path.exists(outputdir):
        pydwarf.log.info('Removing obsolete raws from %s...' % outputdir)
        for removefile in [os.path.join(outputdir, f) for f in os.listdir(outputdir)]:
            pydwarf.log.debug('Removing file %s...' % removefile)
            if removefile.endswith('.txt'): os.remove(removefile)
    else:
        pydwarf.log.info('Creating raws output directory %s...' % outputdir)
        os.makedirs(outputdir)
    
    # Write the output
    pydwarf.log.info('Writing changes to raws to %s...' % outputdir)
    pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log)
    
    # All done!
    pydwarf.log.info('All done!')
示例#9
0
 def run(self, env):
     toDir = self.toDir
     
     if toDir is None:
         if not "WORK_DIR" in env:
             logging.error("WORK_DIR not defined")
             raise TaskError("WORK_DIR not defined")
         toDir = env["WORK_DIR"]
     
     logging.info("Copying " + self.fromDir + " to " + toDir)
     utils.copytree(self.fromDir, toDir)
     logging.info("Successfully copied folder contents")
示例#10
0
 def _initialize(self):
     colored_print('Initializing... ', 'OKGREEN')
     # Init html dest dir
     if os.path.isdir(self.html_output):
         shutil.rmtree(self.html_output)
     os.mkdir(self.html_output)
     copytree(self.assets, self.html_assets)
     os.mkdir(self.html_css)
     # Init pdf dir
     if os.path.isdir(self.pdf_output):
         shutil.rmtree(self.pdf_output)
     os.mkdir(self.pdf_output)
示例#11
0
def addApplication(appName,
                   childOf='',
                   appShortName='',
                   templateDir=None,
                   targetDir=None):

    if targetDir is None:
        targetDir = os.path.join(dirname(dirname(__file__)), 'Apps')

    if templateDir is None:
        templateDir = os.path.join(dirname(__file__), 'templates')

    toMoveFiles = [('APP.app.template', '%s.app' % (appName.upper())),
                   ('APP_init.php.in.template',
                    '%s_init.php.in' % (appName.upper()))]

    toParseFiles = ['%s.app' % (appName.upper())]

    # create tmp dir
    tempDir = mkdtemp()
    #print "working in %s"%(tempDir)
    # copy files to tmp dir
    copytree(os.path.join(templateDir, 'APP'), tempDir, symlinks=False)
    # rename files in tmp dir
    for (fromFilePath, toFilePath) in toMoveFiles:
        fromFileFullPath = os.path.join(tempDir, fromFilePath)
        toFileFullPath = os.path.join(tempDir, toFilePath)
        #print "move %s to %s"%(fromFileFullPath, toFileFullPath)
        shutil.move(fromFileFullPath, toFileFullPath)
    # parse files in tmp dir
    for parsedFilePath in toParseFiles:
        parsedFileFullPath = os.path.join(tempDir, parsedFilePath)
        #print "parsing %s"%(parsedFileFullPath)

        for line in fileinput.input(parsedFileFullPath, inplace=1):
            print Template(line).safe_substitute({
                'APPNAME':
                appName.upper(),
                'CHILDOF':
                childOf.upper(),
                'appShortName':
                appShortName,
                'appIcon':
                "%s.png" % (appName.lower())
            }).rstrip()  #strip to remove EOL duplication

    # move tmp dir to target dir
    shutil.move(tempDir, os.path.join(targetDir, appName.upper()))
    return
示例#12
0
文件: parsing.py 项目: yabirgb/arcade
def copy_static_assets(base_path, theme_folder):

    # copy files from theme folder
    dest = os.path.join(base_path, 'public', 'static')
    orig = os.path.join(base_path, theme_folder, 'static')
    if not os.path.exists(dest):
        os.makedirs(dest)
    else:
        pass
        #shutil.rmtree(dest)

    copytree(orig, dest)
    # copy files from static folder
    orig = os.path.join(base_path, 'static')
    distutils.dir_util.copy_tree(orig, dest)
示例#13
0
def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--targetdir',
                        help='the target directory (Jenkins home directory)',
                        metavar='DIR',
                        required=True)
    parser.add_argument('-b',
                        '--backuprepodir',
                        help='the backup repository',
                        metavar='DIR',
                        default='SapMachine-Infrastructure')
    parser.add_argument('--install-plugins',
                        help='install the Jenkins plugins',
                        action='store_true',
                        default=False)
    parser.add_argument(
        '--plugins-only',
        help='install only the Jenkins plugins (implies --install-plugins)',
        action='store_true',
        default=False)
    args = parser.parse_args()

    if args.plugins_only:
        args.install_plugins = True

    source = os.path.realpath(args.backuprepodir)
    target = os.path.realpath(args.targetdir)

    if not os.path.exists(target):
        os.mkdir(target)

    if not args.plugins_only:
        utils.copytree(join(source, jenkins_configuration), target)

    if args.install_plugins:
        with open(join(source, jenkins_configuration, 'plugin_list.json'),
                  'r') as plugin_list_json:
            plugin_list = json.loads(plugin_list_json.read())

            install_cmd = ['/usr/local/bin/install-plugins.sh']

            for plugin in plugin_list:
                install_cmd.append(
                    str.format('{0}:{1}', plugin['Extension-Name'],
                               plugin['Plugin-Version']))

            utils.run_cmd(install_cmd)
def copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt,
                  deriv_cmt_list):
    origincmt = os.path.join(cmtfolder, _event)
    targetcmt = os.path.join(targetcmtdir, _event)
    copyfile(origincmt, targetcmt, verbose=False)
    if not generate_deriv_cmt:
        # copy deriv cmt files
        for deriv_type in deriv_cmt_list:
            derivcmt = os.path.join(cmtfolder, "%s_%s" % (_event, deriv_type))
            targetcmt = os.path.join(targetcmtdir,
                                     "%s_%s" % (_event, deriv_type))
            copyfile(derivcmt, targetcmt, verbose=False)
    else:
        # copy scripts to generate deriv cmt files
        copytree("job_running_template/perturb_cmt",
                 os.path.dirname(targetcmtdir))
def copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt,
                  deriv_cmt_list):
    origincmt = os.path.join(cmtfolder, _event)
    targetcmt = os.path.join(targetcmtdir, _event)
    copyfile(origincmt, targetcmt, verbose=False)
    if not generate_deriv_cmt:
        # copy deriv cmt files
        for deriv_type in deriv_cmt_list:
            derivcmt = os.path.join(cmtfolder, "%s_%s" % (_event, deriv_type))
            targetcmt = os.path.join(targetcmtdir, "%s_%s"
                                     % (_event, deriv_type))
            copyfile(derivcmt, targetcmt, verbose=False)
    else:
        # copy scripts to generate deriv cmt files
        copytree("job_running_template/perturb_cmt",
                 os.path.dirname(targetcmtdir))
示例#16
0
def addApplication(appName, childOf='', appShortName='', templateDir=None, targetDir=None):

    appNameUpper = appName.upper()

    if targetDir is None:
        targetDir = os.path.join(dirname(dirname(__file__)), appNameUpper)

    if templateDir is None:
        templateDir = os.path.join(dirname(__file__), 'templates')

    toMoveFiles = [
        ('APP.app.template', '%s.app' % (appNameUpper)),
        ('APP_init.php.in.template', '%s_init.php.in' % (appNameUpper))
    ]

    toParseFiles = [
        '%s.app' % (appNameUpper)
    ]

    # create tmp dir
    tempDir = mkdtemp()
    #print "working in %s"%(tempDir)
    # copy files to tmp dir
    copytree(os.path.join(templateDir, 'APP'), tempDir, symlinks=False)
    # rename files in tmp dir
    for (fromFilePath, toFilePath) in toMoveFiles:
        fromFileFullPath = os.path.join(tempDir, fromFilePath)
        toFileFullPath = os.path.join(tempDir,toFilePath)
        #print "move %s to %s"%(fromFileFullPath, toFileFullPath)
        shutil.move(fromFileFullPath, toFileFullPath)
    # parse files in tmp dir
    for parsedFilePath in toParseFiles:
        parsedFileFullPath = os.path.join(tempDir, parsedFilePath)
        #print "parsing %s"%(parsedFileFullPath)

        for line in fileinput.input(parsedFileFullPath, inplace=1):
            print Template(line).safe_substitute({
            'APPNAME': appNameUpper,
            'CHILDOF': childOf.upper(),
            'appShortName': appShortName,
            'appIcon': "%s.png" % (appName.lower())
        }).rstrip() #strip to remove EOL duplication

    # move tmp dir to target dir
    shutil.move(tempDir, os.path.join(targetDir, appNameUpper))
    return
def createModule(moduleName, appName, outputDir, childOf='', ignoreList=[], appShortName=''):



    toMoveFiles = [
        ('APP_en.po', '%s_en.po'%(appName.upper())),
        ('APP_fr.po', '%s_fr.po'%(appName.upper()))
    ]

    toParseFiles = [
        'configure.in',
        'info.xml.in'
    ]

    # create tmp dir
    tempDir = mkdtemp()
    #print "working in %s"%(tempDir)
    # copy files to tmp dir (exclude some)
    ignoreList = tuple(ignoreList) + ('createModule.py', '.git', '.gitmodules', '*.md')
    #print "ignoring '%s'"%("', '".join(ignoreList))
    ignore = shutil.ignore_patterns(*ignoreList)
    copytree(os.path.dirname(__file__), tempDir, symlinks=False, ignore=ignore)
    # rename files in tmp dir
    for (fromFilePath, toFilePath) in toMoveFiles:
        fromFileFullPath = os.path.join(tempDir, fromFilePath)
        toFileFullPath = os.path.join(tempDir,toFilePath)
        #print "move %s to %s"%(fromFileFullPath, toFileFullPath)
        shutil.move(fromFileFullPath, toFileFullPath)
    # parse files in tmp dir
    for parsedFilePath in toParseFiles:
        parsedFileFullPath = os.path.join(tempDir, parsedFilePath)
        #print "parsing %s"%(parsedFileFullPath)

        for line in fileinput.input(parsedFileFullPath, inplace=1):
            print Template(line).safe_substitute({
            'APPNAME': appName.upper(),
            'modulename': moduleName
        }).rstrip() #strip to remove EOL duplication

    addApplication(appName, childOf=childOf, appShortName=appShortName, targetDir=os.path.join(tempDir, 'Apps'))

    # move tmp dir to target dir
    copytree(tempDir, outputDir)
    shutil.rmtree(tempDir)
    return
def main():
	conf_path_list = os.path.join("etc", "apache-vhost-manager")
	etc_config_path_src =	os.path.join( here_path, conf_path_list )
	etc_config_path_dst = os.path.join( "/", conf_path_list )


	setup(
		name='apache_vhost_manager',
		version='0.0.3',
		author="airtonix",
		maintainer="Airtonix",
		maintainer_email="*****@*****.**",
		url="airtonix.net/projects/apache_vhost_creator",
		scripts = [
			'usr/bin/apache-vhost-manager'
		],
		license = read_file('LICENSE.md'),
		description='A helper script to manage apache subdomain based virtualhosts. It inserts BIND dns records, sets up django projects and LDAP authentication directives.',
		long_description = read_file('README.md')
	)
	copytree( etc_config_path_src,  etc_config_path_dst)
 def _edit_and_commit(self, commit_date, message, change_file_dir):
     #for every file in the change directory, copy to the new place
     print("copytree {0} {1}".format(change_file_dir, os.getcwd()))
  
     new_files = copytree(change_file_dir, os.getcwd() + "\\test")
     
     for file in new_files:
         print ("Adding file {0}".format(file))  
           
     self.repo.index.add(new_files)
     date_in_iso = commit_date.strftime("%Y-%m-%d %H:%M:%S")
     self.repo.index.commit(message, author_date=date_in_iso, commit_date=date_in_iso)
     print("{0}{1}".format(commit_date, message))
示例#20
0
def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        '--srcdir',
                        help='the source directory to copy from',
                        metavar='DIR',
                        required=True)
    parser.add_argument('-t',
                        '--targetdir',
                        help='the target directory to copy to',
                        metavar='DIR',
                        required=True)
    parser.add_argument('--install-plugins',
                        help='install the Jenkins plugins',
                        action='store_true',
                        default=False)
    args = parser.parse_args()

    source = os.path.realpath(args.srcdir)
    target = os.path.realpath(args.targetdir)

    if not os.path.exists(target):
        os.mkdir(target)

    utils.copytree(join(source, 'jenkins_configuration'), target)

    if args.install_plugins:
        with open(join(source, 'jenkins_configuration', 'plugin_list.json'),
                  'r') as plugin_list_json:
            plugin_list = json.loads(plugin_list_json.read())

            install_cmd = ['/usr/local/bin/install-plugins.sh']

            for plugin in plugin_list:
                install_cmd.append(
                    str.format('{0}:{1}', plugin['Extension-Name'],
                               plugin['Plugin-Version']))

            utils.run_cmd(install_cmd)
def create_job_folder(template_folder, tag, eventlist_dict, cmtfolder,
                      stafolder, generate_deriv_cmt, deriv_cmt_list):

    targetdir_list = []
    print("*"*20 + "\nCreat job sub folder")
    for _i in range(len(eventlist_dict)):
        idx = _i + 1
        targetdir = "job_" + tag + "_%02d" % idx
        targetdir_list.append(targetdir)

    check_job_folder_exist(targetdir_list)

    for _i, targetdir in enumerate(targetdir_list):
        idx = _i + 1
        print("="*5 + "\nJob id: %d" % idx)
        # copy eventlist file
        eventlist_file = eventlist_dict[idx]
        targetfile = os.path.join(targetdir, "XEVENTID")
        copyfile(eventlist_file, targetfile)

        # copy original cmt file and station file
        targetcmtdir = os.path.join(targetdir, "cmtfile")
        targetstadir = os.path.join(targetdir, "station")
        print("copy cmt:[%s --> %s]" % (cmtfolder, targetcmtdir))
        print("copy stattion:[%s --> %s]" % (stafolder, targetstadir))
        events = read_txt_into_list(eventlist_file)
        for _event in events:
            copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt,
                          deriv_cmt_list)
            copy_stations(_event, stafolder, targetstadir)

        print("Copy dir:[%s --> %s]" % (template_folder, targetdir))
        # copy scripts template
        copytree(template_folder, targetdir)

        # copy config.yaml file
        copyfile("config.yml", os.path.join(targetdir, "config.yml"))
def create_job_folder(template_folder, tag, eventlist_dict, cmtfolder,
                      stafolder, generate_deriv_cmt, deriv_cmt_list):

    targetdir_list = []
    print("*" * 20 + "\nCreat job sub folder")
    for _i in range(len(eventlist_dict)):
        idx = _i + 1
        targetdir = "job_" + tag + "_%02d" % idx
        targetdir_list.append(targetdir)

    check_job_folder_exist(targetdir_list)

    for _i, targetdir in enumerate(targetdir_list):
        idx = _i + 1
        print("=" * 5 + "\nJob id: %d" % idx)
        # copy eventlist file
        eventlist_file = eventlist_dict[idx]
        targetfile = os.path.join(targetdir, "XEVENTID")
        copyfile(eventlist_file, targetfile)

        # copy original cmt file and station file
        targetcmtdir = os.path.join(targetdir, "cmtfile")
        targetstadir = os.path.join(targetdir, "station")
        print("copy cmt:[%s --> %s]" % (cmtfolder, targetcmtdir))
        print("copy stattion:[%s --> %s]" % (stafolder, targetstadir))
        events = read_txt_into_list(eventlist_file)
        for _event in events:
            copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt,
                          deriv_cmt_list)
            copy_stations(_event, stafolder, targetstadir)

        print("Copy dir:[%s --> %s]" % (template_folder, targetdir))
        # copy scripts template
        copytree(template_folder, targetdir)

        # copy config.yaml file
        copyfile("config.yml", os.path.join(targetdir, "config.yml"))
    def _edit_and_commit(self, commit_date, message, change_file_dir):
        #for every file in the change directory, copy to the new place
        print("copytree {0} {1}".format(change_file_dir, os.getcwd()))

        new_files = copytree(change_file_dir, os.getcwd() + "\\test")

        for file in new_files:
            print("Adding file {0}".format(file))

        self.repo.index.add(new_files)
        date_in_iso = commit_date.strftime("%Y-%m-%d %H:%M:%S")
        self.repo.index.commit(message,
                               author_date=date_in_iso,
                               commit_date=date_in_iso)
        print("{0}{1}".format(commit_date, message))
示例#24
0
    def perform_random_validation_split(self, split_size):
        """Performs random split into training and validation sets.

        # Arguments
            split_size: (float), size of validation set in percents

        """

        print('Performing random split with split size: {}'.format(split_size))
        os.chdir(self.train_dir)
        os.chdir('../')
        shutil.rmtree(self.train_dir)
        shutil.rmtree(self.valid_dir)
        os.makedirs(self.train_dir, exist_ok=True)
        os.makedirs(self.valid_dir, exist_ok=True)
        copytree(self.full_train_dir, self.train_dir)
        os.chdir(self.train_dir)

        for _class in glob.glob('*'):
            os.mkdir(self.valid_dir + _class)

        train_images_names, valid_images_names = train_test_split(
            glob.glob(self.train_dir + '*/*.*'),
            test_size=split_size,
            random_state=self.seed)

        print('Number of training set images: {}, validation set images: {}'.
              format(len(train_images_names), len(valid_images_names)))

        for i in range(len(valid_images_names)):
            os.rename(
                valid_images_names[i], '{}/{}'.format(
                    self.valid_dir,
                    '/'.join(valid_images_names[i].split('/')[-2:])))

        return
示例#25
0
# -*- coding: utf-8 -*-

import utils

targetDir = 'EffekseerForDXLib_160b_322c/'
dxlibDir = 'DXLib_VC/'
effekseerDir = '../Effekseer/'
effekseerVSDir = effekseerDir + 'EffekseerRuntime_DXLib/Compiled/'

utils.cdToScript()

utils.rmdir(targetDir)
utils.mkdir(targetDir)

utils.copytree(dxlibDir + 'プロジェクトに追加すべきファイル_VC用/',
               targetDir + 'プロジェクトに追加すべきファイル_VC用/')

utils.copy(effekseerVSDir + 'include/Effekseer.h',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir + 'include/Effekseer.Modules.h',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir + 'include/Effekseer.SIMD.h',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir + 'include/EffekseerRendererDX9.h',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir + 'include/EffekseerRendererDX11.h',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/')

utils.copy(effekseerVSDir + 'lib/VS2017/Debug/Effekseer.lib',
           targetDir + 'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2017_x86_d.lib')
utils.copy(
示例#26
0
def main(argv=sys.argv):

    arguments = parse_arguments(argv[1:])

    parameters, conf_parameters = load_parameters(
        arguments['parameters_filepath'], arguments=arguments)
    dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths(
        parameters)
    check_parameter_compatiblity(parameters, dataset_filepaths)

    # Load dataset
    dataset = ds.Dataset(verbose=parameters['verbose'],
                         debug=parameters['debug'])
    dataset.load_dataset(dataset_filepaths, parameters)

    # Create graph and session
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                intra_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                inter_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                device_count={
                    'CPU': 1,
                    'GPU': parameters['number_of_gpus']
                },
                allow_soft_placement=True,
                log_device_placement=False)

            sess = tf.Session(config=session_conf)

            with sess.as_default():
                start_time = time.time()
                experiment_timestamp = utils.get_current_time_in_miliseconds()
                results = {}
                results['epoch'] = {}
                results['execution_details'] = {}
                results['execution_details']['train_start'] = start_time
                results['execution_details'][
                    'time_stamp'] = experiment_timestamp
                results['execution_details']['early_stop'] = False
                results['execution_details']['keyboard_interrupt'] = False
                results['execution_details']['num_epochs'] = 0
                results['model_options'] = copy.copy(parameters)

                dataset_name = utils.get_basename_without_extension(
                    parameters['dataset_text_folder'])
                model_name = dataset_name
                utils.create_folder_if_not_exists(parameters['output_folder'])
                stats_graph_folder = os.path.join(
                    parameters['output_folder'],
                    model_name)  # Folder where to save graphs
                final_weights_folder = os.path.join(
                    parameters['output_folder'], 'weights')
                utils.create_folder_if_not_exists(stats_graph_folder)
                utils.create_folder_if_not_exists(final_weights_folder)
                model_folder = os.path.join(stats_graph_folder, 'model')
                utils.create_folder_if_not_exists(model_folder)
                with open(os.path.join(model_folder, 'parameters.ini'),
                          'w') as parameters_file:
                    conf_parameters.write(parameters_file)
                tensorboard_log_folder = os.path.join(stats_graph_folder,
                                                      'tensorboard_logs')
                utils.create_folder_if_not_exists(tensorboard_log_folder)
                tensorboard_log_folders = {}
                for dataset_type in dataset_filepaths.keys():
                    tensorboard_log_folders[dataset_type] = os.path.join(
                        stats_graph_folder, 'tensorboard_logs', dataset_type)
                    utils.create_folder_if_not_exists(
                        tensorboard_log_folders[dataset_type])
                pickle.dump(
                    dataset,
                    open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))

                model = EntityLSTM(dataset, parameters)

                writers = {}
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type] = tf.summary.FileWriter(
                        tensorboard_log_folders[dataset_type],
                        graph=sess.graph)
                embedding_writer = tf.summary.FileWriter(model_folder)

                embeddings_projector_config = projector.ProjectorConfig()
                tensorboard_token_embeddings = embeddings_projector_config.embeddings.add(
                )
                tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
                token_list_file_path = os.path.join(
                    model_folder, 'tensorboard_metadata_tokens.tsv')
                tensorboard_token_embeddings.metadata_path = os.path.relpath(
                    token_list_file_path, '..')

                tensorboard_character_embeddings = embeddings_projector_config.embeddings.add(
                )
                tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
                character_list_file_path = os.path.join(
                    model_folder, 'tensorboard_metadata_characters.tsv')
                tensorboard_character_embeddings.metadata_path = os.path.relpath(
                    character_list_file_path, '..')

                projector.visualize_embeddings(embedding_writer,
                                               embeddings_projector_config)

                token_list_file = codecs.open(token_list_file_path, 'w',
                                              'latin-1')
                for token_index in range(dataset.vocabulary_size):
                    token_list_file.write('{0}\n'.format(
                        dataset.index_to_token[token_index]))
                token_list_file.close()

                character_list_file = codecs.open(character_list_file_path,
                                                  'w', 'latin-1')
                for character_index in range(dataset.alphabet_size):
                    if character_index == dataset.PADDING_CHARACTER_INDEX:
                        character_list_file.write('PADDING\n')
                    else:
                        character_list_file.write('{0}\n'.format(
                            dataset.index_to_character[character_index]))
                character_list_file.close()

                # Initialize the model
                sess.run(tf.global_variables_initializer())
                if not parameters['use_pretrained_model']:
                    model.load_pretrained_token_embeddings(
                        sess, dataset, parameters)

                patience_counter = 0  # number of epochs with no improvement on the validation test in terms of F1-score
                f1_score_best = 0
                f1_scores = {'train-F1': [], 'valid-F1': [], 'test-F1': []}
                transition_params_trained = np.random.rand(
                    len(dataset.unique_labels) + 2,
                    len(dataset.unique_labels) + 2)
                model_saver = tf.train.Saver(
                    max_to_keep=parameters['num_of_model_to_keep']
                )  #, reshape= True)  # defaults to saving all variables
                epoch_number = -1
                try:
                    while True:
                        step = 0
                        epoch_number += 1
                        print('\nStarting epoch {0}'.format(epoch_number))

                        epoch_start_time = time.time()

                        if parameters[
                                'use_pretrained_model'] and epoch_number == 0:

                            if parameters['use_corrector']:
                                parameters['use_corrector'] = False
                                transition_params_trained = train.restore_pretrained_model(
                                    parameters, dataset, sess, model,
                                    model_saver)
                                print(
                                    'Getting the 3-label predictions from the step1 model.'
                                )
                                all_pred_labels, y_pred_for_corrector, y_true_for_corrector, \
                                output_filepaths = train.predict_labels(sess, model,
                                                                        transition_params_trained,
                                                                        parameters, dataset,
                                                                        epoch_number,
                                                                        stats_graph_folder,
                                                                        dataset_filepaths,
                                                                        for_corrector = True)
                                all_pred_indices = {}  #defaultdict(list)
                                for dataset_type in dataset_filepaths.keys():
                                    all_pred_indices[dataset_type] = []
                                    for i in range(
                                            len(all_pred_labels[dataset_type])
                                    ):
                                        indices = [
                                            dataset.
                                            label_corrector_to_index[label]
                                            for label in
                                            all_pred_labels[dataset_type][i]
                                        ]
                                        all_pred_indices[dataset_type].append(
                                            indices)

                                label_binarizer_corrector = sklearn.preprocessing.LabelBinarizer(
                                )
                                label_binarizer_corrector.fit(
                                    range(
                                        max(dataset.index_to_label_corrector.
                                            keys()) + 1))
                                predicted_label_corrector_vector_indices = {}
                                for dataset_type in dataset_filepaths.keys():
                                    predicted_label_corrector_vector_indices[
                                        dataset_type] = []
                                    for label_indices_sequence in all_pred_indices[
                                            dataset_type]:
                                        predicted_label_corrector_vector_indices[
                                            dataset_type].append(
                                                label_binarizer_corrector.
                                                transform(
                                                    label_indices_sequence))
                                parameters['use_corrector'] = True

                            transition_params_trained, model, glo_step = \
                                train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver)

                            for dataset_type in dataset_filepaths.keys():
                                writers[dataset_type] = tf.summary.FileWriter(
                                    tensorboard_log_folders[dataset_type],
                                    graph=sess.graph)
                                embedding_writer = tf.summary.FileWriter(
                                    model_folder)
                            init_new_vars_op = tf.initialize_variables(
                                [glo_step])
                            sess.run(init_new_vars_op)

                        elif epoch_number != 0:
                            sequence_numbers = list(
                                range(len(dataset.token_indices['train'])))
                            random.shuffle(sequence_numbers)
                            for sequence_number in sequence_numbers:
                                transition_params_trained, W_before_crf = train.train_step(
                                    sess, dataset, sequence_number, model,
                                    transition_params_trained, parameters)
                                step += 1

                        epoch_elapsed_training_time = time.time(
                        ) - epoch_start_time
                        print('Training completed in {0:.2f} seconds'.format(
                            epoch_elapsed_training_time),
                              flush=False)
                        if parameters['use_corrector']:
                            original_label_corrector_vector_indices = dataset.label_corrector_vector_indices
                            dataset.label_corrector_vector_indices = predicted_label_corrector_vector_indices
                            y_pred, y_true, output_filepaths = train.predict_labels(
                                sess, model, transition_params_trained,
                                parameters, dataset, epoch_number,
                                stats_graph_folder, dataset_filepaths)

                            # Evaluate model: save and plot results
                            evaluate.evaluate_model(results, dataset, y_pred,
                                                    y_true, stats_graph_folder,
                                                    epoch_number,
                                                    epoch_start_time,
                                                    output_filepaths,
                                                    parameters)
                            dataset.label_corrector_vector_indices = original_label_corrector_vector_indices
                        else:
                            y_pred, y_true, output_filepaths = train.predict_labels(
                                sess, model, transition_params_trained,
                                parameters, dataset, epoch_number,
                                stats_graph_folder, dataset_filepaths)

                            # Evaluate model: save and plot results
                            evaluate.evaluate_model(results, dataset, y_pred,
                                                    y_true, stats_graph_folder,
                                                    epoch_number,
                                                    epoch_start_time,
                                                    output_filepaths,
                                                    parameters)

                        summary = sess.run(model.summary_op, feed_dict=None)
                        writers['train'].add_summary(summary, epoch_number)
                        writers['train'].flush()
                        utils.copytree(writers['train'].get_logdir(),
                                       model_folder)

                        # Early stopping
                        train_f1_score = results['epoch'][epoch_number][0][
                            'train']['f1_score']['micro']
                        valid_f1_score = results['epoch'][epoch_number][0][
                            'valid']['f1_score']['micro']
                        test_f1_score = results['epoch'][epoch_number][0][
                            'test']['f1_score']['micro']
                        f1_scores['train-F1'].append(train_f1_score)
                        f1_scores['valid-F1'].append(valid_f1_score)
                        f1_scores['test-F1'].append(test_f1_score)

                        if valid_f1_score > f1_score_best:
                            patience_counter = 0
                            f1_score_best = valid_f1_score
                            # Save the best model
                            model_saver.save(
                                sess,
                                os.path.join(model_folder, 'best_model.ckpt'))
                            print(
                                'updated model to current epoch : epoch {:d}'.
                                format(epoch_number))
                            print('the model is saved in: {:s}'.format(
                                model_folder))
                            ### newly deleted
                        else:
                            patience_counter += 1
                        print("In epoch {:d}, the valid F1 is : {:f}".format(
                            epoch_number, valid_f1_score))
                        print(
                            "The last {0} epochs have not shown improvements on the validation set."
                            .format(patience_counter))

                        if patience_counter >= parameters['patience']:
                            print('Early Stop!')
                            results['execution_details']['early_stop'] = True

                        if epoch_number >= parameters[
                                'maximum_number_of_epochs'] and parameters[
                                    'refine_with_crf']:
                            model = train.refine_with_crf(
                                parameters, sess, model, model_saver)
                            print('refine model with CRF ...')

                            for additional_epoch in range(
                                    parameters['additional_epochs_with_crf']):
                                print('Additional {:d}th epoch'.format(
                                    additional_epoch))
                                sequence_numbers = list(
                                    range(len(dataset.token_indices['train'])))
                                random.shuffle(sequence_numbers)
                                for sequence_number in sequence_numbers:
                                    transition_params_trained, W_before_crf = train.train_step(
                                        sess, dataset, sequence_number, model,
                                        transition_params_trained, parameters)
                                    step += 1
                                epoch_elapsed_training_time = time.time(
                                ) - epoch_start_time
                                print(
                                    'Additional training completed in {0:.2f} seconds'
                                    .format(epoch_elapsed_training_time),
                                    flush=False)

                                y_pred, y_true, output_filepaths = train.predict_labels(
                                    sess, model, transition_params_trained,
                                    parameters, dataset, epoch_number,
                                    stats_graph_folder, dataset_filepaths)

                                evaluate.evaluate_model(
                                    results, dataset, y_pred, y_true,
                                    stats_graph_folder, epoch_number,
                                    epoch_start_time, output_filepaths,
                                    parameters)

                                summary = sess.run(model.summary_op,
                                                   feed_dict=None)
                                writers['train'].add_summary(
                                    summary, epoch_number)
                                writers['train'].flush()
                                utils.copytree(writers['train'].get_logdir(),
                                               model_folder)

                        if epoch_number >= parameters[
                                'maximum_number_of_epochs'] and not parameters[
                                    'refine_with_crf']:
                            break
                    if not parameters['use_pretrained_model']:
                        plot_name = 'F1-summary-step1.svg'
                    else:
                        plot_name = 'F1-summary-step2.svg'
                    for k, l in f1_scores.items():
                        print(k, l)
                    utils_plots.plot_f1(
                        f1_scores,
                        os.path.join(stats_graph_folder, '..', plot_name),
                        'F1 score summary')

                except KeyboardInterrupt:
                    results['execution_details']['keyboard_interrupt'] = True
                    print('Training interrupted')

                print('Finishing the experiment')
                end_time = time.time()
                results['execution_details'][
                    'train_duration'] = end_time - start_time
                results['execution_details']['train_end'] = end_time
                evaluate.save_results(results, stats_graph_folder)
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type].close()

    sess.close()
示例#27
0
文件: _base.py 项目: CMGS/ellen
 def setUp(self):
     self.init_temp_path()
     self.path = self.get_temp_path()
     copytree(BARE_REPO_PATH, self.path)
示例#28
0
文件: _base.py 项目: tclh123/ellen
 def setUp(self):
     self.init_temp_path()
     self.path = self.get_temp_path()
     copytree(BARE_REPO_PATH, self.path)
# -*- coding: utf-8 -*-

import utils

targetDir = 'EffekseerForDXLib_143_320a/'
dxlibDir = 'DXLib_VC/'
effekseerDir = '../Effekseer/'
effekseerVSDir = effekseerDir + 'EffekseerRuntime143/Compiled/'

utils.cdToScript()

utils.rmdir(targetDir)
utils.mkdir(targetDir)

utils.copytree('docs/', targetDir+'Help/')

utils.copytree(dxlibDir+'プロジェクトに追加すべきファイル_VC用/', targetDir+'プロジェクトに追加すべきファイル_VC用/')

utils.copy(effekseerVSDir+'include/Effekseer.h', targetDir+'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir+'include/EffekseerRendererDX9.h', targetDir+'プロジェクトに追加すべきファイル_VC用/')
utils.copy(effekseerVSDir+'include/EffekseerRendererDX11.h', targetDir+'プロジェクトに追加すべきファイル_VC用/')

utils.copy(effekseerVSDir+'lib/VS2015/Debug/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x86_d.lib')
utils.copy(effekseerVSDir+'lib/VS2015/Debug/EffekseerRendererDX9.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX9_vs2015_x86_d.lib')
utils.copy(effekseerVSDir+'lib/VS2015/Debug/EffekseerRendererDX11.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX11_vs2015_x86_d.lib')

utils.copy(effekseerVSDir+'lib/VS2015/Release/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x86.lib')
utils.copy(effekseerVSDir+'lib/VS2015/Release/EffekseerRendererDX9.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX9_vs2015_x86.lib')
utils.copy(effekseerVSDir+'lib/VS2015/Release/EffekseerRendererDX11.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX11_vs2015_x86.lib')

utils.copy(effekseerVSDir+'lib/VS2015WIN64/Debug/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x64_d.lib')
示例#30
0
def main(languages):
    #embeddings_type = ['polyglot', 'fasttext']
    #embeddings_type = ['fasttext', 'fasttext_noOOV']
    embeddings_type = ['fasttext_noOOV']
    character_lstm = [True]
    embedding_language = ['target', 'source']
    combination = product(languages, embeddings_type, embedding_language, character_lstm)
    create_folder_if_not_exists(os.path.join("..", "log"))
    experiment_timestamp = utils.get_current_time_in_miliseconds()
    log_file = os.path.join("..", "log", "experiment-{}.log".format(experiment_timestamp))

    for language, emb_type, emb_language, char_lstm in combination:
        conf_parameters = load_parameters()
        conf_parameters = set_datasets(conf_parameters, language)
        conf_parameters.set('ann','use_character_lstm', str(char_lstm))
        conf_parameters.set('ann','embedding_type', emb_type)
        conf_parameters.set('ann','embedding_language', emb_language)
        if emb_type == 'polyglot':
            conf_parameters.set('ann', 'embedding_dimension', str(64))
        elif 'fasttext' in emb_type:
            conf_parameters.set('ann', 'embedding_dimension', str(300))
        else:
            raise("Uknown embedding type")
        if emb_language == 'source':
            conf_parameters.set('dataset', 'language', constants.MAPPING_LANGUAGE[language])
        else:
            conf_parameters.set('dataset', 'language', language)
        parameters, conf_parameters = parse_parameters(conf_parameters)

        start_time = time.time()
        experiment_timestamp = utils.get_current_time_in_miliseconds()

        results = {}
        results['epoch'] = {}
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time
        results['execution_details']['time_stamp'] = experiment_timestamp
        results['execution_details']['early_stop'] = False
        results['execution_details']['keyboard_interrupt'] = False
        results['execution_details']['num_epochs'] = 0
        results['model_options'] = copy.copy(parameters)

        dataset_name = utils.get_basename_without_extension(parameters['dataset_train'])
        model_name = '{0}_{1}_{2}_{3}_{4}'.format(language, emb_type, char_lstm, emb_language,
                                                  results['execution_details']['time_stamp'])

        sys.stdout = open(os.path.join("..", "log", model_name), "w")
        print(language, emb_type, char_lstm, emb_language)

        with open(log_file, "a") as file:
            file.write("Experiment: {}\n".format(model_name))
            file.write("Start time:{}\n".format(experiment_timestamp))
            file.write("-------------------------------------\n\n")
        pprint(parameters)
        dataset_filepaths = get_valid_dataset_filepaths(parameters)
        check_parameter_compatiblity(parameters, dataset_filepaths)
        previous_best_valid_epoch = -1

        # Load dataset
        dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug'])
        dataset.load_vocab_word_embeddings(parameters)
        dataset.load_dataset(dataset_filepaths, parameters)

        # Create graph and session
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                intra_op_parallelism_threads=parameters['number_of_cpu_threads'],
                inter_op_parallelism_threads=parameters['number_of_cpu_threads'],
                device_count={'CPU': 1, 'GPU': parameters['number_of_gpus']},
                allow_soft_placement=True,
                # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
                log_device_placement=False
            )

            session_conf.gpu_options.allow_growth = True

            sess = tf.Session(config=session_conf)

            with sess.as_default():
                # Initialize and save execution details

                print(model_name)
                output_folder = os.path.join('..', 'output')
                utils.create_folder_if_not_exists(output_folder)
                stats_graph_folder = os.path.join(output_folder, model_name)  # Folder where to save graphs
                utils.create_folder_if_not_exists(stats_graph_folder)
                model_folder = os.path.join(stats_graph_folder, 'model')
                utils.create_folder_if_not_exists(model_folder)
                with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
                    conf_parameters.write(parameters_file)
                tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
                utils.create_folder_if_not_exists(tensorboard_log_folder)
                tensorboard_log_folders = {}
                for dataset_type in dataset_filepaths.keys():
                    tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs',
                                                                         dataset_type)
                    utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
                # del dataset.embeddings_matrix
                if not parameters['use_pretrained_model']:
                    pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))
                # dataset.load_pretrained_word_embeddings(parameters)
                # Instantiate the model
                # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard
                model = EntityLSTM(dataset, parameters)

                # Instantiate the writers for TensorBoard
                writers = {}
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type],
                                                                  graph=sess.graph)
                embedding_writer = tf.summary.FileWriter(
                    model_folder)  # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

                embeddings_projector_config = projector.ProjectorConfig()
                tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
                tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
                token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
                tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

                if parameters['use_character_lstm']:
                    tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
                    tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
                    character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
                    tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

                projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

                # Write metadata for TensorBoard embeddings
                token_list_file = codecs.open(token_list_file_path, 'w', 'UTF-8')
                for token_index in range(len(dataset.index_to_token)):
                    token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
                token_list_file.close()

                if parameters['use_character_lstm']:
                    character_list_file = codecs.open(character_list_file_path, 'w', 'UTF-8')
                    for character_index in range(dataset.alphabet_size):
                        if character_index == dataset.PADDING_CHARACTER_INDEX:
                            character_list_file.write('PADDING\n')
                        else:
                            character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
                    character_list_file.close()

                try:
                    # Initialize the model
                    sess.run(tf.global_variables_initializer())
                    if not parameters['use_pretrained_model']:
                        model.load_pretrained_token_embeddings(sess, dataset, parameters)

                    # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
                    bad_counter = 0  # number of epochs with no improvement on the validation test in terms of F1-score
                    previous_best_valid_f1_score = -1
                    transition_params_trained = np.random.rand(len(dataset.unique_labels), len(
                        dataset.unique_labels))  # TODO np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2)
                    model_saver = tf.train.Saver(
                        max_to_keep=None)  # parameters['maximum_number_of_epochs'])  # defaults to saving all variables
                    epoch_number = 0

                    while True:
                        step = 0
                        epoch_number += 1
                        print('\nStarting epoch {0}'.format(epoch_number))

                        epoch_start_time = time.time()

                        if parameters['use_pretrained_model'] and epoch_number == 1:
                            # Restore pretrained model parameters
                            transition_params_trained = train.restore_model_parameters_from_pretrained_model(parameters,
                                                                                                             dataset,
                                                                                                             sess,
                                                                                                             model,
                                                                                                             model_saver)
                        elif epoch_number != 0:
                            # Train model: loop over all sequences of training set with shuffling
                            sequence_numbers = list(range(len(dataset.token_indices['train'])))
                            random.shuffle(sequence_numbers)
                            data_counter = 0
                            sub_id = 0
                            for i in tqdm(range(0, len(sequence_numbers), parameters['batch_size']), "Training epoch {}".format(epoch_number),
                                          mininterval=1):
                                data_counter += parameters['batch_size']
                                if data_counter >= 20000:
                                    data_counter = 0
                                    sub_id += 0.001
                                    print("Intermediate evaluation number: ", sub_id)
                                    epoch_elapsed_training_time = time.time() - epoch_start_time
                                    print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time),
                                          flush=True)

                                    y_pred, y_true, output_filepaths = train.predict_labels(sess, model,
                                                                                            transition_params_trained,
                                                                                            parameters, dataset,
                                                                                            epoch_number + sub_id,
                                                                                            stats_graph_folder,
                                                                                            dataset_filepaths)
                                    # Evaluate model: save and plot results
                                    evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder,
                                                            epoch_number, epoch_start_time, output_filepaths,
                                                            parameters)
                                    # Save model
                                    model_saver.save(sess, os.path.join(model_folder,
                                                                        'model_{0:07.3f}.ckpt'.format(
                                                                            epoch_number + sub_id)))
                                    # Save TensorBoard logs
                                    summary = sess.run(model.summary_op, feed_dict=None)
                                    writers['train'].add_summary(summary, epoch_number)
                                    writers['train'].flush()
                                    utils.copytree(writers['train'].get_logdir(), model_folder)
                                    # Early stop
                                    valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                                    if valid_f1_score > previous_best_valid_f1_score:
                                        bad_counter = 0
                                        previous_best_valid_f1_score = valid_f1_score
                                    else:
                                        bad_counter += 1

                                sequence_number = sequence_numbers[i: i + parameters['batch_size']]
                                transition_params_trained, loss = train.train_step(sess, dataset, sequence_number,
                                                                                   model, transition_params_trained,
                                                                                   parameters)
                        epoch_elapsed_training_time = time.time() - epoch_start_time
                        print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                        y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained,
                                                                                parameters, dataset, epoch_number,
                                                                                stats_graph_folder, dataset_filepaths)

                        # Evaluate model: save and plot results
                        evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number,
                                                epoch_start_time, output_filepaths, parameters)

                        # Save model
                        model_saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                        # Save TensorBoard logs
                        summary = sess.run(model.summary_op, feed_dict=None)
                        writers['train'].add_summary(summary, epoch_number)
                        writers['train'].flush()
                        utils.copytree(writers['train'].get_logdir(), model_folder)

                        # Early stop
                        valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                        if valid_f1_score > previous_best_valid_f1_score:
                            bad_counter = 0
                            previous_best_valid_f1_score = valid_f1_score
                            previous_best_valid_epoch = epoch_number
                        else:
                            bad_counter += 1
                        print("The last {0} epochs have not shown improvements on the validation set.".format(
                            bad_counter))

                        if bad_counter >= parameters['patience']:
                            print('Early Stop!')
                            results['execution_details']['early_stop'] = True
                            break

                        if epoch_number >= parameters['maximum_number_of_epochs']: break

                    keep_only_best_model(model_folder,previous_best_valid_epoch ,parameters['maximum_number_of_epochs']+1)

                except KeyboardInterrupt:
                    results['execution_details']['keyboard_interrupt'] = True
                    print('Training interrupted')
                    # remove the experiment
                    remove_experiment = input("Do you want to remove the experiment? (yes/y/Yes)")
                    if remove_experiment in ["Yes", "yes", "y"]:
                        shutil.rmtree(stats_graph_folder)
                        print("Folder removed")
                    else:
                        print('Finishing the experiment')
                        end_time = time.time()
                        results['execution_details']['train_duration'] = end_time - start_time
                        results['execution_details']['train_end'] = end_time
                        evaluate.save_results(results, stats_graph_folder)
                    sys.stdout.close()
                except Exception:
                    logging.exception("")
                    remove_experiment = input("Do you want to remove the experiment? (yes/y/Yes)")
                    if remove_experiment in ["Yes", "yes", "y"]:
                        shutil.rmtree(stats_graph_folder)
                        print("Folder removed")
                    sys.stdout.close()

            sess.close()  # release the session's resources
            sys.stdout.close()
示例#31
0
    def fit(self):
        parameters = self.parameters
        conf_parameters = self.conf_parameters
        dataset_filepaths = self.dataset_filepaths
        dataset = self.dataset
        dataset_brat_folders = self.dataset_brat_folders
        sess = self.sess
        model = self.model
        transition_params_trained = self.transition_params_trained
        stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters)

        # Initialize and save execution details
        start_time = time.time()
        results = {}
        results['epoch'] = {}
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time
        results['execution_details']['time_stamp'] = experiment_timestamp
        results['execution_details']['early_stop'] = False
        results['execution_details']['keyboard_interrupt'] = False
        results['execution_details']['num_epochs'] = 0
        results['model_options'] = copy.copy(parameters)

        model_folder = os.path.join(stats_graph_folder, 'model')
        utils.create_folder_if_not_exists(model_folder)
        with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
            conf_parameters.write(parameters_file)
        pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))
            
        tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
        utils.create_folder_if_not_exists(tensorboard_log_folder)
        tensorboard_log_folders = {}
        for dataset_type in dataset_filepaths.keys():
            tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
            utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
                
        # Instantiate the writers for TensorBoard
        writers = {}
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
        embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

        embeddings_projector_config = projector.ProjectorConfig()
        tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
        token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
        tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

        tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
        character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
        tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

        projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

        # Write metadata for TensorBoard embeddings
        token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
        for token_index in range(dataset.vocabulary_size):
            token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
        token_list_file.close()

        character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
        for character_index in range(dataset.alphabet_size):
            if character_index == dataset.PADDING_CHARACTER_INDEX:
                character_list_file.write('PADDING\n')
            else:
                character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
        character_list_file.close()


        # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
        bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
        previous_best_valid_f1_score = 0
        epoch_number = -1
        try:
            while True:
                step = 0
                epoch_number += 1
                print('\nStarting epoch {0}'.format(epoch_number))

                epoch_start_time = time.time()

                if epoch_number != 0:
                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    random.shuffle(sequence_numbers)
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters)
                        step += 1
                        if step % 10 == 0:
                            print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                epoch_elapsed_training_time = time.time() - epoch_start_time
                print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                # Evaluate model: save and plot results
                evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                if parameters['use_pretrained_model'] and not parameters['train_model']:
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                    break

                # Save model
                model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                # Save TensorBoard logs
                summary = sess.run(model.summary_op, feed_dict=None)
                writers['train'].add_summary(summary, epoch_number)
                writers['train'].flush()
                utils.copytree(writers['train'].get_logdir(), model_folder)


                # Early stop
                valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                if  valid_f1_score > previous_best_valid_f1_score:
                    bad_counter = 0
                    previous_best_valid_f1_score = valid_f1_score
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    self.transition_params_trained = transition_params_trained
                else:
                    bad_counter += 1
                print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                if bad_counter >= parameters['patience']:
                    print('Early Stop!')
                    results['execution_details']['early_stop'] = True
                    break

                if epoch_number >= parameters['maximum_number_of_epochs']: break


        except KeyboardInterrupt:
            results['execution_details']['keyboard_interrupt'] = True
            print('Training interrupted')

        print('Finishing the experiment')
        end_time = time.time()
        results['execution_details']['train_duration'] = end_time - start_time
        results['execution_details']['train_end'] = end_time
        evaluate.save_results(results, stats_graph_folder)
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type].close()
    def test_check_project_test_workflow(self):
        """ Validate new project to test via zuul layout.yaml
        """
        # We want to create a project, provide project source
        # code with tests. We then configure zuul/jjb to handle the
        # run of the test cases. We then validate Gerrit has been
        # updated about the test results
        # We use the sample-project (that already exists)

        pname = 'test_workflow_%s' % create_random_str()
        # Be sure the project does not exist
        self.msu.deleteProject(pname,
                               config.ADMIN_USER)
        # Create it
        self.create_project(pname, config.ADMIN_USER)

        # Add the sample-project to the empty repository
        clone_dir = self.clone_as_admin(pname)
        copytree(self.sample_project_dir, clone_dir)
        self.commit_direct_push_as_admin(clone_dir, "Add the sample project")

        # Change to config/zuul/layout.yaml and jobs/projects.yaml
        # in order to test the new project
        ycontent = file(os.path.join(
            self.config_clone_dir, "zuul/projects.yaml")).read()
        file(os.path.join(
            self.config_clone_dir, "zuul/projects.yaml"), 'w').write(
            ycontent.replace("zuul-demo", pname),
        )
        ycontent2 = load(file(os.path.join(
            self.config_clone_dir, "jobs/projects.yaml")).read())
        sp2 = copy.deepcopy(
            [p for p in ycontent2 if 'project' in p and
                p['project']['name'] == 'zuul-demo'][0])
        sp2['project']['name'] = pname
        ycontent2.append(sp2)
        file(os.path.join(
            self.config_clone_dir, "jobs/projects.yaml"), 'w').write(
            dump(ycontent2))

        # Retrieve the previous build number for config-check
        last_success_build_num_ch = \
            self.ju.get_last_build_number("config-check",
                                          "lastSuccessfulBuild")
        # Retrieve the previous build number for config-update
        last_success_build_num_cu = \
            self.ju.get_last_build_number("config-update",
                                          "lastSuccessfulBuild")

        # Send review (config-check) will be triggered
        self.push_review_as_admin(
            self.config_clone_dir,
            "Add config definition in Zuul/JJB config for %s" % pname)

        # Wait for config-check to finish and verify the success
        self.ju.wait_till_job_completes("config-check",
                                        last_success_build_num_ch,
                                        "lastSuccessfulBuild")

        last_build_num_ch, last_success_build_num_ch = 0, 1
        attempt = 0
        while last_build_num_ch != last_success_build_num_ch:
            if attempt >= 90:
                break
            time.sleep(1)
            last_build_num_ch = \
                self.ju.get_last_build_number("config-check",
                                              "lastBuild")
            last_success_build_num_ch = \
                self.ju.get_last_build_number("config-check",
                                              "lastSuccessfulBuild")
            attempt += 1

        self.assertEqual(last_build_num_ch, last_success_build_num_ch)
        # let some time to Zuul to update the test result to Gerrit.
        time.sleep(2)

        # Get the change id
        change_ids = self.gu.get_my_changes_for_project("config")
        self.assertGreater(len(change_ids), 0)
        change_id = change_ids[0]

        # Check whether zuul sets verified to +1 after running the tests
        # let some time to Zuul to update the test result to Gerrit.
        self.assert_reviewer_approvals(change_id, '+1')

        # review the change
        self.gu2.submit_change_note(change_id, "current", "Code-Review", "2")
        self.gu2.submit_change_note(change_id, "current", "Workflow", "1")

        # now zuul processes gate pipeline and runs config-check job
        # Wait for config-check to finish and verify the success
        self.ju.wait_till_job_completes("config-check",
                                        last_success_build_num_ch,
                                        "lastSuccessfulBuild")

        last_build_num_ch, last_success_build_num_ch = 0, 1
        attempt = 0
        while last_build_num_ch != last_success_build_num_ch:
            if attempt >= 90:
                break
            time.sleep(1)
            last_build_num_ch = \
                self.ju.get_last_build_number("config-check",
                                              "lastBuild")
            last_success_build_num_ch = \
                self.ju.get_last_build_number("config-check",
                                              "lastSuccessfulBuild")
            attempt += 1

        self.assertEqual(last_build_num_ch, last_success_build_num_ch)

        # Check whether zuul sets verified to +2 after running the tests
        # let some time to Zuul to update the test result to Gerrit.
        self.assert_reviewer_approvals(change_id, '+2')

        # verify whether zuul merged the patch
        change = self.gu.get_change('config', 'master', change_id)
        change_status = change['status']
        attempt = 0
        while change_status != 'MERGED':
            if attempt >= 90:
                break
            time.sleep(1)
            change = self.gu.get_change('config', 'master', change_id)
            change_status = change['status']
            attempt += 1
        self.assertEqual(change_status, 'MERGED')

        # Test post pipe line
        # as the patch is merged, post pieline should run config-update job
        # Wait for config-update to finish and verify the success
        self.ju.wait_till_job_completes("config-update",
                                        last_success_build_num_cu,
                                        "lastSuccessfulBuild")
        last_build_num_cu = \
            self.ju.get_last_build_number("config-update",
                                          "lastBuild")
        last_success_build_num_cu = \
            self.ju.get_last_build_number("config-update",
                                          "lastSuccessfulBuild")
        self.assertEqual(last_build_num_cu, last_success_build_num_cu)

        # Retrieve the prev build number for pname-unit-tests
        # Retrieve the prev build number for pname-functional-tests
        last_success_build_num_sp_ut = \
            self.ju.get_last_build_number("%s-unit-tests" % pname,
                                          "lastSuccessfulBuild")
        last_success_build_num_sp_ft = \
            self.ju.get_last_build_number("%s-functional-tests" % pname,
                                          "lastSuccessfulBuild")
        # Test config-update
        # config-update should have created jobs for pname
        # Trigger tests on pname
        # Send a review and check tests has been run
        self.gitu_admin.add_commit_and_publish(
            clone_dir, 'master', "Add useless file",
            self.un)
        # Wait for pname-unit-tests to finish and verify the success
        self.ju.wait_till_job_completes("%s-unit-tests" % pname,
                                        last_success_build_num_sp_ut,
                                        "lastSuccessfulBuild")
        # Wait for pname-functional-tests to end and check the success
        self.ju.wait_till_job_completes("%s-functional-tests" % pname,
                                        last_success_build_num_sp_ft,
                                        "lastSuccessfulBuild")
        # Check the unit tests succeed
        last_build_num_sp_ut = \
            self.ju.get_last_build_number("%s-unit-tests" % pname,
                                          "lastBuild")
        last_success_build_num_sp_ut = \
            self.ju.get_last_build_number("%s-unit-tests" % pname,
                                          "lastSuccessfulBuild")
        self.assertEqual(last_build_num_sp_ut, last_success_build_num_sp_ut)
        # Check the functional tests succeed
        last_build_num_sp_ft = \
            self.ju.get_last_build_number("%s-functional-tests" % pname,
                                          "lastBuild")
        last_success_build_num_sp_ft = \
            self.ju.get_last_build_number("%s-functional-tests" % pname,
                                          "lastSuccessfulBuild")
        self.assertEqual(last_build_num_sp_ft, last_success_build_num_sp_ft)

        # Get the change id
        change_ids = self.gu.get_my_changes_for_project(pname)
        self.assertGreater(len(change_ids), 0)
        change_id = change_ids[0]

        # let some time to Zuul to update the test result to Gerrit.
        for i in range(90):
            if "jenkins" in self.gu.get_reviewers(change_id):
                break
            time.sleep(1)

        self.assert_reviewer_approvals(change_id, '+1')
示例#33
0
def __main__():

    # Get configuration
    conf = config.export
    if not conf:
        pydwarf.log.error(
            'No configuration specified. Imported config package must contain an export variable.'
        )
        exit(1)

    # Things to do with versions
    pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__)
    if conf.version is not None:
        pydwarf.log.info('Managing Dwarf Fortress version %s.' % conf.version)
        pydwarf.urist.session.dfversion = conf.version
    else:
        pydwarf.log.error(
            'No Dwarf Fortress version was specified in conf. Scripts will be run regardless of their indicated compatibility.'
        )

    # Verify that input directory exists
    if not os.path.exists(conf.input):
        pydwarf.log.error('Specified raws directory %s does not exist.' %
                          conf.input)
        exit(1)

    # Make backup
    if conf.backup is not None:
        pydwarf.log.info('Backing up raws to %s...' % conf.backup)
        try:
            copytree(conf.input, conf.backup)
        except:
            pydwarf.log.error('Failed to create backup.')
            exit(1)
    else:
        pydwarf.log.warning('Proceeding without backing up raws.')

    # Read input raws
    pydwarf.log.info('Reading raws from input directory %s...' % conf.input)
    pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log)

    # Run each script
    pydwarf.log.info('Running scripts...')
    pydwarf.urist.session.handleall(conf.scripts)

    # Get the output directory, remove old raws if present
    outputdir = conf.output if conf.output else conf.input
    if os.path.exists(outputdir):
        pydwarf.log.info('Removing obsolete raws from %s...' % outputdir)
        for removefile in [
                os.path.join(outputdir, f) for f in os.listdir(outputdir)
        ]:
            pydwarf.log.debug('Removing file %s...' % removefile)
            if removefile.endswith('.txt'): os.remove(removefile)
    else:
        pydwarf.log.info('Creating raws output directory %s...' % outputdir)
        os.makedirs(outputdir)

    # Write the output
    pydwarf.log.info('Writing changes to raws to %s...' % outputdir)
    pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log)

    # All done!
    pydwarf.log.info('All done!')
def main():
    try:
        if not os.path.exists(sys.argv[1]):
            sys.exit("O_O No existe el archivo de parametros: ./" +
                     sys.argv[1])

        with open(sys.argv[1], 'r') as f:
            dic = json.load(f)
        f.close()
        dic['main_dir'] = os.getcwd().replace('\\', '/')
        dic['copy_file'] = os.getcwd().replace('\\',
                                               '/') + '/' + dic['copy_file']
        dic['Filters'] = ['EstPresente', 'Copia', 'Omision']

        # # Archivo de logging
        if os.path.exists(dic['main_dir'] + '/preprocessing.log'):
            os.remove(dic['main_dir'] + '/preprocessing.log')

        logging.basicConfig(
            filename=dic['main_dir'] + '/preprocessing.log',
            level=logging.INFO,
            format='%(asctime)s %(message)s',
            datefmt='%m/%d/%Y %I:%M:%S %p')

        # validar archivo de SNP actualizacion
        if len(sys.argv) == 3:
            flagUpdate = True
            if not os.path.exists(dic['main_dir'] + '/input/' + sys.argv[2]):
                logging.info("ADVERTENCIA .... No existe el archivo (" +
                             sys.argv[2] + ") No se filtraran los .dat")
            else:
                pdSNP = pd.read_csv(
                    dic['main_dir'] + '/input/' + sys.argv[2], dtype="str")
        else:
            flagUpdate = False

        print('REVISANDO PARAMETROS...')

        if not os.path.exists(dic['main_dir'] + '/src/'):
            sys.exit("FALTA CARPETA SRC")

        if not os.path.exists(dic['main_dir'] + '/src/bin/'):
            sys.exit(
                "CARPETA BIN PARA BILOG NO EXISTE, COPIAR CARPETA EN 'src/'")

        if not os.path.exists(dic['main_dir'] + '/input'):
            sys.exit(
                "CARPETA 'input' NO EXISTE --> CREAR CARPETA INPUT CON .zip Y ARCHIVO DE COPIA"
            )

        if not os.path.isfile(dic['copy_file']):
            sys.exit(
                "ARCHIVO DE COPIA NO ENCONTRADO --> REVISAR ARCHIVO DE PARAMETROS 'src/parameters.json' --> parametro 'copy_file'"
            )

        isMissing = [
            prueba for prueba in dic['Pruebas'].keys()
            if not prueba in dic['Codigos'].keys()
        ]
        if len(isMissing) > 0:
            sys.exit(
                "PARAMETROS INCORRECTOS DE JUNTURAS FALTA AGREGAR: ---->\n" +
                ', '.join(isMissing) +
                "\n-------------------------------------------------------" +
                "\ncambiar en 'src/parameters.json' --> parametro 'Codigos'")

        isMissing = [
            prueba for prueba in dic['Codigos'].keys()
            if not prueba in dic['Pruebas'].keys()
        ]
        if len(isMissing) > 0:
            sys.exit(
                "PARAMETROS INCORRECTOS DE FORMAS FALTA AGREGAR: ---->\n" +
                ', '.join(isMissing) +
                "\n-------------------------------------------------------" +
                "\ncambiar en 'src/parameters.json' --> parametro 'Codigos'")

        logging.info(
            '#######################################################################'
        )
        logging.info('COMENZO: ' +
                     datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        logging.info('Corriendo Preprocessing_stage.py')
        logging.info(
            '#######################################################################'
        )

        if dic['subloque']:
            if ("subloques" not in dic.keys()):
                logging.info(
                    'NO DEFINIO SUBLOQUES, no se correran sub-bloques...')
                dic['subloques'] = []
                dic['subloque'] = False
            if len(dic['subloques']) == 0:
                logging.info(
                    'NO DEFINIO SUBLOQUES, no se correran sub-bloques...')
                dic['subloque'] = False

        else:
            logging.info(
                'NO DEFINIO BANDERA SUBLOQUE, no se correran sub-bloques...')

        logging.info('Limpiando carpeta principal')
        if os.path.exists(dic['main_dir'] + '/input/Descargas'):
            shutil.rmtree(
                dic['main_dir'] + '/input/Descargas', ignore_errors=True)
        if os.path.exists(dic['main_dir'] + '/output'):
            shutil.rmtree(dic['main_dir'] + '/output', ignore_errors=True)
        if os.path.exists(dic['main_dir'] + '/doc'):
            shutil.rmtree(dic['main_dir'] + '/doc', ignore_errors=True)
        time.sleep(15)
        logging.info('\tLeyendo archivo de parametros: ' + sys.argv[1])

        zipfiles = []
        logging.info(
            "ARCHIVOS .ZIP ENCONTRADOS en 'input'--> ALGUNA INCONSISTENCIA EN LOS ARCHIVOS --> REVISAR CARPETA INPUT Y EJECUTAR NUEVAMENTE EL PROCESO"
        )
        for file in os.listdir(dic['main_dir'] + '/input/'):
            if file.endswith('.zip'):
                zipfiles.append(file)
        if len(zipfiles) == 0:
            sys.exit(
                "NINGUN ARCHIVO ZIP ENCONTRADO -- > COPIAR ARCHIVOS ZIP EN 'input/'"
            )
        else:
            logging.info(str(zipfiles))
        logging.info(sep)

        logging.info('REVISANDO SCRIPTS NECESARIOS...')
        dirs = os.listdir(dic['main_dir'] + '/src')
        if not 'JuntarModulosSaber11.pl' in dirs:
            logging.info("FALTA SCRIPT 'src/JuntarModulosSaber11.pl'")
            sys.exit("FALTA SCRIPT 'src/JuntarModulosSaber11.pl'")

        groups = {
            '01_Estudiantes': None,
            '02_NoEstudiantes': None,
            '03_Discapacitados': None
        }
        folder_structure = {
            'input': None,
            'output': {
                'calibracion': groups,
                'calificacion': groups
            },
            'doc': None
        }
        logging.info(sep)

        logging.info('CREANDO ESTRUCTURA DE CARPETAS EN: ' + dic['main_dir'] +
                     '/' + str(folder_structure) + '...')
        utils.make_dirs_from_dict(folder_structure, dic['main_dir'] + '/')
        logging.info('\tTerminado...')
        logging.info(sep)

        logging.info('EXTRAYENDO ARCHIVOS...')
        utils.extract_files(dic['main_dir'] + '/')
        logging.info('\tTerminado...')
        logging.info(sep)

        if dic['subloque']:
            logging.info('ELIMINANDO SUBLOQUES 0...')
            for root, dirs, files in os.walk(dic["main_dir"]):
                for file in files:
                    pattern = re.compile(r".*sblq(.*)\.|\-.*", re.I)
                    matcher = pattern.match(file)
                    if not matcher == None and not matcher.groups()[0].split(
                            '-')[0] in dic['subloques']:
                        print('ARCHIVO A ELIMINAR..', file)
                        os.remove(os.path.join(root, file))
            logging.info('\tTerminado...')
            logging.info(sep)

            logging.info('GENERANDO FORMAS DE SUBLOQUES...')
            print('Generando formas de subloques')
            pattern = re.compile(r'.*sblq(.*).con', re.I)
            ext = dic['out_f'][:]
            ext.remove('.con')
            for root, dirs, files in os.walk(dic["main_dir"]):
                for file in files:
                    matcher = pattern.search(file)
                    if not matcher == None:
                        newPath = root + '_' + matcher.groups()[0]
                        if not os.path.exists(newPath):
                            print('NEW_FOLDER: ' + newPath)
                            ensure_dir_exists(newPath + '/salidas')
                            params = {
                                'main_path': root + '/',
                                'con_file': file.replace('.con', '')
                            }
                            utils.filterISELECT(params, newPath)
            logging.info('\tTerminado...')
            logging.info(sep)

            logging.info('ADICIONANDO SUBLOQUES A DICCIONARIO...')
            print('Adicionando subloques a diccionario...')
            utils.add_subloques(dic)
            logging.info(dic['Pruebas'])
            logging.info('\tTerminado...')
            logging.info(sep)

        logging.info('ARCHIVOS DE SUBLOQUES...')
        print('Eliminando archivos de subloques')
        for root, dirs, files in os.walk(dic["main_dir"]):
            for file in files:
                if re.match("(.*)sblq(.*)", file):
                    print('ARCHIVO A ELIMINAR..', file)
                    os.remove(os.path.join(root, file))
        logging.info('\tTerminado...')
        logging.info(sep)

        logging.info('APLICANDO FILTROS A ARCHIVOS .DAT...')
        mpath = ''
        for root, dirs, files in os.walk(dic["main_dir"]):
            for file in files:
                if file.endswith(".con"):
                    flagFiltro = True
                    print('CONFILE ENCONTRADO: ' + file)
                    path = (os.path.join(root, file)).replace('\\',
                                                              '/').split('/')
                    mpath = ('/').join((os.path.join(root, file)).replace(
                        '\\', '/').split('/')[:-1]) + '/'

                    if dic['subloque']:
                        indSubl = [
                            bloque in file for bloque in dic['subloques']
                        ]
                        print(indSubl)
                        if not any(indSubl):
                            flagFiltro = False
                            shutil.rmtree(root)
                    if flagFiltro:
                        confile = path[-2]
                        dic['con_file'] = path[-2]
                        logging.info('\tAplicando filtros a : ' + confile)
                        dic['filtered_data'] = ''
                        dic['main_path'] = mpath
                        dic['count_log'] = dic['main_path'] + '/registro.log'
                        con = utils.create_dict_from_con(dic)
                        params = dict(dic.items() + con.items())
                        params['id_form'] = confile
                        params['aplicacion'] = con['DATA'].split('-')[0][0:7]
                        logging.info('\t\tAplicacion: ' + params['aplicacion'])

                        f_g = {}
                        utils.set_f_g(params['Pruebas'].copy(), confile, f_g)

                        if not f_g == {}:
                            params['curr_group'] = f_g[confile]
                            logging.info('\t\tGrupo: ' + params['curr_group'])
                            if not flagUpdate:
                                filtrado.apply_filters(params)
                            mpath = mpath.replace(confile, '')
                            logging.info('\t\tTerminado...')
        logging.info(sep)

        if not flagUpdate:
            logging.info('CREANDO ARCHIVO DE CONFIGURACION.TXT...')
            f = []
            for root, dirs, files in os.walk(dic['main_dir'] +
                                             '/input/Descargas/'):
                for dir in dirs:
                    f.append(dir)
            config_file = []
            utils.create_config_file(dic['Pruebas'], dic['Codigos'],
                                     dic['Pruebas'].keys(), '', config_file,
                                     [], f)
            np.savetxt(
                dic['main_dir'] + '/output/configuracion.txt',
                config_file,
                delimiter=",",
                fmt="%s")
            logging.info('\t\tTerminado...')
            logging.info(sep)

            logging.info('JUNTANDO ARCHIVOS DAT (JUNTAR.pl)...')
            os.chdir(mpath)  #Change to forms_path
            p = subprocess.Popen([
                'perl', dic['main_dir'] + '/src/JuntarModulosSaber11.pl',
                '-com', '-dat', '-conf',
                dic['main_dir'] + '/output/configuracion.txt'
            ])
            p.communicate()
            logging.info('\t\tTerminado...')
            logging.info(sep)

            ext = dic['out_f']
            logging.info(
                'MOVIENDO ARCHIVOS A SUS RESPECTIVAS CARPETAS DE SALIDA...')
            config = []
            logging.info(
                'SACANDO CARPETAS DE JUNTURA...(Leyendo archivo de configuracion.txt)'
            )
            with open(dic['main_dir'] + '/output/configuracion.txt') as f:
                config = f.readlines()
                f.close()
            J = []  #Formas ya presentes en carpeta JUNTAS
            for line in config:
                if line.startswith("PRUEBA"):
                    J.append(line.split()[2])
            logging.info('FORMAS DE JUNTURA: ' + str(J))
            dirs = os.listdir(os.getcwd())
            for d in dirs:
                if not d in J and os.path.isdir(os.getcwd() + '/' + d):
                    logging.info(sep)
                    logging.info('PATH DE INPUT - OUTPUT PARA CARPETA: ' + d)
                    if d == "JUNTAS":
                        f_g = {'JUNTAS': '01_Estudiantes'}
                    else:
                        f_g = {}
                        utils.set_f_g(dic['Pruebas'].copy(), d, f_g)

                    if not f_g == {}:
                        path_output = dic[
                            'main_dir'] + '/output/calibracion/' + f_g[
                                d] + '/' + d
                        ensure_dir_exists(path_output)
                        if d == 'JUNTAS':
                            path_input = os.getcwd() + '/' + d
                            logging.info('PATH INPUT: ' + path_input)
                            logging.info('PATH OUTPUT: ' + path_output)
                            utils.copytree(path_input, path_output)
                        else:
                            for root, dirs, files in os.walk(os.getcwd() +
                                                             '/' + d):
                                for file in files:
                                    for ex in dic['out_f']:
                                        if ex in file:
                                            path_input = os.path.join(
                                                root, file).replace('\\', '/')
                                            logging.info('PATH INPUT: ' +
                                                         path_input)
                                            logging.info('PATH OUTPUT: ' +
                                                         path_output + '/' +
                                                         file)
                                            shutil.copyfile(
                                                path_input,
                                                path_output + '/' + file)

                        if not d == 'JUNTAS':
                            ensure_dir_exists(path_output + '/salidas')
                        logging.info('SALIDAS: ' + path_output + '/salidas')
            logging.info(sep)

        logging.info(
            'MOVIENDO ARCHIVOS DE CALIFICACION A SUS RESPECTIVAS CARPETAS...')
        ext = []
        for e in dic['out_f']:
            if e.endswith('.DAT') and not flagUpdate:
                ext.append(e.replace('.DAT', '.O'))
            else:
                ext.append(e)
        removePaths = []
        for root, dirs, files in os.walk(os.getcwd()):
            for d in dirs:
                if not d == 'JUNTAS':
                    f_g = {}
                    utils.set_f_g(dic['Pruebas'].copy(), d, f_g)
                    if not f_g == {}:
                        path_output = dic[
                            'main_dir'] + '/output/calificacion/' + f_g[
                                d] + '/' + d
                        ensure_dir_exists(path_output)
                        if not os.path.exists(path_output + '/salidas'):
                            os.makedirs(path_output + '/salidas')

                        for file in os.listdir(os.path.join(root, d)):
                            for e in ext:
                                if e in file:
                                    path_input = os.path.join(root,
                                                              d) + '/' + file
                                    if file.endswith('.O'):
                                        output = path_output + '/' + file.replace(
                                            '.O', '.DAT')
                                    else:
                                        output = path_output + '/' + file
                                    if not os.path.exists(output):
                                        shutil.copyfile(path_input, output)
                                        if flagUpdate and file.endswith(
                                                '.DAT'):
                                            if ('pdSNP' in locals()):
                                                nUpdate = filtroActualizacion(
                                                    output, pdSNP,
                                                    dic['id_len'])
                                                if nUpdate == 0:
                                                    removePaths.append(
                                                        path_output)
                                                logging.info(
                                                    'FILTRANDO NUEVAS PERSONAS PARA CALIFICAR: '
                                                    + file)
                                            else:
                                                statFile = os.stat(path_input)
                                                if str(statFile.
                                                       st_size) == '0':
                                                    removePaths.append(
                                                        path_output)
                                                logging.info(
                                                    'SE CALIFICARAN TODAS LAS PERSONAS: '
                                                    + file)

        # # Removiendo
        if flagUpdate:
            logging.info(sep)
            logging.info('ELIMANDO CARPETAS EN BLANCO DE LA CALIFICACION...')
            for path in removePaths:
                logging.info('Eliminando: ' + path)
                shutil.rmtree(path)
            logging.info(sep)

        logging.info('\tTerminado...')
        logging.info(sep)

        logging.info('TERMINO: ' +
                     datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    except Exception as e:
        print(logging.error(traceback.format_exc()))
示例#35
0
    def make_output_dirs(self):
        self.output_err = ''
        try:
            self.progress_text = 'Removing old output directory...\n'

            output_dir = utils.path_join(self.output_dir(), self.project_name())
            if os.path.exists(output_dir):
                utils.rmtree(output_dir, ignore_errors=True)

            temp_dir = utils.path_join(TEMP_DIR, 'webexectemp')
            if os.path.exists(temp_dir):
                utils.rmtree(temp_dir, ignore_errors=True)

            self.progress_text = 'Making new directories...\n'

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            os.makedirs(temp_dir)

            self.copy_files_to_project_folder()

            json_file = utils.path_join(self.project_dir(), 'package.json')

            global_json = utils.get_data_file_path('files/global.json')

            if self.output_package_json:
                with codecs.open(json_file, 'w+', encoding='utf-8') as f:
                    f.write(self.generate_json())


            with codecs.open(global_json, 'w+', encoding='utf-8') as f:
                f.write(self.generate_json(global_json=True))

            zip_file = utils.path_join(temp_dir, self.project_name()+'.nw')

            app_nw_folder = utils.path_join(temp_dir, self.project_name()+'.nwf')

            utils.copytree(self.project_dir(), app_nw_folder,
                           ignore=shutil.ignore_patterns(output_dir))

            zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir])
            for ex_setting in self.settings['export_settings'].values():
                if ex_setting.value:
                    self.progress_text = '\n'
                    name = ex_setting.display_name
                    self.progress_text = u'Making files for {}...'.format(name)
                    export_dest = utils.path_join(output_dir, ex_setting.name)
                    versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0]

                    minor = int(versions[1])
                    if minor >= 12:
                        export_dest = export_dest.replace('node-webkit', 'nwjs')

                    if os.path.exists(export_dest):
                        utils.rmtree(export_dest, ignore_errors=True)

                    # shutil will make the directory for us
                    utils.copytree(get_data_path('files/'+ex_setting.name),
                                   export_dest,
                                    ignore=shutil.ignore_patterns('place_holder.txt'))
                    utils.rmtree(get_data_path('files/'+ex_setting.name), ignore_errors=True)
                    self.progress_text += '.'

                    if 'mac' in ex_setting.name:
                        uncomp_setting = self.get_setting('uncompressed_folder')
                        uncompressed = uncomp_setting.value
                        app_path = utils.path_join(export_dest,
                                                self.project_name()+'.app')

                        try:
                            utils.move(utils.path_join(export_dest,
                                                     'nwjs.app'),
                                       app_path)
                        except IOError:
                            utils.move(utils.path_join(export_dest,
                                                     'node-webkit.app'),
                                       app_path)

                        plist_path = utils.path_join(app_path, 'Contents', 'Info.plist')

                        plist_dict = plistlib.readPlist(plist_path)

                        plist_dict['CFBundleDisplayName'] = self.project_name()
                        plist_dict['CFBundleName'] = self.project_name()
                        version_setting = self.get_setting('version')
                        plist_dict['CFBundleShortVersionString'] = version_setting.value
                        plist_dict['CFBundleVersion'] = version_setting.value

                        plistlib.writePlist(plist_dict, plist_path)


                        self.progress_text += '.'

                        app_nw_res = utils.path_join(app_path,
                                                  'Contents',
                                                  'Resources',
                                                  'app.nw')

                        if uncompressed:
                            utils.copytree(app_nw_folder, app_nw_res)
                        else:
                            utils.copy(zip_file, app_nw_res)
                        self.create_icns_for_app(utils.path_join(app_path,
                                                              'Contents',
                                                              'Resources',
                                                              'nw.icns'))

                        self.progress_text += '.'
                    else:
                        ext = ''
                        windows = False
                        if 'windows' in ex_setting.name:
                            ext = '.exe'
                            windows = True

                        nw_path = utils.path_join(export_dest,
                                               ex_setting.dest_files[0])

                        if windows:
                            self.replace_icon_in_exe(nw_path)

                        self.compress_nw(nw_path)

                        dest_binary_path = utils.path_join(export_dest,
                                                        self.project_name() +
                                                        ext)
                        if 'linux' in ex_setting.name:
                            self.make_desktop_file(dest_binary_path, export_dest)

                        join_files(dest_binary_path, nw_path, zip_file)

                        sevenfivefive = (stat.S_IRWXU |
                                         stat.S_IRGRP |
                                         stat.S_IXGRP |
                                         stat.S_IROTH |
                                         stat.S_IXOTH)
                        os.chmod(dest_binary_path, sevenfivefive)

                        self.progress_text += '.'

                        if os.path.exists(nw_path):
                            os.remove(nw_path)

        except Exception:
            error = u''.join([unicode(x) for x in traceback.format_exception(sys.exc_info()[0],
                                                                             sys.exc_info()[1],
                                                                             sys.exc_info()[2])])
            self.logger.error(error)
            self.output_err += error
        finally:
            utils.rmtree(temp_dir, ignore_errors=True)
示例#36
0
def __main__():
    
    pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__)
    if settings.dfversion is not None:
        pydwarf.log.info('Managing Dwarf Fortress version %s.' % settings.dfversion)
    else:
        pydwarf.log.error('No Dwarf Fortress version was specified in settings. Scripts will be run regardless of their indicated compatibility.')
    
    if os.path.exists(settings.rawsdir):
    
        if settings.backup and settings.backupdir:
            pydwarf.log.info('Backing up raws to %s...' % settings.backupdir)
            copytree(settings.rawsdir, settings.backupdir)
        else:
            pydwarf.log.warning('Proceeding without backing up raws.')
        
        pydwarf.log.info('Reading raws from %s...' % settings.rawsdir)
        r = raws().read(settings.rawsdir, pydwarf.log)
        
        pydwarf.log.info('Running scripts...')
        for script in settings.runscripts:
            pydwarf.log.debug('Handling script %s...' % script)
            
            urist = None
            scriptname = None
            scriptfunc = None
            scriptargs = None
            if isinstance(script, tuple) or isinstance(script, list):
                scriptargs = script[1]
                script = script[0]
            elif isinstance(script, dict):
                scriptname = script.get('name')
                scriptargs = script.get('args')
                scriptmatch = script.get('match')
                scriptignoreversion = script.get('ignore_df_version')
                checkversion = None if scriptignoreversion else settings.dfversion
                candidates = pydwarf.urist.get(scriptname, version=checkversion, match=scriptmatch)
                if candidates and len(candidates):
                    urist = candidates[0]
                    scriptname = urist.name
                    if len(candidates) > 1: pydwarf.log.warning('More than one fitting script has been specified, using a best guess.')                        
            elif callable(script):
                scriptname = script.__name__
                scriptfunc = script
            else:
                scriptname = script
                candidates = pydwarf.urist.get(scriptname, version=settings.dfversion)
                if candidates and len(candidates):
                    urist = candidates[0]
                    scriptname = urist.name
                    if len(candidates) > 1: pydwarf.log.warning('More than one fitting script has been specified, using a best guess.')
            if urist and scriptfunc is None:
                scriptfunc = urist.fn
            
            if scriptfunc:
                scriptinfo = 'Running script %s' % scriptname
                if scriptargs: scriptinfo = '%s with args %s' % (scriptinfo, scriptargs)
                pydwarf.log.info('%s...' % scriptinfo)
                
                try:
                    response = scriptfunc(r, **scriptargs) if scriptargs else scriptfunc(r)
                    if response:
                        success = response.get('success')
                        status = response['status'] if 'status' in response else ('Script %s ran %ssuccessfully.' % (scriptname, '' if success else 'un'))
                        pydwarf.log.info('%s: %s' % ('SUCCESS' if success else 'FAILURE', status))
                    else:
                        pydwarf.log.error('Received no response from script %s.' % scriptname)
                except Exception:
                    pydwarf.log.exception('Unhandled exception while running script %s.' % scriptname)
                else:
                    pydwarf.log.info('Finished running script %s.' % scriptname)

            else:
                pydwarf.log.error('Failed to retrieve script %s.' % scriptname)
        
        outputdir = settings.outputdir if settings.outputdir else settings.rawsdir
        pydwarf.log.info('Writing changes to raws to %s...' % outputdir)
        if not os.path.exists(outputdir): os.makedirs(outputdir)
        r.write(outputdir, pydwarf.log)
        
        pydwarf.log.info('All done!')
        
    else:
        pydwarf.log.info('Specified raws directory does not exist.')
示例#37
0
def main():

    parameters, conf_parameters = load_parameters()
    dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths(parameters)
    check_parameter_compatiblity(parameters, dataset_filepaths)

    # Load dataset
    dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug'])
    dataset.load_dataset(dataset_filepaths, parameters)

    # Create graph and session
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            intra_op_parallelism_threads=parameters['number_of_cpu_threads'],
            inter_op_parallelism_threads=parameters['number_of_cpu_threads'],
            device_count={'CPU': 1, 'GPU': parameters['number_of_gpus']},
            allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
            log_device_placement=False
            )

        sess = tf.Session(config=session_conf)

        with sess.as_default():
            # Initialize and save execution details
            start_time = time.time()
            experiment_timestamp = utils.get_current_time_in_miliseconds()
            results = {}
            results['epoch'] = {}
            results['execution_details'] = {}
            results['execution_details']['train_start'] = start_time
            results['execution_details']['time_stamp'] = experiment_timestamp
            results['execution_details']['early_stop'] = False
            results['execution_details']['keyboard_interrupt'] = False
            results['execution_details']['num_epochs'] = 0
            results['model_options'] = copy.copy(parameters)

            dataset_name = utils.get_basename_without_extension(parameters['dataset_text_folder'])
            model_name = '{0}_{1}'.format(dataset_name, results['execution_details']['time_stamp'])

            output_folder=os.path.join('..', 'output')
            utils.create_folder_if_not_exists(output_folder)
            stats_graph_folder=os.path.join(output_folder, model_name) # Folder where to save graphs
            utils.create_folder_if_not_exists(stats_graph_folder)
            model_folder = os.path.join(stats_graph_folder, 'model')
            utils.create_folder_if_not_exists(model_folder)
            with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
                conf_parameters.write(parameters_file)
            tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')
            utils.create_folder_if_not_exists(tensorboard_log_folder)
            tensorboard_log_folders = {}
            for dataset_type in dataset_filepaths.keys():
                tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
                utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])
            pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))

            # Instantiate the model
            # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard
            model = EntityLSTM(dataset, parameters)

            # Instantiate the writers for TensorBoard
            writers = {}
            for dataset_type in dataset_filepaths.keys():
                writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
            embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

            embeddings_projector_config = projector.ProjectorConfig()
            tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
            tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
            token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
            tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..')

            tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
            tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
            character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
            tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..')

            projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

            # Write metadata for TensorBoard embeddings
            token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
            for token_index in range(dataset.vocabulary_size):
                token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
            token_list_file.close()

            character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
            for character_index in range(dataset.alphabet_size):
                if character_index == dataset.PADDING_CHARACTER_INDEX:
                    character_list_file.write('PADDING\n')
                else:
                    character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
            character_list_file.close()


            # Initialize the model
            sess.run(tf.global_variables_initializer())
            if not parameters['use_pretrained_model']:
                model.load_pretrained_token_embeddings(sess, dataset, parameters)

            # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
            bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
            previous_best_valid_f1_score = 0
            transition_params_trained = np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2)
            model_saver = tf.train.Saver(max_to_keep=parameters['maximum_number_of_epochs'])  # defaults to saving all variables
            epoch_number = -1
            try:
                while True:
                    step = 0
                    epoch_number += 1
                    print('\nStarting epoch {0}'.format(epoch_number))

                    epoch_start_time = time.time()

                    if parameters['use_pretrained_model'] and epoch_number == 0:
                        # Restore pretrained model parameters
                        transition_params_trained = train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver)
                    elif epoch_number != 0:
                        # Train model: loop over all sequences of training set with shuffling
                        sequence_numbers=list(range(len(dataset.token_indices['train'])))
                        random.shuffle(sequence_numbers)
                        for sequence_number in sequence_numbers:
                            transition_params_trained = train.train_step(sess, dataset, sequence_number, model, transition_params_trained, parameters)
                            step += 1
                            if step % 10 == 0:
                                print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                    epoch_elapsed_training_time = time.time() - epoch_start_time
                    print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                    y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                    # Evaluate model: save and plot results
                    evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                    if parameters['use_pretrained_model'] and not parameters['train_model']:
                        conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                        break

                    # Save model
                    model_saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                    # Save TensorBoard logs
                    summary = sess.run(model.summary_op, feed_dict=None)
                    writers['train'].add_summary(summary, epoch_number)
                    writers['train'].flush()
                    utils.copytree(writers['train'].get_logdir(), model_folder)


                    # Early stop
                    valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                    if  valid_f1_score > previous_best_valid_f1_score:
                        bad_counter = 0
                        previous_best_valid_f1_score = valid_f1_score
                        conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    else:
                        bad_counter += 1
                    print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                    if bad_counter >= parameters['patience']:
                        print('Early Stop!')
                        results['execution_details']['early_stop'] = True
                        break

                    if epoch_number >= parameters['maximum_number_of_epochs']: break


            except KeyboardInterrupt:
                results['execution_details']['keyboard_interrupt'] = True
                print('Training interrupted')

            print('Finishing the experiment')
            end_time = time.time()
            results['execution_details']['train_duration'] = end_time - start_time
            results['execution_details']['train_end'] = end_time
            print('ok1')
            evaluate.save_results(results, stats_graph_folder)
            print('ok2')
        print('ok3')
        #sess.close() # release the session's resources
    print('ok4')
示例#38
0
    def fit(self):
        '''
        Dùng để train data
        '''
        parameters = self.parameters
        conf_parameters = self.conf_parameters
        dataset_filepaths = self.dataset_filepaths
        dataset = self.dataset
        dataset_brat_folders = self.dataset_brat_folders
        sess = self.sess
        model = self.model
        transition_params_trained = self.transition_params_trained
        stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters)

        # Khởi tạo và lưu các thông tin của lần chạy
        start_time = time.time()
        results = {}
        results['epoch'] = {}
        '''
        An epoch, in Machine Learning, is the entire processing by the learning algorithm of the entire train-set.
        Ex:
        The MNIST train set is composed by 55000 samples. Once the algorithm processed all those 55000 samples an epoch is passed.
        '''
        results['execution_details'] = {}
        results['execution_details']['train_start'] = start_time                # Thời gian bắt đầu chạy
        results['execution_details']['time_stamp'] = experiment_timestamp       # Nhãn thời gian
        results['execution_details']['early_stop'] = False                      # Cho biết có lỗi xảy ra nên bị dừng sớm ko
        results['execution_details']['keyboard_interrupt'] = False              # Cho biết có bị dừng bởi keyboard
        results['execution_details']['num_epochs'] = 0                          # Số lượng epoch đã chạy
        results['model_options'] = copy.copy(parameters)                        # Các tham số

        model_folder = os.path.join(stats_graph_folder, 'model')                # output/en.../model
        utils.create_folder_if_not_exists(model_folder)
        # Save value cac parameters vao file parameters.ini
        with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file:
            conf_parameters.write(parameters_file)                                          # Log các tham số ra file
        pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))      # Dump dataset thành pickle file để lần sau chạy

        # Tạo folder tensorboard logs để dùng cho việc vẽ biểu đồ sau này
        tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs')       # folder lưu file log của tensorboard -> dùng cho việc plot biểu đồ lên
        utils.create_folder_if_not_exists(tensorboard_log_folder)
        tensorboard_log_folders = {}
        for dataset_type in dataset_filepaths.keys():
            tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type)
            utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type])

        # Khởi tạo các writers cho tensorboard
        writers = {} # Có nhiều nhất 4 writers train, test, valid, deploy
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph)
        embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

        # Dùng cho việc visualize embedding bằng tensorboard
        embeddings_projector_config = projector.ProjectorConfig()
        tensorboard_token_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
        token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv')
        tensorboard_token_embeddings.metadata_path = 'tensorboard_metadata_tokens.tsv'#os.path.relpath(token_list_file_path, '..')

        tensorboard_character_embeddings = embeddings_projector_config.embeddings.add()
        tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
        character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv')
        tensorboard_character_embeddings.metadata_path = 'tensorboard_metadata_characters.tsv'#os.path.relpath(character_list_file_path, '..')

        # Saves a configuration file that TensorBoard will read during startup.
        projector.visualize_embeddings(embedding_writer, embeddings_projector_config)

        # Ghi token vào file tsv dùng làm metadata cho embedding
        token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8')
        for token_index in range(dataset.vocabulary_size):
            token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index]))
        token_list_file.close()

        # Ghi characters vào file tsv dùng làm metadata cho embedding
        character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8')
        for character_index in range(dataset.alphabet_size):
            if character_index == dataset.PADDING_CHARACTER_INDEX:
                character_list_file.write('PADDING\n')
            else:
                character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index]))
        character_list_file.close()


        # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
        bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score
        previous_best_valid_f1_score = 0 # f1-Score tốt nhất ở các lần chạy trước
        epoch_number = -1
        try:
            while True:
                step = 0
                epoch_number += 1
                print('\nStarting epoch {0}'.format(epoch_number))

                epoch_start_time = time.time()

                if epoch_number != 0:
                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    print("----****____")
                    print(dataset.token_indices['train'][:10])
                    random.shuffle(sequence_numbers)
                    # Thuc hien train
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters)
                        step += 1
                        if step % 10 == 0:
                            print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True)

                # Tinh thoi gian thuc hien 1 epoch
                epoch_elapsed_training_time = time.time() - epoch_start_time
                print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True)

                y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths)

                # Evaluate model: save and plot results
                evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters)

                if parameters['use_pretrained_model'] and not parameters['train_model']:
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder)
                    break

                # Save model
                model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number)))

                # Save TensorBoard logs
                summary = sess.run(model.summary_op, feed_dict=None)
                writers['train'].add_summary(summary, epoch_number)
                writers['train'].flush()
                utils.copytree(writers['train'].get_logdir(), model_folder)


                # Early stop
                valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                # If do chinh xac cua epoch > do chinh xac cua epoch truoc
                if  valid_f1_score > previous_best_valid_f1_score:
                    bad_counter = 0
                    previous_best_valid_f1_score = valid_f1_score
                    conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True)
                    self.transition_params_trained = transition_params_trained
                else:
                    bad_counter += 1
                print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter))

                # If bad_counter den mot muc gioi han parameters['patience'] = 10 (gia tri khoi tao) finish train
                if bad_counter >= parameters['patience']:
                    print('Early Stop!')
                    results['execution_details']['early_stop'] = True
                    break

                # Neu so epoch >= so luong epoch toi da quy dinh --> ket thuc train
                if epoch_number >= parameters['maximum_number_of_epochs']: break


        except KeyboardInterrupt:
            results['execution_details']['keyboard_interrupt'] = True
            print('Training interrupted')

        # Ket thuc train luu cac tham so time, ket qua
        print('Finishing the experiment')
        end_time = time.time()
        results['execution_details']['train_duration'] = end_time - start_time
        results['execution_details']['train_end'] = end_time
        evaluate.save_results(results, stats_graph_folder)
        for dataset_type in dataset_filepaths.keys():
            writers[dataset_type].close()
示例#39
0
    def make_output_dirs(self):
        self.output_err = ''
        try:
            self.progress_text = 'Removing old output directory...\n'

            output_dir = utils.path_join(self.output_dir(), self.project_name())
            if os.path.exists(output_dir):
                utils.rmtree(output_dir, ignore_errors=True)

            temp_dir = utils.path_join(TEMP_DIR, 'webexectemp')
            if os.path.exists(temp_dir):
                utils.rmtree(temp_dir, ignore_errors=True)

            self.progress_text = 'Making new directories...\n'

            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

            os.makedirs(temp_dir)

            self.copy_files_to_project_folder()

            json_file = utils.path_join(self.project_dir(), 'package.json')

            global_json = utils.get_data_file_path('files/global.json')

            if self.output_package_json:
                with codecs.open(json_file, 'w+', encoding='utf-8') as f:
                    f.write(self.generate_json())


            with codecs.open(global_json, 'w+', encoding='utf-8') as f:
                f.write(self.generate_json(global_json=True))

            zip_file = utils.path_join(temp_dir, self.project_name()+'.nw')

            app_nw_folder = utils.path_join(temp_dir, self.project_name()+'.nwf')

            utils.copytree(self.project_dir(), app_nw_folder,
                           ignore=shutil.ignore_patterns(output_dir))

            zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir])
            for ex_setting in self.settings['export_settings'].values():
                if ex_setting.value:
                    self.progress_text = '\n'
                    name = ex_setting.display_name
                    self.progress_text = u'Making files for {}...'.format(name)
                    export_dest = utils.path_join(output_dir, ex_setting.name)
                    versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0]

                    minor = int(versions[1])
                    if minor >= 12:
                        export_dest = export_dest.replace('node-webkit', 'nwjs')

                    if os.path.exists(export_dest):
                        utils.rmtree(export_dest, ignore_errors=True)

                    # shutil will make the directory for us
                    utils.copytree(get_data_path('files/'+ex_setting.name),
                                   export_dest,
                                    ignore=shutil.ignore_patterns('place_holder.txt'))
                    utils.rmtree(get_data_path('files/'+ex_setting.name), ignore_errors=True)
                    self.progress_text += '.'

                    if 'mac' in ex_setting.name:
                        uncomp_setting = self.get_setting('uncompressed_folder')
                        uncompressed = uncomp_setting.value
                        app_path = utils.path_join(export_dest,
                                                self.project_name()+'.app')

                        try:
                            utils.move(utils.path_join(export_dest,
                                                     'nwjs.app'),
                                       app_path)
                        except IOError:
                            utils.move(utils.path_join(export_dest,
                                                     'node-webkit.app'),
                                       app_path)

                        plist_path = utils.path_join(app_path, 'Contents', 'Info.plist')

                        plist_dict = plistlib.readPlist(plist_path)

                        plist_dict['CFBundleDisplayName'] = self.project_name()
                        plist_dict['CFBundleName'] = self.project_name()
                        version_setting = self.get_setting('version')
                        plist_dict['CFBundleShortVersionString'] = version_setting.value
                        plist_dict['CFBundleVersion'] = version_setting.value

                        plistlib.writePlist(plist_dict, plist_path)


                        self.progress_text += '.'

                        app_nw_res = utils.path_join(app_path,
                                                  'Contents',
                                                  'Resources',
                                                  'app.nw')

                        if uncompressed:
                            utils.copytree(app_nw_folder, app_nw_res)
                        else:
                            utils.copy(zip_file, app_nw_res)
                        self.create_icns_for_app(utils.path_join(app_path,
                                                              'Contents',
                                                              'Resources',
                                                              'nw.icns'))

                        self.progress_text += '.'
                    else:
                        ext = ''
                        windows = False
                        if 'windows' in ex_setting.name:
                            ext = '.exe'
                            windows = True

                        nw_path = utils.path_join(export_dest,
                                               ex_setting.dest_files[0])

                        if windows:
                            self.replace_icon_in_exe(nw_path)

                        self.compress_nw(nw_path)

                        dest_binary_path = utils.path_join(export_dest,
                                                        self.project_name() +
                                                        ext)
                        if 'linux' in ex_setting.name:
                            self.make_desktop_file(dest_binary_path, export_dest)

                        join_files(dest_binary_path, nw_path, zip_file)

                        sevenfivefive = (stat.S_IRWXU |
                                         stat.S_IRGRP |
                                         stat.S_IXGRP |
                                         stat.S_IROTH |
                                         stat.S_IXOTH)
                        os.chmod(dest_binary_path, sevenfivefive)

                        self.progress_text += '.'

                        if os.path.exists(nw_path):
                            os.remove(nw_path)

        except Exception:
            error = u''.join([unicode(x) for x in traceback.format_exception(sys.exc_info()[0],
                                                                             sys.exc_info()[1],
                                                                             sys.exc_info()[2])])
            self.logger.error(error)
            self.output_err += error
        finally:
            utils.rmtree(temp_dir, ignore_errors=True)
示例#40
0
    def test_check_project_test_workflow(self):
        """ Validate new project to test via zuul
        """
        # We want to create a project, provide project source
        # code with tests. We then configure zuul/jjb to handle the
        # run of the test cases. We then validate Gerrit has been
        # updated about the test results
        # We use the sample-project (that already exists)

        pname = 'test_workflow_%s' % create_random_str()
        logger.info("Creating project %s" % pname)

        # Create it
        self.create_project(pname)

        logger.info("Populating the project with %s" %
                    self.sample_project_dir)
        # Add the sample-project to the empty repository
        clone_dir = self.clone_as_admin(pname)
        copytree(self.sample_project_dir, clone_dir)
        self.commit_direct_push_as_admin(clone_dir, "Add the sample project")

        # Change to config/{zuul,jobs}/projects.yaml
        # in order to test the new project
        logger.info("Adding config-repo configuration")
        ycontent = file(os.path.join(
            self.config_clone_dir, "zuul/projects.yaml")).read()
        file(os.path.join(
            self.config_clone_dir, "zuul/projects.yaml"), 'w').write(
            ycontent.replace("zuul-demo", pname),
        )
        ycontent2 = load(file(os.path.join(
            self.config_clone_dir, "jobs/projects.yaml")).read())
        sp2 = copy.deepcopy(
            [p for p in ycontent2 if 'project' in p and
                p['project']['name'] == 'zuul-demo'][0])
        sp2['project']['name'] = pname
        ycontent2.append(sp2)
        file(os.path.join(
            self.config_clone_dir, "jobs/projects.yaml"), 'w').write(
            dump(ycontent2))

        # Send review (config-check) will be triggered
        logger.info("Submitting the config review")
        change_sha = self.push_review_as_admin(
            self.config_clone_dir,
            "Add config definition in Zuul/JJB config for %s" % pname)

        change_nr = self.gu.get_change_number(change_sha)

        logger.info("Waiting for verify +1 on change %d" % change_nr)
        self.assertEquals(self.gu.wait_for_verify(change_nr), 1)

        # review the config change as a member from the config-core group
        logger.info("Approving and waiting for verify +2")
        self.gu2.submit_change_note(change_nr, "current", "Code-Review", "2")
        self.gu2.submit_change_note(change_nr, "current", "Workflow", "1")

        for retry in xrange(60):
            jenkins_vote = self.gu.get_vote(change_nr, "Verified")
            if jenkins_vote == 2:
                break
            time.sleep(1)
        self.assertEquals(jenkins_vote, 2)

        # verify whether zuul merged the patch
        logger.info("Waiting for change to be merged")
        for retry in xrange(60):
            change_status = self.gu.get_info(change_nr)['status']
            if change_status == "MERGED":
                break
            time.sleep(1)
        self.assertEqual(change_status, 'MERGED')
        self.need_restore_config_repo = True

        logger.info("Waiting for config-update")
        config_update_log = self.ju.wait_for_config_update(change_sha)
        self.assertIn("Finished: SUCCESS", config_update_log)

        # Propose a change on a the repo and expect a Verified +1
        logger.info("Submiting a test change to %s" % pname)
        change_sha = self.gitu_admin.add_commit_and_publish(
            clone_dir, 'master', "Add useless file",
            self.un)

        change_nr = self.gu.get_change_number(change_sha)

        logger.info("Waiting for verify +1 on change %d" % change_nr)
        self.assertEquals(self.gu.wait_for_verify(change_nr), 1)

        # Update the change on a the repo and expect a Verified -1
        logger.info("Submiting a test change to %s suppose to fail" % pname)
        data = "#!/bin/bash\nexit 1\n"
        file(os.path.join(clone_dir, "run_tests.sh"), 'w').write(data)
        os.chmod(os.path.join(clone_dir, "run_tests.sh"), 0755)
        self.gitu_admin.add_commit_and_publish(
            clone_dir, "master", None, fnames=["run_tests.sh"])

        logger.info("Waiting for verify -1 on change %d" % change_nr)
        self.assertEquals(self.gu.wait_for_verify(change_nr), -1)

        logger.info("Validate jobs ran via the job api %s" % pname)
        # This piece of code is there by convenience ...
        # TODO: Should be moved in the job api tests file.
        # Test the manageSF jobs API: query per patch & revision
        change_ids = self.gu.get_my_changes_for_project(pname)
        self.assertGreater(len(change_ids), 0)
        change_id = change_ids[0]
        patch = self.gu.get_change_last_patchset(change_id)['_number']
        cookie = get_cookie(config.ADMIN_USER, config.ADMIN_PASSWORD)
        cookies = {"auth_pubtkt": cookie}
        base_url = config.GATEWAY_URL + "/manage/jobs/"
        for j in ["%s-functional-tests" % pname, "%s-unit-tests" % pname]:
            job = requests.get(base_url + '%s/?change=%s' % (j, patch),
                               cookies=cookies).json()
            self.assertTrue("jenkins" in job.keys(),
                            job)
            self.assertTrue(len(job["jenkins"]) > 1,
                            job)
示例#41
0
文件: main.py 项目: braemy/NeuroNER
def main():

    parameters, conf_parameters = load_parameters()
    pprint(parameters)
    dataset_filepaths = get_valid_dataset_filepaths(parameters)
    check_parameter_compatiblity(parameters, dataset_filepaths)

    cross_validation = parameters[
        'cross_validation'] if 'cross_validation' in parameters else 1
    valid_fscores = []
    valid_precisions = []
    valid_recalls = []
    for cv in range(0, cross_validation):
        if "als" in dataset_filepaths['train'] and cross_validation > 1:
            train_files = list(range(0, cv)) + list(
                range(cv + 1, cross_validation))
            test_file = cv
            file_train = "tmp_combined.train"
            file_valid = "tmp_combined.test"
            output = []
            for i in train_files:
                with open(dataset_filepaths['train'] + "_" + str(i),
                          "r",
                          encoding="utf-8") as file:
                    output.append(file.read())
            with open(file_train, "w", encoding="utf-8") as file:
                file.write("\n\n".join(output))
            output = []
            with open(dataset_filepaths['train'] + "_" + str(test_file),
                      "r",
                      encoding="utf-8") as file:
                output.append(file.read())
            with open(file_valid, "w", encoding="utf-8") as file:
                file.write("\n\n".join(output))
            dataset_filepaths['train'] = file_train
            dataset_filepaths['valid'] = file_valid
        # Load dataset
        dataset = ds.Dataset(verbose=parameters['verbose'],
                             debug=parameters['debug'])
        dataset.load_vocab_word_embeddings(parameters)
        dataset.load_dataset(dataset_filepaths, parameters)

        # Create graph and session
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                intra_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                inter_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                device_count={
                    'CPU': 1,
                    'GPU': parameters['number_of_gpus']
                },
                allow_soft_placement=
                True,  # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
                log_device_placement=False)

            session_conf.gpu_options.allow_growth = True

            sess = tf.Session(config=session_conf)

            with sess.as_default():
                # Initialize and save execution details
                start_time = time.time()
                experiment_timestamp = utils.get_current_time_in_miliseconds()
                results = {}
                results['epoch'] = {}
                results['execution_details'] = {}
                results['execution_details']['train_start'] = start_time
                results['execution_details'][
                    'time_stamp'] = experiment_timestamp
                results['execution_details']['early_stop'] = False
                results['execution_details']['keyboard_interrupt'] = False
                results['execution_details']['num_epochs'] = 0
                results['model_options'] = copy.copy(parameters)

                dataset_name = utils.get_basename_without_extension(
                    parameters['dataset_train'])
                if 'data_to_use' in parameters:
                    model_name = '{0}_{1}'.format(
                        parameters['language'] + "_" + dataset_name + "_small",
                        results['execution_details']['time_stamp'])
                else:
                    model_name = '{0}_{1}'.format(
                        parameters['language'] + "_" + dataset_name,
                        results['execution_details']['time_stamp'])

                output_folder = os.path.join('..', 'output')
                utils.create_folder_if_not_exists(output_folder)
                stats_graph_folder = os.path.join(
                    output_folder, model_name)  # Folder where to save graphs
                utils.create_folder_if_not_exists(stats_graph_folder)
                model_folder = os.path.join(stats_graph_folder, 'model')
                utils.create_folder_if_not_exists(model_folder)
                with open(os.path.join(model_folder, 'parameters.ini'),
                          'w') as parameters_file:
                    conf_parameters.write(parameters_file)
                tensorboard_log_folder = os.path.join(stats_graph_folder,
                                                      'tensorboard_logs')
                utils.create_folder_if_not_exists(tensorboard_log_folder)
                tensorboard_log_folders = {}
                for dataset_type in dataset_filepaths.keys():
                    tensorboard_log_folders[dataset_type] = os.path.join(
                        stats_graph_folder, 'tensorboard_logs', dataset_type)
                    utils.create_folder_if_not_exists(
                        tensorboard_log_folders[dataset_type])
                #del dataset.embeddings_matrix
                if not parameters['use_pretrained_model']:
                    pickle.dump(
                        dataset,
                        open(os.path.join(model_folder, 'dataset.pickle'),
                             'wb'))
                #dataset.load_pretrained_word_embeddings(parameters)
                # Instantiate the model
                # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard
                model = EntityLSTM(dataset, parameters)

                # Instantiate the writers for TensorBoard
                writers = {}
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type] = tf.summary.FileWriter(
                        tensorboard_log_folders[dataset_type],
                        graph=sess.graph)
                embedding_writer = tf.summary.FileWriter(
                    model_folder
                )  # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings

                embeddings_projector_config = projector.ProjectorConfig()
                tensorboard_token_embeddings = embeddings_projector_config.embeddings.add(
                )
                tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
                token_list_file_path = os.path.join(
                    model_folder, 'tensorboard_metadata_tokens.tsv')
                tensorboard_token_embeddings.metadata_path = os.path.relpath(
                    token_list_file_path, '..')

                if parameters['use_character_lstm']:
                    tensorboard_character_embeddings = embeddings_projector_config.embeddings.add(
                    )
                    tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
                    character_list_file_path = os.path.join(
                        model_folder, 'tensorboard_metadata_characters.tsv')
                    tensorboard_character_embeddings.metadata_path = os.path.relpath(
                        character_list_file_path, '..')

                projector.visualize_embeddings(embedding_writer,
                                               embeddings_projector_config)

                # Write metadata for TensorBoard embeddings
                token_list_file = codecs.open(token_list_file_path, 'w',
                                              'UTF-8')
                for token_index in range(len(dataset.index_to_token)):
                    token_list_file.write('{0}\n'.format(
                        dataset.index_to_token[token_index]))
                token_list_file.close()

                if parameters['use_character_lstm']:
                    character_list_file = codecs.open(character_list_file_path,
                                                      'w', 'UTF-8')
                    for character_index in range(dataset.alphabet_size):
                        if character_index == dataset.PADDING_CHARACTER_INDEX:
                            character_list_file.write('PADDING\n')
                        else:
                            character_list_file.write('{0}\n'.format(
                                dataset.index_to_character[character_index]))
                    character_list_file.close()

                try:
                    # Initialize the model
                    sess.run(tf.global_variables_initializer())
                    if not parameters['use_pretrained_model']:
                        model.load_pretrained_token_embeddings(
                            sess, dataset, parameters)

                    # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
                    bad_counter = 0  # number of epochs with no improvement on the validation test in terms of F1-score
                    previous_best_valid_f1_score = 0
                    transition_params_trained = np.random.rand(
                        len(dataset.unique_labels), len(dataset.unique_labels)
                    )  #TODO np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2)
                    model_saver = tf.train.Saver(
                        max_to_keep=None
                    )  #parameters['maximum_number_of_epochs'])  # defaults to saving all variables
                    epoch_number = 0

                    while True:
                        epoch_number += 1
                        print('\nStarting epoch {0}'.format(epoch_number))

                        epoch_start_time = time.time()

                        if parameters[
                                'use_pretrained_model'] and epoch_number == 1:
                            # Restore pretrained model parameters
                            transition_params_trained = train.restore_model_parameters_from_pretrained_model(
                                parameters, dataset, sess, model, model_saver)
                        elif epoch_number != 0:
                            # Train model: loop over all sequences of training set with shuffling
                            sequence_numbers = list(
                                range(len(dataset.token_indices['train'])))
                            random.shuffle(sequence_numbers)
                            data_counter = 0
                            sub_id = 0
                            for i in tqdm(range(0, len(sequence_numbers),
                                                parameters['batch_size']),
                                          "Training",
                                          mininterval=1):
                                data_counter += parameters['batch_size']
                                if data_counter >= 20000:
                                    data_counter = 0
                                    sub_id += 0.001
                                    print("Intermediate evaluation number: ",
                                          sub_id)

                                    #model_saver.save(sess,
                                    #                 os.path.join(model_folder, 'model_{0:05d}_{1}.ckpt'.format(epoch_number, len(sequence_numbers)/4/len(sequence_numbers))))
                                    epoch_elapsed_training_time = time.time(
                                    ) - epoch_start_time
                                    print(
                                        'Training completed in {0:.2f} seconds'
                                        .format(epoch_elapsed_training_time),
                                        flush=True)

                                    y_pred, y_true, output_filepaths = train.predict_labels(
                                        sess, model, transition_params_trained,
                                        parameters, dataset,
                                        epoch_number + sub_id,
                                        stats_graph_folder, dataset_filepaths)

                                    # Evaluate model: save and plot results
                                    evaluate.evaluate_model(
                                        results, dataset, y_pred, y_true,
                                        stats_graph_folder, epoch_number,
                                        epoch_start_time, output_filepaths,
                                        parameters)

                                    # Save model
                                    model_saver.save(
                                        sess,
                                        os.path.join(
                                            model_folder,
                                            'model_{0:07.3f}.ckpt'.format(
                                                epoch_number + sub_id)))

                                    # Save TensorBoard logs
                                    summary = sess.run(model.summary_op,
                                                       feed_dict=None)
                                    writers['train'].add_summary(
                                        summary, epoch_number)
                                    writers['train'].flush()
                                    utils.copytree(
                                        writers['train'].get_logdir(),
                                        model_folder)

                                    # Early stop
                                    valid_f1_score = results['epoch'][
                                        epoch_number][0]['valid']['f1_score'][
                                            'micro']
                                    # valid_precision = results['epoch'][epoch_number][0]['valid']['precision']['micro']
                                    # valid_recall = results['epoch'][epoch_number][0]['valid']['recall']['micro']

                                    # valid_fscores.append(valid_f1_score)
                                    if valid_f1_score > previous_best_valid_f1_score:
                                        bad_counter = 0
                                        previous_best_valid_f1_score = valid_f1_score
                                        # previous_best_valid_precision = valid_precision
                                        # previous_best_valid_recall = valid_recall
                                    else:
                                        bad_counter += 1

                                sequence_number = sequence_numbers[
                                    i:i + parameters['batch_size']]
                                transition_params_trained, loss = train.train_step(
                                    sess, dataset, sequence_number, model,
                                    transition_params_trained, parameters)
                        epoch_elapsed_training_time = time.time(
                        ) - epoch_start_time
                        print('Training completed in {0:.2f} seconds'.format(
                            epoch_elapsed_training_time),
                              flush=True)

                        y_pred, y_true, output_filepaths = train.predict_labels(
                            sess, model, transition_params_trained, parameters,
                            dataset, epoch_number, stats_graph_folder,
                            dataset_filepaths)

                        # Evaluate model: save and plot results
                        evaluate.evaluate_model(results, dataset, y_pred,
                                                y_true, stats_graph_folder,
                                                epoch_number, epoch_start_time,
                                                output_filepaths, parameters)

                        # Save model
                        model_saver.save(
                            sess,
                            os.path.join(
                                model_folder,
                                'model_{0:05d}.ckpt'.format(epoch_number)))

                        # Save TensorBoard logs
                        summary = sess.run(model.summary_op, feed_dict=None)
                        writers['train'].add_summary(summary, epoch_number)
                        writers['train'].flush()
                        utils.copytree(writers['train'].get_logdir(),
                                       model_folder)

                        # Early stop
                        valid_f1_score = results['epoch'][epoch_number][0][
                            'valid']['f1_score']['micro']
                        #valid_precision = results['epoch'][epoch_number][0]['valid']['precision']['micro']
                        #valid_recall = results['epoch'][epoch_number][0]['valid']['recall']['micro']

                        #valid_fscores.append(valid_f1_score)
                        if valid_f1_score > previous_best_valid_f1_score:
                            bad_counter = 0
                            previous_best_valid_f1_score = valid_f1_score
                            #previous_best_valid_precision = valid_precision
                            #previous_best_valid_recall = valid_recall
                        else:
                            bad_counter += 1
                        print(
                            "The last {0} epochs have not shown improvements on the validation set."
                            .format(bad_counter))

                        if bad_counter >= parameters['patience']:
                            print('Early Stop!')
                            results['execution_details']['early_stop'] = True
                            break

                        if epoch_number >= parameters[
                                'maximum_number_of_epochs']:
                            break

                except KeyboardInterrupt:
                    results['execution_details']['keyboard_interrupt'] = True
                    print('Training interrupted')
                    # remove the experiment
                    remove_experiment = input(
                        "Do you want to remove the experiment? (yes/y/Yes)")
                    if remove_experiment in ["Yes", "yes", "y"]:
                        shutil.rmtree(stats_graph_folder)
                        print("Folder removed")
                    else:
                        print('Finishing the experiment')
                        end_time = time.time()
                        results['execution_details'][
                            'train_duration'] = end_time - start_time
                        results['execution_details']['train_end'] = end_time
                        evaluate.save_results(results, stats_graph_folder)
                except Exception:
                    logging.exception("")
                    remove_experiment = input(
                        "Do you want to remove the experiment? (yes/y/Yes)")
                    if remove_experiment in ["Yes", "yes", "y"]:
                        shutil.rmtree(stats_graph_folder)
                        print("Folder removed")

        sess.close()  # release the session's resources
        if 'cross_validation' in parameters and parameters[
                'cross_validation'] > 1:
            valid_fscores.append(previous_best_valid_f1_score)
            #valid_precisions.append(previous_best_valid_precision)
            #valid_recalls.append(previous_best_valid_recall)
    if 'cross_validation' in parameters and parameters['cross_validation'] > 1:
        print("mean f1score:", np.mean(valid_fscores))
        #print("mean precision:", np.mean(valid_precisions))
        #print("mean recall:", np.mean(valid_recalls))
        with codecs.open(os.path.join(stats_graph_folder, "result_cv.txt"),
                         "w") as file:
            file.write("F1score " + ", ".join(map(str, valid_fscores)))
            # file.write("Precision " + valid_precisions)
            # file.write("Recall " + valid_recalls)
            file.write("Mean F1score " + str(np.mean(valid_fscores)))
示例#42
0
def main(argv=sys.argv):
    ''' NeuroNER main method

    Args:
        parameters_filepath the path to the parameters file
        output_folder the path to the output folder
    '''
    arguments = parse_arguments(argv[1:])
    parameters, conf_parameters = load_parameters(
        arguments['parameters_filepath'], arguments=arguments)
    dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths(
        parameters)
    check_parameter_compatiblity(parameters, dataset_filepaths)

    # Load dataset
    dataset = ds.Dataset(verbose=parameters['verbose'],
                         debug=parameters['debug'])
    dataset.load_dataset(dataset_filepaths, parameters)

    # Create graph and session
    with tf.device('/gpu:0'):
        with tf.Graph().as_default():
            session_conf = tf.ConfigProto(
                intra_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                inter_op_parallelism_threads=parameters[
                    'number_of_cpu_threads'],
                device_count={
                    'CPU': 1,
                    'GPU': parameters['number_of_gpus']
                },
                allow_soft_placement=True,
                # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
                log_device_placement=False)

            sess = tf.Session(config=session_conf)

            with sess.as_default():

                start_time = time.time()
                experiment_timestamp = utils.get_current_time_in_miliseconds()
                results = {}
                results['epoch'] = {}
                results['execution_details'] = {}
                results['execution_details']['train_start'] = start_time
                results['execution_details'][
                    'time_stamp'] = experiment_timestamp
                results['execution_details']['early_stop'] = False
                results['execution_details']['keyboard_interrupt'] = False
                results['execution_details']['num_epochs'] = 0
                results['model_options'] = copy.copy(parameters)

                dataset_name = utils.get_basename_without_extension(
                    parameters['dataset_text_folder'])
                model_name = dataset_name
                utils.create_folder_if_not_exists(parameters['output_folder'])
                stats_graph_folder = os.path.join(
                    parameters['output_folder'],
                    model_name)  # Folder where to save graphs
                final_weights_folder = os.path.join(
                    parameters['output_folder'], 'weights')
                utils.create_folder_if_not_exists(stats_graph_folder)
                utils.create_folder_if_not_exists(final_weights_folder)
                model_folder = os.path.join(stats_graph_folder, 'model')
                utils.create_folder_if_not_exists(model_folder)
                # saving the parameter setting to the output model dir. For later resuming training
                with open(os.path.join(model_folder, 'parameters.ini'),
                          'w') as parameters_file:
                    conf_parameters.write(parameters_file)
                tensorboard_log_folder = os.path.join(stats_graph_folder,
                                                      'tensorboard_logs')
                utils.create_folder_if_not_exists(tensorboard_log_folder)
                tensorboard_log_folders = {}
                for dataset_type in dataset_filepaths.keys():
                    tensorboard_log_folders[dataset_type] = os.path.join(
                        stats_graph_folder, 'tensorboard_logs', dataset_type)
                    utils.create_folder_if_not_exists(
                        tensorboard_log_folders[dataset_type])
                pickle.dump(
                    dataset,
                    open(os.path.join(model_folder, 'dataset.pickle'), 'wb'))

                # Instantiate the model
                # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard
                model = EntityLSTM(dataset, parameters)

                # Instantiate the writers for TensorBoard
                writers = {}
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type] = tf.summary.FileWriter(
                        tensorboard_log_folders[dataset_type],
                        graph=sess.graph)
                # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings
                embedding_writer = tf.summary.FileWriter(model_folder)

                embeddings_projector_config = projector.ProjectorConfig()
                tensorboard_token_embeddings = embeddings_projector_config.embeddings.add(
                )
                tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name
                token_list_file_path = os.path.join(
                    model_folder, 'tensorboard_metadata_tokens.tsv')
                tensorboard_token_embeddings.metadata_path = os.path.relpath(
                    token_list_file_path, '..')

                tensorboard_character_embeddings = embeddings_projector_config.embeddings.add(
                )
                tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name
                character_list_file_path = os.path.join(
                    model_folder, 'tensorboard_metadata_characters.tsv')
                tensorboard_character_embeddings.metadata_path = os.path.relpath(
                    character_list_file_path, '..')

                projector.visualize_embeddings(embedding_writer,
                                               embeddings_projector_config)

                # Write metadata for TensorBoard embeddings
                token_list_file = codecs.open(token_list_file_path, 'w',
                                              'latin-1')
                for token_index in range(dataset.vocabulary_size):
                    token_list_file.write('{0}\n'.format(
                        dataset.index_to_token[token_index]))
                token_list_file.close()

                character_list_file = codecs.open(character_list_file_path,
                                                  'w', 'latin-1')
                for character_index in range(dataset.alphabet_size):
                    if character_index == dataset.PADDING_CHARACTER_INDEX:
                        character_list_file.write('PADDING\n')
                    else:
                        character_list_file.write('{0}\n'.format(
                            dataset.index_to_character[character_index]))
                character_list_file.close()

                # Initialize the model
                sess.run(tf.global_variables_initializer())
                if not parameters['use_pretrained_model']:
                    model.load_pretrained_token_embeddings(
                        sess, dataset, parameters)

                # Start training + evaluation loop. Each iteration corresponds to 1 epoch.
                patience_counter = 0
                f1_score_best = 0
                f1_scores = {'train-F1': [], 'valid-F1': [], 'test-F1': []}
                f1_scores_conll = {
                    'train-F1': [],
                    'valid-F1': [],
                    'test-F1': []
                }
                transition_params_trained = np.random.rand(
                    len(dataset.unique_labels) + 2,
                    len(dataset.unique_labels) + 2)
                model_saver = tf.train.Saver(
                    max_to_keep=parameters['num_of_model_to_keep'])
                epoch_number = -1
                try:
                    while True:
                        step = 0
                        epoch_number += 1
                        print('\nStarting epoch {0}'.format(epoch_number))

                        epoch_start_time = time.time()

                        # use pre-trained model and epoch_number = 0
                        if parameters[
                                'use_pretrained_model'] and epoch_number == 0:

                            if parameters['use_adapter']:
                                parameters['use_adapter'] = False
                                transition_params_trained = train.restore_pretrained_model(
                                    parameters, dataset, sess, model,
                                    model_saver)
                                print(
                                    'Getting the 3-label predictions from the step1 model.'
                                )
                                all_pred_labels, y_pred_for_adapter, y_true_for_adapter, \
                                output_filepaths = train.predict_labels(sess, model,
                                                                        transition_params_trained,
                                                                        parameters, dataset,
                                                                        epoch_number,
                                                                        stats_graph_folder,
                                                                        dataset_filepaths,
                                                                        for_adapter=True)
                                # use the label2idx mapping (for adapter) in the dataset to transform all_pred_labels
                                all_pred_indices = {}
                                for dataset_type in dataset_filepaths.keys():
                                    all_pred_indices[dataset_type] = []
                                    for i in range(
                                            len(all_pred_labels[dataset_type])
                                    ):
                                        indices = [
                                            dataset.
                                            label_adapter_to_index[label]
                                            for label in
                                            all_pred_labels[dataset_type][i]
                                        ]
                                        all_pred_indices[dataset_type].append(
                                            indices)

                                # and use binarizer to transform to ndarray
                                label_binarizer_adapter = sklearn.preprocessing.LabelBinarizer(
                                )
                                label_binarizer_adapter.fit(
                                    range(
                                        max(dataset.index_to_label_adapter.
                                            keys()) + 1))
                                predicted_label_adapter_vector_indices = {}
                                for dataset_type in dataset_filepaths.keys():
                                    predicted_label_adapter_vector_indices[
                                        dataset_type] = []
                                    for label_indices_sequence in all_pred_indices[
                                            dataset_type]:
                                        predicted_label_adapter_vector_indices[
                                            dataset_type].append(
                                                label_binarizer_adapter.
                                                transform(
                                                    label_indices_sequence))
                                parameters['use_adapter'] = True

                            if parameters['train_model'] and parameters[
                                    'add_class']:
                                transition_params_trained, model, glo_step = \
                                    train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess,
                                                                                         model, model_saver)
                                init_new_vars_op = tf.initialize_variables(
                                    [glo_step])
                                sess.run(init_new_vars_op)
                            else:
                                transition_params_trained = \
                                    train.restore_pretrained_model(parameters, dataset, sess, model, model_saver)

                            for dataset_type in dataset_filepaths.keys():
                                writers[dataset_type] = tf.summary.FileWriter(
                                    tensorboard_log_folders[dataset_type],
                                    graph=sess.graph)
                                # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings
                                embedding_writer = tf.summary.FileWriter(
                                    model_folder)

                        # epoch_number != 0, no matter use or not use pre-trained model
                        elif epoch_number != 0:
                            # Train model: loop over all sequences of training set with shuffling
                            sequence_numbers = list(
                                range(len(dataset.token_indices['train'])))
                            random.shuffle(sequence_numbers)
                            for sequence_number in sequence_numbers:
                                transition_params_trained, W_before_crf = train.train_step(
                                    sess, dataset, sequence_number, model,
                                    transition_params_trained, parameters)
                                step += 1
                        epoch_elapsed_training_time = time.time(
                        ) - epoch_start_time
                        print('Training completed in {0:.2f} seconds'.format(
                            epoch_elapsed_training_time),
                              flush=False)

                        if parameters[
                                'use_adapter']:  # model evaluation, using adapter
                            # pass the pred_for_adapter as label_indices vector
                            original_label_adapter_vector_indices = dataset.label_adapter_vector_indices
                            dataset.label_adapter_vector_indices = predicted_label_adapter_vector_indices
                            y_pred, y_true, output_filepaths = train.predict_labels(
                                sess, model, transition_params_trained,
                                parameters, dataset, epoch_number,
                                stats_graph_folder, dataset_filepaths)

                            evaluate.evaluate_model(results, dataset, y_pred,
                                                    y_true, stats_graph_folder,
                                                    epoch_number,
                                                    epoch_start_time,
                                                    output_filepaths,
                                                    parameters)
                            dataset.label_adapter_vector_indices = original_label_adapter_vector_indices

                        else:  # model evaluation,  not using adapter
                            y_pred, y_true, output_filepaths = train.predict_labels(
                                sess, model, transition_params_trained,
                                parameters, dataset, epoch_number,
                                stats_graph_folder, dataset_filepaths)

                            # Evaluate model: save and plot results
                            evaluate.evaluate_model(results, dataset, y_pred,
                                                    y_true, stats_graph_folder,
                                                    epoch_number,
                                                    epoch_start_time,
                                                    output_filepaths,
                                                    parameters)

                        summary = sess.run(model.summary_op, feed_dict=None)
                        writers['train'].add_summary(summary, epoch_number)
                        writers['train'].flush()
                        utils.copytree(writers['train'].get_logdir(),
                                       model_folder)

                        # Early stopping
                        train_f1_score = results['epoch'][epoch_number][0][
                            'train']['f1_score']['weighted']
                        valid_f1_score = results['epoch'][epoch_number][0][
                            'valid']['f1_score']['weighted']
                        test_f1_score = results['epoch'][epoch_number][0][
                            'test']['f1_score']['weighted']
                        f1_scores['train-F1'].append(train_f1_score)
                        f1_scores['valid-F1'].append(valid_f1_score)
                        f1_scores['test-F1'].append(test_f1_score)

                        train_f1_score_conll = results['epoch'][epoch_number][
                            0]['train']['f1_conll']['micro']
                        valid_f1_score_conll = results['epoch'][epoch_number][
                            0]['valid']['f1_conll']['micro']
                        test_f1_score_conll = results['epoch'][epoch_number][
                            0]['test']['f1_conll']['micro']
                        f1_scores_conll['train-F1'].append(
                            train_f1_score_conll)
                        f1_scores_conll['valid-F1'].append(
                            valid_f1_score_conll)
                        f1_scores_conll['test-F1'].append(test_f1_score_conll)

                        if valid_f1_score > f1_score_best:
                            patience_counter = 0
                            f1_score_best = valid_f1_score
                            # Save the best model
                            model_saver.save(
                                sess,
                                os.path.join(model_folder, 'best_model.ckpt'))
                            print(
                                'updated model to current epoch : epoch {:d}'.
                                format(epoch_number))
                            print('the model is saved in: {:s}'.format(
                                model_folder))
                        else:
                            patience_counter += 1
                        print("In epoch {:d}, the valid F1 is : {:f}".format(
                            epoch_number, valid_f1_score))
                        print(
                            "The last {0} epochs have not shown improvements on the validation set."
                            .format(patience_counter))

                        if patience_counter >= parameters['patience']:
                            print('Early Stop!')
                            results['execution_details']['early_stop'] = True
                            # save last model
                            model_saver.save(
                                sess,
                                os.path.join(model_folder, 'last_model.ckpt'))
                            print('the last model is saved in: {:s}'.format(
                                model_folder))

                            break

                        if epoch_number >= parameters[
                                'maximum_number_of_epochs'] and not parameters[
                                    'refine_with_crf']:
                            break
                    if not parameters['use_pretrained_model']:
                        plot_name = 'F1-summary-step1.svg'
                    else:
                        plot_name = 'F1-summary-step2.svg'

                    print('Sklearn result:')
                    for k, l in f1_scores.items():
                        print(k, l)

                    print('Conll result:')
                    for k, l in f1_scores_conll.items():
                        print(k, l)
                    utils_plots.plot_f1(
                        f1_scores,
                        os.path.join(stats_graph_folder, '..', plot_name),
                        'F1 score summary')

                    # TODO: in step 1, for task a, add the best deploy data to step 2 train set, and call script
                    print('(sklearn micro) test F1:')
                    micro_f1 = ','.join([
                        str(results['epoch'][ep][0]['test']['f1_score']
                            ['micro']) for ep in range(epoch_number + 1)
                    ])
                    print(micro_f1)
                    print('(sklearn macro) test F1:')
                    macro_f1 = ','.join([
                        str(results['epoch'][ep][0]['test']['f1_score']
                            ['macro']) for ep in range(epoch_number + 1)
                    ])
                    print(macro_f1)

                except KeyboardInterrupt:
                    results['execution_details']['keyboard_interrupt'] = True
                    print('Training interrupted')

                print('Finishing the experiment')
                end_time = time.time()
                results['execution_details'][
                    'train_duration'] = end_time - start_time
                results['execution_details']['train_end'] = end_time
                evaluate.save_results(results, stats_graph_folder)
                for dataset_type in dataset_filepaths.keys():
                    writers[dataset_type].close()

    sess.close()  # release the session's resources