def __configure_master_host(self):
        # Clear master_host/slave files and add master_host host address to the hadoop master_hosts file
        commands = [
            settings.HADOOP_CLEAN_SLAVES_FILE,
            "echo "
            + settings.SYSTEM_HOSTNAME_PREFIX
            + remove_dots(self.master_host)
            + " > "
            + settings.HADOOP_MASTER_FILE,
        ]

        # add slave hosts ip to hadoop slave_hosts file
        for host in self.slave_hosts:
            commands.append(
                "echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host) + " >> " + settings.HADOOP_SLAVES_FILE
            )

        if self.__storage_mode == "nfs":
            commands.append(settings.HADOOP_START_MAPRED)
        elif self.__storage_mode == "hdfs":
            commands.append(settings.HADOOP_FORMAT_DFS)
            commands.append(settings.HADOOP_START_ALL_SERVICES)

        # run the commands on the master_host host
        self.sshClient.run_commands_on_host(
            self.master_host, commands, settings.SYSTEM_HADOOP_USER_NAME, settings.SYSTEM_HADOOP_USER_PASSWORD
        )

        print "Waiting %s seconds for nodes to become ready" % (settings.HADOOP_WAIT_TIME)
        time.sleep(settings.HADOOP_WAIT_TIME)
예제 #2
0
 def __configure_master_host(self):
     # Clear master_host/slave files and add master_host host address to the hadoop master_hosts file
     commands = [settings.HADOOP_CLEAN_SLAVES_FILE,
                 "echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(self.master_host) + " > " \
                 + settings.HADOOP_MASTER_FILE]
     
     # add slave hosts ip to hadoop slave_hosts file
     for host in self.slave_hosts:        
         commands.append("echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host) + " >> " + \
                          settings.HADOOP_SLAVES_FILE)
                 
     if self.__storage_mode == "nfs":
         commands.append(settings.HADOOP_START_MAPRED)
     elif self.__storage_mode == "hdfs":
         commands.append(settings.HADOOP_FORMAT_DFS)
         commands.append(settings.HADOOP_START_ALL_SERVICES)    
         
     # run the commands on the master_host host
     self.sshClient.run_commands_on_host(self.master_host, 
                                         commands, 
                                         settings.SYSTEM_HADOOP_USER_NAME, 
                                         settings.SYSTEM_HADOOP_USER_PASSWORD)
     
     print "Waiting %s seconds for nodes to become ready" % (settings.HADOOP_WAIT_TIME)
     time.sleep(settings.HADOOP_WAIT_TIME)
예제 #3
0
 def __start_task_trackers(self):
     commands = [settings.HADOOP_CLEAN_SLAVES_FILE,
                 "echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(self.master_host) + " > " \
                 + settings.HADOOP_MASTER_FILE]
     
     for compute_host in self.__compute_hosts:        
         commands.append("echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(compute_host) + " >> " + \
                          settings.HADOOP_SLAVES_FILE)
     
     commands.append(settings.HADOOP_START_MAPRED)
     self.sshClient.run_commands_on_host(self.master_host, 
                                         commands, 
                                         settings.SYSTEM_HADOOP_USER_NAME, 
                                         settings.SYSTEM_HADOOP_USER_PASSWORD)
    def prepare_environment(self):
        """
        Prepares the system environment (updates hosts list, 
                                         sets hostname,
                                         apply urandom and ulimit fixes)
        """

        hosts_file_update_command = self.__generate_hosts_update_command()
        hosts_dict = {}
        for host in self.__hosts:
            commands = [settings.SYSTEM_URANDOM_FIX, settings.SYSTEM_ULIMIT_FIX]
            commands.append("echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host) + " > /etc/hostname")
            commands.append("hostname -v " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host))
            commands.extend(hosts_file_update_command)
            hosts_dict.update({host: commands})

        self.sshClient.run_distinct_commands_on_hosts(
            hosts_dict, settings.SYSTEM_ROOT_USER_NAME, settings.SYSTEM_ROOT_USER_PASSWORD
        )
예제 #5
0
 def prepare_environment(self):
     '''
     Prepares the system environment (updates hosts list, 
                                      sets hostname,
                                      apply urandom and ulimit fixes)
     '''
     
     hosts_file_update_command = self.__generate_hosts_update_command()
     hosts_dict = {}
     for host in self.__hosts:
         commands = [settings.SYSTEM_URANDOM_FIX, settings.SYSTEM_ULIMIT_FIX]
         commands.append("echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host) + " > /etc/hostname")
         commands.append("hostname -v " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(host))
         commands.extend(hosts_file_update_command)
         hosts_dict.update({host: commands})
     
     self.sshClient.run_distinct_commands_on_hosts(hosts_dict, 
                                                   settings.SYSTEM_ROOT_USER_NAME, 
                                                   settings.SYSTEM_ROOT_USER_PASSWORD)
예제 #6
0
 def __generate_hosts_update_command(self):
     '''
     Generates a hosts update command
     '''
     
     hosts_file_update = [settings.SYSTEM_CLEAN_HOSTS_FILE]
     for host in self.__hosts:
         hosts_file_update.append("echo '" + host + settings.WHITESPACE + settings.SYSTEM_HOSTNAME_PREFIX \
                                   + remove_dots(host) + "' >> /etc/hosts")
     return hosts_file_update
    def __start_task_trackers(self):
        commands = [
            settings.HADOOP_CLEAN_SLAVES_FILE,
            "echo "
            + settings.SYSTEM_HOSTNAME_PREFIX
            + remove_dots(self.master_host)
            + " > "
            + settings.HADOOP_MASTER_FILE,
        ]

        for compute_host in self.__compute_hosts:
            commands.append(
                "echo "
                + settings.SYSTEM_HOSTNAME_PREFIX
                + remove_dots(compute_host)
                + " >> "
                + settings.HADOOP_SLAVES_FILE
            )

        commands.append(settings.HADOOP_START_MAPRED)
        self.sshClient.run_commands_on_host(
            self.master_host, commands, settings.SYSTEM_HADOOP_USER_NAME, settings.SYSTEM_HADOOP_USER_PASSWORD
        )
예제 #8
0
 def __start_data_hosts(self):
     commands = [settings.HADOOP_CLEAN_SLAVES_FILE]
     for data_host in self.__data_hosts:
         commands.append("echo " + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(data_host) + " >> " + \
                          settings.HADOOP_SLAVES_FILE)
     
     commands.append(settings.HADOOP_FORMAT_DFS)
     commands.append(settings.HADOOP_START_DFS)
     self.sshClient.run_commands_on_host(self.master_host, 
                                         commands, 
                                         settings.SYSTEM_HADOOP_USER_NAME, 
                                         settings.SYSTEM_HADOOP_USER_PASSWORD)
     
     print "Waiting %s seconds for nodes to become ready" % (settings.HADOOP_WAIT_TIME)
     time.sleep(settings.HADOOP_WAIT_TIME)
    def __generate_hosts_update_command(self):
        """
        Generates a hosts update command
        """

        hosts_file_update = [settings.SYSTEM_CLEAN_HOSTS_FILE]
        for host in self.__hosts:
            hosts_file_update.append(
                "echo '"
                + host
                + settings.WHITESPACE
                + settings.SYSTEM_HOSTNAME_PREFIX
                + remove_dots(host)
                + "' >> /etc/hosts"
            )
        return hosts_file_update
예제 #10
0
    def __start_data_hosts(self):
        commands = [settings.HADOOP_CLEAN_SLAVES_FILE]
        for data_host in self.__data_hosts:
            commands.append(
                "echo "
                + settings.SYSTEM_HOSTNAME_PREFIX
                + remove_dots(data_host)
                + " >> "
                + settings.HADOOP_SLAVES_FILE
            )

        commands.append(settings.HADOOP_FORMAT_DFS)
        commands.append(settings.HADOOP_START_DFS)
        self.sshClient.run_commands_on_host(
            self.master_host, commands, settings.SYSTEM_HADOOP_USER_NAME, settings.SYSTEM_HADOOP_USER_PASSWORD
        )

        print "Waiting %s seconds for nodes to become ready" % (settings.HADOOP_WAIT_TIME)
        time.sleep(settings.HADOOP_WAIT_TIME)
예제 #11
0
    def configure_slave_hosts_hdfs(self):
        logger.info("Preparing the following VMs with HDFS: %s" % self.__hosts)
        commands = [
            settings.SYSTEM_KILL_JAVA,
            settings.SYSTEM_CLEAN_TMP,
            settings.HADOOP_DISABLE_HOST_KEY_CHECK,
            settings.HADOOP_UPDATE_ENV,
        ]

        commands.append(
            """cat >"""
            + settings.HADOOP_INSTALL_DIR
            + """/conf/hdfs-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration> 
    <property> 
        <name>dfs.block.size</name> 
        <value>"""
            + settings.HADOOP_BLOCK_SIZE
            + """</value> 
        <final>true</final>
    </property>

    <property>
       <name>dfs.datanode.max.xcievers</name>
       <value>"""
            + settings.HADOOP_MAX_XCIEVERS
            + """</value>
    </property>
      
    <property> 
        <name>dfs.replication</name> 
        <value>"""
            + settings.HADOOP_RELICATION_FACTOR
            + """</value> 
        <final>true</final>
    </property>
</configuration>
EOF"""
        )

        commands.append(
            """cat >"""
            + settings.HADOOP_INSTALL_DIR
            + """/conf/mapred-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration>
  <property> 
    <name>mapred.job.tracker</name> 
    <value>"""
            + settings.SYSTEM_HOSTNAME_PREFIX
            + remove_dots(self.master_host)
            + """:8021</value> 
  </property> 

  <property>
    <name>mapred.child.java.opts</name>
    <value>-Xmx"""
            + settings.HADOOP_XMX_SIZE
            + """m -Xmn"""
            + settings.HADOOP_XMN_SIZE
            + """m</value>
  </property>

  <property>
    <name>mapred.tasktracker.map.tasks.maximum</name>
    <value>"""
            + settings.HADOOP_MAX_NUMBER_OF_MAP_SLOTS
            + """</value>
  </property>

  <property>
    <name>mapred.tasktracker.reduce.tasks.maximum</name>
    <value>"""
            + settings.HADOOP_MAX_NUMBER_OF_REDUCE_SLOTS
            + """</value>
  </property>
</configuration> 
EOF"""
        )

        commands.append(
            """cat >"""
            + settings.HADOOP_INSTALL_DIR
            + """/conf/core-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration> 
  <property> 
    <name>fs.default.name</name> 
    <value>hdfs://"""
            + settings.SYSTEM_HOSTNAME_PREFIX
            + remove_dots(self.master_host)
            + """</value> 
  </property>
  
  <property> 
    <name>io.file.buffer.size</name> 
    <value>hdfs://"""
            + settings.HADOOP_IO_FILE_BUFFER_SIZE
            + """</value> 
  </property>
</configuration>
EOF"""
        )

        self.sshClient.run_same_commands_on_hosts(
            self.__hosts, commands, settings.SYSTEM_HADOOP_USER_NAME, settings.SYSTEM_HADOOP_USER_PASSWORD
        )
예제 #12
0
    def configure_slave_hosts_hdfs(self):
        logger.info("Preparing the following VMs with HDFS: %s" % self.__hosts)
        commands = [settings.SYSTEM_KILL_JAVA,
                    settings.SYSTEM_CLEAN_TMP,
                    settings.HADOOP_DISABLE_HOST_KEY_CHECK,
                    settings.HADOOP_UPDATE_ENV]

        commands.append('''cat >''' + settings.HADOOP_INSTALL_DIR + '''/conf/hdfs-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration> 
    <property> 
        <name>dfs.block.size</name> 
        <value>''' + settings.HADOOP_BLOCK_SIZE + '''</value> 
        <final>true</final>
    </property>

    <property>
       <name>dfs.datanode.max.xcievers</name>
       <value>''' + settings.HADOOP_MAX_XCIEVERS + '''</value>
    </property>
      
    <property> 
        <name>dfs.replication</name> 
        <value>''' + settings.HADOOP_RELICATION_FACTOR + '''</value> 
        <final>true</final>
    </property>
</configuration>
EOF''')

        commands.append('''cat >''' + settings.HADOOP_INSTALL_DIR + '''/conf/mapred-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration>
  <property> 
    <name>mapred.job.tracker</name> 
    <value>''' + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(self.master_host) + ''':8021</value> 
  </property> 

  <property>
    <name>mapred.child.java.opts</name>
    <value>-Xmx''' + settings.HADOOP_XMX_SIZE + '''m -Xmn''' + settings.HADOOP_XMN_SIZE + '''m</value>
  </property>

  <property>
    <name>mapred.tasktracker.map.tasks.maximum</name>
    <value>''' + settings.HADOOP_MAX_NUMBER_OF_MAP_SLOTS + '''</value>
  </property>

  <property>
    <name>mapred.tasktracker.reduce.tasks.maximum</name>
    <value>''' + settings.HADOOP_MAX_NUMBER_OF_REDUCE_SLOTS + '''</value>
  </property>
</configuration> 
EOF''')

        commands.append('''cat >''' + settings.HADOOP_INSTALL_DIR + '''/conf/core-site.xml <<EOF 
<?xml version="1.0"?> 
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>  
<configuration> 
  <property> 
    <name>fs.default.name</name> 
    <value>hdfs://''' + settings.SYSTEM_HOSTNAME_PREFIX + remove_dots(self.master_host) + '''</value> 
  </property>
  
  <property> 
    <name>io.file.buffer.size</name> 
    <value>hdfs://''' + settings.HADOOP_IO_FILE_BUFFER_SIZE + '''</value> 
  </property>
</configuration>
EOF''')   
            
        self.sshClient.run_same_commands_on_hosts(self.__hosts, 
                                                  commands,
                                                  settings.SYSTEM_HADOOP_USER_NAME, 
                                                  settings.SYSTEM_HADOOP_USER_PASSWORD)