Context Navigation

← Previous Change
Next Change →

Changeset 5bf359d for fedd/federation

Timestamp:

May 28, 2010 3:16:46 AM (15 years ago)

Author:

Ted Faber <faber@…>

Branches:

axis_example, compt_changes, info-ops, master, version-3.01, version-3.02

Children:

Parents:

Message:

More refactoring. Neaten up the code for creating segments in emulab and make the local and proxy class structures parallel. The code is more readable this way, I hope.

Location:

fedd/federation

Files:

: 1 added
: 3 edited

local_emulab_segment.py (modified) (7 diffs)
local_segment.py (added)
proxy_emulab_segment.py (modified) (7 diffs)
proxy_segment.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

fedd/federation/local_emulab_segment.py

-                      r06cc65b
+                      r5bf359d
 import util
+class local_emulab_segment:
+    class cmd_timeout(RuntimeError): pass
+from local_segment import local_segment
+class start_segment(local_segment):
     def __init__(self, log=None, keyfile=None, debug=False):
+        self.log = log or logging.getLogger(\
+                'fedd.access.proxy_emulab_segment')
+        self.certfile = keyfile
+        self.debug = debug
+        self.cmd_timeout = local_emulab_segment.cmd_timeout
+    def copy_file(self, src, dest, size=1024):
+        """
+        Exceedingly simple file copy.
+        """
+        if not self.debug:
+            util.copy_file(src, dest, size)
+        else:
+            self.log.debug("Copy %s to %s" % (src, dest))
+    def cmd_with_timeout(self, cmd, wname=None, timeout=None):
+        """
+        Run a command.  If debug is set, the action
+        is only logged.  Commands are run without stdin, to avoid stray
+        SIGTTINs. If timeout is given and the command runs longer, a
+        cmd_timeout exception is thrown.
+        """
+        try:
+            dnull = open("/dev/null", "w")
+        except EnvironmentError:
+            self.log.debug("[cmd_with_timeout]: failed to open /dev/null " + \
+                    "for redirect")
+            dnull = Null
+        self.log.debug("[cmd_with_timeout]: %s" % cmd)
+        if not self.debug:
+            if dnull:
+                sub = subprocess.Popen(cmd, shell=True, stdout=dnull,
+                        stderr=dnull, close_fds=True)
+            else:
+                sub = subprocess.Popen(cmd, shell=True, close_fds=True)
+            if timeout:
+                i = 0
+                rv = sub.poll()
+                while i < timeout:
+                    if rv is not None: break
+                    else:
+                        time.sleep(1)
+                        rv = sub.poll()
+                        i += 1
+                else:
+                    self.log.debug("Process exceeded runtime: %s" % cmd)
+                    os.kill(sub.pid, signal.SIGKILL)
+                    raise self.cmd_timeout();
+                return rv == 0
+            else:
+                return sub.wait() == 0
+        else:
+            if timeout == 0:
+                self.log.debug("debug timeout raised on %s " % cmd)
+                raise self.cmd_timeout()
+            else:
+                return True
+class start_segment(local_emulab_segment):
+    def __init__(self, log=None, keyfile=None, debug=False):
+        local_emulab_segment.__init__(self, log=log,
+                keyfile=keyfile, debug=debug)
+        local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
         self.null = """
 set ns [new Simulator]
 …
     def get_state(self, pid, eid):
+        """
+        Return the state of the experiment as reported by emulab
+        """
         # command to test experiment state
         expinfo_exec = "/usr/testbed/bin/expinfo"
 …
     def get_mapping(self, pid, eid):
+        """
+        Get the physical to virtual mapping from the expinfo command and save
+        it in the self.map member.
+        """
         # command to test experiment state
         expinfo_exec = "/usr/testbed/bin/expinfo"
 …
             return True
+    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
+        """
+        Start a sub-experiment on a federant.
+        Get the current state, modify or create as appropriate, ship data
+        and configs and start the experiment.  There are small ordering
+        differences based on the initial state of the sub-experiment.
+        """
+    def make_null_experiment(self, pid, eid, tmpdir):
+        """
+        Create a null copy of the experiment so that we capture any logs there
+        if the modify fails.  Emulab software discards the logs from a failed
+        startexp.
+        """
+        try:
+            f = open("%s/null.tcl" % tmpdir, "w")
+            print >>f, self.null
+            f.close()
+        except EnvironmentError, e:
+            raise service_error(service_error.internal,
+                    "Cannot stage null.tcl: %s" % e.strerror)
+        timedout = False
+        try:
+            if not self.cmd_with_timeout(
+                    ("/usr/testbed/bin/startexp -i -f -w -p %s " +
+                    "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
+                    timeout=60 * 10):
+                return False
+        except self.cmd_timeout:
+            timedout = True
+        if timedout:
+            state = self.get_state(pid, eid)
+            return state == "swapped"
+        else:
+            return True
+    def set_up_experiment_filespace(self, pid, eid, tmpdir):
         # Configuration directories on this machine
         proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
 …
         lsoftdir = "%s/software" % tmpdir
-        state = self.get_state(pid, eid)
-        if state == 'none':
-            # Create a null copy of the experiment so that we capture any
-            # logs there if the modify fails.  Emulab software discards the
-            # logs from a failed startexp
-            try:
-                f = open("%s/null.tcl" % tmpdir, "w")
-                print >>f, self.null
-                f.close()
-            except EnvironmentError, e:
-                raise service_error(service_error.internal,
-                        "Cannot stage null.tcl: %s" % e.strerror)
-            timedout = False
-            try:
-                if not self.cmd_with_timeout(
-                        ("/usr/testbed/bin/startexp -i -f -w -p %s " +
-                        "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
-                        timeout=60 * 10):
-                    return False
-            except self.cmd_timeout:
-                timedout = True
-            if timedout:
-                state = self.get_state(pid, eid)
-                if state != "swapped":
-                    return False
         # Set up the experiment's file space
         if not self.cmd_with_timeout("/bin/rm -rf %s" % proj_dir):
 …
             return False
+        return True
+    def swap_in(self, pid, eid):
+        """
+        Swap experiment in.  This includes code to cope with the experiment
+        swaping command timing out, but the experiment being swapped in
+        successfully.
+        """
+        self.log.info("[start_segment]: Swapping %s" % eid)
+        timedout = False
+        try:
+            if not self.cmd_with_timeout(
+                    "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
+                    "swapexp", timeout=25*60):
+                return False
+        except self.cmd_timeout:
+            timedout = True
+        # If the command was terminated, but completed successfully,
+        # report success.
+        if timedout:
+            self.log.debug("[start_segment]: swapin timed out " +\
+                    "checking state")
+            state = self.get_state(pid, eid)
+            self.log.debug("[start_segment]: state is %s" % state)
+            return state == 'active'
+        else:
+            return True
+    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
+        """
+        Start a sub-experiment on a federant.
+        Get the current state, modify or create as appropriate, ship data
+        and configs and start the experiment.  There are small ordering
+        differences based on the initial state of the sub-experiment.
+        """
+        state = self.get_state(pid, eid)
+        if state == 'none':
+            if not self.make_null_experiment(pid, eid, tmpdir):
+                return False
+        if not self.set_up_experiment_filespace(pid, eid, tmpdir):
+            return False
         # Stage the new configuration (active experiments will stay swapped
         # in now)
 …
         # Active experiments are still swapped, this swaps the others in.
         if state != 'active':
+            self.log.info("[start_segment]: Swapping %s" % eid)
+            timedout = False
+            try:
+                if not self.cmd_with_timeout(
+                        "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
+                        "swapexp", timeout=25*60):
+                    return False
+            except self.cmd_timeout:
+                timedout = True
+            # If the command was terminated, but completed successfully,
+            # report success.
+            if timedout:
+                self.log.debug("[start_segment]: swapin timed out " +\
+                        "checking state")
+                state = self.get_state(pid, eid)
+                self.log.debug("[start_segment]: state is %s" % state)
+                if state != 'active':
+                    return False
+            if not self.swap_in(pid, eid):
+                return False
         # Everything has gone OK.
         self.get_mapping(pid,eid)
         return True
 class stop_segment(local_emulab_segment):
+class stop_segment(local_segment):
     def __init__(self, log=None, keyfile=None, debug=False):
+        local_emulab_segment.__init__(self,
+                log=log, keyfile=keyfile, debug=debug)
+        local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
     def __call__(self, parent, user, pid, eid):

fedd/federation/proxy_emulab_segment.py

-                      r06cc65b
+                      r5bf359d
 class start_segment(proxy_segment):
+    """
+    This starts an experiment on an emulab accessed remotely via ssh.  Most of
+    the experiment constuction has been done by the emulab_access object.  This
+    just does the wrangling of the emulab commands and collected the node to
+    physical mapping.  The routine throws service errors.
+    """
     def __init__(self, log=None, keyfile=None, debug=False):
         proxy_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
 …
     def get_state(self, user, host, pid, eid):
+        """
+        Return the state of the experiment as reported by emulab
+        """
         # command to test experiment state
         expinfo_exec = "/usr/testbed/bin/expinfo"
 …
     def get_mapping(self, user, host, pid, eid):
+        """
+        Get the physical to virtual mapping from the expinfo command and save
+        it in the self.map member.
+        """
         # command to test experiment state
         expinfo_exec = "/usr/testbed/bin/expinfo"
 …
+    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
+        """
+        Start a sub-experiment on a federant.
+        Get the current state, modify or create as appropriate, ship data
+        and configs and start the experiment.  There are small ordering
+        differences based on the initial state of the sub-experiment.
+        """
+        # ops node in the federant
+        host = "%s%s" % (parent.ops, parent.domain)
+    def make_null_experiment(self, user, host, pid, eid, tmpdir):
+        """
+        Create a null copy of the experiment so that we capture any logs there
+        if the modify fails.  Emulab software discards the logs from a failed
+        startexp
+        """
+        try:
+            f = open("%s/null.tcl" % tmpdir, "w")
+            print >>f, self.null
+            f.close()
+        except EnvironmentError, e:
+            raise service_error(service_error.internal,
+                    "Cannot stage tarfile/rpm: %s" % e.strerror)
+        if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
+            return False
+        self.log.info("[start_segment]: Creating %s" % eid)
+        timedout = False
+        try:
+            if not self.ssh_cmd(user, host,
+                    ("/usr/testbed/bin/startexp -i -f -w -p %s " +
+                    "-e %s null.tcl") % (pid, eid), "startexp",
+                    timeout=60 * 10):
+                return False
+        except self.ssh_cmd_timeout:
+            timedout = True
+        if timedout:
+            state = self.get_state(user, host, pid, eid)
+            if state != "swapped":
+                return False
+        return True
+    def set_up_experiment_filespace(self, user, host, pid, eid, tmpdir):
+        """
+        Send all the software and configuration files into the experiment's
+        file space.  To reduce the number of ssh connections, we script many
+        changes and execute the script.
+        """
         # Configuration directories on the remote machine
         proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
 …
         lsoftdir = "%s/software" % tmpdir
-        state = self.get_state(user, host, pid, eid)
-        if not self.scp_file(tclfile, user, host):
-            return False
-        if state == 'none':
-            # Create a null copy of the experiment so that we capture any
-            # logs there if the modify fails.  Emulab software discards the
-            # logs from a failed startexp
-            try:
-                f = open("%s/null.tcl" % tmpdir, "w")
-                print >>f, self.null
-                f.close()
-            except EnvironmentError, e:
-                raise service_error(service_error.internal,
-                        "Cannot stage tarfile/rpm: %s" % e.strerror)
-            if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
-                return False
-            self.log.info("[start_segment]: Creating %s" % eid)
-            timedout = False
-            try:
-                if not self.ssh_cmd(user, host,
-                        ("/usr/testbed/bin/startexp -i -f -w -p %s " +
-                        "-e %s null.tcl") % (pid, eid), "startexp",
-                        timeout=60 * 10):
-                    return False
-            except self.ssh_cmd_timeout:
-                timedout = True
-            if timedout:
-                state = self.get_state(user, host, pid, eid)
-                if state != "swapped":
-                    return False
         # Open up a temporary file to contain a script for setting up the
         # filespace for the new experiment.
 …
                             user, host, "%s/%s" % (softdir, f)):
                         return False
+        # Stage the new configuration (active experiments will stay swapped
+        # in now)
+        return True
+    def swap_in(self, user, host, pid, eid):
+        """
+        Swap experiment in.  This includes code to cope with the experiment
+        swaping command timing out, but the experiment being swapped in
+        successfully.
+        """
+        self.log.info("[start_segment]: Swapping %s in" % eid)
+        timedout = False
+        try:
+            if not self.ssh_cmd(user, host,
+                    "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
+                    "swapexp", timeout=25*60):
+                return False
+        except self.ssh_cmd_timeout:
+            timedout = True
+        # If the command was terminated, but completed successfully,
+        # report success.
+        if timedout:
+            self.log.debug("[start_segment]: swapin timed out " +\
+                    "checking state")
+            state = self.get_state(user, host, pid, eid)
+            self.log.debug("[start_segment]: state is %s" % state)
+            return state == 'active'
+        return True
+    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
+        """
+        Start a sub-experiment on a federant.
+        Get the current state, modify or create as appropriate, ship data
+        and configs and start the experiment.  There are small ordering
+        differences based on the initial state of the sub-experiment.
+        """
+        # ops node in the federant
+        host = "%s%s" % (parent.ops, parent.domain)
+        state = self.get_state(user, host, pid, eid)
+        if not self.scp_file(tclfile, user, host):
+            return False
+        if state == 'none':
+            # Put a dummy in place to capture logs, and establish an experiment
+            # directory.
+            if not self.make_null_experiment(user, host, pid, eid, tmpdir):
+                return False
+        if not self.set_up_experiment_filespace(user, host, pid, eid, tmpdir):
+            return False
+        # With the filespace in place, we can modify and swap in.
         self.log.info("[start_segment]: Modifying %s" % eid)
         try:
 …
         # Active experiments are still swapped, this swaps the others in.
         if state != 'active':
+            self.log.info("[start_segment]: Swapping %s" % eid)
+            timedout = False
+            try:
+                if not self.ssh_cmd(user, host,
+                        "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
+                        "swapexp", timeout=25*60):
+                    return False
+            except self.ssh_cmd_timeout:
+                timedout = True
+            # If the command was terminated, but completed successfully,
+            # report success.
+            if timedout:
+                self.log.debug("[start_segment]: swapin timed out " +\
+                        "checking state")
+                state = self.get_state(user, host, pid, eid)
+                self.log.debug("[start_segment]: state is %s" % state)
+                return state == 'active'
+            if not self.swap_in(user, host, pid, eid):
+                return False
         # Everything has gone OK.
         self.get_mapping(user, host, pid,eid)

fedd/federation/proxy_segment.py

-                      r06cc65b
+                      r5bf359d
 class proxy_segment:
+    """
+    Base class for segment starter classes that access their underlying testebd
+    remotely using ssh.  It is promarily a code repository for commonly used
+    ssh commands for moving code and logging in.
+    """
     class ssh_cmd_timeout(RuntimeError): pass
     def __init__(self, log=None, keyfile=None, debug=False):
+        self.log = log or logging.getLogger(\
+                'fedd.access.proxy_segment')
+        """
+        log is the logging.log to print messages to, keyfile is the private key
+        for ssh interactions and if debug is true, commands are not executed
+        using ssh.
+        """
+        self.log = log or logging.getLogger('fedd.access.proxy_segment')
         self.ssh_privkey_file = keyfile
         self.debug = debug

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: