Context Navigation

-                      rab37086
+                      rdb6b092
 from subprocess import *
+from urlparse import urlparse
+from urllib2 import urlopen
 from util import *
 from fedid import fedid, generate_fedid
 …
 from service_error import service_error
+import topdl
+from ip_allocator import ip_allocator
+from ip_addr import ip_addr
 class nullHandler(logging.Handler):
 …
     class list_log:
         """
         Provide an interface that lets logger.StreamHandler s write to a list
         of strings.
         """
         def __init__(self, l=[]):
             """
             Link to an existing list or just create a log
             """
             self.ll = l
             self.lock = Lock()
         def write(self, str):
             """
             Add the string to the log.  Lock for consistency.
             """
             self.lock.acquire()
             self.ll.append(str)
             self.lock.release()
         def flush(self):
             """
             No-op that StreamHandlers expect
             """
             pass
+        """
+        Provide an interface that lets logger.StreamHandler s write to a list
+        of strings.
+        """
+        def __init__(self, l=[]):
+            """
+            Link to an existing list or just create a log
+            """
+            self.ll = l
+            self.lock = Lock()
+        def write(self, str):
+            """
+            Add the string to the log.  Lock for consistency.
+            """
+            self.lock.acquire()
+            self.ll.append(str)
+            self.lock.release()
+        def flush(self):
+            """
+            No-op that StreamHandlers expect
+            """
+            pass
 …
         self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
         self.overrides = set([])
         ovr = config.get('experiment_control', 'overrides')
         if ovr:
             for o in ovr.split(","):
                 o = o.strip()
                 if o.startswith('fedid:'): o = o[len('fedid:'):]
                 self.overrides.add(fedid(hexstr=o))
+        self.overrides = set([])
+        ovr = config.get('experiment_control', 'overrides')
+        if ovr:
+            for o in ovr.split(","):
+                o = o.strip()
+                if o.startswith('fedid:'): o = o[len('fedid:'):]
+                self.overrides.add(fedid(hexstr=o))
         self.state = { }
 …
         # Dispatch tables
         self.soap_services = {\
                 'Create': soap_handler('Create', self.create_experiment),
+                'Create': soap_handler('Create', self.new_create_experiment),
                 'Vtopo': soap_handler('Vtopo', self.get_vtopo),
                 'Vis': soap_handler('Vis', self.get_vis),
 …
         self.xmlrpc_services = {\
                 'Create': xmlrpc_handler('Create', self.create_experiment),
+                'Create': xmlrpc_handler('Create', self.new_create_experiment),
                 'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
                 'Vis': xmlrpc_handler('Vis', self.get_vis),
 …
                             if f.has_key('fedid') ]:
                     self.auth.set_attribute(self.state[k]['owner'], eid)
                     # allow overrides to control experiments as well
                     for o in self.overrides:
                         self.auth.set_attribute(o, eid)
+                    # allow overrides to control experiments as well
+                    for o in self.overrides:
+                        self.auth.set_attribute(o, eid)
             except KeyError, e:
                 self.log.warning("[read_state]: State ownership or identity " +\
 …
     class emulab_segment:
         def __init__(self, log=None, keyfile=None, debug=False):
             self.log = log or logging.getLogger(\
                     'fedd.experiment_control.emulab_segment')
             self.ssh_privkey_file = keyfile
             self.debug = debug
             self.ssh_exec="/usr/bin/ssh"
             self.scp_exec = "/usr/bin/scp"
             self.ssh_cmd_timeout = experiment_control_local.ssh_cmd_timeout
         def scp_file(self, file, user, host, dest=""):
             """
             scp a file to the remote host.  If debug is set the action is only
             logged.
             """
             scp_cmd = [self.scp_exec, '-o', 'IdentitiesOnly yes',
                     '-o', 'StrictHostKeyChecking yes', '-i',
                     self.ssh_privkey_file, file,
                     "%s@%s:%s" % (user, host, dest)]
             rv = 0
             try:
                 dnull = open("/dev/null", "w")
             except IOError:
                 self.log.debug("[ssh_file]: failed to open " + \
                         "/dev/null for redirect")
                 dnull = Null
             self.log.debug("[scp_file]: %s" % " ".join(scp_cmd))
             if not self.debug:
                 rv = call(scp_cmd, stdout=dnull, stderr=dnull, close_fds=True,
                         close_fds=True)
             return rv == 0
         def ssh_cmd(self, user, host, cmd, wname=None, timeout=None):
             """
             Run a remote command on host as user.  If debug is set, the action
             is only logged.  Commands are run without stdin, to avoid stray
             SIGTTINs.
             """
             sh_str = ("%s -n -o 'IdentitiesOnly yes' -o " + \
                     "'StrictHostKeyChecking yes' -i %s %s@%s %s") % \
                     (self.ssh_exec, self.ssh_privkey_file,
                             user, host, cmd)
             try:
                 dnull = open("/dev/null", "w")
             except IOError:
                 self.log.debug("[ssh_cmd]: failed to open /dev/null " + \
                         "for redirect")
                 dnull = Null
             self.log.debug("[ssh_cmd]: %s" % sh_str)
             if not self.debug:
                 if dnull:
                     sub = Popen(sh_str, shell=True, stdout=dnull, stderr=dnull,
                             close_fds=True)
                 else:
                     sub = Popen(sh_str, shell=True,
                             close_fds=True)
                 if timeout:
                     i = 0
                     rv = sub.poll()
                     while i < timeout:
                         if rv is not None: break
                         else:
                             time.sleep(1)
                             rv = sub.poll()
                             i += 1
                     else:
                         self.log.debug("Process exceeded runtime: %s" % sh_str)
                         os.kill(sub.pid, signal.SIGKILL)
                         raise self.ssh_cmd_timeout();
                     return rv == 0
                 else:
                     return sub.wait() == 0
             else:
                 if timeout == 0:
                     self.log.debug("debug timeout raised on %s " % sh_str)
                     raise self.ssh_cmd_timeout()
                 else:
                     return True
+        def __init__(self, log=None, keyfile=None, debug=False):
+            self.log = log or logging.getLogger(\
+                    'fedd.experiment_control.emulab_segment')
+            self.ssh_privkey_file = keyfile
+            self.debug = debug
+            self.ssh_exec="/usr/bin/ssh"
+            self.scp_exec = "/usr/bin/scp"
+            self.ssh_cmd_timeout = experiment_control_local.ssh_cmd_timeout
+        def scp_file(self, file, user, host, dest=""):
+            """
+            scp a file to the remote host.  If debug is set the action is only
+            logged.
+            """
+            scp_cmd = [self.scp_exec, '-o', 'IdentitiesOnly yes',
+                    '-o', 'StrictHostKeyChecking yes', '-i',
+                    self.ssh_privkey_file, file,
+                    "%s@%s:%s" % (user, host, dest)]
+            rv = 0
+            try:
+                dnull = open("/dev/null", "w")
+            except IOError:
+                self.log.debug("[ssh_file]: failed to open " + \
+                        "/dev/null for redirect")
+                dnull = Null
+            self.log.debug("[scp_file]: %s" % " ".join(scp_cmd))
+            if not self.debug:
+                rv = call(scp_cmd, stdout=dnull, stderr=dnull, close_fds=True,
+                        close_fds=True)
+            return rv == 0
+        def ssh_cmd(self, user, host, cmd, wname=None, timeout=None):
+            """
+            Run a remote command on host as user.  If debug is set, the action
+            is only logged.  Commands are run without stdin, to avoid stray
+            SIGTTINs.
+            """
+            sh_str = ("%s -n -o 'IdentitiesOnly yes' -o " + \
+                    "'StrictHostKeyChecking yes' -i %s %s@%s %s") % \
+                    (self.ssh_exec, self.ssh_privkey_file,
+                            user, host, cmd)
+            try:
+                dnull = open("/dev/null", "w")
+            except IOError:
+                self.log.debug("[ssh_cmd]: failed to open /dev/null " + \
+                        "for redirect")
+                dnull = Null
+            self.log.debug("[ssh_cmd]: %s" % sh_str)
+            if not self.debug:
+                if dnull:
+                    sub = Popen(sh_str, shell=True, stdout=dnull, stderr=dnull,
+                            close_fds=True)
+                else:
+                    sub = Popen(sh_str, shell=True,
+                            close_fds=True)
+                if timeout:
+                    i = 0
+                    rv = sub.poll()
+                    while i < timeout:
+                        if rv is not None: break
+                        else:
+                            time.sleep(1)
+                            rv = sub.poll()
+                            i += 1
+                    else:
+                        self.log.debug("Process exceeded runtime: %s" % sh_str)
+                        os.kill(sub.pid, signal.SIGKILL)
+                        raise self.ssh_cmd_timeout();
+                    return rv == 0
+                else:
+                    return sub.wait() == 0
+            else:
+                if timeout == 0:
+                    self.log.debug("debug timeout raised on %s " % sh_str)
+                    raise self.ssh_cmd_timeout()
+                else:
+                    return True
     class start_segment(emulab_segment):
         def __init__(self, log=None, keyfile=None, debug=False):
             experiment_control_local.emulab_segment.__init__(self,
                     log=log, keyfile=keyfile, debug=debug)
         def create_config_tree(self, src_dir, dest_dir, script):
             """
             Append commands to script that will create the directory hierarchy
             on the remote federant.
             """
             if os.path.isdir(src_dir):
                 print >>script, "mkdir -p %s" % dest_dir
                 print >>script, "chmod 770 %s" % dest_dir
                 for f in os.listdir(src_dir):
                     if os.path.isdir(f):
                         self.create_config_tree("%s/%s" % (src_dir, f),
                                 "%s/%s" % (dest_dir, f), script)
             else:
                 self.log.debug("[create_config_tree]: Not a directory: %s" \
                         % src_dir)
         def ship_configs(self, host, user, src_dir, dest_dir):
             """
             Copy federant-specific configuration files to the federant.
             """
             for f in os.listdir(src_dir):
                 if os.path.isdir(f):
                     if not self.ship_configs(host, user, "%s/%s" % (src_dir, f),
                             "%s/%s" % (dest_dir, f)):
                         return False
                 else:
                     if not self.scp_file("%s/%s" % (src_dir, f),
                             user, host, dest_dir):
                         return False
             return True
         def get_state(self, user, host, tb, pid, eid):
             # command to test experiment state
             expinfo_exec = "/usr/testbed/bin/expinfo"
             # Regular expressions to parse the expinfo response
             state_re = re.compile("State:\s+(\w+)")
             no_exp_re = re.compile("^No\s+such\s+experiment")
             swapping_re = re.compile("^No\s+information\s+available.")
             state = None    # Experiment state parsed from expinfo
             # The expinfo ssh command.  Note the identity restriction to use
             # only the identity provided in the pubkey given.
             cmd = [self.ssh_exec, '-o', 'IdentitiesOnly yes', '-o',
                     'StrictHostKeyChecking yes', '-i',
                     self.ssh_privkey_file, "%s@%s" % (user, host),
                     expinfo_exec, pid, eid]
             dev_null = None
             try:
                 dev_null = open("/dev/null", "a")
             except IOError, e:
                 self.log.error("[get_state]: can't open /dev/null: %s" %e)
             if self.debug:
                 state = 'swapped'
                 rv = 0
             else:
                 status = Popen(cmd, stdout=PIPE, stderr=dev_null,
                         close_fds=True)
                 for line in status.stdout:
                     m = state_re.match(line)
                     if m: state = m.group(1)
                     else:
                         for reg, st in ((no_exp_re, "none"),
                                 (swapping_re, "swapping")):
                             m = reg.match(line)
                             if m: state = st
                 rv = status.wait()
             # If the experiment is not present the subcommand returns a
             # non-zero return value.  If we successfully parsed a "none"
             # outcome, ignore the return code.
             if rv != 0 and state != 'none':
                 raise service_error(service_error.internal,
                         "Cannot get status of segment %s:%s/%s" % \
                                 (tb, pid, eid))
             elif state not in ('active', 'swapped', 'swapping', 'none'):
                 raise service_error(service_error.internal,
                         "Cannot get status of segment %s:%s/%s" % \
                                 (tb, pid, eid))
             else: return state
         def __call__(self, tb, eid, tbparams, tmpdir, timeout=0):
             """
             Start a sub-experiment on a federant.
             Get the current state, modify or create as appropriate, ship data
             and configs and start the experiment.  There are small ordering
             differences based on the initial state of the sub-experiment.
             """
             # ops node in the federant
             host = "%s%s" % (tbparams[tb]['host'], tbparams[tb]['domain'])
             user = tbparams[tb]['user']     # federant user
             pid = tbparams[tb]['project']   # federant project
             # XXX
             base_confs = ( "hosts",)
             tclfile = "%s.%s.tcl" % (eid, tb)   # sub-experiment description
             # Configuration directories on the remote machine
             proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
             tarfiles_dir = "/proj/%s/tarfiles/%s" % (pid, eid)
             rpms_dir = "/proj/%s/rpms/%s" % (pid, eid)
             state = self.get_state(user, host, tb, pid, eid)
             self.log.debug("[start_segment]: %s: %s" % (tb, state))
             self.log.info("[start_segment]:transferring experiment to %s" % tb)
             if not self.scp_file("%s/%s/%s" % \
                     (tmpdir, tb, tclfile), user, host):
                 return False
             if state == 'none':
                 # Create a null copy of the experiment so that we capture any
                 # logs there if the modify fails.  Emulab software discards the
                 # logs from a failed startexp
                 if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
                     return False
                 self.log.info("[start_segment]: Creating %s on %s" % (eid, tb))
                 timedout = False
                 try:
                     if not self.ssh_cmd(user, host,
                             ("/usr/testbed/bin/startexp -i -f -w -p %s " +
                             "-e %s null.tcl") % (pid, eid), "startexp",
                             timeout=60 * 10):
                         return False
                 except self.ssh_cmd_timeout:
                     timedout = True
                 if timedout:
                     state = self.get_state(user, host, tb, pid, eid)
                     if state != "swapped":
                         return False
             # Open up a temporary file to contain a script for setting up the
             # filespace for the new experiment.
             self.log.info("[start_segment]: creating script file")
             try:
                 sf, scriptname = tempfile.mkstemp()
                 scriptfile = os.fdopen(sf, 'w')
             except IOError:
                 return False
             scriptbase = os.path.basename(scriptname)
             # Script the filesystem changes
             print >>scriptfile, "/bin/rm -rf %s" % proj_dir
             # Clear and create the tarfiles and rpm directories
             for d in (tarfiles_dir, rpms_dir):
                 print >>scriptfile, "/bin/rm -rf %s/*" % d
                 print >>scriptfile, "mkdir -p %s" % d
             print >>scriptfile, 'mkdir -p %s' % proj_dir
             self.create_config_tree("%s/%s" % (tmpdir, tb),
                     proj_dir, scriptfile)
             if os.path.isdir("%s/tarfiles" % tmpdir):
                 self.create_config_tree("%s/tarfiles" % tmpdir, tarfiles_dir,
                         scriptfile)
             if os.path.isdir("%s/rpms" % tmpdir):
                 self.create_config_tree("%s/rpms" % tmpdir, rpms_dir,
                         scriptfile)
             print >>scriptfile, "rm -f %s" % scriptbase
             scriptfile.close()
             # Move the script to the remote machine
             # XXX: could collide tempfile names on the remote host
             if self.scp_file(scriptname, user, host, scriptbase):
                 os.remove(scriptname)
             else:
                 return False
             # Execute the script (and the script's last line deletes it)
             if not self.ssh_cmd(user, host, "sh -x %s" % scriptbase):
                 return False
             for f in base_confs:
                 if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
                         "%s/%s" % (proj_dir, f)):
                     return False
             if not self.ship_configs(host, user, "%s/%s" % (tmpdir, tb),
                     proj_dir):
                 return False
             if os.path.isdir("%s/tarfiles" % tmpdir):
                 if not self.ship_configs(host, user,
                         "%s/tarfiles" % tmpdir, tarfiles_dir):
                     return False
             if os.path.isdir("%s/rpms" % tmpdir):
                 if not self.ship_configs(host, user,
                         "%s/rpms" % tmpdir, tarfiles_dir):
                     return False
             # Stage the new configuration (active experiments will stay swapped
             # in now)
             self.log.info("[start_segment]: Modifying %s on %s" % (eid, tb))
             try:
                 if not self.ssh_cmd(user, host,
                         "/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
                                 (pid, eid, tclfile),
                         "modexp", timeout= 60 * 10):
                     return False
             except self.ssh_cmd_timeout:
                 self.log.error("Modify command failed to complete in time")
                 # There's really no way to see if this succeeded or failed, so
                 # if it hangs, assume the worst.
                 return False
             # Active experiments are still swapped, this swaps the others in.
             if state != 'active':
                 self.log.info("[start_segment]: Swapping %s in on %s" % \
                         (eid, tb))
                 timedout = False
                 try:
                     if not self.ssh_cmd(user, host,
                             "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
                             "swapexp", timeout=10*60):
                         return False
                 except self.ssh_cmd_timeout:
                     timedout = True
                 # If the command was terminated, but completed successfully,
                 # report success.
                 if timedout:
                     self.log.debug("[start_segment]: swapin timed out " +\
                             "checking state")
                     state = self.get_state(user, host, tb, pid, eid)
                     self.log.debug("[start_segment]: state is %s" % state)
                     return state == 'active'
             # Everything has gone OK.
             return True
+        def __init__(self, log=None, keyfile=None, debug=False):
+            experiment_control_local.emulab_segment.__init__(self,
+                    log=log, keyfile=keyfile, debug=debug)
+        def create_config_tree(self, src_dir, dest_dir, script):
+            """
+            Append commands to script that will create the directory hierarchy
+            on the remote federant.
+            """
+            if os.path.isdir(src_dir):
+                print >>script, "mkdir -p %s" % dest_dir
+                print >>script, "chmod 770 %s" % dest_dir
+                for f in os.listdir(src_dir):
+                    if os.path.isdir(f):
+                        self.create_config_tree("%s/%s" % (src_dir, f),
+                                "%s/%s" % (dest_dir, f), script)
+            else:
+                self.log.debug("[create_config_tree]: Not a directory: %s" \
+                        % src_dir)
+        def ship_configs(self, host, user, src_dir, dest_dir):
+            """
+            Copy federant-specific configuration files to the federant.
+            """
+            for f in os.listdir(src_dir):
+                if os.path.isdir(f):
+                    if not self.ship_configs(host, user, "%s/%s" % (src_dir, f),
+                            "%s/%s" % (dest_dir, f)):
+                        return False
+                else:
+                    if not self.scp_file("%s/%s" % (src_dir, f),
+                            user, host, dest_dir):
+                        return False
+            return True
+        def get_state(self, user, host, tb, pid, eid):
+            # command to test experiment state
+            expinfo_exec = "/usr/testbed/bin/expinfo"
+            # Regular expressions to parse the expinfo response
+            state_re = re.compile("State:\s+(\w+)")
+            no_exp_re = re.compile("^No\s+such\s+experiment")
+            swapping_re = re.compile("^No\s+information\s+available.")
+            state = None    # Experiment state parsed from expinfo
+            # The expinfo ssh command.  Note the identity restriction to use
+            # only the identity provided in the pubkey given.
+            cmd = [self.ssh_exec, '-o', 'IdentitiesOnly yes', '-o',
+                    'StrictHostKeyChecking yes', '-i',
+                    self.ssh_privkey_file, "%s@%s" % (user, host),
+                    expinfo_exec, pid, eid]
+            dev_null = None
+            try:
+                dev_null = open("/dev/null", "a")
+            except IOError, e:
+                self.log.error("[get_state]: can't open /dev/null: %s" %e)
+            if self.debug:
+                state = 'swapped'
+                rv = 0
+            else:
+                status = Popen(cmd, stdout=PIPE, stderr=dev_null,
+                        close_fds=True)
+                for line in status.stdout:
+                    m = state_re.match(line)
+                    if m: state = m.group(1)
+                    else:
+                        for reg, st in ((no_exp_re, "none"),
+                                (swapping_re, "swapping")):
+                            m = reg.match(line)
+                            if m: state = st
+                rv = status.wait()
+            # If the experiment is not present the subcommand returns a
+            # non-zero return value.  If we successfully parsed a "none"
+            # outcome, ignore the return code.
+            if rv != 0 and state != 'none':
+                raise service_error(service_error.internal,
+                        "Cannot get status of segment %s:%s/%s" % \
+                                (tb, pid, eid))
+            elif state not in ('active', 'swapped', 'swapping', 'none'):
+                raise service_error(service_error.internal,
+                        "Cannot get status of segment %s:%s/%s" % \
+                                (tb, pid, eid))
+            else: return state
+        def __call__(self, tb, eid, tbparams, tmpdir, timeout=0):
+            """
+            Start a sub-experiment on a federant.
+            Get the current state, modify or create as appropriate, ship data
+            and configs and start the experiment.  There are small ordering
+            differences based on the initial state of the sub-experiment.
+            """
+            # ops node in the federant
+            host = "%s%s" % (tbparams[tb]['host'], tbparams[tb]['domain'])
+            user = tbparams[tb]['user']     # federant user
+            pid = tbparams[tb]['project']   # federant project
+            # XXX
+            base_confs = ( "hosts",)
+            tclfile = "%s.%s.tcl" % (eid, tb)   # sub-experiment description
+            # Configuration directories on the remote machine
+            proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
+            tarfiles_dir = "/proj/%s/tarfiles/%s" % (pid, eid)
+            rpms_dir = "/proj/%s/rpms/%s" % (pid, eid)
+            state = self.get_state(user, host, tb, pid, eid)
+            self.log.debug("[start_segment]: %s: %s" % (tb, state))
+            self.log.info("[start_segment]:transferring experiment to %s" % tb)
+            if not self.scp_file("%s/%s/%s" % \
+                    (tmpdir, tb, tclfile), user, host):
+                return False
+            if state == 'none':
+                # Create a null copy of the experiment so that we capture any
+                # logs there if the modify fails.  Emulab software discards the
+                # logs from a failed startexp
+                if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
+                    return False
+                self.log.info("[start_segment]: Creating %s on %s" % (eid, tb))
+                timedout = False
+                try:
+                    if not self.ssh_cmd(user, host,
+                            ("/usr/testbed/bin/startexp -i -f -w -p %s " +
+                            "-e %s null.tcl") % (pid, eid), "startexp",
+                            timeout=60 * 10):
+                        return False
+                except self.ssh_cmd_timeout:
+                    timedout = True
+                if timedout:
+                    state = self.get_state(user, host, tb, pid, eid)
+                    if state != "swapped":
+                        return False
+            # Open up a temporary file to contain a script for setting up the
+            # filespace for the new experiment.
+            self.log.info("[start_segment]: creating script file")
+            try:
+                sf, scriptname = tempfile.mkstemp()
+                scriptfile = os.fdopen(sf, 'w')
+            except IOError:
+                return False
+            scriptbase = os.path.basename(scriptname)
+            # Script the filesystem changes
+            print >>scriptfile, "/bin/rm -rf %s" % proj_dir
+            # Clear and create the tarfiles and rpm directories
+            for d in (tarfiles_dir, rpms_dir):
+                print >>scriptfile, "/bin/rm -rf %s/*" % d
+                print >>scriptfile, "mkdir -p %s" % d
+            print >>scriptfile, 'mkdir -p %s' % proj_dir
+            self.create_config_tree("%s/%s" % (tmpdir, tb),
+                    proj_dir, scriptfile)
+            if os.path.isdir("%s/tarfiles" % tmpdir):
+                self.create_config_tree("%s/tarfiles" % tmpdir, tarfiles_dir,
+                        scriptfile)
+            if os.path.isdir("%s/rpms" % tmpdir):
+                self.create_config_tree("%s/rpms" % tmpdir, rpms_dir,
+                        scriptfile)
+            print >>scriptfile, "rm -f %s" % scriptbase
+            scriptfile.close()
+            # Move the script to the remote machine
+            # XXX: could collide tempfile names on the remote host
+            if self.scp_file(scriptname, user, host, scriptbase):
+                os.remove(scriptname)
+            else:
+                return False
+            # Execute the script (and the script's last line deletes it)
+            if not self.ssh_cmd(user, host, "sh -x %s" % scriptbase):
+                return False
+            for f in base_confs:
+                if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
+                        "%s/%s" % (proj_dir, f)):
+                    return False
+            if not self.ship_configs(host, user, "%s/%s" % (tmpdir, tb),
+                    proj_dir):
+                return False
+            if os.path.isdir("%s/tarfiles" % tmpdir):
+                if not self.ship_configs(host, user,
+                        "%s/tarfiles" % tmpdir, tarfiles_dir):
+                    return False
+            if os.path.isdir("%s/rpms" % tmpdir):
+                if not self.ship_configs(host, user,
+                        "%s/rpms" % tmpdir, tarfiles_dir):
+                    return False
+            # Stage the new configuration (active experiments will stay swapped
+            # in now)
+            self.log.info("[start_segment]: Modifying %s on %s" % (eid, tb))
+            try:
+                if not self.ssh_cmd(user, host,
+                        "/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
+                                (pid, eid, tclfile),
+                        "modexp", timeout= 60 * 10):
+                    return False
+            except self.ssh_cmd_timeout:
+                self.log.error("Modify command failed to complete in time")
+                # There's really no way to see if this succeeded or failed, so
+                # if it hangs, assume the worst.
+                return False
+            # Active experiments are still swapped, this swaps the others in.
+            if state != 'active':
+                self.log.info("[start_segment]: Swapping %s in on %s" % \
+                        (eid, tb))
+                timedout = False
+                try:
+                    if not self.ssh_cmd(user, host,
+                            "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
+                            "swapexp", timeout=10*60):
+                        return False
+                except self.ssh_cmd_timeout:
+                    timedout = True
+                # If the command was terminated, but completed successfully,
+                # report success.
+                if timedout:
+                    self.log.debug("[start_segment]: swapin timed out " +\
+                            "checking state")
+                    state = self.get_state(user, host, tb, pid, eid)
+                    self.log.debug("[start_segment]: state is %s" % state)
+                    return state == 'active'
+            # Everything has gone OK.
+            return True
     class stop_segment(emulab_segment):
         def __init__(self, log=None, keyfile=None, debug=False):
             experiment_control_local.emulab_segment.__init__(self,
                     log=log, keyfile=keyfile, debug=debug)
         def __call__(self, tb, eid, tbparams):
             """
             Stop a sub experiment by calling swapexp on the federant
             """
             user = tbparams[tb]['user']
             host = "%s%s" % (tbparams[tb]['host'], tbparams[tb]['domain'])
             pid = tbparams[tb]['project']
             self.log.info("[stop_segment]: Stopping %s on %s" % (eid, tb))
             rv = False
             try:
                 # Clean out tar files: we've gone over quota in the past
                 self.ssh_cmd(user, host, "rm -rf /proj/%s/rpms/%s" % (pid, eid))
                 self.ssh_cmd(user, host, "rm -rf /proj/%s/tarfiles/%s" % \
                         (pid, eid))
                 rv = self.ssh_cmd(user, host,
                         "/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid))
             except self.ssh_cmd_timeout:
                 rv = False
             return rv
+        def __init__(self, log=None, keyfile=None, debug=False):
+            experiment_control_local.emulab_segment.__init__(self,
+                    log=log, keyfile=keyfile, debug=debug)
+        def __call__(self, tb, eid, tbparams):
+            """
+            Stop a sub experiment by calling swapexp on the federant
+            """
+            user = tbparams[tb]['user']
+            host = "%s%s" % (tbparams[tb]['host'], tbparams[tb]['domain'])
+            pid = tbparams[tb]['project']
+            self.log.info("[stop_segment]: Stopping %s on %s" % (eid, tb))
+            rv = False
+            try:
+                # Clean out tar files: we've gone over quota in the past
+                self.ssh_cmd(user, host, "rm -rf /proj/%s/rpms/%s" % (pid, eid))
+                self.ssh_cmd(user, host, "rm -rf /proj/%s/tarfiles/%s" % \
+                        (pid, eid))
+                rv = self.ssh_cmd(user, host,
+                        "/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid))
+            except self.ssh_cmd_timeout:
+                rv = False
+            return rv
 …
                     "Failed to open file in genviz")
         try:
             dnull = open('/dev/null', 'w')
         except IOError:
             service_error(service_error.internal,
+        try:
+            dnull = open('/dev/null', 'w')
+        except IOError:
+            service_error(service_error.internal,
                     "Failed to open /dev/null in genviz")
 …
         dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
                 '-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
                 close_fds=True)
         dnull.close()
+                close_fds=True)
+        dnull.close()
         # Translate dot to vis format
 …
     def allocate_resources(self, allocated, master, eid, expid, expcert,
             tbparams, tmpdir, alloc_log=None):
         started = { }           # Testbeds where a sub-experiment started
                                 # successfully
+            tbparams, tmpdir, alloc_log=None):
+        started = { }           # Testbeds where a sub-experiment started
+                                # successfully
         # XXX
         fail_soft = False
         log = alloc_log or self.log
+        log = alloc_log or self.log
         thread_pool = self.thread_pool(self.nthreads)
 …
             thread_pool.wait_for_slot()
             t  = self.pooled_thread(\
                     target=self.start_segment(log=log,
                         keyfile=self.ssh_privkey_file, debug=self.debug),
+                    target=self.start_segment(log=log,
+                        keyfile=self.ssh_privkey_file, debug=self.debug),
                     args=(tb, eid, tbparams, tmpdir, 0), name=tb,
                     pdata=thread_pool, trace_file=self.trace_file)
 …
         if len(failed) == 0:
             starter = self.start_segment(log=log,
                     keyfile=self.ssh_privkey_file, debug=self.debug)
+            starter = self.start_segment(log=log,
+                    keyfile=self.ssh_privkey_file, debug=self.debug)
             if not starter(master, eid, tbparams, tmpdir):
                 failed.append(master)
 …
                     thread_pool.wait_for_slot()
                     t  = self.pooled_thread(\
                             target=self.stop_segment(log=log,
                                 keyfile=self.ssh_privkey_file,
                                 debug=self.debug),
+                            target=self.stop_segment(log=log,
+                                keyfile=self.ssh_privkey_file,
+                                debug=self.debug),
                             args=(tb, eid, tbparams), name=tb,
                             pdata=thread_pool, trace_file=self.trace_file)
 …
                 self.state_lock.acquire()
                 self.state[eid]['experimentStatus'] = 'failed'
                 if self.state_filename: self.write_state()
+                if self.state_filename: self.write_state()
                 self.state_lock.release()
 …
                 #    "Swap in failed on %s" % ",".join(failed))
                 log.error("Swap in failed on %s" % ",".join(failed))
                 return
+                return
         else:
             log.info("[start_segment]: Experiment %s active" % eid)
 …
         # Insert the experiment into our state and update the disk copy
         self.state_lock.acquire()
         self.state[expid]['experimentStatus'] = 'active'
+        self.state[expid]['experimentStatus'] = 'active'
         self.state[eid] = self.state[expid]
         if self.state_filename: self.write_state()
         self.state_lock.release()
         return
+        return
     def create_experiment(self, req, fid):
 …
         if req.has_key('experimentID') and \
                 req['experimentID'].has_key('localname'):
             overwrite = False
+            overwrite = False
             eid = req['experimentID']['localname']
             # If there's an old failed experiment here with the same local name
             # and accessible by this user, we'll overwrite it, otherwise we'll
             # fall through and do the collision avoidance.
             old_expid = self.get_experiment_fedid(eid)
             if old_expid and self.check_experiment_access(fid, old_expid):
                 self.state_lock.acquire()
                 status = self.state[eid].get('experimentStatus', None)
                 if status and status == 'failed':
                     # remove the old access attribute
                     self.auth.unset_attribute(fid, old_expid)
                     overwrite = True
                     del self.state[eid]
                     del self.state[old_expid]
                 self.state_lock.release()
+            # If there's an old failed experiment here with the same local name
+            # and accessible by this user, we'll overwrite it, otherwise we'll
+            # fall through and do the collision avoidance.
+            old_expid = self.get_experiment_fedid(eid)
+            if old_expid and self.check_experiment_access(fid, old_expid):
+                self.state_lock.acquire()
+                status = self.state[eid].get('experimentStatus', None)
+                if status and status == 'failed':
+                    # remove the old access attribute
+                    self.auth.unset_attribute(fid, old_expid)
+                    overwrite = True
+                    del self.state[eid]
+                    del self.state[old_expid]
+                self.state_lock.release()
             self.state_lock.acquire()
             while (self.state.has_key(eid) and not overwrite):
                 eid += random.choice(string.ascii_letters)
             # Initial state
+            # Initial state
             self.state[eid] = {
                     'experimentID' : \
                             [ { 'localname' : eid }, {'fedid': expid } ],
                     'experimentStatus': 'starting',
                     'experimentAccess': { 'X509' : expcert },
                     'owner': fid,
                     'log' : [],
+                }
             self.state[expid] = self.state[eid]
+                    'experimentID' : \
+                            [ { 'localname' : eid }, {'fedid': expid } ],
+                    'experimentStatus': 'starting',
+                    'experimentAccess': { 'X509' : expcert },
+                    'owner': fid,
+                    'log' : [],
+                }
+            self.state[expid] = self.state[eid]
             if self.state_filename: self.write_state()
             self.state_lock.release()
 …
                 for i in range(0,5):
                     eid += random.choice(string.ascii_letters)
             # Initial state
+            # Initial state
             self.state[eid] = {
                     'experimentID' : \
                             [ { 'localname' : eid }, {'fedid': expid } ],
                     'experimentStatus': 'starting',
                     'experimentAccess': { 'X509' : expcert },
                     'owner': fid,
                     'log' : [],
+                }
             self.state[expid] = self.state[eid]
+                    'experimentID' : \
+                            [ { 'localname' : eid }, {'fedid': expid } ],
+                    'experimentStatus': 'starting',
+                    'experimentAccess': { 'X509' : expcert },
+                    'owner': fid,
+                    'log' : [],
+                }
+            self.state[expid] = self.state[eid]
             if self.state_filename: self.write_state()
             self.state_lock.release()
 …
                 self.log.debug("running local splitter %s", " ".join(tclcmd))
                 # This is just fantastic.  As a side effect the parser copies
                 # tb_compat.tcl into the current directory, so that directory
                 # must be writable by the fedd user.  Doing this in the
                 # temporary subdir ensures this is the case.
+                # This is just fantastic.  As a side effect the parser copies
+                # tb_compat.tcl into the current directory, so that directory
+                # must be writable by the fedd user.  Doing this in the
+                # temporary subdir ensures this is the case.
                 tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
                         cwd=tmpdir)
+                        cwd=tmpdir)
                 split_data = tclparser.stdout
 …
+                    }
             self.state_lock.acquire()
             self.state[eid]['vtopo'] = vtopo
             self.state[eid]['vis'] = vis
             self.state[expid]['federant'] = \
                     [ tbparams[tb]['federant'] for tb in tbparams.keys() \
                         if tbparams[tb].has_key('federant') ]
+            self.state_lock.acquire()
+            self.state[eid]['vtopo'] = vtopo
+            self.state[eid]['vis'] = vis
+            self.state[expid]['federant'] = \
+                    [ tbparams[tb]['federant'] for tb in tbparams.keys() \
+                        if tbparams[tb].has_key('federant') ]
             if self.state_filename: self.write_state()
             self.state_lock.release()
+            self.state_lock.release()
             # Copy tarfiles and rpms needed at remote sites into a staging area
 …
             # If something goes wrong in the parse (usually an access error)
             # clear the placeholder state.  From here on out the code delays
             # exceptions.  Failing at this point returns a fault to the remote
             # caller.
+            # exceptions.  Failing at this point returns a fault to the remote
+            # caller.
             self.state_lock.acquire()
             del self.state[eid]
 …
         # Start the background swapper and return the starting state.  From
         # here on out, the state will stick around a while.
         # Let users touch the state
+        # Start the background swapper and return the starting state.  From
+        # here on out, the state will stick around a while.
+        # Let users touch the state
         self.auth.set_attribute(fid, expid)
         self.auth.set_attribute(expid, expid)
+        # Override fedids can manipulate state as well
+        for o in self.overrides:
+            self.auth.set_attribute(o, expid)
+        # Create a logger that logs to the experiment's state object as well as
+        # to the main log file.
+        alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
+        h = logging.StreamHandler(self.list_log(self.state[eid]['log']))
+        # XXX: there should be a global one of these rather than repeating the
+        # code.
+        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
+                    '%d %b %y %H:%M:%S'))
+        alloc_log.addHandler(h)
+        # Start a thread to do the resource allocation
+        t  = Thread(target=self.allocate_resources,
+                args=(allocated, master, eid, expid, expcert, tbparams,
+                    tmpdir, alloc_log),
+                name=eid)
+        t.start()
+        rv = {
+                'experimentID': [
+                    {'localname' : eid }, { 'fedid': copy.copy(expid) }
+                ],
+                'experimentStatus': 'starting',
+                'experimentAccess': { 'X509' : expcert }
+            }
+        return rv
+        # Override fedids can manipulate state as well
+        for o in self.overrides:
+            self.auth.set_attribute(o, expid)
+        # Create a logger that logs to the experiment's state object as well as
+        # to the main log file.
+        alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
+        h = logging.StreamHandler(self.list_log(self.state[eid]['log']))
+        # XXX: there should be a global one of these rather than repeating the
+        # code.
+        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
+                    '%d %b %y %H:%M:%S'))
+        alloc_log.addHandler(h)
+        # Start a thread to do the resource allocation
+        t  = Thread(target=self.allocate_resources,
+                args=(allocated, master, eid, expid, expcert, tbparams,
+                    tmpdir, alloc_log),
+                name=eid)
+        t.start()
+        rv = {
+                'experimentID': [
+                    {'localname' : eid }, { 'fedid': copy.copy(expid) }
+                ],
+                'experimentStatus': 'starting',
+                'experimentAccess': { 'X509' : expcert }
+            }
+        return rv
+    def new_create_experiment(self, req, fid):
+        """
+        The external interface to experiment creation called from the
+        dispatcher.
+        Creates a working directory, splits the incoming description using the
+        splitter script and parses out the avrious subsections using the
+        lcasses above.  Once each sub-experiment is created, use pooled threads
+        to instantiate them and start it all up.
+        """
+        if not self.auth.check_attribute(fid, 'create'):
+            raise service_error(service_error.access, "Create access denied")
+        try:
+            tmpdir = tempfile.mkdtemp(prefix="split-")
+        except IOError:
+            raise service_error(service_error.internal, "Cannot create tmp dir")
+        gw_pubkey_base = "fed.%s.pub" % self.ssh_type
+        gw_secretkey_base = "fed.%s" % self.ssh_type
+        gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
+        gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
+        tclfile = tmpdir + "/experiment.tcl"
+        tbparams = { }
+        try:
+            access_user = self.accessdb[fid]
+        except KeyError:
+            raise service_error(service_error.internal,
+                    "Access map and authorizer out of sync in " + \
+                            "create_experiment for fedid %s"  % fid)
+        pid = "dummy"
+        gid = "dummy"
+        try:
+            os.mkdir(tmpdir+"/keys")
+        except OSError:
+            raise service_error(service_error.internal,
+                    "Can't make temporary dir")
+        req = req.get('CreateRequestBody', None)
+        if not req:
+            raise service_error(service_error.req,
+                    "Bad request format (no CreateRequestBody)")
+        # The tcl parser needs to read a file so put the content into that file
+        descr=req.get('experimentdescription', None)
+        if descr:
+            file_content=descr.get('ns2description', None)
+            if file_content:
+                try:
+                    f = open(tclfile, 'w')
+                    f.write(file_content)
+                    f.close()
+                except IOError:
+                    raise service_error(service_error.internal,
+                            "Cannot write temp experiment description")
+            else:
+                raise service_error(service_error.req,
+                        "Only ns2descriptions supported")
+        else:
+            raise service_error(service_error.req, "No experiment description")
+        # Generate an ID for the experiment (slice) and a certificate that the
+        # allocator can use to prove they own it.  We'll ship it back through
+        # the encrypted connection.
+        (expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
+        if req.has_key('experimentID') and \
+                req['experimentID'].has_key('localname'):
+            overwrite = False
+            eid = req['experimentID']['localname']
+            # If there's an old failed experiment here with the same local name
+            # and accessible by this user, we'll overwrite it, otherwise we'll
+            # fall through and do the collision avoidance.
+            old_expid = self.get_experiment_fedid(eid)
+            if old_expid and self.check_experiment_access(fid, old_expid):
+                self.state_lock.acquire()
+                status = self.state[eid].get('experimentStatus', None)
+                if status and status == 'failed':
+                    # remove the old access attribute
+                    self.auth.unset_attribute(fid, old_expid)
+                    overwrite = True
+                    del self.state[eid]
+                    del self.state[old_expid]
+                self.state_lock.release()
+            self.state_lock.acquire()
+            while (self.state.has_key(eid) and not overwrite):
+                eid += random.choice(string.ascii_letters)
+            # Initial state
+            self.state[eid] = {
+                    'experimentID' : \
+                            [ { 'localname' : eid }, {'fedid': expid } ],
+                    'experimentStatus': 'starting',
+                    'experimentAccess': { 'X509' : expcert },
+                    'owner': fid,
+                    'log' : [],
+                }
+            self.state[expid] = self.state[eid]
+            if self.state_filename: self.write_state()
+            self.state_lock.release()
+        else:
+            eid = self.exp_stem
+            for i in range(0,5):
+                eid += random.choice(string.ascii_letters)
+            self.state_lock.acquire()
+            while (self.state.has_key(eid)):
+                eid = self.exp_stem
+                for i in range(0,5):
+                    eid += random.choice(string.ascii_letters)
+            # Initial state
+            self.state[eid] = {
+                    'experimentID' : \
+                            [ { 'localname' : eid }, {'fedid': expid } ],
+                    'experimentStatus': 'starting',
+                    'experimentAccess': { 'X509' : expcert },
+                    'owner': fid,
+                    'log' : [],
+                }
+            self.state[expid] = self.state[eid]
+            if self.state_filename: self.write_state()
+            self.state_lock.release()
+        try:
+            # This catches exceptions to clear the placeholder if necessary
+            try:
+                self.generate_ssh_keys(gw_secretkey, self.ssh_type)
+            except ValueError:
+                raise service_error(service_error.server_config,
+                        "Bad key type (%s)" % self.ssh_type)
+            user = req.get('user', None)
+            if user == None:
+                raise service_error(service_error.req, "No user")
+            master = req.get('master', None)
+            if not master:
+                raise service_error(service_error.req,
+                        "No master testbed label")
+            export_project = req.get('exportProject', None)
+            if not export_project:
+                raise service_error(service_error.req, "No export project")
+            if self.splitter_url:
+                self.log.debug("Calling remote splitter at %s" % \
+                        self.splitter_url)
+                split_data = self.remote_splitter(self.splitter_url,
+                        file_content, master)
+            else:
+                tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
+                    str(self.muxmax), '-m', master]
+                if self.fedkit:
+                    tclcmd.append('-k')
+                if self.gatewaykit:
+                    tclcmd.append('-K')
+                tclcmd.extend([pid, gid, eid, tclfile])
+                self.log.debug("running local splitter %s", " ".join(tclcmd))
+                # This is just fantastic.  As a side effect the parser copies
+                # tb_compat.tcl into the current directory, so that directory
+                # must be writable by the fedd user.  Doing this in the
+                # temporary subdir ensures this is the case.
+                tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
+                        cwd=tmpdir)
+                split_data = tclparser.stdout
+            allocated = { }         # Testbeds we can access
+# XXX here's where we're working
+            def out_topo(filename, t):
+                try:
+                    f = open("/tmp/%s" % filename, "w")
+                    print >> f, "%s" % \
+                            topdl.topology_to_xml(t, top="experiment")
+                    f.close()
+                except IOError, e:
+                    raise service_error(service_error.internal, "Can't open file")
+            try:
+                top = topdl.topology_from_xml(file=split_data, top="experiment")
+                subs = sorted(top.substrates,
+                        cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
+                        reverse=True)
+                ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
+                for s in subs:
+                    a = ips.allocate(len(s.interfaces)+2)
+                    if a :
+                        base, num = a
+                        if num < len(s.interfaces) +2 :
+                            raise service_error(service_error.internal,
+                                    "Allocator returned wrong number of IPs??")
+                    else:
+                        raise service_error(service_error.req,
+                                "Cannot allocate IP addresses")
+                    base += 1
+                    for i in s.interfaces:
+                        i.attribute.append(
+                                topdl.Attribute('ip4_address',
+                                    "%s" % ip_addr(base)))
+                        base += 1
+                testbeds = set([ a.value for e in top.elements \
+                        for a in e.attribute \
+                            if a.attribute == 'testbed'] )
+                topo ={ }
+                for tb in testbeds:
+                    self.get_access(tb, None, user, tbparams, master,
+                            export_project, access_user)
+                    topo[tb] = top.clone()
+                    to_delete = [ ]
+                    for e in topo[tb].elements:
+                        etb = e.get_attribute('testbed')
+                        if etb and etb != tb:
+                            for i in e.interface:
+                                for s in i.subs:
+                                    try:
+                                        s.interfaces.remove(i)
+                                    except ValueError:
+                                        raise service_error(service_error.internal,
+                                                "Can't remove interface??")
+                            to_delete.append(e)
+                    for e in to_delete:
+                        topo[tb].elements.remove(e)
+                    topo[tb].make_indices()
+                for s in top.substrates:
+                    tests = { }
+                    for i in s.interfaces:
+                        e = i.element
+                        tb = e.get_attribute('testbed')
+                        if tb and not tests.has_key(tb):
+                            for i in e.interface:
+                                if s in i.subs:
+                                    tests[tb]= \
+                                            i.get_attribute('ip4_address')
+                    if len(tests) < 2:
+                        continue
+                    # More than one testbed is on this substrate.  Insert
+                    # some gateways into the subtopologies.
+                    for st in tests.keys():
+                        for dt in [ t for t in tests.keys() if t != st]:
+                            myname =  "%stunnel" % dt
+                            desthost  =  "%stunnel" % st
+                            sproject = tbparams[st].get('project', 'project')
+                            dproject = tbparams[dt].get('project', 'project')
+                            sdomain = ".%s.%s%s" % (eid, sproject,
+                                    tbparams[st].get('domain', ".example.com"))
+                            ddomain = ".%s.%s%s" % (eid, dproject,
+                                    tbparams[dt].get('domain', ".example.com"))
+                            boss = tbparams[master].get('boss', "boss")
+                            fs = tbparams[master].get('fs', "fs")
+                            event_server = "%s%s" % \
+                                    (tbparams[st].get('eventserver', "event_server"),
+                                            tbparams[dt].get('domain', "example.com"))
+                            remote_event_server = "%s%s" % \
+                                    (tbparams[dt].get('eventserver', "event_server"),
+                                            tbparams[dt].get('domain', "example.com"))
+                            seer_control = "%s%s" % \
+                                    (tbparams[st].get('control', "control"), sdomain)
+                            local_key_dir = "/proj/%s/exp/%s/tmp" % ( sproject, eid)
+                            remote_conf_dir = "/proj/%s/exp/%s/tmp" % ( dproject, eid)
+                            conf_file = "%s%s.gw.conf" % (myname, sdomain)
+                            remote_conf_file = "%s%s.gw.conf" % (desthost, ddomain)
+                            # translate to lower case so the `hostname` hack for specifying
+                            # configuration files works.
+                            conf_file = conf_file.lower();
+                            remote_conf_file = remote_conf_file.lower();
+                            active = ("%s" % (st == master))
+                            portal = topdl.Computer(**{
+                                    'name': "%stunnel" % dt,
+                                    'attribute' : [{
+                                        'attribute': n,
+                                        'value': v,
+                                        } for n, v in (\
+                                                ('gateway', 'true'),
+                                                ('boss', boss),
+                                                ('fs', fs),
+                                                ('event_server', event_server),
+                                                ('remote_event_server', remote_event_server),
+                                                ('seer_control', seer_control),
+                                                ('local_key_dir', local_key_dir),
+                                                ('remote_conf_dir', remote_conf_dir),
+                                                ('conf_file', conf_file),
+                                                ('remote_conf_file', remote_conf_file),
+                                                ('remote_script_dir', "/usr/local/federation/bin"),
+                                                ('local_script_dir', "/usr/local/federation/bin"),
+                                                )],
+                                    'interface': [{
+                                        'substrate': s.name,
+                                        'attribute': [ {
+                                            'attribute': 'ip4_addreess',
+                                            'value': tests[dt],
+                                            }, ],
+                                        }, ],
+                                    })
+                            topo[st].elements.append(portal)
+                # Connect the gateway nodes into the topologies and clear out
+                # substrates that are not in the topologies
+                for tb in testbeds:
+                    topo[tb].incorporate_elements()
+                    topo[tb].substrates = \
+                            [s for s in topo[tb].substrates \
+                                if len(s.interfaces) >0]
+                softdir ="%s/software" % tmpdir
+                softmap = { }
+                os.mkdir(softdir)
+                pkgs = set([fedkit, gatewaykit])
+                pkgs.update([x.location for e in top.elements \
+                        for x in e.software])
+                for pkg in pkgs:
+                    loc = pkg
+                    scheme, host, path = urlparse(loc)[0:3]
+                    dest = os.path.basename(path)
+                    if not scheme:
+                        if not loc.startswith('/'):
+                            loc = "/%s" % loc
+                        loc = "file://%s" %loc
+                    try:
+                        u = urlopen(loc)
+                    except Exception, e:
+                        raise service_error(service_error.req,
+                                "Cannot open %s: %s" % (loc, e))
+                    try:
+                        f = open("%s/%s" % (softdir, dest) , "w")
+                        data = u.read(4096)
+                        while data:
+                            f.write(data)
+                            data = u.read(4096)
+                        f.close()
+                        u.close()
+                    except Exception, e:
+                        raise service_error(service_error.internal,
+                                "Could not copy %s: %s" % (loc, e))
+                    path = re.sub("/tmp", "", softdir)
+                    # XXX
+                    softmap[pkg] = \
+                            "https://users.isi.deterlab.net:23232/%s/%s" %\
+                            ( path, dest)
+                # Convert the software locations in the segments into the local
+                # copies on this host
+                for soft in [ s for tb in topo.values() \
+                        for e in tb.elements \
+                            for s in e.software ]:
+                    if softmap.has_key(soft.location):
+                        soft.location = softmap[soft.location]
+                for tb in testbeds:
+                    out_topo("%s.xml" %tb, topo[tb])
+                vtopo = topdl.topology_to_vtopo(top)
+                vis = self.genviz(vtopo)
+            except Exception, e:
+                traceback.print_exc()
+                raise service_error(service_error.internal, "%s"  % e)
+            # Build the testbed topologies:
+            if True:
+                raise service_error(service_error.internal, "Developing")
+# XXX old code
+            # Objects to parse the splitter output (defined above)
+            parse_current_testbed = self.current_testbed(eid, tmpdir,
+                    self.fedkit, self.gatewaykit)
+            parse_allbeds = self.allbeds(self.get_access)
+            parse_gateways = self.gateways(eid, master, tmpdir,
+                    gw_pubkey_base, gw_secretkey_base, self.copy_file,
+                    self.fedkit)
+            parse_vtopo = self.shunt_to_string("^#\s+Begin\s+Vtopo",
+                        "^#\s+End\s+Vtopo")
+            parse_hostnames = self.shunt_to_file("^#\s+Begin\s+hostnames",
+                        "^#\s+End\s+hostnames", tmpdir + "/hosts")
+            parse_tarfiles = self.shunt_to_list("^#\s+Begin\s+tarfiles",
+                    "^#\s+End\s+tarfiles")
+            parse_rpms = self.shunt_to_list("^#\s+Begin\s+rpms",
+                    "^#\s+End\s+rpms")
+            # Working on the split data
+            for line in split_data:
+                line = line.rstrip()
+                if parse_current_testbed(line, master, allocated, tbparams):
+                    continue
+                elif parse_allbeds(line, user, tbparams, master, export_project,
+                        access_user):
+                    continue
+                elif parse_gateways(line, allocated, tbparams):
+                    continue
+                elif parse_vtopo(line):
+                    continue
+                elif parse_hostnames(line):
+                    continue
+                elif parse_tarfiles(line):
+                    continue
+                elif parse_rpms(line):
+                    continue
+                else:
+                    raise service_error(service_error.internal,
+                            "Bad tcl parse? %s" % line)
+            # Virtual topology and visualization
+            vtopo = self.gentopo(parse_vtopo.str)
+            if not vtopo:
+                raise service_error(service_error.internal,
+                        "Failed to generate virtual topology")
+            vis = self.genviz(vtopo)
+            if not vis:
+                raise service_error(service_error.internal,
+                        "Failed to generate visualization")
+            # save federant information
+            for k in allocated.keys():
+                tbparams[k]['federant'] = {\
+                        'name': [ { 'localname' : eid} ],\
+                        'emulab': tbparams[k]['emulab'],\
+                        'allocID' : tbparams[k]['allocID'],\
+                        'master' : k == master,\
+                    }
+            self.state_lock.acquire()
+            self.state[eid]['vtopo'] = vtopo
+            self.state[eid]['vis'] = vis
+            self.state[expid]['federant'] = \
+                    [ tbparams[tb]['federant'] for tb in tbparams.keys() \
+                        if tbparams[tb].has_key('federant') ]
+            if self.state_filename: self.write_state()
+            self.state_lock.release()
+            # Copy tarfiles and rpms needed at remote sites into a staging area
+            try:
+                if self.fedkit:
+                    for t in self.fedkit:
+                        parse_tarfiles.list.append(t[1])
+                if self.gatewaykit:
+                    for t in self.gatewaykit:
+                        parse_tarfiles.list.append(t[1])
+                for t in parse_tarfiles.list:
+                    if not os.path.exists("%s/tarfiles" % tmpdir):
+                        os.mkdir("%s/tarfiles" % tmpdir)
+                    self.copy_file(t, "%s/tarfiles/%s" % \
+                            (tmpdir, os.path.basename(t)))
+                for r in parse_rpms.list:
+                    if not os.path.exists("%s/rpms" % tmpdir):
+                        os.mkdir("%s/rpms" % tmpdir)
+                    self.copy_file(r, "%s/rpms/%s" % \
+                            (tmpdir, os.path.basename(r)))
+                # A null experiment file in case we need to create a remote
+                # experiment from scratch
+                f = open("%s/null.tcl" % tmpdir, "w")
+                print >>f, """
+set ns [new Simulator]
+source tb_compat.tcl
+set a [$ns node]
+$ns rtproto Session
+$ns run
+"""
+                f.close()
+            except IOError, e:
+                raise service_error(service_error.internal,
+                        "Cannot stage tarfile/rpm: %s" % e.strerror)
+        except service_error, e:
+            # If something goes wrong in the parse (usually an access error)
+            # clear the placeholder state.  From here on out the code delays
+            # exceptions.  Failing at this point returns a fault to the remote
+            # caller.
+            self.state_lock.acquire()
+            del self.state[eid]
+            del self.state[expid]
+            if self.state_filename: self.write_state()
+            self.state_lock.release()
+            raise e
+        # Start the background swapper and return the starting state.  From
+        # here on out, the state will stick around a while.
+        # Let users touch the state
+        self.auth.set_attribute(fid, expid)
+        self.auth.set_attribute(expid, expid)
+        # Override fedids can manipulate state as well
+        for o in self.overrides:
+            self.auth.set_attribute(o, expid)
+        # Create a logger that logs to the experiment's state object as well as
+        # to the main log file.
+        alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
+        h = logging.StreamHandler(self.list_log(self.state[eid]['log']))
+        # XXX: there should be a global one of these rather than repeating the
+        # code.
+        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
+                    '%d %b %y %H:%M:%S'))
+        alloc_log.addHandler(h)
+        # Start a thread to do the resource allocation
+        t  = Thread(target=self.allocate_resources,
+                args=(allocated, master, eid, expid, expcert, tbparams,
+                    tmpdir, alloc_log),
+                name=eid)
+        t.start()
+        rv = {
+                'experimentID': [
+                    {'localname' : eid }, { 'fedid': copy.copy(expid) }
+                ],
+                'experimentStatus': 'starting',
+                'experimentAccess': { 'X509' : expcert }
+            }
+        return rv
     def get_experiment_fedid(self, key):
         """
         find the fedid associated with the localname key in the state database.
         """
         rv = None
         self.state_lock.acquire()
         if self.state.has_key(key):
             if isinstance(self.state[key], dict):
                 try:
                     kl = [ f['fedid'] for f in \
                             self.state[key]['experimentID']\
                                 if f.has_key('fedid') ]
                 except KeyError:
                     self.state_lock.release()
                     raise service_error(service_error.internal,
                             "No fedid for experiment %s when getting "+\
                                     "fedid(!?)" % key)
                 if len(kl) == 1:
                     rv = kl[0]
                 else:
                     self.state_lock.release()
                     raise service_error(service_error.internal,
                             "multiple fedids for experiment %s when " +\
                                     "getting fedid(!?)" % key)
             else:
                 self.state_lock.release()
                 raise service_error(service_error.internal,
                         "Unexpected state for %s" % key)
         self.state_lock.release()
         return rv
+        find the fedid associated with the localname key in the state database.
+        """
+        rv = None
+        self.state_lock.acquire()
+        if self.state.has_key(key):
+            if isinstance(self.state[key], dict):
+                try:
+                    kl = [ f['fedid'] for f in \
+                            self.state[key]['experimentID']\
+                                if f.has_key('fedid') ]
+                except KeyError:
+                    self.state_lock.release()
+                    raise service_error(service_error.internal,
+                            "No fedid for experiment %s when getting "+\
+                                    "fedid(!?)" % key)
+                if len(kl) == 1:
+                    rv = kl[0]
+                else:
+                    self.state_lock.release()
+                    raise service_error(service_error.internal,
+                            "multiple fedids for experiment %s when " +\
+                                    "getting fedid(!?)" % key)
+            else:
+                self.state_lock.release()
+                raise service_error(service_error.internal,
+                        "Unexpected state for %s" % key)
+        self.state_lock.release()
+        return rv
     def check_experiment_access(self, fid, key):
 …
         """
         if not isinstance(key, fedid):
             key = self.get_experiment_fedid(key)
+            key = self.get_experiment_fedid(key)
         if self.auth.check_attribute(fid, key):
 …
+    def get_handler(self, path, fid):
+        print "in get_handler %s %s" % (path, fid)
+        return ("/users/faber/test.html", "text/html")
     def get_vtopo(self, req, fid):
 …
         """
         rv = None
         state = None
+        state = None
         req = req.get('VtopoRequestBody', None)
 …
         self.state_lock.acquire()
         if self.state.has_key(key):
             if self.state[key].has_key('vtopo'):
                 rv = { 'experiment' : {keytype: key },\
                         'vtopo': self.state[key]['vtopo'],\
+                    }
             else:
                 state = self.state[key]['experimentStatus']
+            if self.state[key].has_key('vtopo'):
+                rv = { 'experiment' : {keytype: key },\
+                        'vtopo': self.state[key]['vtopo'],\
+                    }
+            else:
+                state = self.state[key]['experimentStatus']
         self.state_lock.release()
         if rv: return rv
         else:
             if state:
                 raise service_error(service_error.partial,
                         "Not ready: %s" % state)
             else:
                 raise service_error(service_error.req, "No such experiment")
+            if state:
+                raise service_error(service_error.partial,
+                        "Not ready: %s" % state)
+            else:
+                raise service_error(service_error.req, "No such experiment")
     def get_vis(self, req, fid):
 …
         """
         rv = None
         state = None
+        state = None
         req = req.get('VisRequestBody', None)
 …
         self.state_lock.acquire()
         if self.state.has_key(key):
             if self.state[key].has_key('vis'):
                 rv =  { 'experiment' : {keytype: key },\
                         'vis': self.state[key]['vis'],\
+                        }
             else:
                 state = self.state[key]['experimentStatus']
+            if self.state[key].has_key('vis'):
+                rv =  { 'experiment' : {keytype: key },\
+                        'vis': self.state[key]['vis'],\
+                        }
+            else:
+                state = self.state[key]['experimentStatus']
         self.state_lock.release()
         if rv: return rv
         else:
             if state:
                 raise service_error(service_error.partial,
                         "Not ready: %s" % state)
             else:
                 raise service_error(service_error.req, "No such experiment")
+            if state:
+                raise service_error(service_error.partial,
+                        "Not ready: %s" % state)
+            else:
+                raise service_error(service_error.req, "No such experiment")
     def clean_info_response(self, rv):
         """
         Remove the information in the experiment's state object that is not in
         the info response.
         """
         # Remove the owner info (should always be there, but...)
         if rv.has_key('owner'): del rv['owner']
         # Convert the log into the allocationLog parameter and remove the
         # log entry (with defensive programming)
         if rv.has_key('log'):
             rv['allocationLog'] = "".join(rv['log'])
             del rv['log']
         else:
             rv['allocationLog'] = ""
         if rv['experimentStatus'] != 'active':
             if rv.has_key('federant'): del rv['federant']
         else:
             # remove the allocationID info from each federant
             for f in rv.get('federant', []):
                 if f.has_key('allocID'): del f['allocID']
         return rv
+        """
+        Remove the information in the experiment's state object that is not in
+        the info response.
+        """
+        # Remove the owner info (should always be there, but...)
+        if rv.has_key('owner'): del rv['owner']
+        # Convert the log into the allocationLog parameter and remove the
+        # log entry (with defensive programming)
+        if rv.has_key('log'):
+            rv['allocationLog'] = "".join(rv['log'])
+            del rv['log']
+        else:
+            rv['allocationLog'] = ""
+        if rv['experimentStatus'] != 'active':
+            if rv.has_key('federant'): del rv['federant']
+        else:
+            # remove the allocationID info from each federant
+            for f in rv.get('federant', []):
+                if f.has_key('allocID'): del f['allocID']
+        return rv
     def get_info(self, req, fid):
 …
         self.state_lock.release()
         if rv:
             return self.clean_info_response(rv)
+        if rv:
+            return self.clean_info_response(rv)
         else:
             raise service_error(service_error.req, "No such experiment")
+            raise service_error(service_error.req, "No such experiment")
     def get_multi_info(self, req, fid):
 …
         Return all the stored info that this fedid can access
         """
         rv = { 'info': [ ] }
         self.state_lock.acquire()
         for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
             self.check_experiment_access(fid, key)
             if self.state.has_key(key):
                 e = copy.deepcopy(self.state[key])
                 e = self.clean_info_response(e)
                 rv['info'].append(e)
+        rv = { 'info': [ ] }
+        self.state_lock.acquire()
+        for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
+            self.check_experiment_access(fid, key)
+            if self.state.has_key(key):
+                e = copy.deepcopy(self.state[key])
+                e = self.clean_info_response(e)
+                rv['info'].append(e)
         self.state_lock.release()
         return rv
+        return rv
 …
             raise service_error(service_error.req,
                     "Bad request format (no TerminateRequestBody)")
         force = req.get('force', False)
+        force = req.get('force', False)
         exp = req.get('experiment', None)
         if exp:
 …
         self.check_experiment_access(fid, key)
         dealloc_list = [ ]
         # Create a logger that logs to the dealloc_list as well as to the main
         # log file.
         dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
         h = logging.StreamHandler(self.list_log(dealloc_list))
         # XXX: there should be a global one of these rather than repeating the
         # code.
         h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
                     '%d %b %y %H:%M:%S'))
         dealloc_log.addHandler(h)
+        dealloc_list = [ ]
+        # Create a logger that logs to the dealloc_list as well as to the main
+        # log file.
+        dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
+        h = logging.StreamHandler(self.list_log(dealloc_list))
+        # XXX: there should be a global one of these rather than repeating the
+        # code.
+        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
+                    '%d %b %y %H:%M:%S'))
+        dealloc_log.addHandler(h)
         self.state_lock.acquire()
 …
             # remove the experiment state when the termination is complete.
             # First make sure that the experiment creation is complete.
             status = fed_exp.get('experimentStatus', None)
             if status:
                 if status in ('starting', 'terminating'):
                     if not force:
                         self.state_lock.release()
                         raise service_error(service_error.partial,
                                 'Experiment still being created or destroyed')
                     else:
                         self.log.warning('Experiment in %s state ' % status + \
                                 'being terminated by force.')
             else:
                 # No status??? trouble
                 self.state_lock.release()
                 raise service_error(service_error.internal,
                         "Experiment has no status!?")
+            # First make sure that the experiment creation is complete.
+            status = fed_exp.get('experimentStatus', None)
+            if status:
+                if status in ('starting', 'terminating'):
+                    if not force:
+                        self.state_lock.release()
+                        raise service_error(service_error.partial,
+                                'Experiment still being created or destroyed')
+                    else:
+                        self.log.warning('Experiment in %s state ' % status + \
+                                'being terminated by force.')
+            else:
+                # No status??? trouble
+                self.state_lock.release()
+                raise service_error(service_error.internal,
+                        "Experiment has no status!?")
             ids = []
 …
                         'aid': aid,\
+                    }
             fed_exp['experimentStatus'] = 'terminating'
+            fed_exp['experimentStatus'] = 'terminating'
             if self.state_filename: self.write_state()
             self.state_lock.release()
             # Stop everyone.  NB, wait_for_all waits until a thread starts and
             # then completes, so we can't wait if nothing starts.  So, no
             # tbparams, no start.
             if len(tbparams) > 0:
                 thread_pool = self.thread_pool(self.nthreads)
                 for tb in tbparams.keys():
                     # Create and start a thread to stop the segment
                     thread_pool.wait_for_slot()
                     t  = self.pooled_thread(\
                             target=self.stop_segment(log=dealloc_log,
                                 keyfile=self.ssh_privkey_file, debug=self.debug),
                             args=(tb, tbparams[tb]['eid'], tbparams), name=tb,
                             pdata=thread_pool, trace_file=self.trace_file)
                     t.start()
                 # Wait for completions
                 thread_pool.wait_for_all_done()
             # release the allocations (failed experiments have done this
             # already, and starting experiments may be in odd states, so we
             # ignore errors releasing those allocations
             try:
                 for tb in tbparams.keys():
                     self.release_access(tb, tbparams[tb]['aid'])
             except service_error, e:
                 if status != 'failed' and not force:
                     raise e
+            # Stop everyone.  NB, wait_for_all waits until a thread starts and
+            # then completes, so we can't wait if nothing starts.  So, no
+            # tbparams, no start.
+            if len(tbparams) > 0:
+                thread_pool = self.thread_pool(self.nthreads)
+                for tb in tbparams.keys():
+                    # Create and start a thread to stop the segment
+                    thread_pool.wait_for_slot()
+                    t  = self.pooled_thread(\
+                            target=self.stop_segment(log=dealloc_log,
+                                keyfile=self.ssh_privkey_file, debug=self.debug),
+                            args=(tb, tbparams[tb]['eid'], tbparams), name=tb,
+                            pdata=thread_pool, trace_file=self.trace_file)
+                    t.start()
+                # Wait for completions
+                thread_pool.wait_for_all_done()
+            # release the allocations (failed experiments have done this
+            # already, and starting experiments may be in odd states, so we
+            # ignore errors releasing those allocations
+            try:
+                for tb in tbparams.keys():
+                    self.release_access(tb, tbparams[tb]['aid'])
+            except service_error, e:
+                if status != 'failed' and not force:
+                    raise e
             # Remove the terminated experiment
 …
             self.state_lock.release()
             return {
                     'experiment': exp ,
                     'deallocationLog': "".join(dealloc_list),
+                    }
+            return {
+                    'experiment': exp ,
+                    'deallocationLog': "".join(dealloc_list),
+                    }
         else:
             # Don't forget to release the lock

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset db6b092 for fedd/federation/experiment_control.py

Legend:

fedd/federation/experiment_control.py

Download in other formats: