Context Navigation

← Previous Change
Next Change →

Changeset cf0ff4f for fedd/federation

Timestamp:

Dec 6, 2010 4:50:57 PM (14 years ago)

Author:

Ted Faber <faber@…>

Branches:

axis_example, compt_changes, info-ops, master

Children:

35a5879

Parents:

5ecb9a3

Message:

End of a detangling pass.

There are still some functions that are too long, but overall this is a
cleaner version of the experiment controller that's somewhat easier to
read and maintain.

Closes #10

File:

: 1 edited

fedd/federation/experiment_control.py (modified) (15 diffs)

Legend:

: Unmodified
: Added
: Removed

fedd/federation/experiment_control.py

-                      r5ecb9a3
+                      rcf0ff4f
             self.log.error("Pickling problem (TypeError): %s" % e)
+    def remove_dirs(self, dir):
+        """
+        Remove the directory tree and all files rooted at dir.  Log any errors,
+        but continue.
+        """
+        self.log.debug("[removedirs]: removing %s" % dir)
+        try:
+            for path, dirs, files in os.walk(dir, topdown=False):
+                for f in files:
+                    os.remove(os.path.join(path, f))
+                for d in dirs:
+                    os.rmdir(os.path.join(path, d))
+            os.rmdir(dir)
+        except EnvironmentError, e:
+            self.log.error("Error deleting directory tree in %s" % e);
+    @staticmethod
+    def make_temp_certfile(expcert, tmpdir):
+        """
+        make a protected copy of the access certificate so the experiment
+        controller can act as the experiment principal.  mkstemp is the most
+        secure way to do that. The directory should be created by
+        mkdtemp.  Return the filename.
+        """
+        if expcert and tmpdir:
+            try:
+                certf, certfn = tempfile.mkstemp(suffix=".pem", dir=tmpdir)
+                f = os.fdopen(certf, 'w')
+                print >> f, expcert
+                f.close()
+            except EnvironmentError, e:
+                raise service_error(service_error.internal,
+                        "Cannot create temp cert file?")
+            return certfn
+        else:
+            return None
     def generate_ssh_keys(self, dest, type="rsa" ):
 …
                         (self.testbed, e))
                 return False
     def allocate_resources(self, allocated, masters, eid, expid,
 …
                             "new_experiment for fedid %s"  % fid)
-        pid = "dummy"
-        gid = "dummy"
         # Generate an ID for the experiment (slice) and a certificate that the
         # allocator can use to prove they own it.  We'll ship it back through
 …
         return rv
+    # create_experiment sub-functions
     @staticmethod
     def get_create_key(req):
+    def get_experiment_key(req, field='experimentID'):
         """
         Parse the experiment identifiers out of the request (the request body
 …
         """
         # Get the experiment access
         exp = req.get('experimentID', None)
+        exp = req.get(field, None)
         if exp:
             if exp.has_key('fedid'):
 …
         return masters, pmasters
+    def create_experiment(self, req, fid):
+        """
+        The external interface to experiment creation called from the
+        dispatcher.
+        Creates a working directory, splits the incoming description using the
+        splitter script and parses out the various subsections using the
+        classes above.  Once each sub-experiment is created, use pooled threads
+        to instantiate them and start it all up.
+        """
+        req = req.get('CreateRequestBody', None)
+        if req:
+            key = self.get_create_key(req)
+        else:
+            raise service_error(service_error.req,
+                    "Bad request format (no CreateRequestBody)")
+        # Import information from the requester
+        if self.auth.import_credentials(data_list=req.get('credential', [])):
+            self.auth.save()
+        # Make sure that the caller can talk to us
+        self.check_experiment_access(fid, key)
+        # Install the testbed map entries supplied with the request into a copy
+        # of the testbed map.
+        tbmap = dict(self.tbmap)
+        for m in req.get('testbedmap', []):
+            if 'testbed' in m and 'uri' in m:
+                tbmap[m['testbed']] = m['uri']
+        # a place to work
+        try:
+            tmpdir = tempfile.mkdtemp(prefix="split-")
+            os.mkdir(tmpdir+"/keys")
+        except EnvironmentError:
+            raise service_error(service_error.internal, "Cannot create tmp dir")
+    def generate_keys_and_hosts(self, tmpdir, expid, hosts, tbparams):
+        """
+        Create the ssh keys necessary for interconnecting the potral nodes and
+        the global hosts file for letting each segment know about the IP
+        addresses in play.  Save these into the repo.  Add attributes to the
+        autorizer allowing access controllers to download them and return a set
+        of attributes that inform the segments where to find this stuff.  Mau
+        raise service_errors in if there are problems.
+        """
         gw_pubkey_base = "fed.%s.pub" % self.ssh_type
         gw_secretkey_base = "fed.%s" % self.ssh_type
         gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
         gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
+        tbparams = { }
+        eid, expid, expcert_file = \
+                self.get_experiment_ids_and_start(key, tmpdir)
+        # This catches exceptions to clear the placeholder if necessary
+        try:
+            if not (eid and expid):
+                raise service_error(service_error.internal,
+                        "Cannot find local experiment info!?")
+            try:
+                self.generate_ssh_keys(gw_secretkey, self.ssh_type)
+            except ValueError:
+                raise service_error(service_error.server_config,
+                        "Bad key type (%s)" % self.ssh_type)
+            top = self.get_topology(req, tmpdir)
+            # Assign the IPs
+            hosts, ip_allocator = self.allocate_ips_to_topo(top)
+            # Find the testbeds to look up
+            tb_hosts = { }
+            testbeds = [ ]
+            for e in top.elements:
+                if isinstance(e, topdl.Computer):
+                    tb = e.get_attribute('testbed') or 'default'
+                    if tb in tb_hosts: tb_hosts[tb].append(e.name)
+                    else:
+                        tb_hosts[tb] = [ e.name ]
+                        testbeds.append(tb)
+            masters, pmasters = self.get_testbed_services(req)
+            allocated = { }         # Testbeds we can access
+            topo ={ }               # Sub topologies
+            connInfo = { }          # Connection information
+            self.get_access_to_testbeds(testbeds, fid, allocated,
+                    tbparams, masters, tbmap, expid, expcert_file)
+            self.split_topology(top, topo, testbeds)
+            # Copy configuration files into the remote file store
+            # The config urlpath
+            configpath = "/%s/config" % expid
+            # The config file system location
+            configdir ="%s%s" % ( self.repodir, configpath)
+            try:
+                os.makedirs(configdir)
+            except EnvironmentError, e:
+                raise service_error(service_error.internal,
+                        "Cannot create config directory: %s" % e)
+            try:
+                f = open("%s/hosts" % configdir, "w")
+                f.write('\n'.join(hosts))
+                f.close()
+            except EnvironmentError, e:
+                raise service_error(service_error.internal,
+                        "Cannot write hosts file: %s" % e)
+            try:
+                copy_file("%s" % gw_pubkey, "%s/%s" % \
+                        (configdir, gw_pubkey_base))
+                copy_file("%s" % gw_secretkey, "%s/%s" % \
+                        (configdir, gw_secretkey_base))
+            except EnvironmentError, e:
+                raise service_error(service_error.internal,
+                        "Cannot copy keyfiles: %s" % e)
+            # Allow the individual testbeds to access the configuration files.
+            for tb in tbparams.keys():
+                asignee = tbparams[tb]['allocID']['fedid']
+                for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
+                    self.auth.set_attribute(asignee, "%s/%s" % \
+                            (configpath, f))
+            part = experiment_partition(self.auth, self.store_url, tbmap,
+                    self.muxmax, self.direct_transit)
+            part.add_portals(top, topo, eid, pmasters, tbparams, ip_allocator,
+                    connInfo, expid)
+            # Now get access to the dynamic testbeds (those added above)
+            for tb in [ t for t in topo if t not in allocated]:
+                #XXX: ABAC
+                if self.auth_type =='legacy':
+                    self.get_legacy_access(tb, None, tbparams, access_user,
+                            masters, tbmap)
+                elif self.auth_type == 'abac':
+                    self.get_access(tb, tbparams, fid, masters, tbmap,
+                            expid, expcert_file)
+                else:
+                    raise service_error(service_error.internal,
+                            "Unknown auth_type %s" % self.auth_type)
+                allocated[tb] = 1
+                store_keys = topo[tb].get_attribute('store_keys')
+                # Give the testbed access to keys it exports or imports
+                if store_keys:
+                    for sk in store_keys.split(" "):
+                        self.auth.set_attribute(\
+                                tbparams[tb]['allocID']['fedid'], sk)
+        try:
+            self.generate_ssh_keys(gw_secretkey, self.ssh_type)
+        except ValueError:
+            raise service_error(service_error.server_config,
+                    "Bad key type (%s)" % self.ssh_type)
+        # Copy configuration files into the remote file store
+        # The config urlpath
+        configpath = "/%s/config" % expid
+        # The config file system location
+        configdir ="%s%s" % ( self.repodir, configpath)
+        try:
+            os.makedirs(configdir)
+        except EnvironmentError, e:
+            raise service_error(service_error.internal,
+                    "Cannot create config directory: %s" % e)
+        try:
+            f = open("%s/hosts" % configdir, "w")
+            print >> f, string.join(hosts, '\n')
+            f.close()
+        except EnvironmentError, e:
+            raise service_error(service_error.internal,
+                    "Cannot write hosts file: %s" % e)
+        try:
+            copy_file("%s" % gw_pubkey, "%s/%s" % \
+                    (configdir, gw_pubkey_base))
+            copy_file("%s" % gw_secretkey, "%s/%s" % \
+                    (configdir, gw_secretkey_base))
+        except EnvironmentError, e:
+            raise service_error(service_error.internal,
+                    "Cannot copy keyfiles: %s" % e)
+        # Allow the individual testbeds to access the configuration files.
+        for tb in tbparams.keys():
+            asignee = tbparams[tb]['allocID']['fedid']
+            for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
+                self.auth.set_attribute(asignee, "%s/%s" % \
+                        (configpath, f))
             self.auth.save()
-            self.wrangle_software(expid, top, topo, tbparams)
-            vtopo = topdl.topology_to_vtopo(top)
-            vis = self.genviz(vtopo)
-            # save federant information
-            for k in allocated.keys():
-                tbparams[k]['federant'] = {
-                        'name': [ { 'localname' : eid} ],
-                        'allocID' : tbparams[k]['allocID'],
-                        'uri': tbparams[k]['uri'],
+                    }
-            self.state_lock.acquire()
-            self.state[eid]['vtopo'] = vtopo
-            self.state[eid]['vis'] = vis
-            self.state[eid]['experimentdescription'] = \
-                    { 'topdldescription': top.to_dict() }
-            self.state[eid]['federant'] = \
-                    [ tbparams[tb]['federant'] for tb in tbparams.keys() \
-                        if tbparams[tb].has_key('federant') ]
-            if self.state_filename:
-                self.write_state()
-            self.state_lock.release()
-        except service_error, e:
-            # If something goes wrong in the parse (usually an access error)
-            # clear the placeholder state.  From here on out the code delays
-            # exceptions.  Failing at this point returns a fault to the remote
-            # caller.
-            self.state_lock.acquire()
-            del self.state[eid]
-            del self.state[expid]
-            if self.state_filename: self.write_state()
-            self.state_lock.release()
-            if tmpdir and self.cleanup:
-                self.remove_dirs(tmpdir)
-            raise e
-        # Start the background swapper and return the starting state.  From
-        # here on out, the state will stick around a while.
-        # Let users touch the state
-        self.auth.set_attribute(fid, expid)
-        self.auth.set_attribute(expid, expid)
-        # Override fedids can manipulate state as well
-        for o in self.overrides:
-            self.auth.set_attribute(o, expid)
-        self.auth.save()
-        # Create a logger that logs to the experiment's state object as well as
-        # to the main log file.
-        alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
-        alloc_collector = self.list_log(self.state[eid]['log'])
-        h = logging.StreamHandler(alloc_collector)
-        # XXX: there should be a global one of these rather than repeating the
-        # code.
-        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
-                    '%d %b %y %H:%M:%S'))
-        alloc_log.addHandler(h)
         attrs = [
+                {
 …
                 },
+            ]
+        # transit and disconnected testbeds may not have a connInfo entry.
+        # Fill in the blanks.
+        for t in allocated.keys():
+            if not connInfo.has_key(t):
+                connInfo[t] = { }
+        return attrs
+    def get_vtopo(self, req, fid):
+        """
+        Return the stored virtual topology for this experiment
+        """
+        rv = None
+        state = None
+        req = req.get('VtopoRequestBody', None)
+        if not req:
+            raise service_error(service_error.req,
+                    "Bad request format (no VtopoRequestBody)")
+        exp = req.get('experiment', None)
+        if exp:
+            if exp.has_key('fedid'):
+                key = exp['fedid']
+                keytype = "fedid"
+            elif exp.has_key('localname'):
+                key = exp['localname']
+                keytype = "localname"
+            else:
+                raise service_error(service_error.req, "Unknown lookup type")
+        else:
+            raise service_error(service_error.req, "No request?")
+        self.check_experiment_access(fid, key)
+        self.state_lock.acquire()
+        if self.state.has_key(key):
+            if self.state[key].has_key('vtopo'):
+                rv = { 'experiment' : {keytype: key },\
+                        'vtopo': self.state[key]['vtopo'],\
+                    }
+            else:
+                state = self.state[key]['experimentStatus']
+        self.state_lock.release()
+        if rv: return rv
+        else:
+            if state:
+                raise service_error(service_error.partial,
+                        "Not ready: %s" % state)
+            else:
+                raise service_error(service_error.req, "No such experiment")
+    def get_vis(self, req, fid):
+        """
+        Return the stored visualization for this experiment
+        """
+        rv = None
+        state = None
+        req = req.get('VisRequestBody', None)
+        if not req:
+            raise service_error(service_error.req,
+                    "Bad request format (no VisRequestBody)")
+        exp = req.get('experiment', None)
+        if exp:
+            if exp.has_key('fedid'):
+                key = exp['fedid']
+                keytype = "fedid"
+            elif exp.has_key('localname'):
+                key = exp['localname']
+                keytype = "localname"
+            else:
+                raise service_error(service_error.req, "Unknown lookup type")
+        else:
+            raise service_error(service_error.req, "No request?")
+        self.check_experiment_access(fid, key)
+        self.state_lock.acquire()
+        if self.state.has_key(key):
+            if self.state[key].has_key('vis'):
+                rv =  { 'experiment' : {keytype: key },\
+                        'vis': self.state[key]['vis'],\
+                        }
+            else:
+                state = self.state[key]['experimentStatus']
+        self.state_lock.release()
+        if rv: return rv
+        else:
+            if state:
+                raise service_error(service_error.partial,
+                        "Not ready: %s" % state)
+            else:
+                raise service_error(service_error.req, "No such experiment")
+    def save_federant_information(self, allocated, tbparams, eid, vtopo, vis,
+            top):
+        """
+        Store the various data that have changed in the experiment state
+        between when it was started and the beginning of resource allocation.
+        This is basically the information about each local allocation.  This
+        fills in the values of the placeholder allocation in the state.
+        """
+        # save federant information
+        for k in allocated.keys():
+            tbparams[k]['federant'] = {
+                    'name': [ { 'localname' : eid} ],
+                    'allocID' : tbparams[k]['allocID'],
+                    'uri': tbparams[k]['uri'],
+                }
+        self.state_lock.acquire()
+        self.state[eid]['vtopo'] = vtopo
+        self.state[eid]['vis'] = vis
+        self.state[eid]['experimentdescription'] = \
+                { 'topdldescription': top.to_dict() }
+        self.state[eid]['federant'] = \
+                [ tbparams[tb]['federant'] for tb in tbparams.keys() \
+                    if tbparams[tb].has_key('federant') ]
+        if self.state_filename:
+            self.write_state()
+        self.state_lock.release()
+    def clear_placeholder(self, eid, expid, tmpdir):
+        """
+        Clear the placeholder and remove any allocated temporary dir.
+        """
+        self.state_lock.acquire()
+        del self.state[eid]
+        del self.state[expid]
+        if self.state_filename: self.write_state()
+        self.state_lock.release()
+        if tmpdir and self.cleanup:
+            self.remove_dirs(tmpdir)
+    # end of create_experiment sub-functions
+    def create_experiment(self, req, fid):
+        """
+        The external interface to experiment creation called from the
+        dispatcher.
+        Creates a working directory, splits the incoming description using the
+        splitter script and parses out the various subsections using the
+        classes above.  Once each sub-experiment is created, use pooled threads
+        to instantiate them and start it all up.
+        """
+        req = req.get('CreateRequestBody', None)
+        if req:
+            key = self.get_experiment_key(req)
+        else:
+            raise service_error(service_error.req,
+                    "Bad request format (no CreateRequestBody)")
+        # Import information from the requester
+        if self.auth.import_credentials(data_list=req.get('credential', [])):
+            self.auth.save()
+        # Make sure that the caller can talk to us
+        self.check_experiment_access(fid, key)
+        # Install the testbed map entries supplied with the request into a copy
+        # of the testbed map.
+        tbmap = dict(self.tbmap)
+        for m in req.get('testbedmap', []):
+            if 'testbed' in m and 'uri' in m:
+                tbmap[m['testbed']] = m['uri']
+        # a place to work
+        try:
+            tmpdir = tempfile.mkdtemp(prefix="split-")
+            os.mkdir(tmpdir+"/keys")
+        except EnvironmentError:
+            raise service_error(service_error.internal, "Cannot create tmp dir")
+        tbparams = { }
+        eid, expid, expcert_file = \
+                self.get_experiment_ids_and_start(key, tmpdir)
+        # This catches exceptions to clear the placeholder if necessary
+        try:
+            if not (eid and expid):
+                raise service_error(service_error.internal,
+                        "Cannot find local experiment info!?")
+            top = self.get_topology(req, tmpdir)
+            # Assign the IPs
+            hosts, ip_allocator = self.allocate_ips_to_topo(top)
+            # Find the testbeds to look up
+            tb_hosts = { }
+            testbeds = [ ]
+            for e in top.elements:
+                if isinstance(e, topdl.Computer):
+                    tb = e.get_attribute('testbed') or 'default'
+                    if tb in tb_hosts: tb_hosts[tb].append(e.name)
+                    else:
+                        tb_hosts[tb] = [ e.name ]
+                        testbeds.append(tb)
+            masters, pmasters = self.get_testbed_services(req)
+            allocated = { }         # Testbeds we can access
+            topo ={ }               # Sub topologies
+            connInfo = { }          # Connection information
+            self.get_access_to_testbeds(testbeds, fid, allocated,
+                    tbparams, masters, tbmap, expid, expcert_file)
+            self.split_topology(top, topo, testbeds)
+            attrs = self.generate_keys_and_hosts(tmpdir, expid, hosts, tbparams)
+            part = experiment_partition(self.auth, self.store_url, tbmap,
+                    self.muxmax, self.direct_transit)
+            part.add_portals(top, topo, eid, pmasters, tbparams, ip_allocator,
+                    connInfo, expid)
+            # Now get access to the dynamic testbeds (those added above)
+            for tb in [ t for t in topo if t not in allocated]:
+                self.get_access(tb, tbparams, fid, masters, tbmap,
+                        expid, expcert_file)
+                allocated[tb] = 1
+                store_keys = topo[tb].get_attribute('store_keys')
+                # Give the testbed access to keys it exports or imports
+                if store_keys:
+                    for sk in store_keys.split(" "):
+                        self.auth.set_attribute(\
+                                tbparams[tb]['allocID']['fedid'], sk)
+            self.auth.save()
+            # transit and disconnected testbeds may not have a connInfo entry.
+            # Fill in the blanks.
+            for t in allocated.keys():
+                if not connInfo.has_key(t):
+                    connInfo[t] = { }
+            self.wrangle_software(expid, top, topo, tbparams)
+            vtopo = topdl.topology_to_vtopo(top)
+            vis = self.genviz(vtopo)
+            self.save_federant_information(allocated, tbparams, eid, vtopo,
+                    vis, top)
+        except service_error, e:
+            # If something goes wrong in the parse (usually an access error)
+            # clear the placeholder state.  From here on out the code delays
+            # exceptions.  Failing at this point returns a fault to the remote
+            # caller.
+            self.clear_placeholder(eid, expid, tmpdir)
+            raise e
+        # Start the background swapper and return the starting state.  From
+        # here on out, the state will stick around a while.
+        # Let users touch the state
+        self.auth.set_attribute(fid, expid)
+        self.auth.set_attribute(expid, expid)
+        # Override fedids can manipulate state as well
+        for o in self.overrides:
+            self.auth.set_attribute(o, expid)
+        self.auth.save()
+        # Create a logger that logs to the experiment's state object as well as
+        # to the main log file.
+        alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
+        alloc_collector = self.list_log(self.state[eid]['log'])
+        h = logging.StreamHandler(alloc_collector)
+        # XXX: there should be a global one of these rather than repeating the
+        # code.
+        h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
+                    '%d %b %y %H:%M:%S'))
+        alloc_log.addHandler(h)
         # Start a thread to do the resource allocation
 …
     def get_handler(self, path, fid):
+        """
+        Perhaps surprisingly named, this function handles HTTP GET requests to
+        this server (SOAP requests are POSTs).
+        """
         self.log.info("Get handler %s %s" % (path, fid))
         if self.auth.check_attribute(fid, path):
 …
         else:
             return (None, None)
-    def get_vtopo(self, req, fid):
-        """
-        Return the stored virtual topology for this experiment
-        """
-        rv = None
-        state = None
-        req = req.get('VtopoRequestBody', None)
-        if not req:
-            raise service_error(service_error.req,
-                    "Bad request format (no VtopoRequestBody)")
-        exp = req.get('experiment', None)
-        if exp:
-            if exp.has_key('fedid'):
-                key = exp['fedid']
-                keytype = "fedid"
-            elif exp.has_key('localname'):
-                key = exp['localname']
-                keytype = "localname"
-            else:
-                raise service_error(service_error.req, "Unknown lookup type")
-        else:
-            raise service_error(service_error.req, "No request?")
-        self.check_experiment_access(fid, key)
-        self.state_lock.acquire()
-        if self.state.has_key(key):
-            if self.state[key].has_key('vtopo'):
-                rv = { 'experiment' : {keytype: key },\
-                        'vtopo': self.state[key]['vtopo'],\
+                    }
-            else:
-                state = self.state[key]['experimentStatus']
-        self.state_lock.release()
-        if rv: return rv
-        else:
-            if state:
-                raise service_error(service_error.partial,
-                        "Not ready: %s" % state)
-            else:
-                raise service_error(service_error.req, "No such experiment")
-    def get_vis(self, req, fid):
-        """
-        Return the stored visualization for this experiment
-        """
-        rv = None
-        state = None
-        req = req.get('VisRequestBody', None)
-        if not req:
-            raise service_error(service_error.req,
-                    "Bad request format (no VisRequestBody)")
-        exp = req.get('experiment', None)
-        if exp:
-            if exp.has_key('fedid'):
-                key = exp['fedid']
-                keytype = "fedid"
-            elif exp.has_key('localname'):
-                key = exp['localname']
-                keytype = "localname"
-            else:
-                raise service_error(service_error.req, "Unknown lookup type")
-        else:
-            raise service_error(service_error.req, "No request?")
-        self.check_experiment_access(fid, key)
-        self.state_lock.acquire()
-        if self.state.has_key(key):
-            if self.state[key].has_key('vis'):
-                rv =  { 'experiment' : {keytype: key },\
-                        'vis': self.state[key]['vis'],\
+                        }
-            else:
-                state = self.state[key]['experimentStatus']
-        self.state_lock.release()
-        if rv: return rv
-        else:
-            if state:
-                raise service_error(service_error.partial,
-                        "Not ready: %s" % state)
-            else:
-                raise service_error(service_error.req, "No such experiment")
     def clean_info_response(self, rv):
 …
         return rv
+    def remove_dirs(self, dir):
+        """
+        Remove the directory tree and all files rooted at dir.  Log any errors,
+        but continue.
+        """
+        self.log.debug("[removedirs]: removing %s" % dir)
+    def check_termination_status(self, fed_exp, force):
+        """
+        Confirm that the experiment is sin a valid state to stop (or force it)
+        return the state - invalid states for deletion and force settings cause
+        exceptions.
+        """
+        self.state_lock.acquire()
+        status = fed_exp.get('experimentStatus', None)
+        if status:
+            if status in ('starting', 'terminating'):
+                if not force:
+                    self.state_lock.release()
+                    raise service_error(service_error.partial,
+                            'Experiment still being created or destroyed')
+                else:
+                    self.log.warning('Experiment in %s state ' % status + \
+                            'being terminated by force.')
+            self.state_lock.release()
+            return status
+        else:
+            # No status??? trouble
+            self.state_lock.release()
+            raise service_error(service_error.internal,
+                    "Experiment has no status!?")
+    def get_termination_info(self, fed_exp):
+        ids = []
+        term_params = { }
+        self.state_lock.acquire()
+        #  experimentID is a list of dicts that are self-describing
+        #  identifiers.  This finds all the fedids and localnames - the
+        #  keys of self.state - and puts them into ids, which is used to delete
+        #  the state after everything is swapped out.
+        for id in fed_exp.get('experimentID', []):
+            if 'fedid' in id:
+                ids.append(id['fedid'])
+                repo = "%s" % id['fedid']
+            if 'localname' in id: ids.append(id['localname'])
+        # Get the experimentAccess - the principal for this experiment.  It
+        # is this principal to which credentials have been delegated, and
+        # as which the experiment controller must act.
+        if 'experimentAccess' in fed_exp and \
+                'X509' in fed_exp['experimentAccess']:
+            expcert = fed_exp['experimentAccess']['X509']
+        else:
+            expcert = None
+        # Collect the allocation/segment ids into a dict keyed by the fedid
+        # of the allocation (or a monotonically increasing integer) that
+        # contains a tuple of uri, aid (which is a dict...)
+        for i, fed in enumerate(fed_exp.get('federant', [])):
+            try:
+                uri = fed['uri']
+                aid = fed['allocID']
+                k = fed['allocID'].get('fedid', i)
+            except KeyError, e:
+                continue
+            term_params[k] = (uri, aid)
+        # Change the experiment state
+        fed_exp['experimentStatus'] = 'terminating'
+        if self.state_filename: self.write_state()
+        self.state_lock.release()
+        return ids, term_params, expcert, repo
+    def deallocate_resources(self, term_params, expcert, status, force,
+            dealloc_log):
+        tmpdir = None
+        # This try block makes sure the tempdir is cleared
         try:
+            for path, dirs, files in os.walk(dir, topdown=False):
+                for f in files:
+                    os.remove(os.path.join(path, f))
+                for d in dirs:
+                    os.rmdir(os.path.join(path, d))
+            os.rmdir(dir)
+        except EnvironmentError, e:
+            self.log.error("Error deleting directory tree in %s" % e);
+    @staticmethod
+    def make_temp_certfile(expcert, tmpdir):
+        """
+        make a protected copy of the access certificate so the experiment
+        controller can act as the experiment principal.  mkstemp is the most
+        secure way to do that. The directory should be created by
+        mkdtemp.  Return the filename.
+        """
+        if expcert and tmpdir:
+            try:
+                certf, certfn = tempfile.mkstemp(suffix=".pem", dir=tmpdir)
+                f = os.fdopen(certf, 'w')
+                print >> f, expcert
+                f.close()
+            except EnvironmentError, e:
+                raise service_error(service_error.internal,
+                        "Cannot create temp cert file?")
+            return certfn
+        else:
+            return None
+            # If no expcert, try the deallocation as the experiment
+            # controller instance.
+            if expcert and self.auth_type != 'legacy':
+                try:
+                    tmpdir = tempfile.mkdtemp(prefix="term-")
+                except EnvironmentError:
+                    raise service_error(service_error.internal,
+                            "Cannot create tmp dir")
+                cert_file = self.make_temp_certfile(expcert, tmpdir)
+                pw = None
+            else:
+                cert_file = self.cert_file
+                pw = self.cert_pwd
+            # Stop everyone.  NB, wait_for_all waits until a thread starts
+            # and then completes, so we can't wait if nothing starts.  So,
+            # no tbparams, no start.
+            if len(term_params) > 0:
+                tp = thread_pool(self.nthreads)
+                for k, (uri, aid) in term_params.items():
+                    # Create and start a thread to stop the segment
+                    tp.wait_for_slot()
+                    t  = pooled_thread(\
+                            target=self.terminate_segment(log=dealloc_log,
+                                testbed=uri,
+                                cert_file=cert_file,
+                                cert_pwd=pw,
+                                trusted_certs=self.trusted_certs,
+                                caller=self.call_TerminateSegment),
+                            args=(uri, aid), name=k,
+                            pdata=tp, trace_file=self.trace_file)
+                    t.start()
+                # Wait for completions
+                tp.wait_for_all_done()
+            # release the allocations (failed experiments have done this
+            # already, and starting experiments may be in odd states, so we
+            # ignore errors releasing those allocations
+            try:
+                for k, (uri, aid)  in term_params.items():
+                    self.release_access(None, aid, uri=uri,
+                            cert_file=cert_file, cert_pwd=pw)
+            except service_error, e:
+                if status != 'failed' and not force:
+                    raise e
+        # Clean up the tmpdir no matter what
+        finally:
+            if tmpdir: self.remove_dirs(tmpdir)
     def terminate_experiment(self, req, fid):
 …
             raise service_error(service_error.req,
                     "Bad request format (no TerminateRequestBody)")
+        key = self.get_experiment_key(req, 'experiment')
+        self.check_experiment_access(fid, key)
+        exp = req.get('experiment', False)
         force = req.get('force', False)
-        exp = req.get('experiment', None)
-        if exp:
-            if exp.has_key('fedid'):
-                key = exp['fedid']
-                keytype = "fedid"
-            elif exp.has_key('localname'):
-                key = exp['localname']
-                keytype = "localname"
-            else:
-                raise service_error(service_error.req, "Unknown lookup type")
-        else:
-            raise service_error(service_error.req, "No request?")
-        self.check_experiment_access(fid, key)
         dealloc_list = [ ]
 …
         self.state_lock.acquire()
         fed_exp = self.state.get(key, None)
+        self.state_lock.release()
         repo = None
         if fed_exp:
+            # This branch of the conditional holds the lock to generate a
+            # consistent temporary tbparams variable to deallocate experiments.
+            # It releases the lock to do the deallocations and reacquires it to
+            # remove the experiment state when the termination is complete.
+            # First make sure that the experiment creation is complete.
+            status = fed_exp.get('experimentStatus', None)
+            if status:
+                if status in ('starting', 'terminating'):
+                    if not force:
+                        self.state_lock.release()
+                        raise service_error(service_error.partial,
+                                'Experiment still being created or destroyed')
+                    else:
+                        self.log.warning('Experiment in %s state ' % status + \
+                                'being terminated by force.')
+            else:
+                # No status??? trouble
+                self.state_lock.release()
+                raise service_error(service_error.internal,
+                        "Experiment has no status!?")
+            ids = []
+            #  experimentID is a list of dicts that are self-describing
+            #  identifiers.  This finds all the fedids and localnames - the
+            #  keys of self.state - and puts them into ids.
+            for id in fed_exp.get('experimentID', []):
+                if id.has_key('fedid'):
+                    ids.append(id['fedid'])
+                    repo = "%s" % id['fedid']
+                if id.has_key('localname'): ids.append(id['localname'])
+            # Get the experimentAccess - the principal for this experiment.  It
+            # is this principal to which credentials have been delegated, and
+            # as which the experiment controller must act.
+            if 'experimentAccess' in self.state[key] and \
+                    'X509' in self.state[key]['experimentAccess']:
+                expcert = self.state[key]['experimentAccess']['X509']
+            else:
+                expcert = None
+            # Collect the allocation/segment ids into a dict keyed by the fedid
+            # of the allocation (or a monotonically increasing integer) that
+            # contains a tuple of uri, aid (which is a dict...)
+            for i, fed in enumerate(fed_exp.get('federant', [])):
+                try:
+                    uri = fed['uri']
+                    aid = fed['allocID']
+                    k = fed['allocID'].get('fedid', i)
+                except KeyError, e:
+                    continue
+                tbparams[k] = (uri, aid)
+            fed_exp['experimentStatus'] = 'terminating'
+            if self.state_filename: self.write_state()
+            self.state_lock.release()
+            try:
+                tmpdir = tempfile.mkdtemp(prefix="split-")
+            except EnvironmentError:
+                raise service_error(service_error.internal,
+                        "Cannot create tmp dir")
+            # This try block makes sure the tempdir is cleared
+            try:
+                # If no expcert, try the deallocation as the experiment
+                # controller instance.
+                if expcert and self.auth_type != 'legacy':
+                    cert_file = self.make_temp_certfile(expcert, tmpdir)
+                    pw = None
+                else:
+                    cert_file = self.cert_file
+                    pw = self.cert_pwd
+                # Stop everyone.  NB, wait_for_all waits until a thread starts
+                # and then completes, so we can't wait if nothing starts.  So,
+                # no tbparams, no start.
+                if len(tbparams) > 0:
+                    tp = thread_pool(self.nthreads)
+                    for k in tbparams.keys():
+                        # Create and start a thread to stop the segment
+                        tp.wait_for_slot()
+                        uri, aid = tbparams[k]
+                        t  = pooled_thread(\
+                                target=self.terminate_segment(log=dealloc_log,
+                                    testbed=uri,
+                                    cert_file=cert_file,
+                                    cert_pwd=pw,
+                                    trusted_certs=self.trusted_certs,
+                                    caller=self.call_TerminateSegment),
+                                args=(uri, aid), name=k,
+                                pdata=tp, trace_file=self.trace_file)
+                        t.start()
+                    # Wait for completions
+                    tp.wait_for_all_done()
+                # release the allocations (failed experiments have done this
+                # already, and starting experiments may be in odd states, so we
+                # ignore errors releasing those allocations
+                try:
+                    for k in tbparams.keys():
+                        # This releases access by uri
+                        uri, aid = tbparams[k]
+                        self.release_access(None, aid, uri=uri,
+                                cert_file=cert_file, cert_pwd=pw)
+                except service_error, e:
+                    if status != 'failed' and not force:
+                        raise e
+            # Clean up the tmpdir no matter what
+            finally:
+                self.remove_dirs(tmpdir)
+            status = self.check_termination_status(fed_exp, force)
+            ids, term_params, expcert, repo = self.get_termination_info(fed_exp)
+            self.deallocate_resources(term_params, expcert, status, force,
+                    dealloc_log)
             # Remove the terminated experiment
             self.state_lock.acquire()
             for id in ids:
                 if self.state.has_key(id): del self.state[id]
+                if id in self.state: del self.state[id]
             if self.state_filename: self.write_state()
 …
             return {
                     'experiment': exp ,
                     'deallocationLog': "".join(dealloc_list),
+                    'deallocationLog': string.join(dealloc_list, ''),
+                    }
         else:
-            # Don't forget to release the lock
-            self.state_lock.release()
             raise service_error(service_error.req, "No saved state")
 …
                     "Bad request format (no GetValueRequestBody)")
         name = req['name']
         wait = req['wait']
+        name = req.get('name', None)
+        wait = req.get('wait', False)
         rv = { 'name': name }
         if self.auth.check_attribute(fid, name):
+        if name and self.auth.check_attribute(fid, name):
             self.log.debug("[GetValue] asking for %s " % name)
             try:
 …
                     "Bad request format (no SetValueRequestBody)")
         name = req['name']
         v = req['value']
         if self.auth.check_attribute(fid, name):
+        name = req.get('name', None)
+        v = req.get('value', '')
+        if name and self.auth.check_attribute(fid, name):
             try:
                 self.synch_store.set_value(name, v)

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset cf0ff4f for fedd/federation

Legend:

fedd/federation/experiment_control.py

Download in other formats: