Changeset 37ed9a5 for fedd


Ignore:
Timestamp:
May 30, 2010 10:46:00 AM (14 years ago)
Author:
Ted Faber <faber@…>
Branches:
axis_example, compt_changes, info-ops, master, version-3.01, version-3.02
Children:
60961f5
Parents:
42cd8a7
Message:

Much more nicely factored now.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • fedd/federation/protogeni_access.py

    r42cd8a7 r37ed9a5  
    366366            raise service_error(service_error.req, "No such allocation")
    367367
    368     # Turn the manifest into a dict were each virtual nodename (i.e. the topdl
    369     # name) has an entry with the allocated machine in hostname and the
    370     # interfaces in 'interfaces'.  I love having XML parser code lying around.
    371368    def manifest_to_dict(self, manifest, ignore_debug=False):
     369        """
     370        Turn the manifest into a dict were each virtual nodename (i.e. the
     371        topdl name) has an entry with the allocated machine in hostname and the
     372        interfaces in 'interfaces'.  I love having XML parser code lying
     373        around.
     374        """
    372375        if self.create_debug and not ignore_debug:
    373376            self.log.debug("Returning null manifest dict")
     
    416419
    417420    def fake_manifest(self, topo):
     421        """
     422        Fake the output of manifest_to_dict with a bunch of generic node an
     423        interface names, for debugging.
     424        """
    418425        node = { }
    419426        for i, e in enumerate([ e for e in topo.elements \
     
    648655                    continue
    649656
    650     def configure_nodes(self, segment_commands, topo, nodes, user, pubkey, secretkey,
    651             stagingdir, tmpdir):
    652 
    653         # These little functions/functors just make things more readable
     657    def write_node_config_script(self, elem, node, user, pubkey,
     658            secretkey, stagingdir, tmpdir):
     659        """
     660        Write out the configuration script that is to run on the node
     661        represented by elem in the topology.  This is called
     662        once per node to configure.
     663        """
     664        # These little functions/functors just make things more readable.  Each
     665        # one encapsulates a small task of copying software files or installing
     666        # them.
    654667        class stage_file_type:
     668            """
     669            Write code copying file sfrom the staging host to the host on which
     670            this will run.
     671            """
    655672            def __init__(self, user, host, stagingdir):
    656673                self.user = user
     
    668685
    669686        def install_tar(script, loc, base):
     687            """
     688            Print code to script to install a tarfile in loc.
     689            """
    670690            tar = "/bin/tar"
    671691            mkdir="/bin/mkdir"
     
    675695
    676696        def install_rpm(script, base):
     697            """
     698            Print code to script to install an rpm
     699            """
    677700            rpm = "/bin/rpm"
    678701            print >>script, "%s --install %s" % (rpm, base)
    679702
     703        ifconfig = "/sbin/ifconfig"
     704        stage_file = stage_file_type(user, self.staging_host, stagingdir)
     705        pname = node.get('hostname', None)
    680706        fed_dir = "/usr/local/federation"
    681707        fed_etc_dir = "%s/etc" % fed_dir
     
    683709        fed_lib_dir = "%s/lib" % fed_dir
    684710
    685         ifconfig = "/sbin/ifconfig"
    686 
    687         stage_file = stage_file_type(user, self.staging_host, stagingdir)
    688 
    689         for e in [ e for e in topo.elements if isinstance(e, topdl.Computer)]:
    690             vname = e.name
    691             node = nodes.get(vname, { })
    692             pname = node.get('hostname', None)
    693             if pname:
    694                 script = open("%s/%s.startup" %(tmpdir, pname), "w")
    695                 # Reset the interfaces to the ones in the topo file
    696                 for i in [ i for i in e.interface \
    697                         if not i.get_attribute('portal')]:
    698                     pinf = node['interfaces'].get(i.name, None)
    699                     addr = i.get_attribute('ip4_address')
    700                     netmask = i.get_attribute('ip4_netmask') or '255.255.255.0'
    701                     if pinf and addr:
    702                         print >>script, \
    703                                 "%s %s %s netmask %s"  % \
    704                                 (ifconfig, pinf, addr, netmask)
    705                     else:
    706                         self.log.error("Missing interface or address for %s" \
    707                                 % i.name)
    708                    
    709                 for l, f in self.federation_software:
     711        if pname:
     712            sfile = "%s/%s.startup" % (tmpdir, pname)
     713            script = open(sfile, "w")
     714            # Reset the interfaces to the ones in the topo file
     715            for i in [ i for i in elem.interface \
     716                    if not i.get_attribute('portal')]:
     717                pinf = node['interfaces'].get(i.name, None)
     718                addr = i.get_attribute('ip4_address')
     719                netmask = i.get_attribute('ip4_netmask') or '255.255.255.0'
     720                if pinf and addr:
     721                    print >>script, \
     722                            "%s %s %s netmask %s"  % \
     723                            (ifconfig, pinf, addr, netmask)
     724                else:
     725                    self.log.error("Missing interface or address for %s" \
     726                            % i.name)
     727               
     728            for l, f in self.federation_software:
     729                base = os.path.basename(f)
     730                stage_file(script, base)
     731                if l: install_tar(script, l, base)
     732                else: install_rpm(script, base)
     733
     734            for s in elem.software:
     735                s_base = s.location.rpartition('/')[2]
     736                stage_file(script, s_base)
     737                if s.install: install_tar(script, s.install, s_base)
     738                else: install_rpm(script, s_base)
     739
     740            for f in ('hosts', pubkey, secretkey, 'client.conf',
     741                    'userconf'):
     742                stage_file(script, f, fed_etc_dir)
     743            if self.sshd:
     744                stage_file(script, self.sshd, fed_bin_dir)
     745            if self.sshd_config:
     746                stage_file(script, self.sshd_config, fed_etc_dir)
     747
     748            # Look in tmpdir to get the names.  They've all been copied
     749            # into the (remote) staging dir
     750            if os.access("%s/%s.gw.conf" % (tmpdir, elem.name), os.R_OK):
     751                stage_file(script, "%s.gw.conf" % elem.name, fed_etc_dir)
     752
     753            # Hackery dackery dock: the ProtoGENI python is really ancient.
     754            # A modern version (though packaged for Mandrake (remember
     755            # Mandrake?  good times, good times)) should be in the
     756            # federation_software list, but we need to move rename is for
     757            # SEER.
     758            print >>script, "rm /usr/bin/python"
     759            print >>script, "ln /usr/bin/python2.4 /usr/bin/python"
     760            # Back to less hacky stuff
     761
     762            # Start commands
     763            if elem.get_attribute('portal') and self.portal_startcommand:
     764                # Install portal software
     765                for l, f in self.portal_software:
    710766                    base = os.path.basename(f)
    711767                    stage_file(script, base)
     
    713769                    else: install_rpm(script, base)
    714770
    715                 for s in e.software:
    716                     s_base = s.location.rpartition('/')[2]
    717                     stage_file(script, s_base)
    718                     if s.install: install_tar(script, s.install, s_base)
    719                     else: install_rpm(script, s_base)
    720 
    721                 for f in ('hosts', pubkey, secretkey, 'client.conf',
    722                         'userconf'):
    723                     stage_file(script, f, fed_etc_dir)
    724                 if self.sshd:
    725                     stage_file(script, self.sshd, fed_bin_dir)
    726                 if self.sshd_config:
    727                     stage_file(script, self.sshd_config, fed_etc_dir)
    728 
    729                 # Look in tmpdir to get the names.  They've all been copied
    730                 # into the (remote) staging dir
    731                 if os.access("%s/%s.gw.conf" % (tmpdir, vname), os.R_OK):
    732                     stage_file(script, "%s.gw.conf" % vname, fed_etc_dir)
    733 
    734                 # Hackery dackery dock: the ProtoGENI python is really ancient.
    735                 # A modern version (though packaged for Mandrake (remember
    736                 # Mandrake?  good times, good times)) should be in the
    737                 # federation_software list, but we need to move rename is for
    738                 # SEER.
    739                 print >>script, "rm /usr/bin/python"
    740                 print >>script, "ln /usr/bin/python2.4 /usr/bin/python"
    741                 # Back to less hacky stuff
    742 
    743                 # Start commands
    744                 if e.get_attribute('portal') and self.portal_startcommand:
    745                     # Install portal software
    746                     for l, f in self.portal_software:
    747                         base = os.path.basename(f)
    748                         stage_file(script, base)
    749                         if l: install_tar(script, l, base)
    750                         else: install_rpm(script, base)
    751 
    752                     # Portals never have a user-specified start command
    753                     print >>script, self.portal_startcommand
    754                 elif self.node_startcommand:
    755                     # XXX: debug
    756                     print >>script, "sudo perl -I%s %simport_key.pl /users/%s/.ssh/authorized_keys /root/.ssh/authorized_keys" % (fed_lib_dir, fed_bin_dir, user)
    757                     # XXX: debug
    758                     if e.get_attribute('startup'):
    759                         print >>script, "%s \\$USER '%s'" % \
    760                                 (self.node_startcommand, e.get_attribute('startup'))
    761                     else:
    762                         print >>script, self.node_startcommand
    763                 script.close()
    764                 if not segment_commands.scp_file("%s/%s.startup" % (tmpdir, pname),
    765                         user, pname):
     771                # Portals never have a user-specified start command
     772                print >>script, self.portal_startcommand
     773            elif self.node_startcommand:
     774                # XXX: debug
     775                print >>script, "sudo perl -I%s %simport_key.pl /users/%s/.ssh/authorized_keys /root/.ssh/authorized_keys" % (fed_lib_dir, fed_bin_dir, user)
     776                # XXX: debug
     777                if elem.get_attribute('startup'):
     778                    print >>script, "%s \\$USER '%s'" % \
     779                            (self.node_startcommand,
     780                                    elem.get_attribute('startup'))
     781                else:
     782                    print >>script, self.node_startcommand
     783            script.close()
     784            return sfile, pname
     785        else:
     786            return None, None
     787
     788
     789    def configure_nodes(self, segment_commands, topo, nodes, user,
     790            pubkey, secretkey, stagingdir, tmpdir):
     791        """
     792        For each node in the topology, generate a script file that copies
     793        software onto it and installs it in the proper places and then runs the
     794        startup command (including the federation commands.
     795        """
     796
     797
     798
     799        for e in [ e for e in topo.elements if isinstance(e, topdl.Computer)]:
     800            vname = e.name
     801            sfile, pname = self.write_node_config_script(e,
     802                    nodes.get(vname, { }),
     803                    user, pubkey, secretkey, stagingdir, tmpdir)
     804            if sfile:
     805                if not segment_commands.scp_file(sfile, user, pname):
    766806                    self.log.error("Could not copy script to %s" % pname)
    767807            else:
     
    769809
    770810    def start_node(self, user, host, node, segment_commands):
     811        """
     812        Copy an identity to a node for the configuration script to be able to
     813        import data and then run the startup script remotely.
     814        """
    771815        # Place an identity on the node so that the copying can succeed
    772816        segment_commands.ssh_cmd(user, host, "scp .ssh/id_rsa %s:.ssh" % node)
     
    775819
    776820    def start_nodes(self, user, host, nodes, segment_commands):
     821        """
     822        Start a thread to initialize each node and wait for them to complete.
     823        Each thread runs start_node.
     824        """
    777825        threads = [ ]
    778826        for n in nodes:
     
    788836            done = [not t.isAlive() for t in threads]
    789837
    790 
    791 
    792 
    793     def start_segment(self, segment_commands, aid, user, rspec, pubkey,
    794             secretkey, ename, stagingdir, tmpdir, certfile, certpw,
    795             export_certfile, topo, connInfo, services, timeout=0):
    796         """
    797         Start a sub-experiment on a federant.
    798 
    799         Get the current state, modify or create as appropriate, ship data
    800         and configs and start the experiment.  There are small ordering
    801         differences based on the initial state of the sub-experiment.
    802         """
    803 
    804         def random_slicename(user):
    805             slicename = user
    806             for i in range(0,5):
    807                 slicename += random.choice(string.ascii_letters)
    808             return slicename
    809 
    810         host = self.staging_host
    811         if not os.access(certfile, os.R_OK):
    812             self.log.error("[start_segment]: Cannot read certfile: %s" % \
    813                     certfile)
    814             return False
    815         ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
    816         # Local software dir
    817         lsoftdir = "%s/software" % tmpdir
    818 
    819         # Open up a temporary file to contain a script for setting up the
    820         # filespace for the new experiment.
     838    def set_up_staging_filespace(self, segment_commands, user, host,
     839            stagingdir):
     840        """
     841        Set up teh staging area on the staging machine.  To reduce the number
     842        of ssh commands, we compose a script and execute it remotely.
     843        """
     844
    821845        self.log.info("[start_segment]: creating script file")
    822846        try:
     
    845869            return False
    846870
     871    def initialize_protogeni_context(self, segment_commands, certfile, certpw):
     872        """
     873        Protogeni interactions take a context and a protogeni certificate.
     874        This establishes both for later calls and returns them.
     875        """
     876        if os.access(certfile, os.R_OK):
     877            ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
     878        else:
     879            self.log.error("[start_segment]: Cannot read certfile: %s" % \
     880                    certfile)
     881            return None, None
     882
    847883        try:
    848884            gcred = segment_commands.pg_call(self.sa_url,
     
    851887            raise service_error(service_error.federant,
    852888                    "ProtoGENI: %s" % e)
    853         # Find a slicename not in use
    854         slicename = "fabereGpgL"
     889
     890        return ctxt, gcred
     891
     892    def get_free_slicename(self, segment_commands, user, gcred, ctxt):
     893        """
     894        Find a usable slice name by trying random ones until there's no
     895        collision.
     896        """
     897
     898        def random_slicename(user):
     899            """
     900            Return a random slicename by appending 5 letters to the username.
     901            """
     902            slicename = user
     903            for i in range(0,5):
     904                slicename += random.choice(string.ascii_letters)
     905            return slicename
     906
    855907        while True:
    856908            slicename = random_slicename(user)
     
    866918                break
    867919
    868         self.log.info("Creating %s" % slicename)
    869         f = open("./rspec", "w")
    870         print >>f, "%s" % rspec
    871         f.close()
    872         # Create the slice and allocate resources.  If any of this stuff fails,
    873         # the allocations will time out on PG in short order, so we just raise
    874         # the service_error.
     920        return slicename
     921
     922    def allocate_slice(self, segment_commands, slicename, rspec, gcred, ctxt):
     923        """
     924        Create the slice and allocate resources.  If any of this stuff fails,
     925        the allocations will time out on PG in short order, so we just raise
     926        the service_error.  Return the slice and sliver credentials as well as
     927        the manifest.
     928        """
    875929        try:
    876930            param = {
     
    879933                    'type': 'Slice'
    880934                    }
    881             slice_cred = segment_commands.pg_call(self.sa_url, 'Register', param, ctxt)
     935            slice_cred = segment_commands.pg_call(self.sa_url, 'Register',
     936                    param, ctxt)
    882937            f = open("./slice_cred", "w")
    883938            print >>f, slice_cred
     
    887942                    'credential': gcred,
    888943                    }
    889             keys =  segment_commands.pg_call(self.sa_url, 'GetKeys', param, ctxt)
     944            keys =  segment_commands.pg_call(self.sa_url, 'GetKeys', param,
     945                    ctxt)
    890946            # Grab and redeem a ticket
    891947            param = {
     
    893949                    'rspec': rspec,
    894950                    }
    895             ticket = segment_commands.pg_call(self.cm_url, 'GetTicket', param, ctxt)
     951            ticket = segment_commands.pg_call(self.cm_url, 'GetTicket', param,
     952                    ctxt)
    896953            f = open("./ticket", "w")
    897954            print >>f, ticket
     
    919976                    "ProtoGENI: %s %s" % (e.code, e))
    920977
     978        return (slice_cred, sliver_cred, manifest)
     979
     980    def wait_for_slice(self, segment_commands, slice_cred, ctxt):
     981        """
     982        Wait for the given slice to finish its startup.  Return the final
     983        status.
     984        """
     985        status = 'notready'
     986        try:
     987            while status == 'notready':
     988                param = {
     989                        'credential': slice_cred
     990                        }
     991                r = segment_commands.pg_call(self.cm_url,
     992                        'SliceStatus', param, ctxt)
     993                status = r.get('status', 'notready')
     994                if status == 'notready':
     995                    time.sleep(30)
     996        except segment_commands.ProtoGENIError, e:
     997            raise service_error(service_error.federant,
     998                    "ProtoGENI: %s %s" % (e.code, e))
     999
     1000        return status
     1001
     1002    def delete_slice(self, segment_commands, slice_cred, ctxt):
     1003        """
     1004        Delete the slice resources.  An error from the service is ignores,
     1005        because the soft state will go away anyway.
     1006        """
     1007        try:
     1008            param = { 'credential': slice_cred }
     1009            segment_commands.pg_call(self.cm_url, 'DeleteSliver',
     1010                    param, ctxt)
     1011        except segment_commands.ProtoGENIError, e:
     1012            self.log.warn("ProtoGENI: %s" % e)
     1013
     1014
     1015
     1016    def start_segment(self, segment_commands, aid, user, rspec, pubkey,
     1017            secretkey, ename, stagingdir, tmpdir, certfile, certpw,
     1018            export_certfile, topo, connInfo, services, timeout=0):
     1019        """
     1020        Start a sub-experiment on a federant.
     1021
     1022        Get the current state, modify or create as appropriate, ship data
     1023        and configs and start the experiment.  There are small ordering
     1024        differences based on the initial state of the sub-experiment.
     1025        """
     1026
     1027        # Local software dir
     1028        lsoftdir = "%s/software" % tmpdir
     1029        host = self.staging_host
     1030
     1031        ctxt, gcred = self.initialize_protogeni_context(segment_commands,
     1032                certfile, certpw)
     1033
     1034        if not ctxt: return False
     1035
     1036        self.set_up_staging_filespace(segment_commands, user, host, stagingdir)
     1037        slicename = self.get_free_slicename(segment_commands, user, gcred, ctxt)
     1038        self.log.info("Creating %s" % slicename)
     1039        slice_cred, sliver_cred, manifest = self.allocate_slice(
     1040                segment_commands, slicename, rspec, gcred, ctxt)
     1041
    9211042        # With manifest in hand, we can export the portal node names.
    9221043        if self.create_debug: nodes = self.fake_manifest(topo)
     
    9411062
    9421063        # Now we wait for the nodes to start on PG
    943         status = 'notready'
    944         try:
    945             while status == 'notready':
    946                 param = {
    947                         'credential': slice_cred
    948                         }
    949                 r = segment_commands.pg_call(self.cm_url, 'SliceStatus', param, ctxt)
    950                 print r
    951                 status = r.get('status', 'notready')
    952                 if status == 'notready':
    953                     time.sleep(30)
    954         except segment_commands.ProtoGENIError, e:
    955             raise service_error(service_error.federant,
    956                     "ProtoGENI: %s %s" % (e.code, e))
    957 
     1064        status = self.wait_for_slice(segment_commands, slice_cred, ctxt)
    9581065        if status == 'failed':
    9591066            self.log.error('Sliver failed to start on ProtoGENI')
    960             try:
    961                 param = {
    962                         'credential': slice_cred
    963                         }
    964                 segment_commands.pg_call(self.cm_url, 'DeleteSliver', param, ctxt)
    965             except segment_commands.ProtoGENIError, e:
    966                 raise service_error(service_error.federant,
    967                     "ProtoGENI: %s" % e)
     1067            self.delete_slice(segment_commands, slice_cred, ctxt)
    9681068            return False
    9691069        else:
     1070            # All good: save ProtoGENI info in shared state
    9701071            self.state_lock.acquire()
    9711072            self.allocation[aid]['slice_name'] = slicename
     
    9791080
    9801081        # Now we have configuration to do for ProtoGENI
    981         self.configure_nodes(segment_commands, topo, nodes, user, pubkey, secretkey,
    982                 stagingdir, tmpdir)
     1082        self.configure_nodes(segment_commands, topo, nodes, user, pubkey,
     1083                secretkey, stagingdir, tmpdir)
    9831084
    9841085        self.start_nodes(user, self.staging_host,
     
    12291330                    "%s/%s" % (self.staging_dir, ename), tmpdir, cf, cpw,
    12301331                    certfile, topo, connInfo, services)
    1231         except EnvironmentError:
     1332        except EnvironmentError, e:
    12321333            err = service_error(service_error.internal, "%s" % e)
    12331334        except service_error, e:
     
    12641365                self.log.error('Removing Sliver on ProtoGENI')
    12651366                ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
    1266                 try:
    1267                     param = {
    1268                             'credential': slice_cred
    1269                             }
    1270                     segment_commands.pg_call(self.cm_url, 'DeleteSlice',
    1271                             param, ctxt)
    1272                 except segment_commands.ProtoGENIError, e:
    1273                     raise service_error(service_error.federant,
    1274                         "ProtoGENI: %s" % e)
     1367                self.delete_slice(segment_commands, slice_cred, ctxt)
    12751368            return True
    12761369        except self.ssh_cmd_timeout:
     
    13151408    def renew_segment(self, segment_commands, name, scred, interval,
    13161409            certfile, certpw):
     1410        """
     1411        Linear code through the segment renewal calls.
     1412        """
    13171413        ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
    13181414        try:
    13191415            expiration = time.strftime("%Y%m%dT%H:%M:%S",
    13201416                    time.gmtime(time.time() + interval))
    1321             cred = segment_commands.pg_call(self.sa_url, 'GetCredential', {}, ctxt)
     1417            cred = segment_commands.pg_call(self.sa_url, 'GetCredential',
     1418                    {}, ctxt)
    13221419
    13231420            param = {
     
    13311428                    'type': 'Slice',
    13321429                    }
    1333             slice = segment_commands.pg_call(self.sa_url, 'Resolve', param, ctxt)
     1430            slice = segment_commands.pg_call(self.sa_url, 'Resolve',
     1431                    param, ctxt)
    13341432            uuid = slice.get('uuid', None)
    13351433            if uuid == None:
     
    13421440                    'type': 'Slice',
    13431441                    }
    1344             new_scred = segment_commands.pg_call(self.sa_url, 'GetCredential', param, ctxt)
     1442            new_scred = segment_commands.pg_call(self.sa_url, 'GetCredential',
     1443                    param, ctxt)
    13451444            f = open('./new_slice_cred', 'w')
    13461445            print >>f, new_scred
Note: See TracChangeset for help on using the changeset viewer.