Changeset 5bf359d for fedd


Ignore:
Timestamp:
May 28, 2010 3:16:46 AM (15 years ago)
Author:
Ted Faber <faber@…>
Branches:
axis_example, compt_changes, info-ops, master, version-3.01, version-3.02
Children:
2f6820c
Parents:
06cc65b
Message:

More refactoring. Neaten up the code for creating segments in emulab and make the local and proxy class structures parallel. The code is more readable this way, I hope.

Location:
fedd/federation
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • fedd/federation/local_emulab_segment.py

    r06cc65b r5bf359d  
    1212import util
    1313
    14 class local_emulab_segment:
    15     class cmd_timeout(RuntimeError): pass
    16 
     14from local_segment import local_segment
     15
     16class start_segment(local_segment):
    1717    def __init__(self, log=None, keyfile=None, debug=False):
    18         self.log = log or logging.getLogger(\
    19                 'fedd.access.proxy_emulab_segment')
    20         self.certfile = keyfile
    21         self.debug = debug
    22         self.cmd_timeout = local_emulab_segment.cmd_timeout
    23 
    24     def copy_file(self, src, dest, size=1024):
    25         """
    26         Exceedingly simple file copy.
    27         """
    28 
    29         if not self.debug:
    30             util.copy_file(src, dest, size)
    31         else:
    32             self.log.debug("Copy %s to %s" % (src, dest))
    33 
    34     def cmd_with_timeout(self, cmd, wname=None, timeout=None):
    35         """
    36         Run a command.  If debug is set, the action
    37         is only logged.  Commands are run without stdin, to avoid stray
    38         SIGTTINs. If timeout is given and the command runs longer, a
    39         cmd_timeout exception is thrown.
    40         """
    41 
    42         try:
    43             dnull = open("/dev/null", "w")
    44         except EnvironmentError:
    45             self.log.debug("[cmd_with_timeout]: failed to open /dev/null " + \
    46                     "for redirect")
    47             dnull = Null
    48 
    49         self.log.debug("[cmd_with_timeout]: %s" % cmd)
    50         if not self.debug:
    51             if dnull:
    52                 sub = subprocess.Popen(cmd, shell=True, stdout=dnull,
    53                         stderr=dnull, close_fds=True)
    54             else:
    55                 sub = subprocess.Popen(cmd, shell=True, close_fds=True)
    56             if timeout:
    57                 i = 0
    58                 rv = sub.poll()
    59                 while i < timeout:
    60                     if rv is not None: break
    61                     else:
    62                         time.sleep(1)
    63                         rv = sub.poll()
    64                         i += 1
    65                 else:
    66                     self.log.debug("Process exceeded runtime: %s" % cmd)
    67                     os.kill(sub.pid, signal.SIGKILL)
    68                     raise self.cmd_timeout();
    69                 return rv == 0
    70             else:
    71                 return sub.wait() == 0
    72         else:
    73             if timeout == 0:
    74                 self.log.debug("debug timeout raised on %s " % cmd)
    75                 raise self.cmd_timeout()
    76             else:
    77                 return True
    78 
    79 class start_segment(local_emulab_segment):
    80     def __init__(self, log=None, keyfile=None, debug=False):
    81         local_emulab_segment.__init__(self, log=log,
    82                 keyfile=keyfile, debug=debug)
     18        local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
    8319        self.null = """
    8420set ns [new Simulator]
     
    9329
    9430    def get_state(self, pid, eid):
     31        """
     32        Return the state of the experiment as reported by emulab
     33        """
    9534        # command to test experiment state
    9635        expinfo_exec = "/usr/testbed/bin/expinfo" 
     
    14180
    14281    def get_mapping(self, pid, eid):
     82        """
     83        Get the physical to virtual mapping from the expinfo command and save
     84        it in the self.map member.
     85        """
    14386        # command to test experiment state
    14487        expinfo_exec = "/usr/testbed/bin/expinfo" 
     
    199142            return True
    200143
    201 
    202 
    203     def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
    204         """
    205         Start a sub-experiment on a federant.
    206 
    207         Get the current state, modify or create as appropriate, ship data
    208         and configs and start the experiment.  There are small ordering
    209         differences based on the initial state of the sub-experiment.
    210         """
     144    def make_null_experiment(self, pid, eid, tmpdir):
     145        """
     146        Create a null copy of the experiment so that we capture any logs there
     147        if the modify fails.  Emulab software discards the logs from a failed
     148        startexp.
     149        """
     150        try:
     151            f = open("%s/null.tcl" % tmpdir, "w")
     152            print >>f, self.null
     153            f.close()
     154        except EnvironmentError, e:
     155            raise service_error(service_error.internal,
     156                    "Cannot stage null.tcl: %s" % e.strerror)
     157
     158        timedout = False
     159        try:
     160            if not self.cmd_with_timeout(
     161                    ("/usr/testbed/bin/startexp -i -f -w -p %s " +
     162                    "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
     163                    timeout=60 * 10):
     164                return False
     165        except self.cmd_timeout:
     166            timedout = True
     167
     168        if timedout:
     169            state = self.get_state(pid, eid)
     170            return state == "swapped"
     171        else:
     172            return True
     173
     174    def set_up_experiment_filespace(self, pid, eid, tmpdir):
    211175        # Configuration directories on this machine
    212176        proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
     
    215179        lsoftdir = "%s/software" % tmpdir
    216180
    217         state = self.get_state(pid, eid)
    218 
    219         if state == 'none':
    220             # Create a null copy of the experiment so that we capture any
    221             # logs there if the modify fails.  Emulab software discards the
    222             # logs from a failed startexp
    223             try:
    224                 f = open("%s/null.tcl" % tmpdir, "w")
    225                 print >>f, self.null
    226                 f.close()
    227             except EnvironmentError, e:
    228                 raise service_error(service_error.internal,
    229                         "Cannot stage null.tcl: %s" % e.strerror)
    230 
    231             timedout = False
    232             try:
    233                 if not self.cmd_with_timeout(
    234                         ("/usr/testbed/bin/startexp -i -f -w -p %s " +
    235                         "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
    236                         timeout=60 * 10):
    237                     return False
    238             except self.cmd_timeout:
    239                 timedout = True
    240 
    241             if timedout:
    242                 state = self.get_state(pid, eid)
    243                 if state != "swapped":
    244                     return False
    245        
    246181        # Set up the experiment's file space
    247182        if not self.cmd_with_timeout("/bin/rm -rf %s" % proj_dir):
     
    270205            return False
    271206
     207        return True
     208
     209    def swap_in(self, pid, eid):
     210        """
     211        Swap experiment in.  This includes code to cope with the experiment
     212        swaping command timing out, but the experiment being swapped in
     213        successfully.
     214        """
     215        self.log.info("[start_segment]: Swapping %s" % eid)
     216        timedout = False
     217        try:
     218            if not self.cmd_with_timeout(
     219                    "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
     220                    "swapexp", timeout=25*60):
     221                return False
     222        except self.cmd_timeout:
     223            timedout = True
     224       
     225        # If the command was terminated, but completed successfully,
     226        # report success.
     227        if timedout:
     228            self.log.debug("[start_segment]: swapin timed out " +\
     229                    "checking state")
     230            state = self.get_state(pid, eid)
     231            self.log.debug("[start_segment]: state is %s" % state)
     232            return state == 'active'
     233        else:
     234            return True
     235
     236    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
     237        """
     238        Start a sub-experiment on a federant.
     239
     240        Get the current state, modify or create as appropriate, ship data
     241        and configs and start the experiment.  There are small ordering
     242        differences based on the initial state of the sub-experiment.
     243        """
     244
     245        state = self.get_state(pid, eid)
     246
     247        if state == 'none':
     248            if not self.make_null_experiment(pid, eid, tmpdir):
     249                return False
     250
     251        if not self.set_up_experiment_filespace(pid, eid, tmpdir):
     252            return False
     253       
    272254        # Stage the new configuration (active experiments will stay swapped
    273255        # in now)
     
    286268        # Active experiments are still swapped, this swaps the others in.
    287269        if state != 'active':
    288             self.log.info("[start_segment]: Swapping %s" % eid)
    289             timedout = False
    290             try:
    291                 if not self.cmd_with_timeout(
    292                         "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
    293                         "swapexp", timeout=25*60):
    294                     return False
    295             except self.cmd_timeout:
    296                 timedout = True
    297            
    298             # If the command was terminated, but completed successfully,
    299             # report success.
    300             if timedout:
    301                 self.log.debug("[start_segment]: swapin timed out " +\
    302                         "checking state")
    303                 state = self.get_state(pid, eid)
    304                 self.log.debug("[start_segment]: state is %s" % state)
    305                 if state != 'active':
    306                     return False
     270            if not self.swap_in(pid, eid):
     271                return False
    307272        # Everything has gone OK.
    308273        self.get_mapping(pid,eid)
    309274        return True
    310275
    311 class stop_segment(local_emulab_segment):
     276class stop_segment(local_segment):
    312277    def __init__(self, log=None, keyfile=None, debug=False):
    313         local_emulab_segment.__init__(self,
    314                 log=log, keyfile=keyfile, debug=debug)
     278        local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
    315279
    316280    def __call__(self, parent, user, pid, eid):
  • fedd/federation/proxy_emulab_segment.py

    r06cc65b r5bf359d  
    1414
    1515class start_segment(proxy_segment):
     16    """
     17    This starts an experiment on an emulab accessed remotely via ssh.  Most of
     18    the experiment constuction has been done by the emulab_access object.  This
     19    just does the wrangling of the emulab commands and collected the node to
     20    physical mapping.  The routine throws service errors.
     21    """
     22
    1623    def __init__(self, log=None, keyfile=None, debug=False):
    1724        proxy_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
     
    2835
    2936    def get_state(self, user, host, pid, eid):
     37        """
     38        Return the state of the experiment as reported by emulab
     39        """
    3040        # command to test experiment state
    3141        expinfo_exec = "/usr/testbed/bin/expinfo" 
     
    8090
    8191    def get_mapping(self, user, host, pid, eid):
     92        """
     93        Get the physical to virtual mapping from the expinfo command and save
     94        it in the self.map member.
     95        """
    8296        # command to test experiment state
    8397        expinfo_exec = "/usr/testbed/bin/expinfo" 
     
    143157
    144158
    145     def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
    146         """
    147         Start a sub-experiment on a federant.
    148 
    149         Get the current state, modify or create as appropriate, ship data
    150         and configs and start the experiment.  There are small ordering
    151         differences based on the initial state of the sub-experiment.
    152         """
    153         # ops node in the federant
    154         host = "%s%s" % (parent.ops, parent.domain)
     159    def make_null_experiment(self, user, host, pid, eid, tmpdir):
     160        """
     161        Create a null copy of the experiment so that we capture any logs there
     162        if the modify fails.  Emulab software discards the logs from a failed
     163        startexp
     164        """
     165        try:
     166            f = open("%s/null.tcl" % tmpdir, "w")
     167            print >>f, self.null
     168            f.close()
     169        except EnvironmentError, e:
     170            raise service_error(service_error.internal,
     171                    "Cannot stage tarfile/rpm: %s" % e.strerror)
     172
     173        if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
     174            return False
     175        self.log.info("[start_segment]: Creating %s" % eid)
     176        timedout = False
     177        try:
     178            if not self.ssh_cmd(user, host,
     179                    ("/usr/testbed/bin/startexp -i -f -w -p %s " +
     180                    "-e %s null.tcl") % (pid, eid), "startexp",
     181                    timeout=60 * 10):
     182                return False
     183        except self.ssh_cmd_timeout:
     184            timedout = True
     185
     186        if timedout:
     187            state = self.get_state(user, host, pid, eid)
     188            if state != "swapped":
     189                return False
     190        return True
     191
     192    def set_up_experiment_filespace(self, user, host, pid, eid, tmpdir):
     193        """
     194        Send all the software and configuration files into the experiment's
     195        file space.  To reduce the number of ssh connections, we script many
     196        changes and execute the script.
     197        """
    155198        # Configuration directories on the remote machine
    156199        proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
     
    159202        lsoftdir = "%s/software" % tmpdir
    160203
    161         state = self.get_state(user, host, pid, eid)
    162 
    163         if not self.scp_file(tclfile, user, host):
    164             return False
    165        
    166         if state == 'none':
    167             # Create a null copy of the experiment so that we capture any
    168             # logs there if the modify fails.  Emulab software discards the
    169             # logs from a failed startexp
    170             try:
    171                 f = open("%s/null.tcl" % tmpdir, "w")
    172                 print >>f, self.null
    173                 f.close()
    174             except EnvironmentError, e:
    175                 raise service_error(service_error.internal,
    176                         "Cannot stage tarfile/rpm: %s" % e.strerror)
    177 
    178             if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
    179                 return False
    180             self.log.info("[start_segment]: Creating %s" % eid)
    181             timedout = False
    182             try:
    183                 if not self.ssh_cmd(user, host,
    184                         ("/usr/testbed/bin/startexp -i -f -w -p %s " +
    185                         "-e %s null.tcl") % (pid, eid), "startexp",
    186                         timeout=60 * 10):
    187                     return False
    188             except self.ssh_cmd_timeout:
    189                 timedout = True
    190 
    191             if timedout:
    192                 state = self.get_state(user, host, pid, eid)
    193                 if state != "swapped":
    194                     return False
    195        
    196204        # Open up a temporary file to contain a script for setting up the
    197205        # filespace for the new experiment.
     
    237245                            user, host, "%s/%s" % (softdir, f)):
    238246                        return False
    239         # Stage the new configuration (active experiments will stay swapped
    240         # in now)
     247        return True
     248
     249    def swap_in(self, user, host, pid, eid):
     250        """
     251        Swap experiment in.  This includes code to cope with the experiment
     252        swaping command timing out, but the experiment being swapped in
     253        successfully.
     254        """
     255        self.log.info("[start_segment]: Swapping %s in" % eid)
     256        timedout = False
     257        try:
     258            if not self.ssh_cmd(user, host,
     259                    "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
     260                    "swapexp", timeout=25*60):
     261                return False
     262        except self.ssh_cmd_timeout:
     263            timedout = True
     264       
     265        # If the command was terminated, but completed successfully,
     266        # report success.
     267        if timedout:
     268            self.log.debug("[start_segment]: swapin timed out " +\
     269                    "checking state")
     270            state = self.get_state(user, host, pid, eid)
     271            self.log.debug("[start_segment]: state is %s" % state)
     272            return state == 'active'
     273
     274        return True
     275
     276
     277    def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
     278        """
     279        Start a sub-experiment on a federant.
     280
     281        Get the current state, modify or create as appropriate, ship data
     282        and configs and start the experiment.  There are small ordering
     283        differences based on the initial state of the sub-experiment.
     284        """
     285        # ops node in the federant
     286        host = "%s%s" % (parent.ops, parent.domain)
     287        state = self.get_state(user, host, pid, eid)
     288
     289        if not self.scp_file(tclfile, user, host):
     290            return False
     291       
     292        if state == 'none':
     293            # Put a dummy in place to capture logs, and establish an experiment
     294            # directory.
     295            if not self.make_null_experiment(user, host, pid, eid, tmpdir):
     296                return False
     297
     298        if not self.set_up_experiment_filespace(user, host, pid, eid, tmpdir):
     299            return False
     300
     301        # With the filespace in place, we can modify and swap in.
    241302        self.log.info("[start_segment]: Modifying %s" % eid)
    242303        try:
     
    253314        # Active experiments are still swapped, this swaps the others in.
    254315        if state != 'active':
    255             self.log.info("[start_segment]: Swapping %s" % eid)
    256             timedout = False
    257             try:
    258                 if not self.ssh_cmd(user, host,
    259                         "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
    260                         "swapexp", timeout=25*60):
    261                     return False
    262             except self.ssh_cmd_timeout:
    263                 timedout = True
    264            
    265             # If the command was terminated, but completed successfully,
    266             # report success.
    267             if timedout:
    268                 self.log.debug("[start_segment]: swapin timed out " +\
    269                         "checking state")
    270                 state = self.get_state(user, host, pid, eid)
    271                 self.log.debug("[start_segment]: state is %s" % state)
    272                 return state == 'active'
     316            if not self.swap_in(user, host, pid, eid):
     317                return False
    273318        # Everything has gone OK.
    274319        self.get_mapping(user, host, pid,eid)
  • fedd/federation/proxy_segment.py

    r06cc65b r5bf359d  
    1313
    1414class proxy_segment:
     15    """
     16    Base class for segment starter classes that access their underlying testebd
     17    remotely using ssh.  It is promarily a code repository for commonly used
     18    ssh commands for moving code and logging in.
     19    """
    1520    class ssh_cmd_timeout(RuntimeError): pass
    1621
    1722    def __init__(self, log=None, keyfile=None, debug=False):
    18         self.log = log or logging.getLogger(\
    19                 'fedd.access.proxy_segment')
     23        """
     24        log is the logging.log to print messages to, keyfile is the private key
     25        for ssh interactions and if debug is true, commands are not executed
     26        using ssh.
     27        """
     28        self.log = log or logging.getLogger('fedd.access.proxy_segment')
    2029        self.ssh_privkey_file = keyfile
    2130        self.debug = debug
Note: See TracChangeset for help on using the changeset viewer.