Changeset 5bf359d for fedd/federation
- Timestamp:
- May 28, 2010 3:16:46 AM (14 years ago)
- Branches:
- axis_example, compt_changes, info-ops, master, version-3.01, version-3.02
- Children:
- 2f6820c
- Parents:
- 06cc65b
- Location:
- fedd/federation
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
fedd/federation/local_emulab_segment.py
r06cc65b r5bf359d 12 12 import util 13 13 14 class local_emulab_segment: 15 class cmd_timeout(RuntimeError): pass 16 14 from local_segment import local_segment 15 16 class start_segment(local_segment): 17 17 def __init__(self, log=None, keyfile=None, debug=False): 18 self.log = log or logging.getLogger(\ 19 'fedd.access.proxy_emulab_segment') 20 self.certfile = keyfile 21 self.debug = debug 22 self.cmd_timeout = local_emulab_segment.cmd_timeout 23 24 def copy_file(self, src, dest, size=1024): 25 """ 26 Exceedingly simple file copy. 27 """ 28 29 if not self.debug: 30 util.copy_file(src, dest, size) 31 else: 32 self.log.debug("Copy %s to %s" % (src, dest)) 33 34 def cmd_with_timeout(self, cmd, wname=None, timeout=None): 35 """ 36 Run a command. If debug is set, the action 37 is only logged. Commands are run without stdin, to avoid stray 38 SIGTTINs. If timeout is given and the command runs longer, a 39 cmd_timeout exception is thrown. 40 """ 41 42 try: 43 dnull = open("/dev/null", "w") 44 except EnvironmentError: 45 self.log.debug("[cmd_with_timeout]: failed to open /dev/null " + \ 46 "for redirect") 47 dnull = Null 48 49 self.log.debug("[cmd_with_timeout]: %s" % cmd) 50 if not self.debug: 51 if dnull: 52 sub = subprocess.Popen(cmd, shell=True, stdout=dnull, 53 stderr=dnull, close_fds=True) 54 else: 55 sub = subprocess.Popen(cmd, shell=True, close_fds=True) 56 if timeout: 57 i = 0 58 rv = sub.poll() 59 while i < timeout: 60 if rv is not None: break 61 else: 62 time.sleep(1) 63 rv = sub.poll() 64 i += 1 65 else: 66 self.log.debug("Process exceeded runtime: %s" % cmd) 67 os.kill(sub.pid, signal.SIGKILL) 68 raise self.cmd_timeout(); 69 return rv == 0 70 else: 71 return sub.wait() == 0 72 else: 73 if timeout == 0: 74 self.log.debug("debug timeout raised on %s " % cmd) 75 raise self.cmd_timeout() 76 else: 77 return True 78 79 class start_segment(local_emulab_segment): 80 def __init__(self, log=None, keyfile=None, debug=False): 81 local_emulab_segment.__init__(self, log=log, 82 keyfile=keyfile, debug=debug) 18 local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug) 83 19 self.null = """ 84 20 set ns [new Simulator] … … 93 29 94 30 def get_state(self, pid, eid): 31 """ 32 Return the state of the experiment as reported by emulab 33 """ 95 34 # command to test experiment state 96 35 expinfo_exec = "/usr/testbed/bin/expinfo" … … 141 80 142 81 def get_mapping(self, pid, eid): 82 """ 83 Get the physical to virtual mapping from the expinfo command and save 84 it in the self.map member. 85 """ 143 86 # command to test experiment state 144 87 expinfo_exec = "/usr/testbed/bin/expinfo" … … 199 142 return True 200 143 201 202 203 def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0): 204 """ 205 Start a sub-experiment on a federant. 206 207 Get the current state, modify or create as appropriate, ship data 208 and configs and start the experiment. There are small ordering 209 differences based on the initial state of the sub-experiment. 210 """ 144 def make_null_experiment(self, pid, eid, tmpdir): 145 """ 146 Create a null copy of the experiment so that we capture any logs there 147 if the modify fails. Emulab software discards the logs from a failed 148 startexp. 149 """ 150 try: 151 f = open("%s/null.tcl" % tmpdir, "w") 152 print >>f, self.null 153 f.close() 154 except EnvironmentError, e: 155 raise service_error(service_error.internal, 156 "Cannot stage null.tcl: %s" % e.strerror) 157 158 timedout = False 159 try: 160 if not self.cmd_with_timeout( 161 ("/usr/testbed/bin/startexp -i -f -w -p %s " + 162 "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp", 163 timeout=60 * 10): 164 return False 165 except self.cmd_timeout: 166 timedout = True 167 168 if timedout: 169 state = self.get_state(pid, eid) 170 return state == "swapped" 171 else: 172 return True 173 174 def set_up_experiment_filespace(self, pid, eid, tmpdir): 211 175 # Configuration directories on this machine 212 176 proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid) … … 215 179 lsoftdir = "%s/software" % tmpdir 216 180 217 state = self.get_state(pid, eid)218 219 if state == 'none':220 # Create a null copy of the experiment so that we capture any221 # logs there if the modify fails. Emulab software discards the222 # logs from a failed startexp223 try:224 f = open("%s/null.tcl" % tmpdir, "w")225 print >>f, self.null226 f.close()227 except EnvironmentError, e:228 raise service_error(service_error.internal,229 "Cannot stage null.tcl: %s" % e.strerror)230 231 timedout = False232 try:233 if not self.cmd_with_timeout(234 ("/usr/testbed/bin/startexp -i -f -w -p %s " +235 "-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",236 timeout=60 * 10):237 return False238 except self.cmd_timeout:239 timedout = True240 241 if timedout:242 state = self.get_state(pid, eid)243 if state != "swapped":244 return False245 246 181 # Set up the experiment's file space 247 182 if not self.cmd_with_timeout("/bin/rm -rf %s" % proj_dir): … … 270 205 return False 271 206 207 return True 208 209 def swap_in(self, pid, eid): 210 """ 211 Swap experiment in. This includes code to cope with the experiment 212 swaping command timing out, but the experiment being swapped in 213 successfully. 214 """ 215 self.log.info("[start_segment]: Swapping %s" % eid) 216 timedout = False 217 try: 218 if not self.cmd_with_timeout( 219 "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid), 220 "swapexp", timeout=25*60): 221 return False 222 except self.cmd_timeout: 223 timedout = True 224 225 # If the command was terminated, but completed successfully, 226 # report success. 227 if timedout: 228 self.log.debug("[start_segment]: swapin timed out " +\ 229 "checking state") 230 state = self.get_state(pid, eid) 231 self.log.debug("[start_segment]: state is %s" % state) 232 return state == 'active' 233 else: 234 return True 235 236 def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0): 237 """ 238 Start a sub-experiment on a federant. 239 240 Get the current state, modify or create as appropriate, ship data 241 and configs and start the experiment. There are small ordering 242 differences based on the initial state of the sub-experiment. 243 """ 244 245 state = self.get_state(pid, eid) 246 247 if state == 'none': 248 if not self.make_null_experiment(pid, eid, tmpdir): 249 return False 250 251 if not self.set_up_experiment_filespace(pid, eid, tmpdir): 252 return False 253 272 254 # Stage the new configuration (active experiments will stay swapped 273 255 # in now) … … 286 268 # Active experiments are still swapped, this swaps the others in. 287 269 if state != 'active': 288 self.log.info("[start_segment]: Swapping %s" % eid) 289 timedout = False 290 try: 291 if not self.cmd_with_timeout( 292 "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid), 293 "swapexp", timeout=25*60): 294 return False 295 except self.cmd_timeout: 296 timedout = True 297 298 # If the command was terminated, but completed successfully, 299 # report success. 300 if timedout: 301 self.log.debug("[start_segment]: swapin timed out " +\ 302 "checking state") 303 state = self.get_state(pid, eid) 304 self.log.debug("[start_segment]: state is %s" % state) 305 if state != 'active': 306 return False 270 if not self.swap_in(pid, eid): 271 return False 307 272 # Everything has gone OK. 308 273 self.get_mapping(pid,eid) 309 274 return True 310 275 311 class stop_segment(local_ emulab_segment):276 class stop_segment(local_segment): 312 277 def __init__(self, log=None, keyfile=None, debug=False): 313 local_emulab_segment.__init__(self, 314 log=log, keyfile=keyfile, debug=debug) 278 local_segment.__init__(self, log=log, keyfile=keyfile, debug=debug) 315 279 316 280 def __call__(self, parent, user, pid, eid): -
fedd/federation/proxy_emulab_segment.py
r06cc65b r5bf359d 14 14 15 15 class start_segment(proxy_segment): 16 """ 17 This starts an experiment on an emulab accessed remotely via ssh. Most of 18 the experiment constuction has been done by the emulab_access object. This 19 just does the wrangling of the emulab commands and collected the node to 20 physical mapping. The routine throws service errors. 21 """ 22 16 23 def __init__(self, log=None, keyfile=None, debug=False): 17 24 proxy_segment.__init__(self, log=log, keyfile=keyfile, debug=debug) … … 28 35 29 36 def get_state(self, user, host, pid, eid): 37 """ 38 Return the state of the experiment as reported by emulab 39 """ 30 40 # command to test experiment state 31 41 expinfo_exec = "/usr/testbed/bin/expinfo" … … 80 90 81 91 def get_mapping(self, user, host, pid, eid): 92 """ 93 Get the physical to virtual mapping from the expinfo command and save 94 it in the self.map member. 95 """ 82 96 # command to test experiment state 83 97 expinfo_exec = "/usr/testbed/bin/expinfo" … … 143 157 144 158 145 def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0): 146 """ 147 Start a sub-experiment on a federant. 148 149 Get the current state, modify or create as appropriate, ship data 150 and configs and start the experiment. There are small ordering 151 differences based on the initial state of the sub-experiment. 152 """ 153 # ops node in the federant 154 host = "%s%s" % (parent.ops, parent.domain) 159 def make_null_experiment(self, user, host, pid, eid, tmpdir): 160 """ 161 Create a null copy of the experiment so that we capture any logs there 162 if the modify fails. Emulab software discards the logs from a failed 163 startexp 164 """ 165 try: 166 f = open("%s/null.tcl" % tmpdir, "w") 167 print >>f, self.null 168 f.close() 169 except EnvironmentError, e: 170 raise service_error(service_error.internal, 171 "Cannot stage tarfile/rpm: %s" % e.strerror) 172 173 if not self.scp_file("%s/null.tcl" % tmpdir, user, host): 174 return False 175 self.log.info("[start_segment]: Creating %s" % eid) 176 timedout = False 177 try: 178 if not self.ssh_cmd(user, host, 179 ("/usr/testbed/bin/startexp -i -f -w -p %s " + 180 "-e %s null.tcl") % (pid, eid), "startexp", 181 timeout=60 * 10): 182 return False 183 except self.ssh_cmd_timeout: 184 timedout = True 185 186 if timedout: 187 state = self.get_state(user, host, pid, eid) 188 if state != "swapped": 189 return False 190 return True 191 192 def set_up_experiment_filespace(self, user, host, pid, eid, tmpdir): 193 """ 194 Send all the software and configuration files into the experiment's 195 file space. To reduce the number of ssh connections, we script many 196 changes and execute the script. 197 """ 155 198 # Configuration directories on the remote machine 156 199 proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid) … … 159 202 lsoftdir = "%s/software" % tmpdir 160 203 161 state = self.get_state(user, host, pid, eid)162 163 if not self.scp_file(tclfile, user, host):164 return False165 166 if state == 'none':167 # Create a null copy of the experiment so that we capture any168 # logs there if the modify fails. Emulab software discards the169 # logs from a failed startexp170 try:171 f = open("%s/null.tcl" % tmpdir, "w")172 print >>f, self.null173 f.close()174 except EnvironmentError, e:175 raise service_error(service_error.internal,176 "Cannot stage tarfile/rpm: %s" % e.strerror)177 178 if not self.scp_file("%s/null.tcl" % tmpdir, user, host):179 return False180 self.log.info("[start_segment]: Creating %s" % eid)181 timedout = False182 try:183 if not self.ssh_cmd(user, host,184 ("/usr/testbed/bin/startexp -i -f -w -p %s " +185 "-e %s null.tcl") % (pid, eid), "startexp",186 timeout=60 * 10):187 return False188 except self.ssh_cmd_timeout:189 timedout = True190 191 if timedout:192 state = self.get_state(user, host, pid, eid)193 if state != "swapped":194 return False195 196 204 # Open up a temporary file to contain a script for setting up the 197 205 # filespace for the new experiment. … … 237 245 user, host, "%s/%s" % (softdir, f)): 238 246 return False 239 # Stage the new configuration (active experiments will stay swapped 240 # in now) 247 return True 248 249 def swap_in(self, user, host, pid, eid): 250 """ 251 Swap experiment in. This includes code to cope with the experiment 252 swaping command timing out, but the experiment being swapped in 253 successfully. 254 """ 255 self.log.info("[start_segment]: Swapping %s in" % eid) 256 timedout = False 257 try: 258 if not self.ssh_cmd(user, host, 259 "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid), 260 "swapexp", timeout=25*60): 261 return False 262 except self.ssh_cmd_timeout: 263 timedout = True 264 265 # If the command was terminated, but completed successfully, 266 # report success. 267 if timedout: 268 self.log.debug("[start_segment]: swapin timed out " +\ 269 "checking state") 270 state = self.get_state(user, host, pid, eid) 271 self.log.debug("[start_segment]: state is %s" % state) 272 return state == 'active' 273 274 return True 275 276 277 def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0): 278 """ 279 Start a sub-experiment on a federant. 280 281 Get the current state, modify or create as appropriate, ship data 282 and configs and start the experiment. There are small ordering 283 differences based on the initial state of the sub-experiment. 284 """ 285 # ops node in the federant 286 host = "%s%s" % (parent.ops, parent.domain) 287 state = self.get_state(user, host, pid, eid) 288 289 if not self.scp_file(tclfile, user, host): 290 return False 291 292 if state == 'none': 293 # Put a dummy in place to capture logs, and establish an experiment 294 # directory. 295 if not self.make_null_experiment(user, host, pid, eid, tmpdir): 296 return False 297 298 if not self.set_up_experiment_filespace(user, host, pid, eid, tmpdir): 299 return False 300 301 # With the filespace in place, we can modify and swap in. 241 302 self.log.info("[start_segment]: Modifying %s" % eid) 242 303 try: … … 253 314 # Active experiments are still swapped, this swaps the others in. 254 315 if state != 'active': 255 self.log.info("[start_segment]: Swapping %s" % eid) 256 timedout = False 257 try: 258 if not self.ssh_cmd(user, host, 259 "/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid), 260 "swapexp", timeout=25*60): 261 return False 262 except self.ssh_cmd_timeout: 263 timedout = True 264 265 # If the command was terminated, but completed successfully, 266 # report success. 267 if timedout: 268 self.log.debug("[start_segment]: swapin timed out " +\ 269 "checking state") 270 state = self.get_state(user, host, pid, eid) 271 self.log.debug("[start_segment]: state is %s" % state) 272 return state == 'active' 316 if not self.swap_in(user, host, pid, eid): 317 return False 273 318 # Everything has gone OK. 274 319 self.get_mapping(user, host, pid,eid) -
fedd/federation/proxy_segment.py
r06cc65b r5bf359d 13 13 14 14 class proxy_segment: 15 """ 16 Base class for segment starter classes that access their underlying testebd 17 remotely using ssh. It is promarily a code repository for commonly used 18 ssh commands for moving code and logging in. 19 """ 15 20 class ssh_cmd_timeout(RuntimeError): pass 16 21 17 22 def __init__(self, log=None, keyfile=None, debug=False): 18 self.log = log or logging.getLogger(\ 19 'fedd.access.proxy_segment') 23 """ 24 log is the logging.log to print messages to, keyfile is the private key 25 for ssh interactions and if debug is true, commands are not executed 26 using ssh. 27 """ 28 self.log = log or logging.getLogger('fedd.access.proxy_segment') 20 29 self.ssh_privkey_file = keyfile 21 30 self.debug = debug
Note: See TracChangeset
for help on using the changeset viewer.