Context Navigation

source: fedd/federation/local_emulab_segment.py @ 9b3627e

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 9b3627e was 595191c, checked in by Ted Faber <faber@…>, 15 years ago
longer timeout
Property mode set to `100644`
File size: 7.7 KB

Rev	Line
[11860f52]	1	#!/usr/local/bin/python
	2
	3	import sys, os
	4	import re
	5
	6	import tempfile
	7	import subprocess
	8	import logging
	9	import time
	10	import signal
	11
	12	import util
	13
	14	class local_emulab_segment:
	15	class cmd_timeout(RuntimeError): pass
	16
	17	def __init__(self, log=None, keyfile=None, debug=False):
	18	self.log = log or logging.getLogger(\
	19	'fedd.access.proxy_emulab_segment')
	20	self.certfile = keyfile
	21	self.debug = debug
	22	self.cmd_timeout = local_emulab_segment.cmd_timeout
	23
	24	def copy_file(self, src, dest, size=1024):
	25	"""
	26	Exceedingly simple file copy.
	27	"""
	28
	29	if not self.debug:
	30	util.copy_file(src, dest, size)
	31	else:
	32	self.log.debug("Copy %s to %s" % (src, dest))
	33
	34	def cmd_with_timeout(self, cmd, wname=None, timeout=None):
	35	"""
	36	Run a command. If debug is set, the action
	37	is only logged. Commands are run without stdin, to avoid stray
	38	SIGTTINs. If timeout is given and the command runs longer, a
	39	cmd_timeout exception is thrown.
	40	"""
	41
	42	try:
	43	dnull = open("/dev/null", "w")
	44	except IOError:
	45	self.log.debug("[cmd_with_timeout]: failed to open /dev/null " + \
	46	"for redirect")
	47	dnull = Null
	48
	49	self.log.debug("[cmd_with_timeout]: %s" % cmd)
	50	if not self.debug:
	51	if dnull:
	52	sub = subprocess.Popen(cmd, shell=True, stdout=dnull,
	53	stderr=dnull, close_fds=True)
	54	else:
	55	sub = subprocess.Popen(cmd, shell=True, close_fds=True)
	56	if timeout:
	57	i = 0
	58	rv = sub.poll()
	59	while i < timeout:
	60	if rv is not None: break
	61	else:
	62	time.sleep(1)
	63	rv = sub.poll()
	64	i += 1
	65	else:
	66	self.log.debug("Process exceeded runtime: %s" % cmd)
	67	os.kill(sub.pid, signal.SIGKILL)
	68	raise self.cmd_timeout();
	69	return rv == 0
	70	else:
	71	return sub.wait() == 0
	72	else:
	73	if timeout == 0:
	74	self.log.debug("debug timeout raised on %s " % cmd)
	75	raise self.cmd_timeout()
	76	else:
	77	return True
	78
	79	class start_segment(local_emulab_segment):
	80	def __init__(self, log=None, keyfile=None, debug=False):
	81	local_emulab_segment.__init__(self, log=log,
	82	keyfile=keyfile, debug=debug)
	83	self.null = """
	84	set ns [new Simulator]
	85	source tb_compat.tcl
	86
	87	set a [$ns node]
	88
	89	$ns rtproto Session
	90	$ns run
	91	"""
	92
	93	def get_state(self, pid, eid):
	94	# command to test experiment state
	95	expinfo_exec = "/usr/testbed/bin/expinfo"
	96	# Regular expressions to parse the expinfo response
	97	state_re = re.compile("State:\s+(\w+)")
	98	no_exp_re = re.compile("^No\s+such\s+experiment")
	99	swapping_re = re.compile("^No\s+information\s+available.")
	100	state = None # Experiment state parsed from expinfo
	101	# The expinfo ssh command. Note the identity restriction to use
	102	# only the identity provided in the pubkey given.
	103	cmd = [ expinfo_exec, pid, eid]
	104
	105	dev_null = None
	106	try:
	107	dev_null = open("/dev/null", "a")
	108	except IOError, e:
	109	self.log.error("[get_state]: can't open /dev/null: %s" %e)
	110
	111	if self.debug:
	112	state = 'swapped'
	113	rv = 0
	114	else:
	115	self.log.debug("Checking state")
	116	status = subprocess.Popen(cmd, stdout=subprocess.PIPE,
	117	stderr=dev_null, close_fds=True)
	118	for line in status.stdout:
	119	m = state_re.match(line)
	120	if m: state = m.group(1)
	121	else:
	122	for reg, st in ((no_exp_re, "none"),
	123	(swapping_re, "swapping")):
	124	m = reg.match(line)
	125	if m: state = st
	126	rv = status.wait()
	127
	128	# If the experiment is not present the subcommand returns a
	129	# non-zero return value. If we successfully parsed a "none"
	130	# outcome, ignore the return code.
	131	if rv != 0 and state != 'none':
	132	raise service_error(service_error.internal,
	133	"Cannot get status of segment:%s/%s" % (pid, eid))
	134	elif state not in ('active', 'swapped', 'swapping', 'none'):
	135	raise service_error(service_error.internal,
	136	"Cannot get status of segment:%s/%s" % (pid, eid))
	137	else:
	138	self.log.debug("State is %s" % state)
	139	return state
	140
	141
	142	def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
	143	"""
	144	Start a sub-experiment on a federant.
	145
	146	Get the current state, modify or create as appropriate, ship data
	147	and configs and start the experiment. There are small ordering
	148	differences based on the initial state of the sub-experiment.
	149	"""
	150	# Configuration directories on this machine
	151	proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
	152	softdir = "/proj/%s/software/%s" % (pid, eid)
	153	# Softwrae staging directory software dir
	154	lsoftdir = "%s/software" % tmpdir
	155
	156	state = self.get_state(pid, eid)
	157
	158	if state == 'none':
	159	# Create a null copy of the experiment so that we capture any
	160	# logs there if the modify fails. Emulab software discards the
	161	# logs from a failed startexp
	162	try:
	163	f = open("%s/null.tcl" % tmpdir, "w")
	164	print >>f, self.null
	165	f.close()
	166	except IOError, e:
	167	raise service_error(service_error.internal,
	168	"Cannot stage null.tcl: %s" % e.strerror)
	169
	170	timedout = False
	171	try:
	172	if not self.cmd_with_timeout(
	173	("/usr/testbed/bin/startexp -i -f -w -p %s " +
	174	"-e %s %s/null.tcl") % (pid, eid, tmpdir), "startexp",
	175	timeout=60 * 10):
	176	return False
	177	except self.cmd_timeout:
	178	timedout = True
	179
	180	if timedout:
	181	state = self.get_state(pid, eid)
	182	if state != "swapped":
	183	return False
	184
	185	# Set up the experiment's file space
	186	if not self.cmd_with_timeout("/bin/rm -rf %s" % proj_dir):
	187	return False
	188	# Clear and create the software and configuration directories
	189	if not self.cmd_with_timeout("/bin/rm -rf %s/*" % softdir):
	190	return False
	191	if not self.cmd_with_timeout('mkdir -p %s' % proj_dir):
	192	return False
	193	if os.path.isdir(lsoftdir):
	194	if not self.cmd_with_timeout('mkdir -p %s' % softdir):
	195	return False
	196
	197	try:
	198	for f in os.listdir(tmpdir):
	199	if not os.path.isdir("%s/%s" % (tmpdir, f)):
	200	self.copy_file("%s/%s" % (tmpdir, f),
	201	"%s/%s" % (proj_dir, f))
	202	if os.path.isdir(lsoftdir):
	203	for f in os.listdir(lsoftdir):
	204	if not os.path.isdir("%s/%s" % (lsoftdir, f)):
	205	self.copy_file("%s/%s" % (lsoftdir, f),
	206	"%s/%s" % (softdir, f))
	207	except IOError, e:
	208	self.log.error("Error copying file: %s" %e)
	209	return False
	210
	211	# Stage the new configuration (active experiments will stay swapped
	212	# in now)
	213	self.log.info("[start_segment]: Modifying %s" % eid)
	214	try:
	215	if not self.cmd_with_timeout(
	216	"/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
	217	(pid, eid, tclfile),
	218	"modexp", timeout= 60 * 10):
	219	return False
	220	except self.cmd_timeout:
	221	self.log.error("Modify command failed to complete in time")
	222	# There's really no way to see if this succeeded or failed, so
	223	# if it hangs, assume the worst.
	224	return False
	225	# Active experiments are still swapped, this swaps the others in.
	226	if state != 'active':
	227	self.log.info("[start_segment]: Swapping %s" % eid)
	228	timedout = False
	229	try:
	230	if not self.cmd_with_timeout(
	231	"/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
[595191c]	232	"swapexp", timeout=25*60):
[11860f52]	233	return False
	234	except self.cmd_timeout:
	235	timedout = True
	236
	237	# If the command was terminated, but completed successfully,
	238	# report success.
	239	if timedout:
	240	self.log.debug("[start_segment]: swapin timed out " +\
	241	"checking state")
[6e44258]	242	state = self.get_state(pid, eid)
[11860f52]	243	self.log.debug("[start_segment]: state is %s" % state)
	244	return state == 'active'
	245	# Everything has gone OK.
	246	return True
	247
	248	class stop_segment(local_emulab_segment):
	249	def __init__(self, log=None, keyfile=None, debug=False):
	250	local_emulab_segment.__init__(self,
	251	log=log, keyfile=keyfile, debug=debug)
	252
	253	def __call__(self, parent, user, pid, eid):
	254	"""
	255	Stop a sub experiment by calling swapexp on the federant
	256	"""
	257	self.log.info("[stop_segment]: Stopping %s" % eid)
	258	rv = False
	259	try:
	260	# Clean out tar files: we've gone over quota in the past
	261	self.cmd_with_timeout("rm -rf /proj/%s/software/%s" % (pid, eid))
	262	rv = self.cmd_with_timeout(
	263	"/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid),
	264	timeout = 60*10)
	265	except self.cmd_timeout:
	266	rv = False
	267	return rv
	268

Note: See TracBrowser for help on using the repository browser.

Download in other formats: