Context Navigation

source: fedd/federation/proxy_emulab_segment.py @ 6280b1f

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 6280b1f was 37776ea, checked in by Ted Faber <faber@…>, 15 years ago
Split out proxy code for use with other proxies - e.g. protogeni proxy
Property mode set to `100644`
File size: 7.1 KB

Rev	Line
[11860f52]	1	#!/usr/local/bin/python
	2
	3	import sys, os
	4	import re
	5
	6	import tempfile
	7	import subprocess
	8	import logging
	9	import time
	10	import signal
	11
[37776ea]	12	from proxy_segment import proxy_segment
[bbd0039]	13	from service_error import service_error
	14
[37776ea]	15	class start_segment(proxy_segment):
[11860f52]	16	def __init__(self, log=None, keyfile=None, debug=False):
[37776ea]	17	proxy_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
[11860f52]	18	self.null = """
	19	set ns [new Simulator]
	20	source tb_compat.tcl
	21
	22	set a [$ns node]
	23
	24	$ns rtproto Session
	25	$ns run
	26	"""
	27
	28	def get_state(self, user, host, pid, eid):
	29	# command to test experiment state
	30	expinfo_exec = "/usr/testbed/bin/expinfo"
	31	# Regular expressions to parse the expinfo response
	32	state_re = re.compile("State:\s+(\w+)")
	33	no_exp_re = re.compile("^No\s+such\s+experiment")
	34	swapping_re = re.compile("^No\s+information\s+available.")
	35	state = None # Experiment state parsed from expinfo
	36	# The expinfo ssh command. Note the identity restriction to use
	37	# only the identity provided in the pubkey given.
	38	cmd = [self.ssh_exec, '-o', 'IdentitiesOnly yes', '-o',
[bbd0039]	39	'StrictHostKeyChecking no', '-i',
[11860f52]	40	self.ssh_privkey_file, "%s@%s" % (user, host),
	41	expinfo_exec, pid, eid]
	42
	43	dev_null = None
	44	try:
	45	dev_null = open("/dev/null", "a")
	46	except IOError, e:
	47	self.log.error("[get_state]: can't open /dev/null: %s" %e)
	48
	49	if self.debug:
	50	state = 'swapped'
	51	rv = 0
	52	else:
	53	self.log.debug("Checking state")
	54	status = subprocess.Popen(cmd, stdout=subprocess.PIPE,
	55	stderr=dev_null, close_fds=True)
	56	for line in status.stdout:
	57	m = state_re.match(line)
	58	if m: state = m.group(1)
	59	else:
	60	for reg, st in ((no_exp_re, "none"),
	61	(swapping_re, "swapping")):
	62	m = reg.match(line)
	63	if m: state = st
	64	rv = status.wait()
	65
	66	# If the experiment is not present the subcommand returns a
	67	# non-zero return value. If we successfully parsed a "none"
	68	# outcome, ignore the return code.
	69	if rv != 0 and state != 'none':
	70	raise service_error(service_error.internal,
	71	"Cannot get status of segment:%s/%s" % (pid, eid))
	72	elif state not in ('active', 'swapped', 'swapping', 'none'):
	73	raise service_error(service_error.internal,
	74	"Cannot get status of segment:%s/%s" % (pid, eid))
	75	else:
	76	self.log.debug("State is %s" % state)
	77	return state
	78
	79
	80	def __call__(self, parent, eid, pid, user, tclfile, tmpdir, timeout=0):
	81	"""
	82	Start a sub-experiment on a federant.
	83
	84	Get the current state, modify or create as appropriate, ship data
	85	and configs and start the experiment. There are small ordering
	86	differences based on the initial state of the sub-experiment.
	87	"""
	88	# ops node in the federant
	89	host = "%s%s" % (parent.ops, parent.domain)
	90	# Configuration directories on the remote machine
	91	proj_dir = "/proj/%s/exp/%s/tmp" % (pid, eid)
	92	softdir = "/proj/%s/software/%s" % (pid, eid)
	93	# Local software dir
	94	lsoftdir = "%s/software" % tmpdir
	95
	96	state = self.get_state(user, host, pid, eid)
	97
	98	if not self.scp_file(tclfile, user, host):
	99	return False
	100
	101	if state == 'none':
	102	# Create a null copy of the experiment so that we capture any
	103	# logs there if the modify fails. Emulab software discards the
	104	# logs from a failed startexp
	105	try:
	106	f = open("%s/null.tcl" % tmpdir, "w")
	107	print >>f, self.null
	108	f.close()
	109	except IOError, e:
	110	raise service_error(service_error.internal,
	111	"Cannot stage tarfile/rpm: %s" % e.strerror)
	112
	113	if not self.scp_file("%s/null.tcl" % tmpdir, user, host):
	114	return False
	115	self.log.info("[start_segment]: Creating %s" % eid)
	116	timedout = False
	117	try:
	118	if not self.ssh_cmd(user, host,
	119	("/usr/testbed/bin/startexp -i -f -w -p %s " +
	120	"-e %s null.tcl") % (pid, eid), "startexp",
	121	timeout=60 * 10):
	122	return False
	123	except self.ssh_cmd_timeout:
	124	timedout = True
	125
	126	if timedout:
	127	state = self.get_state(user, host, pid, eid)
	128	if state != "swapped":
	129	return False
	130
	131	# Open up a temporary file to contain a script for setting up the
	132	# filespace for the new experiment.
	133	self.log.info("[start_segment]: creating script file")
	134	try:
	135	sf, scriptname = tempfile.mkstemp()
	136	scriptfile = os.fdopen(sf, 'w')
	137	except IOError:
	138	return False
	139
	140	scriptbase = os.path.basename(scriptname)
	141
	142	# Script the filesystem changes
	143	print >>scriptfile, "/bin/rm -rf %s" % proj_dir
	144	# Clear and create the software directory
	145	print >>scriptfile, "/bin/rm -rf %s/*" % softdir
	146	print >>scriptfile, 'mkdir -p %s' % proj_dir
	147	if os.path.isdir(lsoftdir):
	148	print >>scriptfile, 'mkdir -p %s' % softdir
	149	print >>scriptfile, "rm -f %s" % scriptbase
	150	scriptfile.close()
	151
	152	# Move the script to the remote machine
	153	# XXX: could collide tempfile names on the remote host
	154	if self.scp_file(scriptname, user, host, scriptbase):
	155	os.remove(scriptname)
	156	else:
	157	return False
	158
	159	# Execute the script (and the script's last line deletes it)
	160	if not self.ssh_cmd(user, host, "sh -x %s" % scriptbase):
	161	return False
	162
	163	for f in os.listdir(tmpdir):
	164	if not os.path.isdir("%s/%s" % (tmpdir, f)):
	165	if not self.scp_file("%s/%s" % (tmpdir, f), user, host,
	166	"%s/%s" % (proj_dir, f)):
	167	return False
	168	if os.path.isdir(lsoftdir):
	169	for f in os.listdir(lsoftdir):
	170	if not os.path.isdir("%s/%s" % (lsoftdir, f)):
	171	if not self.scp_file("%s/%s" % (lsoftdir, f),
	172	user, host, "%s/%s" % (softdir, f)):
	173	return False
	174	# Stage the new configuration (active experiments will stay swapped
	175	# in now)
	176	self.log.info("[start_segment]: Modifying %s" % eid)
	177	try:
	178	if not self.ssh_cmd(user, host,
	179	"/usr/testbed/bin/modexp -r -s -w %s %s %s" % \
	180	(pid, eid, tclfile.rpartition('/')[2]),
	181	"modexp", timeout= 60 * 10):
	182	return False
	183	except self.ssh_cmd_timeout:
	184	self.log.error("Modify command failed to complete in time")
	185	# There's really no way to see if this succeeded or failed, so
	186	# if it hangs, assume the worst.
	187	return False
	188	# Active experiments are still swapped, this swaps the others in.
	189	if state != 'active':
	190	self.log.info("[start_segment]: Swapping %s" % eid)
	191	timedout = False
	192	try:
	193	if not self.ssh_cmd(user, host,
	194	"/usr/testbed/bin/swapexp -w %s %s in" % (pid, eid),
[595191c]	195	"swapexp", timeout=25*60):
[11860f52]	196	return False
	197	except self.ssh_cmd_timeout:
	198	timedout = True
	199
	200	# If the command was terminated, but completed successfully,
	201	# report success.
	202	if timedout:
	203	self.log.debug("[start_segment]: swapin timed out " +\
	204	"checking state")
	205	state = self.get_state(user, host, pid, eid)
	206	self.log.debug("[start_segment]: state is %s" % state)
	207	return state == 'active'
	208	# Everything has gone OK.
	209	return True
	210
[37776ea]	211	class stop_segment(proxy_segment):
[11860f52]	212	def __init__(self, log=None, keyfile=None, debug=False):
[37776ea]	213	proxy_segment.__init__(self, log=log, keyfile=keyfile, debug=debug)
[11860f52]	214
	215	def __call__(self, parent, user, pid, eid):
	216	"""
	217	Stop a sub experiment by calling swapexp on the federant
	218	"""
	219	host = "%s%s" % (parent.ops, parent.domain)
	220	self.log.info("[stop_segment]: Stopping %s" % eid)
	221	rv = False
	222	try:
	223	# Clean out tar files: we've gone over quota in the past
	224	self.ssh_cmd(user, host, "rm -rf /proj/%s/software/%s" % \
	225	(pid, eid))
	226	rv = self.ssh_cmd(user, host,
	227	"/usr/testbed/bin/swapexp -w %s %s out" % (pid, eid))
	228	except self.ssh_cmd_timeout:
	229	rv = False
	230	return rv
	231

Note: See TracBrowser for help on using the repository browser.

Download in other formats: