Context Navigation

source: fedd/federation/experiment_control.py @ e794984

axis_examplecompt_changesinfo-opsversion-2.00version-3.01version-3.02

Last change on this file since e794984 was e794984, checked in by Ted Faber <faber@…>, 15 years ago
remove debugging
Property mode set to `100644`
File size: 71.9 KB

Rev	Line
[6679c122]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4
	5	import re
	6	import random
	7	import string
	8	import subprocess
	9	import tempfile
	10	import copy
[eee2b2e]	11	import pickle
[c971895]	12	import logging
[79b6596]	13	import signal
	14	import time
[6679c122]	15
[3441fe3]	16	import traceback
[c971895]	17	# For parsing visualization output and splitter output
	18	import xml.parsers.expat
[3441fe3]	19
[6c57fe9]	20	from threading import Lock, Thread, Condition
	21	from subprocess import call, Popen, PIPE
[6679c122]	22
[db6b092]	23	from urlparse import urlparse
	24	from urllib2 import urlopen
	25
[ec4fb42]	26	from util import *
[51cc9df]	27	from fedid import fedid, generate_fedid
[9460b1e]	28	from remote_service import xmlrpc_handler, soap_handler, service_caller
[c971895]	29	from service_error import service_error
[6679c122]	30
[db6b092]	31	import topdl
	32	from ip_allocator import ip_allocator
	33	from ip_addr import ip_addr
	34
[11a08b0]	35
	36	class nullHandler(logging.Handler):
	37	def emit(self, record): pass
	38
	39	fl = logging.getLogger("fedd.experiment_control")
	40	fl.addHandler(nullHandler())
	41
[ec4fb42]	42	class experiment_control_local:
[0ea11af]	43	"""
	44	Control of experiments that this system can directly access.
	45
	46	Includes experiment creation, termination and information dissemination.
	47	Thred safe.
	48	"""
[79b6596]	49
	50	class ssh_cmd_timeout(RuntimeError): pass
[4b362df]	51
	52	class list_log:
[db6b092]	53	"""
	54	Provide an interface that lets logger.StreamHandler s write to a list
	55	of strings.
	56	"""
	57	def __init__(self, l=[]):
	58	"""
	59	Link to an existing list or just create a log
	60	"""
	61	self.ll = l
	62	self.lock = Lock()
	63	def write(self, str):
	64	"""
	65	Add the string to the log. Lock for consistency.
	66	"""
	67	self.lock.acquire()
	68	self.ll.append(str)
	69	self.lock.release()
	70
	71	def flush(self):
	72	"""
	73	No-op that StreamHandlers expect
	74	"""
	75	pass
[4b362df]	76
[6679c122]	77
[1af38d6]	78	class thread_pool:
[866c983]	79	"""
	80	A class to keep track of a set of threads all invoked for the same
	81	task. Manages the mutual exclusion of the states.
	82	"""
	83	def __init__(self, nthreads):
	84	"""
	85	Start a pool.
	86	"""
	87	self.changed = Condition()
	88	self.started = 0
	89	self.terminated = 0
	90	self.nthreads = nthreads
	91
	92	def acquire(self):
	93	"""
	94	Get the pool's lock.
	95	"""
	96	self.changed.acquire()
	97
	98	def release(self):
	99	"""
	100	Release the pool's lock.
	101	"""
	102	self.changed.release()
	103
	104	def wait(self, timeout = None):
	105	"""
	106	Wait for a pool thread to start or stop.
	107	"""
	108	self.changed.wait(timeout)
	109
	110	def start(self):
	111	"""
	112	Called by a pool thread to report starting.
	113	"""
	114	self.changed.acquire()
	115	self.started += 1
	116	self.changed.notifyAll()
	117	self.changed.release()
	118
	119	def terminate(self):
	120	"""
	121	Called by a pool thread to report finishing.
	122	"""
	123	self.changed.acquire()
	124	self.terminated += 1
	125	self.changed.notifyAll()
	126	self.changed.release()
	127
	128	def clear(self):
	129	"""
	130	Clear all pool data.
	131	"""
	132	self.changed.acquire()
	133	self.started = 0
	134	self.terminated =0
	135	self.changed.notifyAll()
	136	self.changed.release()
	137
	138	def wait_for_slot(self):
	139	"""
	140	Wait until we have a free slot to start another pooled thread
	141	"""
	142	self.acquire()
	143	while self.started - self.terminated >= self.nthreads:
	144	self.wait()
	145	self.release()
	146
	147	def wait_for_all_done(self):
	148	"""
	149	Wait until all active threads finish (and at least one has started)
	150	"""
	151	self.acquire()
	152	while self.started == 0 or self.started > self.terminated:
	153	self.wait()
	154	self.release()
[8bc5754]	155
[1af38d6]	156	class pooled_thread(Thread):
[866c983]	157	"""
	158	One of a set of threads dedicated to a specific task. Uses the
	159	thread_pool class above for coordination.
	160	"""
	161	def __init__(self, group=None, target=None, name=None, args=(),
	162	kwargs={}, pdata=None, trace_file=None):
	163	Thread.__init__(self, group, target, name, args, kwargs)
	164	self.rv = None # Return value of the ops in this thread
	165	self.exception = None # Exception that terminated this thread
	166	self.target=target # Target function to run on start()
	167	self.args = args # Args to pass to target
	168	self.kwargs = kwargs # Additional kw args
	169	self.pdata = pdata # thread_pool for this class
	170	# Logger for this thread
	171	self.log = logging.getLogger("fedd.experiment_control")
	172
	173	def run(self):
	174	"""
	175	Emulate Thread.run, except add pool data manipulation and error
	176	logging.
	177	"""
	178	if self.pdata:
	179	self.pdata.start()
	180
	181	if self.target:
	182	try:
	183	self.rv = self.target(self.args, *self.kwargs)
	184	except service_error, s:
	185	self.exception = s
	186	self.log.error("Thread exception: %s %s" % \
	187	(s.code_string(), s.desc))
	188	except:
	189	self.exception = sys.exc_info()[1]
	190	self.log.error(("Unexpected thread exception: %s" +\
	191	"Trace %s") % (self.exception,\
	192	traceback.format_exc()))
	193	if self.pdata:
	194	self.pdata.terminate()
[6679c122]	195
[f069052]	196	call_RequestAccess = service_caller('RequestAccess')
	197	call_ReleaseAccess = service_caller('ReleaseAccess')
[cc8d8e9]	198	call_StartSegment = service_caller('StartSegment')
[5ae3857]	199	call_TerminateSegment = service_caller('TerminateSegment')
[f069052]	200	call_Ns2Split = service_caller('Ns2Split')
[058f58e]	201
[3f6bc5f]	202	def __init__(self, config=None, auth=None):
[866c983]	203	"""
	204	Intialize the various attributes, most from the config object
	205	"""
	206
	207	def parse_tarfile_list(tf):
	208	"""
	209	Parse a tarfile list from the configuration. This is a set of
	210	paths and tarfiles separated by spaces.
	211	"""
	212	rv = [ ]
	213	if tf is not None:
	214	tl = tf.split()
	215	while len(tl) > 1:
	216	p, t = tl[0:2]
	217	del tl[0:2]
	218	rv.append((p, t))
	219	return rv
	220
	221	self.thread_with_rv = experiment_control_local.pooled_thread
	222	self.thread_pool = experiment_control_local.thread_pool
[bd3e314]	223	self.list_log = experiment_control_local.list_log
[866c983]	224
	225	self.cert_file = config.get("experiment_control", "cert_file")
	226	if self.cert_file:
	227	self.cert_pwd = config.get("experiment_control", "cert_pwd")
	228	else:
	229	self.cert_file = config.get("globals", "cert_file")
	230	self.cert_pwd = config.get("globals", "cert_pwd")
	231
	232	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
	233	or config.get("globals", "trusted_certs")
	234
[6c57fe9]	235	self.repodir = config.get("experiment_control", "repodir")
[cc8d8e9]	236
[866c983]	237	self.exp_stem = "fed-stem"
	238	self.log = logging.getLogger("fedd.experiment_control")
	239	set_log_level(config, "experiment_control", self.log)
	240	self.muxmax = 2
	241	self.nthreads = 2
	242	self.randomize_experiments = False
	243
	244	self.splitter = None
	245	self.ssh_keygen = "/usr/bin/ssh-keygen"
	246	self.ssh_identity_file = None
	247
	248
	249	self.debug = config.getboolean("experiment_control", "create_debug")
	250	self.state_filename = config.get("experiment_control",
	251	"experiment_state")
	252	self.splitter_url = config.get("experiment_control", "splitter_uri")
	253	self.fedkit = parse_tarfile_list(\
	254	config.get("experiment_control", "fedkit"))
	255	self.gatewaykit = parse_tarfile_list(\
	256	config.get("experiment_control", "gatewaykit"))
	257	accessdb_file = config.get("experiment_control", "accessdb")
	258
	259	self.ssh_pubkey_file = config.get("experiment_control",
	260	"ssh_pubkey_file")
	261	self.ssh_privkey_file = config.get("experiment_control",
	262	"ssh_privkey_file")
	263	# NB for internal master/slave ops, not experiment setup
	264	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
[ca489e8]	265
[db6b092]	266	self.overrides = set([])
	267	ovr = config.get('experiment_control', 'overrides')
	268	if ovr:
	269	for o in ovr.split(","):
	270	o = o.strip()
	271	if o.startswith('fedid:'): o = o[len('fedid:'):]
	272	self.overrides.add(fedid(hexstr=o))
[ca489e8]	273
[866c983]	274	self.state = { }
	275	self.state_lock = Lock()
	276	self.tclsh = "/usr/local/bin/otclsh"
	277	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
	278	config.get("experiment_control", "tcl_splitter",
	279	"/usr/testbed/lib/ns2ir/parse.tcl")
	280	mapdb_file = config.get("experiment_control", "mapdb")
	281	self.trace_file = sys.stderr
	282
	283	self.def_expstart = \
	284	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
	285	"/tmp/federate";
	286	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
	287	"FEDDIR/hosts";
	288	self.def_gwstart = \
	289	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
	290	"/tmp/bridge.log";
	291	self.def_mgwstart = \
	292	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
	293	"/tmp/bridge.log";
	294	self.def_gwimage = "FBSD61-TUNNEL2";
	295	self.def_gwtype = "pc";
	296	self.local_access = { }
	297
	298	if auth:
	299	self.auth = auth
	300	else:
	301	self.log.error(\
	302	"[access]: No authorizer initialized, creating local one.")
	303	auth = authorizer()
	304
	305
	306	if self.ssh_pubkey_file:
	307	try:
	308	f = open(self.ssh_pubkey_file, 'r')
	309	self.ssh_pubkey = f.read()
	310	f.close()
	311	except IOError:
	312	raise service_error(service_error.internal,
	313	"Cannot read sshpubkey")
	314	else:
	315	raise service_error(service_error.internal,
	316	"No SSH public key file?")
	317
	318	if not self.ssh_privkey_file:
	319	raise service_error(service_error.internal,
	320	"No SSH public key file?")
	321
	322
	323	if mapdb_file:
	324	self.read_mapdb(mapdb_file)
	325	else:
	326	self.log.warn("[experiment_control] No testbed map, using defaults")
	327	self.tbmap = {
	328	'deter':'https://users.isi.deterlab.net:23235',
	329	'emulab':'https://users.isi.deterlab.net:23236',
	330	'ucb':'https://users.isi.deterlab.net:23237',
	331	}
	332
	333	if accessdb_file:
	334	self.read_accessdb(accessdb_file)
	335	else:
	336	raise service_error(service_error.internal,
	337	"No accessdb specified in config")
	338
	339	# Grab saved state. OK to do this w/o locking because it's read only
	340	# and only one thread should be in existence that can see self.state at
	341	# this point.
	342	if self.state_filename:
	343	self.read_state()
	344
	345	# Dispatch tables
	346	self.soap_services = {\
[e19b75c]	347	'Create': soap_handler('Create', self.create_experiment),
[866c983]	348	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
	349	'Vis': soap_handler('Vis', self.get_vis),
	350	'Info': soap_handler('Info', self.get_info),
[65f3f29]	351	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
[866c983]	352	'Terminate': soap_handler('Terminate',
[e19b75c]	353	self.terminate_experiment),
[866c983]	354	}
	355
	356	self.xmlrpc_services = {\
[e19b75c]	357	'Create': xmlrpc_handler('Create', self.create_experiment),
[866c983]	358	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
	359	'Vis': xmlrpc_handler('Vis', self.get_vis),
	360	'Info': xmlrpc_handler('Info', self.get_info),
[65f3f29]	361	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
[866c983]	362	'Terminate': xmlrpc_handler('Terminate',
[e19b75c]	363	self.terminate_experiment),
[866c983]	364	}
[19cc408]	365
[6679c122]	366	def copy_file(self, src, dest, size=1024):
[866c983]	367	"""
	368	Exceedingly simple file copy.
	369	"""
	370	s = open(src,'r')
	371	d = open(dest, 'w')
	372
	373	buf = "x"
	374	while buf != "":
	375	buf = s.read(size)
	376	d.write(buf)
	377	s.close()
	378	d.close()
[6679c122]	379
[a97394b]	380	# Call while holding self.state_lock
[eee2b2e]	381	def write_state(self):
[866c983]	382	"""
	383	Write a new copy of experiment state after copying the existing state
	384	to a backup.
	385
	386	State format is a simple pickling of the state dictionary.
	387	"""
	388	if os.access(self.state_filename, os.W_OK):
	389	self.copy_file(self.state_filename, \
	390	"%s.bak" % self.state_filename)
	391	try:
	392	f = open(self.state_filename, 'w')
	393	pickle.dump(self.state, f)
	394	except IOError, e:
	395	self.log.error("Can't write file %s: %s" % \
	396	(self.state_filename, e))
	397	except pickle.PicklingError, e:
	398	self.log.error("Pickling problem: %s" % e)
	399	except TypeError, e:
	400	self.log.error("Pickling problem (TypeError): %s" % e)
[eee2b2e]	401
[a97394b]	402	# Call while holding self.state_lock
[eee2b2e]	403	def read_state(self):
[866c983]	404	"""
	405	Read a new copy of experiment state. Old state is overwritten.
	406
	407	State format is a simple pickling of the state dictionary.
	408	"""
[cc8d8e9]	409
	410	def get_experiment_id(state):
	411	"""
	412	Pull the fedid experimentID out of the saved state. This is kind
	413	of a gross walk through the dict.
	414	"""
	415
	416	if state.has_key('experimentID'):
	417	for e in state['experimentID']:
	418	if e.has_key('fedid'):
	419	return e['fedid']
	420	else:
	421	return None
	422	else:
	423	return None
	424
	425	def get_alloc_ids(state):
	426	"""
	427	Pull the fedids of the identifiers of each allocation from the
	428	state. Again, a dict dive that's best isolated.
	429	"""
	430
	431	return [ f['allocID']['fedid']
	432	for f in state.get('federant',[]) \
	433	if f.has_key('allocID') and \
	434	f['allocID'].has_key('fedid')]
	435
	436
[866c983]	437	try:
	438	f = open(self.state_filename, "r")
	439	self.state = pickle.load(f)
	440	self.log.debug("[read_state]: Read state from %s" % \
	441	self.state_filename)
	442	except IOError, e:
	443	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
	444	% (self.state_filename, e))
	445	except pickle.UnpicklingError, e:
	446	self.log.warning(("[read_state]: No saved state: " + \
	447	"Unpickling failed: %s") % e)
	448
[cc8d8e9]	449	for s in self.state.values():
[866c983]	450	try:
[cc8d8e9]	451
	452	eid = get_experiment_id(s)
	453	if eid :
	454	# Give the owner rights to the experiment
	455	self.auth.set_attribute(s['owner'], eid)
	456	# And holders of the eid as well
	457	self.auth.set_attribute(eid, eid)
[db6b092]	458	# allow overrides to control experiments as well
	459	for o in self.overrides:
	460	self.auth.set_attribute(o, eid)
[cc8d8e9]	461	# Set permissions to allow reading of the software repo, if
	462	# any, as well.
	463	for a in get_alloc_ids(s):
	464	self.auth.set_attribute(a, 'repo/%s' % eid)
	465	else:
	466	raise KeyError("No experiment id")
[866c983]	467	except KeyError, e:
	468	self.log.warning("[read_state]: State ownership or identity " +\
	469	"misformatted in %s: %s" % (self.state_filename, e))
[4064742]	470
	471
	472	def read_accessdb(self, accessdb_file):
[866c983]	473	"""
	474	Read the mapping from fedids that can create experiments to their name
	475	in the 3-level access namespace. All will be asserted from this
	476	testbed and can include the local username and porject that will be
	477	asserted on their behalf by this fedd. Each fedid is also added to the
	478	authorization system with the "create" attribute.
	479	"""
	480	self.accessdb = {}
	481	# These are the regexps for parsing the db
	482	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
	483	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	484	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
	485	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	486	"\s->\s(" + name_expr + ")\s*$")
	487	lineno = 0
	488
	489	# Parse the mappings and store in self.authdb, a dict of
	490	# fedid -> (proj, user)
	491	try:
	492	f = open(accessdb_file, "r")
	493	for line in f:
	494	lineno += 1
	495	line = line.strip()
	496	if len(line) == 0 or line.startswith('#'):
	497	continue
	498	m = project_line.match(line)
	499	if m:
	500	fid = fedid(hexstr=m.group(1))
	501	project, user = m.group(2,3)
	502	if not self.accessdb.has_key(fid):
	503	self.accessdb[fid] = []
	504	self.accessdb[fid].append((project, user))
	505	continue
	506
	507	m = user_line.match(line)
	508	if m:
	509	fid = fedid(hexstr=m.group(1))
	510	project = None
	511	user = m.group(2)
	512	if not self.accessdb.has_key(fid):
	513	self.accessdb[fid] = []
	514	self.accessdb[fid].append((project, user))
	515	continue
	516	self.log.warn("[experiment_control] Error parsing access " +\
	517	"db %s at line %d" % (accessdb_file, lineno))
	518	except IOError:
	519	raise service_error(service_error.internal,
	520	"Error opening/reading %s as experiment " +\
	521	"control accessdb" % accessdb_file)
	522	f.close()
	523
	524	# Initialize the authorization attributes
	525	for fid in self.accessdb.keys():
	526	self.auth.set_attribute(fid, 'create')
[34bc05c]	527
	528	def read_mapdb(self, file):
[866c983]	529	"""
	530	Read a simple colon separated list of mappings for the
	531	label-to-testbed-URL mappings. Clears or creates self.tbmap.
	532	"""
	533
	534	self.tbmap = { }
	535	lineno =0
	536	try:
	537	f = open(file, "r")
	538	for line in f:
	539	lineno += 1
	540	line = line.strip()
	541	if line.startswith('#') or len(line) == 0:
	542	continue
	543	try:
	544	label, url = line.split(':', 1)
	545	self.tbmap[label] = url
	546	except ValueError, e:
	547	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
	548	"map db: %s %s" % (lineno, line, e))
	549	except IOError, e:
	550	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
	551	"open %s: %s" % (file, e))
	552	f.close()
	553
[6679c122]	554	def generate_ssh_keys(self, dest, type="rsa" ):
[866c983]	555	"""
	556	Generate a set of keys for the gateways to use to talk.
	557
	558	Keys are of type type and are stored in the required dest file.
	559	"""
	560	valid_types = ("rsa", "dsa")
	561	t = type.lower();
	562	if t not in valid_types: raise ValueError
	563	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
	564
	565	try:
	566	trace = open("/dev/null", "w")
	567	except IOError:
	568	raise service_error(service_error.internal,
	569	"Cannot open /dev/null??");
	570
	571	# May raise CalledProcessError
	572	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
[4ea1e22]	573	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
[866c983]	574	if rv != 0:
	575	raise service_error(service_error.internal,
	576	"Cannot generate nonce ssh keys. %s return code %d" \
	577	% (self.ssh_keygen, rv))
[6679c122]	578
[0d830de]	579	def gentopo(self, str):
[866c983]	580	"""
	581	Generate the topology dtat structure from the splitter's XML
	582	representation of it.
	583
	584	The topology XML looks like:
	585	<experiment>
	586	<nodes>
	587	<node><vname></vname><ips>ip1:ip2</ips></node>
	588	</nodes>
	589	<lans>
	590	<lan>
	591	<vname></vname><vnode></vnode><ip></ip>
	592	<bandwidth></bandwidth><member>node:port</member>
	593	</lan>
	594	</lans>
	595	"""
	596	class topo_parse:
	597	"""
	598	Parse the topology XML and create the dats structure.
	599	"""
	600	def __init__(self):
	601	# Typing of the subelements for data conversion
	602	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
	603	self.int_subelements = ( 'bandwidth',)
	604	self.float_subelements = ( 'delay',)
	605	# The final data structure
	606	self.nodes = [ ]
	607	self.lans = [ ]
	608	self.topo = { \
	609	'node': self.nodes,\
	610	'lan' : self.lans,\
	611	}
	612	self.element = { } # Current element being created
	613	self.chars = "" # Last text seen
	614
	615	def end_element(self, name):
	616	# After each sub element the contents is added to the current
	617	# element or to the appropriate list.
	618	if name == 'node':
	619	self.nodes.append(self.element)
	620	self.element = { }
	621	elif name == 'lan':
	622	self.lans.append(self.element)
	623	self.element = { }
	624	elif name in self.str_subelements:
	625	self.element[name] = self.chars
	626	self.chars = ""
	627	elif name in self.int_subelements:
	628	self.element[name] = int(self.chars)
	629	self.chars = ""
	630	elif name in self.float_subelements:
	631	self.element[name] = float(self.chars)
	632	self.chars = ""
	633
	634	def found_chars(self, data):
	635	self.chars += data.rstrip()
	636
	637
	638	tp = topo_parse();
	639	parser = xml.parsers.expat.ParserCreate()
	640	parser.EndElementHandler = tp.end_element
	641	parser.CharacterDataHandler = tp.found_chars
	642
	643	parser.Parse(str)
	644
	645	return tp.topo
	646
[0d830de]	647
	648	def genviz(self, topo):
[866c983]	649	"""
	650	Generate the visualization the virtual topology
	651	"""
	652
	653	neato = "/usr/local/bin/neato"
	654	# These are used to parse neato output and to create the visualization
	655	# file.
	656	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
	657	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
	658	"%s</type></node>"
	659
	660	try:
	661	# Node names
	662	nodes = [ n['vname'] for n in topo['node'] ]
	663	topo_lans = topo['lan']
[cc8d8e9]	664	except KeyError, e:
	665	raise service_error(service_error.internal, "Bad topology: %s" %e)
[866c983]	666
	667	lans = { }
	668	links = { }
	669
	670	# Walk through the virtual topology, organizing the connections into
	671	# 2-node connections (links) and more-than-2-node connections (lans).
	672	# When a lan is created, it's added to the list of nodes (there's a
	673	# node in the visualization for the lan).
	674	for l in topo_lans:
	675	if links.has_key(l['vname']):
	676	if len(links[l['vname']]) < 2:
	677	links[l['vname']].append(l['vnode'])
	678	else:
	679	nodes.append(l['vname'])
	680	lans[l['vname']] = links[l['vname']]
	681	del links[l['vname']]
	682	lans[l['vname']].append(l['vnode'])
	683	elif lans.has_key(l['vname']):
	684	lans[l['vname']].append(l['vnode'])
	685	else:
	686	links[l['vname']] = [ l['vnode'] ]
	687
	688
	689	# Open up a temporary file for dot to turn into a visualization
	690	try:
	691	df, dotname = tempfile.mkstemp()
	692	dotfile = os.fdopen(df, 'w')
	693	except IOError:
	694	raise service_error(service_error.internal,
	695	"Failed to open file in genviz")
	696
[db6b092]	697	try:
	698	dnull = open('/dev/null', 'w')
	699	except IOError:
	700	service_error(service_error.internal,
[886307f]	701	"Failed to open /dev/null in genviz")
	702
[866c983]	703	# Generate a dot/neato input file from the links, nodes and lans
	704	try:
	705	print >>dotfile, "graph G {"
	706	for n in nodes:
	707	print >>dotfile, '\t"%s"' % n
	708	for l in links.keys():
	709	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
	710	for l in lans.keys():
	711	for n in lans[l]:
	712	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
	713	print >>dotfile, "}"
	714	dotfile.close()
	715	except TypeError:
	716	raise service_error(service_error.internal,
	717	"Single endpoint link in vtopo")
	718	except IOError:
	719	raise service_error(service_error.internal, "Cannot write dot file")
	720
	721	# Use dot to create a visualization
	722	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
[886307f]	723	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
[db6b092]	724	close_fds=True)
	725	dnull.close()
[866c983]	726
	727	# Translate dot to vis format
	728	vis_nodes = [ ]
	729	vis = { 'node': vis_nodes }
	730	for line in dot.stdout:
	731	m = vis_re.match(line)
	732	if m:
	733	vn = m.group(1)
	734	vis_node = {'name': vn, \
	735	'x': float(m.group(2)),\
	736	'y' : float(m.group(3)),\
	737	}
	738	if vn in links.keys() or vn in lans.keys():
	739	vis_node['type'] = 'lan'
	740	else:
	741	vis_node['type'] = 'node'
	742	vis_nodes.append(vis_node)
	743	rv = dot.wait()
	744
	745	os.remove(dotname)
	746	if rv == 0 : return vis
	747	else: return None
[d0ae12d]	748
[4064742]	749	def get_access(self, tb, nodes, user, tbparam, master, export_project,
[866c983]	750	access_user):
	751	"""
	752	Get access to testbed through fedd and set the parameters for that tb
	753	"""
	754	uri = self.tbmap.get(tb, None)
	755	if not uri:
	756	raise service_error(serice_error.server_config,
	757	"Unknown testbed: %s" % tb)
	758
	759	# currently this lumps all users into one service access group
	760	service_keys = [ a for u in user \
	761	for a in u.get('access', []) \
	762	if a.has_key('sshPubkey')]
	763
	764	if len(service_keys) == 0:
	765	raise service_error(service_error.req,
	766	"Must have at least one SSH pubkey for services")
	767
	768
	769	for p, u in access_user:
	770	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
	771	"to %s") % ((p or "None"), u, uri))
	772
	773	if p:
	774	# Request with user and project specified
	775	req = {\
	776	'destinationTestbed' : { 'uri' : uri },
	777	'project': {
	778	'name': {'localname': p},
	779	'user': [ {'userID': { 'localname': u } } ],
	780	},
	781	'user': user,
	782	'allocID' : { 'localname': 'test' },
	783	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
	784	'serviceAccess' : service_keys
	785	}
	786	else:
	787	# Request with only user specified
	788	req = {\
	789	'destinationTestbed' : { 'uri' : uri },
	790	'user': [ {'userID': { 'localname': u } } ],
	791	'allocID' : { 'localname': 'test' },
	792	'createAccess' : [ { 'sshPubkey' : self.ssh_pubkey } ],
	793	'serviceAccess' : service_keys
	794	}
	795
	796	if tb == master:
	797	# NB, the export_project parameter is a dict that includes
	798	# the type
	799	req['exportProject'] = export_project
	800
	801	# node resources if any
	802	if nodes != None and len(nodes) > 0:
	803	rnodes = [ ]
	804	for n in nodes:
	805	rn = { }
	806	image, hw, count = n.split(":")
	807	if image: rn['image'] = [ image ]
	808	if hw: rn['hardware'] = [ hw ]
	809	if count and int(count) >0 : rn['count'] = int(count)
	810	rnodes.append(rn)
	811	req['resources']= { }
	812	req['resources']['node'] = rnodes
	813
	814	try:
	815	if self.local_access.has_key(uri):
	816	# Local access call
	817	req = { 'RequestAccessRequestBody' : req }
	818	r = self.local_access[uri].RequestAccess(req,
	819	fedid(file=self.cert_file))
	820	r = { 'RequestAccessResponseBody' : r }
	821	else:
	822	r = self.call_RequestAccess(uri, req,
	823	self.cert_file, self.cert_pwd, self.trusted_certs)
	824	except service_error, e:
	825	if e.code == service_error.access:
	826	self.log.debug("[get_access] Access denied")
	827	r = None
	828	continue
	829	else:
	830	raise e
	831
[e19b75c]	832	if r.has_key('RequestAccessResponseBody'):
	833	# Through to here we have a valid response, not a fault.
	834	# Access denied is a fault, so something better or worse than
	835	# access denied has happened.
	836	r = r['RequestAccessResponseBody']
	837	self.log.debug("[get_access] Access granted")
	838	break
	839	else:
	840	raise service_error(service_error.protocol,
	841	"Bad proxy response")
	842
	843	if not r:
	844	raise service_error(service_error.access,
	845	"Access denied by %s (%s)" % (tb, uri))
[db6b092]	846
[e19b75c]	847	e = r['emulab']
	848	p = e['project']
	849	tbparam[tb] = {
	850	"boss": e['boss'],
	851	"host": e['ops'],
	852	"domain": e['domain'],
	853	"fs": e['fileServer'],
	854	"eventserver": e['eventServer'],
	855	"project": unpack_id(p['name']),
	856	"emulab" : e,
	857	"allocID" : r['allocID'],
	858	}
	859	# Make the testbed name be the label the user applied
	860	p['testbed'] = {'localname': tb }
[db6b092]	861
[e19b75c]	862	for u in p['user']:
	863	role = u.get('role', None)
	864	if role == 'experimentCreation':
	865	tbparam[tb]['user'] = unpack_id(u['userID'])
	866	break
	867	else:
	868	raise service_error(service_error.internal,
	869	"No createExperimentUser from %s" %tb)
[db6b092]	870
[e19b75c]	871	# Add attributes to barameter space. We don't allow attributes to
	872	# overlay any parameters already installed.
	873	for a in e['fedAttr']:
	874	try:
	875	if a['attribute'] and isinstance(a['attribute'], basestring)\
	876	and not tbparam[tb].has_key(a['attribute'].lower()):
	877	tbparam[tb][a['attribute'].lower()] = a['value']
	878	except KeyError:
	879	self.log.error("Bad attribute in response: %s" % a)
	880
	881	def release_access(self, tb, aid):
	882	"""
	883	Release access to testbed through fedd
	884	"""
[db6b092]	885
[e19b75c]	886	uri = self.tbmap.get(tb, None)
	887	if not uri:
	888	raise service_error(serice_error.server_config,
	889	"Unknown testbed: %s" % tb)
[db6b092]	890
[e19b75c]	891	if self.local_access.has_key(uri):
	892	resp = self.local_access[uri].ReleaseAccess(\
	893	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
	894	fedid(file=self.cert_file))
	895	resp = { 'ReleaseAccessResponseBody': resp }
	896	else:
	897	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
	898	self.cert_file, self.cert_pwd, self.trusted_certs)
[db6b092]	899
[e19b75c]	900	# better error coding
[db6b092]	901
[e19b75c]	902	def remote_splitter(self, uri, desc, master):
[db6b092]	903
[e19b75c]	904	req = {
	905	'description' : { 'ns2description': desc },
	906	'master': master,
	907	'include_fedkit': bool(self.fedkit),
	908	'include_gatewaykit': bool(self.gatewaykit)
[db6b092]	909	}
	910
[e19b75c]	911	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
	912	self.trusted_certs)
	913
	914	if r.has_key('Ns2SplitResponseBody'):
	915	r = r['Ns2SplitResponseBody']
	916	if r.has_key('output'):
	917	return r['output'].splitlines()
	918	else:
	919	raise service_error(service_error.protocol,
	920	"Bad splitter response (no output)")
	921	else:
	922	raise service_error(service_error.protocol, "Bad splitter response")
[cc8d8e9]	923
[e19b75c]	924	class start_segment:
[fd556d1]	925	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[cc8d8e9]	926	cert_pwd=None, trusted_certs=None, caller=None):
	927	self.log = log
	928	self.debug = debug
	929	self.cert_file = cert_file
	930	self.cert_pwd = cert_pwd
	931	self.trusted_certs = None
	932	self.caller = caller
[fd556d1]	933	self.testbed = testbed
[cc8d8e9]	934
[ecca6eb]	935	def __call__(self, uri, aid, topo, master, attrs=None):
[cc8d8e9]	936	req = {
	937	'allocID': { 'fedid' : aid },
	938	'segmentdescription': {
	939	'topdldescription': topo.to_dict(),
	940	},
[ecca6eb]	941	'master': master,
[cc8d8e9]	942	}
[6c57fe9]	943	if attrs:
	944	req['fedAttr'] = attrs
[cc8d8e9]	945
[fd556d1]	946	try:
	947	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	948	self.trusted_certs)
	949	return True
	950	except service_error, e:
	951	self.log.error("Start segment failed on %s: %s" % \
	952	(self.testbed, e))
	953	return False
[cc8d8e9]	954
	955
[5ae3857]	956
[e19b75c]	957	class terminate_segment:
[fd556d1]	958	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[5ae3857]	959	cert_pwd=None, trusted_certs=None, caller=None):
	960	self.log = log
	961	self.debug = debug
	962	self.cert_file = cert_file
	963	self.cert_pwd = cert_pwd
	964	self.trusted_certs = None
	965	self.caller = caller
[fd556d1]	966	self.testbed = testbed
[5ae3857]	967
	968	def __call__(self, uri, aid ):
	969	req = {
	970	'allocID': aid ,
	971	}
[fd556d1]	972	try:
	973	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	974	self.trusted_certs)
	975	return True
	976	except service_error, e:
	977	self.log.error("Terminate segment failed on %s: %s" % \
	978	(self.testbed, e))
	979	return False
[db6b092]	980
	981
[e19b75c]	982	def allocate_resources(self, allocated, master, eid, expid, expcert,
[6c57fe9]	983	tbparams, topo, tmpdir, alloc_log=None, attrs=None):
[cc8d8e9]	984	started = { } # Testbeds where a sub-experiment started
	985	# successfully
	986
	987	# XXX
	988	fail_soft = False
	989
	990	log = alloc_log or self.log
	991
	992	thread_pool = self.thread_pool(self.nthreads)
	993	threads = [ ]
	994
	995	for tb in [ k for k in allocated.keys() if k != master]:
	996	# Create and start a thread to start the segment, and save it to
	997	# get the return value later
	998	thread_pool.wait_for_slot()
	999	uri = self.tbmap.get(tb, None)
	1000	if not uri:
	1001	raise service_error(service_error.internal,
	1002	"Unknown testbed %s !?" % tb)
	1003
	1004	if tbparams[tb].has_key('allocID') and \
	1005	tbparams[tb]['allocID'].has_key('fedid'):
	1006	aid = tbparams[tb]['allocID']['fedid']
	1007	else:
	1008	raise service_error(service_error.internal,
	1009	"No alloc id for testbed %s !?" % tb)
	1010
	1011	t = self.pooled_thread(\
[e19b75c]	1012	target=self.start_segment(log=log, debug=self.debug,
[fd556d1]	1013	testbed=tb, cert_file=self.cert_file,
	1014	cert_pwd=self.cert_pwd,
[cc8d8e9]	1015	trusted_certs=self.trusted_certs,
	1016	caller=self.call_StartSegment),
[ecca6eb]	1017	args=(uri, aid, topo[tb], False, attrs), name=tb,
[cc8d8e9]	1018	pdata=thread_pool, trace_file=self.trace_file)
	1019	threads.append(t)
	1020	t.start()
	1021
	1022	# Wait until all finish
	1023	thread_pool.wait_for_all_done()
	1024
	1025	# If none failed, start the master
	1026	failed = [ t.getName() for t in threads if not t.rv ]
	1027
	1028	if len(failed) == 0:
	1029	uri = self.tbmap.get(master, None)
	1030	if not uri:
	1031	raise service_error(service_error.internal,
	1032	"Unknown testbed %s !?" % master)
	1033
	1034	if tbparams[master].has_key('allocID') and \
	1035	tbparams[master]['allocID'].has_key('fedid'):
	1036	aid = tbparams[master]['allocID']['fedid']
	1037	else:
	1038	raise service_error(service_error.internal,
	1039	"No alloc id for testbed %s !?" % master)
[e19b75c]	1040	starter = self.start_segment(log=log, debug=self.debug,
[fd556d1]	1041	testbed=master, cert_file=self.cert_file,
	1042	cert_pwd=self.cert_pwd, trusted_certs=self.trusted_certs,
[cc8d8e9]	1043	caller=self.call_StartSegment)
[ecca6eb]	1044	if not starter(uri, aid, topo[master], True, attrs):
[cc8d8e9]	1045	failed.append(master)
	1046
	1047	succeeded = [tb for tb in allocated.keys() if tb not in failed]
	1048	# If one failed clean up, unless fail_soft is set
	1049	if failed and False:
	1050	if not fail_soft:
	1051	thread_pool.clear()
	1052	for tb in succeeded:
	1053	# Create and start a thread to stop the segment
	1054	thread_pool.wait_for_slot()
	1055	t = self.pooled_thread(\
	1056	target=self.stop_segment(log=log,
[fd556d1]	1057	testbed=tb,
[cc8d8e9]	1058	keyfile=self.ssh_privkey_file,
	1059	debug=self.debug),
	1060	args=(tb, eid, tbparams), name=tb,
	1061	pdata=thread_pool, trace_file=self.trace_file)
	1062	t.start()
	1063	# Wait until all finish
	1064	thread_pool.wait_for_all_done()
	1065
	1066	# release the allocations
	1067	for tb in tbparams.keys():
	1068	self.release_access(tb, tbparams[tb]['allocID'])
	1069	# Remove the placeholder
	1070	self.state_lock.acquire()
	1071	self.state[eid]['experimentStatus'] = 'failed'
	1072	if self.state_filename: self.write_state()
	1073	self.state_lock.release()
	1074
	1075	log.error("Swap in failed on %s" % ",".join(failed))
	1076	return
	1077	else:
	1078	log.info("[start_segment]: Experiment %s active" % eid)
	1079
	1080	log.debug("[start_experiment]: removing %s" % tmpdir)
	1081
	1082	# Walk up tmpdir, deleting as we go
	1083	for path, dirs, files in os.walk(tmpdir, topdown=False):
	1084	for f in files:
	1085	os.remove(os.path.join(path, f))
	1086	for d in dirs:
	1087	os.rmdir(os.path.join(path, d))
	1088	os.rmdir(tmpdir)
	1089
	1090	# Insert the experiment into our state and update the disk copy
	1091	self.state_lock.acquire()
	1092	self.state[expid]['experimentStatus'] = 'active'
	1093	self.state[eid] = self.state[expid]
	1094	if self.state_filename: self.write_state()
	1095	self.state_lock.release()
	1096	return
	1097
	1098
[e19b75c]	1099	def create_experiment(self, req, fid):
[db6b092]	1100	"""
	1101	The external interface to experiment creation called from the
	1102	dispatcher.
	1103
	1104	Creates a working directory, splits the incoming description using the
	1105	splitter script and parses out the avrious subsections using the
	1106	lcasses above. Once each sub-experiment is created, use pooled threads
	1107	to instantiate them and start it all up.
	1108	"""
	1109
[6c57fe9]	1110	def add_kit(e, kit):
	1111	"""
	1112	Add a Software object created from the list of (install, location)
	1113	tuples passed as kit to the software attribute of an object e. We
	1114	do this enough to break out the code, but it's kind of a hack to
	1115	avoid changing the old tuple rep.
	1116	"""
	1117
	1118	s = [ topdl.Software(install=i, location=l) for i, l in kit]
	1119
	1120	if isinstance(e.software, list): e.software.extend(s)
	1121	else: e.software = s
	1122
	1123
[db6b092]	1124	if not self.auth.check_attribute(fid, 'create'):
	1125	raise service_error(service_error.access, "Create access denied")
	1126
	1127	try:
	1128	tmpdir = tempfile.mkdtemp(prefix="split-")
	1129	except IOError:
	1130	raise service_error(service_error.internal, "Cannot create tmp dir")
	1131
	1132	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
	1133	gw_secretkey_base = "fed.%s" % self.ssh_type
	1134	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
	1135	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
	1136	tclfile = tmpdir + "/experiment.tcl"
	1137	tbparams = { }
	1138	try:
	1139	access_user = self.accessdb[fid]
	1140	except KeyError:
	1141	raise service_error(service_error.internal,
	1142	"Access map and authorizer out of sync in " + \
	1143	"create_experiment for fedid %s" % fid)
	1144
	1145	pid = "dummy"
	1146	gid = "dummy"
	1147	try:
	1148	os.mkdir(tmpdir+"/keys")
	1149	except OSError:
	1150	raise service_error(service_error.internal,
	1151	"Can't make temporary dir")
	1152
	1153	req = req.get('CreateRequestBody', None)
	1154	if not req:
	1155	raise service_error(service_error.req,
	1156	"Bad request format (no CreateRequestBody)")
	1157	# The tcl parser needs to read a file so put the content into that file
	1158	descr=req.get('experimentdescription', None)
	1159	if descr:
	1160	file_content=descr.get('ns2description', None)
	1161	if file_content:
	1162	try:
	1163	f = open(tclfile, 'w')
	1164	f.write(file_content)
	1165	f.close()
	1166	except IOError:
	1167	raise service_error(service_error.internal,
	1168	"Cannot write temp experiment description")
	1169	else:
	1170	raise service_error(service_error.req,
	1171	"Only ns2descriptions supported")
	1172	else:
	1173	raise service_error(service_error.req, "No experiment description")
	1174
	1175	# Generate an ID for the experiment (slice) and a certificate that the
	1176	# allocator can use to prove they own it. We'll ship it back through
	1177	# the encrypted connection.
	1178	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
	1179
	1180	if req.has_key('experimentID') and \
	1181	req['experimentID'].has_key('localname'):
	1182	overwrite = False
	1183	eid = req['experimentID']['localname']
	1184	# If there's an old failed experiment here with the same local name
	1185	# and accessible by this user, we'll overwrite it, otherwise we'll
	1186	# fall through and do the collision avoidance.
	1187	old_expid = self.get_experiment_fedid(eid)
	1188	if old_expid and self.check_experiment_access(fid, old_expid):
	1189	self.state_lock.acquire()
	1190	status = self.state[eid].get('experimentStatus', None)
	1191	if status and status == 'failed':
	1192	# remove the old access attribute
	1193	self.auth.unset_attribute(fid, old_expid)
	1194	overwrite = True
	1195	del self.state[eid]
	1196	del self.state[old_expid]
	1197	self.state_lock.release()
	1198	self.state_lock.acquire()
	1199	while (self.state.has_key(eid) and not overwrite):
	1200	eid += random.choice(string.ascii_letters)
	1201	# Initial state
	1202	self.state[eid] = {
	1203	'experimentID' : \
	1204	[ { 'localname' : eid }, {'fedid': expid } ],
	1205	'experimentStatus': 'starting',
	1206	'experimentAccess': { 'X509' : expcert },
	1207	'owner': fid,
	1208	'log' : [],
	1209	}
	1210	self.state[expid] = self.state[eid]
	1211	if self.state_filename: self.write_state()
	1212	self.state_lock.release()
	1213	else:
	1214	eid = self.exp_stem
	1215	for i in range(0,5):
	1216	eid += random.choice(string.ascii_letters)
	1217	self.state_lock.acquire()
	1218	while (self.state.has_key(eid)):
	1219	eid = self.exp_stem
	1220	for i in range(0,5):
	1221	eid += random.choice(string.ascii_letters)
	1222	# Initial state
	1223	self.state[eid] = {
	1224	'experimentID' : \
	1225	[ { 'localname' : eid }, {'fedid': expid } ],
	1226	'experimentStatus': 'starting',
	1227	'experimentAccess': { 'X509' : expcert },
	1228	'owner': fid,
	1229	'log' : [],
	1230	}
	1231	self.state[expid] = self.state[eid]
	1232	if self.state_filename: self.write_state()
	1233	self.state_lock.release()
	1234
	1235	try:
	1236	# This catches exceptions to clear the placeholder if necessary
	1237	try:
	1238	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
	1239	except ValueError:
	1240	raise service_error(service_error.server_config,
	1241	"Bad key type (%s)" % self.ssh_type)
	1242
	1243	user = req.get('user', None)
	1244	if user == None:
	1245	raise service_error(service_error.req, "No user")
	1246
	1247	master = req.get('master', None)
	1248	if not master:
	1249	raise service_error(service_error.req,
	1250	"No master testbed label")
	1251	export_project = req.get('exportProject', None)
	1252	if not export_project:
	1253	raise service_error(service_error.req, "No export project")
	1254
	1255	if self.splitter_url:
	1256	self.log.debug("Calling remote splitter at %s" % \
	1257	self.splitter_url)
	1258	split_data = self.remote_splitter(self.splitter_url,
	1259	file_content, master)
	1260	else:
	1261	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
	1262	str(self.muxmax), '-m', master]
	1263
	1264	if self.fedkit:
	1265	tclcmd.append('-k')
	1266
	1267	if self.gatewaykit:
	1268	tclcmd.append('-K')
	1269
	1270	tclcmd.extend([pid, gid, eid, tclfile])
	1271
	1272	self.log.debug("running local splitter %s", " ".join(tclcmd))
	1273	# This is just fantastic. As a side effect the parser copies
	1274	# tb_compat.tcl into the current directory, so that directory
	1275	# must be writable by the fedd user. Doing this in the
	1276	# temporary subdir ensures this is the case.
[70caa72]	1277	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
[db6b092]	1278	cwd=tmpdir)
[866c983]	1279	split_data = tclparser.stdout
	1280
	1281	allocated = { } # Testbeds we can access
[cc8d8e9]	1282	# Allocate IP addresses: The allocator is a buddy system memory
	1283	# allocator. Allocate from the largest substrate to the
	1284	# smallest to make the packing more likely to work - i.e.
	1285	# avoiding internal fragmentation.
	1286	top = topdl.topology_from_xml(file=split_data, top="experiment")
	1287	subs = sorted(top.substrates,
	1288	cmp=lambda x,y: cmp(len(x.interfaces),
	1289	len(y.interfaces)),
	1290	reverse=True)
	1291	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
[6c57fe9]	1292	ifs = { }
	1293	hosts = [ ]
	1294	# The config urlpath
	1295	configpath = "/%s/config" % expid
	1296	# The config file system location
	1297	configdir ="%s%s" % ( self.repodir, configpath)
	1298
	1299	for idx, s in enumerate(subs):
[cc8d8e9]	1300	a = ips.allocate(len(s.interfaces)+2)
	1301	if a :
	1302	base, num = a
	1303	if num < len(s.interfaces) +2 :
	1304	raise service_error(service_error.internal,
	1305	"Allocator returned wrong number of IPs??")
	1306	else:
	1307	raise service_error(service_error.req,
	1308	"Cannot allocate IP addresses")
	1309
	1310	base += 1
	1311	for i in s.interfaces:
	1312	i.attribute.append(
	1313	topdl.Attribute('ip4_address',
	1314	"%s" % ip_addr(base)))
[6c57fe9]	1315	hname = i.element.name[0]
	1316	if ifs.has_key(hname):
	1317	hosts.append("%s\t%s-%s %s-%d" % \
	1318	(ip_addr(base), hname, s.name, hname,
	1319	ifs[hname]))
	1320	else:
	1321	ifs[hname] = 0
	1322	hosts.append("%s\t%s-%s %s-%d %s" % \
	1323	(ip_addr(base), hname, s.name, hname,
	1324	ifs[hname], hname))
	1325
	1326	ifs[hname] += 1
[cc8d8e9]	1327	base += 1
[6c57fe9]	1328	# save config files
	1329	try:
	1330	os.makedirs(configdir)
	1331	except IOError, e:
	1332	raise service_error(
	1333	"Cannot create config directory: %s" % e)
[cc8d8e9]	1334	# Find the testbeds to look up
	1335	testbeds = set([ a.value for e in top.elements \
	1336	for a in e.attribute \
	1337	if a.attribute == 'testbed'] )
	1338
[6c57fe9]	1339
[cc8d8e9]	1340	# Make per testbed topologies. Copy the main topo and remove
	1341	# interfaces and nodes that don't live in the testbed.
	1342	topo ={ }
	1343	for tb in testbeds:
	1344	self.get_access(tb, None, user, tbparams, master,
	1345	export_project, access_user)
	1346	allocated[tb] = 1
	1347	topo[tb] = top.clone()
	1348	to_delete = [ ]
	1349	for e in topo[tb].elements:
	1350	etb = e.get_attribute('testbed')
	1351	if etb and etb != tb:
	1352	for i in e.interface:
	1353	for s in i.subs:
	1354	try:
	1355	s.interfaces.remove(i)
	1356	except ValueError:
	1357	raise service_error(service_error.internal,
	1358	"Can't remove interface??")
	1359	to_delete.append(e)
	1360	for e in to_delete:
	1361	topo[tb].elements.remove(e)
	1362	topo[tb].make_indices()
	1363
[6c57fe9]	1364	for e in topo[tb].elements:
	1365	if tb == master:
	1366	cmd = 'sudo -H /usr/local/federation/bin/make_hosts /proj/%s/exp/%s/tmp/hosts >& /tmp/federate' % (tbparams[tb].get('project', 'project'), eid)
	1367	else:
	1368	cmd = "sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& /tmp/federate"
	1369	scmd = e.get_attribute('startup')
	1370	if scmd:
	1371	cmd = "%s \\$USER '%s'" % (cmd, scmd)
	1372
	1373	e.set_attribute('startup', cmd)
	1374	if self.fedkit: add_kit(e, self.fedkit)
	1375
	1376	# Copy configuration files into the remote file store
	1377	try:
	1378	f = open("%s/hosts" % configdir, "w")
	1379	f.write('\n'.join(hosts))
	1380	f.close()
	1381	except IOError, e:
	1382	raise service_error(service_error.internal,
	1383	"Cannot write hosts file: %s" % e)
	1384	try:
	1385	self.copy_file("%s" % gw_pubkey, "%s/%s" % \
	1386	(configdir, gw_pubkey_base))
	1387	self.copy_file("%s" % gw_secretkey, "%s/%s" % \
	1388	(configdir, gw_secretkey_base))
	1389	except IOError, e:
	1390	raise service_error(service_error.internal,
	1391	"Cannot copy keyfiles: %s" % e)
[cc8d8e9]	1392
[6c57fe9]	1393	# Allow the individual testbeds to access the configuration files.
	1394	for tb in tbparams.keys():
	1395	asignee = tbparams[tb]['allocID']['fedid']
	1396	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
	1397	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
[cc8d8e9]	1398
	1399	# Now, for each substrate in the main topology, find those that
	1400	# have nodes on more than one testbed. Insert portal nodes
	1401	# into the copies of those substrates on the sub topologies.
	1402	for s in top.substrates:
[6c57fe9]	1403	# tbs will contain an ip address on this subsrate that is in
	1404	# each testbed.
	1405	tbs = { }
[cc8d8e9]	1406	for i in s.interfaces:
	1407	e = i.element
	1408	tb = e.get_attribute('testbed')
[6c57fe9]	1409	if tb and not tbs.has_key(tb):
[cc8d8e9]	1410	for i in e.interface:
	1411	if s in i.subs:
[6c57fe9]	1412	tbs[tb]= i.get_attribute('ip4_address')
	1413	if len(tbs) < 2:
[cc8d8e9]	1414	continue
	1415
	1416	# More than one testbed is on this substrate. Insert
[6c57fe9]	1417	# some portals into the subtopologies. st == source testbed,
	1418	# dt == destination testbed.
	1419	segment_substrate = { }
	1420	for st in tbs.keys():
	1421	segment_substrate[st] = { }
	1422	for dt in [ t for t in tbs.keys() if t != st]:
[cc8d8e9]	1423	myname = "%stunnel" % dt
	1424	desthost = "%stunnel" % st
	1425	sproject = tbparams[st].get('project', 'project')
	1426	dproject = tbparams[dt].get('project', 'project')
[f9ef40b]	1427	mproject = tbparams[master].get('project', 'project')
	1428	sdomain = tbparams[st].get('domain', ".example.com")
	1429	ddomain = tbparams[dt].get('domain', ".example.com")
	1430	mdomain = tbparams[master].get('domain', '.example.com')
[1da6a23]	1431	muser = tbparams[master].get('user', 'root')
	1432	smbshare = tbparams[master].get('smbshare', 'USERS')
[6c57fe9]	1433	# XXX: active and type need to be unkludged
[cc8d8e9]	1434	active = ("%s" % (st == master))
[6c57fe9]	1435	if not segment_substrate[st].has_key(dt):
	1436	# Put a substrate and a segment for the connected
	1437	# testbed in there.
	1438	tsubstrate = \
[f9ef40b]	1439	topdl.Substrate(name='%s-%s' % (st, dt),
	1440	attribute= [
	1441	topdl.Attribute(
	1442	attribute='portal',
	1443	value='true')
	1444	]
	1445	)
[6c57fe9]	1446	segment_element = topdl.Segment(
	1447	id= tbparams[dt]['allocID'],
	1448	type='emulab',
	1449	uri = self.tbmap.get(dt, None),
	1450	interface=[
	1451	topdl.Interface(
	1452	substrate=tsubstrate.name),
	1453	],
	1454	attribute = [
	1455	topdl.Attribute(attribute=n, value=v)
	1456	for n, v in (\
	1457	('domain', ddomain),
	1458	('experiment', "%s/%s" % \
	1459	(dproject, eid)),)
	1460	],
	1461	)
	1462	segment_substrate[st][dt] = tsubstrate
	1463	topo[st].substrates.append(tsubstrate)
	1464	topo[st].elements.append(segment_element)
	1465	portal = topdl.Computer(
	1466	name="%stunnel" % dt,
	1467	attribute=[
	1468	topdl.Attribute(attribute=n,value=v)
	1469	for n, v in (\
	1470	('portal', 'true'),
[f9ef40b]	1471	('domain', sdomain),
[6c57fe9]	1472	('masterdomain', mdomain),
[f9ef40b]	1473	('masterexperiment', "%s/%s" % \
	1474	(mproject, eid)),
[1da6a23]	1475	('masteruser', muser),
	1476	('smbshare', smbshare),
[6c57fe9]	1477	('experiment', "%s/%s" % \
	1478	(sproject, eid)),
[f9ef40b]	1479	('peer', "%s" % desthost),
	1480	('peer_segment', "%s" % \
	1481	tbparams[dt]['allocID']['fedid']),
[6c57fe9]	1482	('scriptdir',
	1483	"/usr/local/federation/bin"),
	1484	('active', "%s" % active),
[ecca6eb]	1485	('portal_type', 'both'),
	1486	('startup', 'sudo -H /usr/local/federation/bin/fed-tun.pl -f /proj/%s/exp/%s/tmp/%s.%s.%s%s.gw.conf >& /tmp/bridge.log' % (sproject, eid, myname.lower(), eid.lower(), sproject.lower(), sdomain.lower())))
[6c57fe9]	1487	],
	1488	interface=[
	1489	topdl.Interface(
	1490	substrate=s.name,
	1491	attribute=[
	1492	topdl.Attribute(
[1da6a23]	1493	attribute='ip4_address',
[6c57fe9]	1494	value=tbs[dt]
	1495	)
	1496	]),
	1497	topdl.Interface(
	1498	substrate=\
[f9ef40b]	1499	segment_substrate[st][dt].name,
	1500	attribute=[
	1501	topdl.Attribute(attribute='portal',
	1502	value='true')
	1503	]
[6c57fe9]	1504	),
	1505	],
	1506	)
	1507	if self.fedkit: add_kit(portal, self.fedkit)
	1508	if self.gatewaykit: add_kit(portal, self.gatewaykit)
	1509
[cc8d8e9]	1510	topo[st].elements.append(portal)
[6c57fe9]	1511
[cc8d8e9]	1512	# Connect the gateway nodes into the topologies and clear out
	1513	# substrates that are not in the topologies
	1514	for tb in testbeds:
	1515	topo[tb].incorporate_elements()
	1516	topo[tb].substrates = \
	1517	[s for s in topo[tb].substrates \
	1518	if len(s.interfaces) >0]
	1519
	1520	# Copy the rpms and tarfiles to a distribution directory from
	1521	# which the federants can retrieve them
	1522	linkpath = "%s/software" % expid
	1523	softdir ="%s/%s" % ( self.repodir, linkpath)
	1524	softmap = { }
	1525	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
	1526	for p, t in l ])
	1527	pkgs.update([x.location for e in top.elements \
	1528	for x in e.software])
	1529	try:
	1530	os.makedirs(softdir)
	1531	except IOError, e:
	1532	raise service_error(
	1533	"Cannot create software directory: %s" % e)
	1534	for pkg in pkgs:
	1535	loc = pkg
	1536
	1537	scheme, host, path = urlparse(loc)[0:3]
	1538	dest = os.path.basename(path)
	1539	if not scheme:
	1540	if not loc.startswith('/'):
	1541	loc = "/%s" % loc
	1542	loc = "file://%s" %loc
	1543	try:
	1544	u = urlopen(loc)
	1545	except Exception, e:
	1546	raise service_error(service_error.req,
	1547	"Cannot open %s: %s" % (loc, e))
	1548	try:
	1549	f = open("%s/%s" % (softdir, dest) , "w")
[6c57fe9]	1550	self.log.debug("Writing %s/%s" % (softdir,dest) )
[cc8d8e9]	1551	data = u.read(4096)
	1552	while data:
	1553	f.write(data)
	1554	data = u.read(4096)
	1555	f.close()
	1556	u.close()
	1557	except Exception, e:
	1558	raise service_error(service_error.internal,
	1559	"Could not copy %s: %s" % (loc, e))
	1560	path = re.sub("/tmp", "", linkpath)
	1561	# XXX
	1562	softmap[pkg] = \
	1563	"https://users.isi.deterlab.net:23232/%s/%s" %\
	1564	( path, dest)
	1565
	1566	# Allow the individual testbeds to access the software.
	1567	for tb in tbparams.keys():
	1568	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
[6c57fe9]	1569	"/%s/%s" % ( path, dest))
[cc8d8e9]	1570
	1571	# Convert the software locations in the segments into the local
	1572	# copies on this host
	1573	for soft in [ s for tb in topo.values() \
	1574	for e in tb.elements \
[6c57fe9]	1575	if getattr(e, 'software', False) \
	1576	for s in e.software ]:
[cc8d8e9]	1577	if softmap.has_key(soft.location):
	1578	soft.location = softmap[soft.location]
	1579
	1580	vtopo = topdl.topology_to_vtopo(top)
	1581	vis = self.genviz(vtopo)
[db6b092]	1582
[866c983]	1583	# save federant information
	1584	for k in allocated.keys():
	1585	tbparams[k]['federant'] = {\
	1586	'name': [ { 'localname' : eid} ],\
	1587	'emulab': tbparams[k]['emulab'],\
	1588	'allocID' : tbparams[k]['allocID'],\
	1589	'master' : k == master,\
	1590	}
	1591
[db6b092]	1592	self.state_lock.acquire()
	1593	self.state[eid]['vtopo'] = vtopo
	1594	self.state[eid]['vis'] = vis
	1595	self.state[expid]['federant'] = \
	1596	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
	1597	if tbparams[tb].has_key('federant') ]
[cc8d8e9]	1598	if self.state_filename:
	1599	self.write_state()
[db6b092]	1600	self.state_lock.release()
[866c983]	1601	except service_error, e:
	1602	# If something goes wrong in the parse (usually an access error)
	1603	# clear the placeholder state. From here on out the code delays
[db6b092]	1604	# exceptions. Failing at this point returns a fault to the remote
	1605	# caller.
[cc8d8e9]	1606
[866c983]	1607	self.state_lock.acquire()
	1608	del self.state[eid]
[bd3e314]	1609	del self.state[expid]
	1610	if self.state_filename: self.write_state()
[866c983]	1611	self.state_lock.release()
	1612	raise e
	1613
	1614
[db6b092]	1615	# Start the background swapper and return the starting state. From
	1616	# here on out, the state will stick around a while.
[866c983]	1617
[db6b092]	1618	# Let users touch the state
[bd3e314]	1619	self.auth.set_attribute(fid, expid)
	1620	self.auth.set_attribute(expid, expid)
[db6b092]	1621	# Override fedids can manipulate state as well
	1622	for o in self.overrides:
	1623	self.auth.set_attribute(o, expid)
	1624
	1625	# Create a logger that logs to the experiment's state object as well as
	1626	# to the main log file.
	1627	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
	1628	h = logging.StreamHandler(self.list_log(self.state[eid]['log']))
	1629	# XXX: there should be a global one of these rather than repeating the
	1630	# code.
	1631	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	1632	'%d %b %y %H:%M:%S'))
	1633	alloc_log.addHandler(h)
	1634
[6c57fe9]	1635	# XXX
	1636	url_base = 'https://users.isi.deterlab.net:23232'
	1637	attrs = [
	1638	{
	1639	'attribute': 'ssh_pubkey',
	1640	'value': '%s/%s/config/%s' % \
	1641	(url_base, expid, gw_pubkey_base)
	1642	},
	1643	{
	1644	'attribute': 'ssh_secretkey',
	1645	'value': '%s/%s/config/%s' % \
	1646	(url_base, expid, gw_secretkey_base)
	1647	},
	1648	{
	1649	'attribute': 'hosts',
	1650	'value': '%s/%s/config/hosts' % \
	1651	(url_base, expid)
	1652	},
[ecca6eb]	1653	{
	1654	'attribute': 'experiment_name',
	1655	'value': eid,
	1656	},
[6c57fe9]	1657	]
	1658
[db6b092]	1659	# Start a thread to do the resource allocation
[e19b75c]	1660	t = Thread(target=self.allocate_resources,
[db6b092]	1661	args=(allocated, master, eid, expid, expcert, tbparams,
[6c57fe9]	1662	topo, tmpdir, alloc_log, attrs),
[db6b092]	1663	name=eid)
	1664	t.start()
	1665
	1666	rv = {
	1667	'experimentID': [
	1668	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	1669	],
	1670	'experimentStatus': 'starting',
	1671	'experimentAccess': { 'X509' : expcert }
	1672	}
	1673
	1674	return rv
[9479343]	1675
	1676	def get_experiment_fedid(self, key):
	1677	"""
[db6b092]	1678	find the fedid associated with the localname key in the state database.
[9479343]	1679	"""
	1680
[db6b092]	1681	rv = None
	1682	self.state_lock.acquire()
	1683	if self.state.has_key(key):
	1684	if isinstance(self.state[key], dict):
	1685	try:
	1686	kl = [ f['fedid'] for f in \
	1687	self.state[key]['experimentID']\
	1688	if f.has_key('fedid') ]
	1689	except KeyError:
	1690	self.state_lock.release()
	1691	raise service_error(service_error.internal,
	1692	"No fedid for experiment %s when getting "+\
	1693	"fedid(!?)" % key)
	1694	if len(kl) == 1:
	1695	rv = kl[0]
	1696	else:
	1697	self.state_lock.release()
	1698	raise service_error(service_error.internal,
	1699	"multiple fedids for experiment %s when " +\
	1700	"getting fedid(!?)" % key)
	1701	else:
	1702	self.state_lock.release()
	1703	raise service_error(service_error.internal,
	1704	"Unexpected state for %s" % key)
	1705	self.state_lock.release()
	1706	return rv
[a97394b]	1707
[4064742]	1708	def check_experiment_access(self, fid, key):
[866c983]	1709	"""
	1710	Confirm that the fid has access to the experiment. Though a request
	1711	may be made in terms of a local name, the access attribute is always
	1712	the experiment's fedid.
	1713	"""
	1714	if not isinstance(key, fedid):
[db6b092]	1715	key = self.get_experiment_fedid(key)
[866c983]	1716
	1717	if self.auth.check_attribute(fid, key):
	1718	return True
	1719	else:
	1720	raise service_error(service_error.access, "Access Denied")
[4064742]	1721
	1722
[db6b092]	1723	def get_handler(self, path, fid):
[6c57fe9]	1724	if self.auth.check_attribute(fid, path):
	1725	return ("%s/%s" % (self.repodir, path), "application/binary")
	1726	else:
	1727	return (None, None)
[987aaa1]	1728
	1729	def get_vtopo(self, req, fid):
[866c983]	1730	"""
	1731	Return the stored virtual topology for this experiment
	1732	"""
	1733	rv = None
[db6b092]	1734	state = None
[866c983]	1735
	1736	req = req.get('VtopoRequestBody', None)
	1737	if not req:
	1738	raise service_error(service_error.req,
	1739	"Bad request format (no VtopoRequestBody)")
	1740	exp = req.get('experiment', None)
	1741	if exp:
	1742	if exp.has_key('fedid'):
	1743	key = exp['fedid']
	1744	keytype = "fedid"
	1745	elif exp.has_key('localname'):
	1746	key = exp['localname']
	1747	keytype = "localname"
	1748	else:
	1749	raise service_error(service_error.req, "Unknown lookup type")
	1750	else:
	1751	raise service_error(service_error.req, "No request?")
	1752
	1753	self.check_experiment_access(fid, key)
	1754
	1755	self.state_lock.acquire()
	1756	if self.state.has_key(key):
[db6b092]	1757	if self.state[key].has_key('vtopo'):
	1758	rv = { 'experiment' : {keytype: key },\
	1759	'vtopo': self.state[key]['vtopo'],\
	1760	}
	1761	else:
	1762	state = self.state[key]['experimentStatus']
[866c983]	1763	self.state_lock.release()
	1764
	1765	if rv: return rv
[bd3e314]	1766	else:
[db6b092]	1767	if state:
	1768	raise service_error(service_error.partial,
	1769	"Not ready: %s" % state)
	1770	else:
	1771	raise service_error(service_error.req, "No such experiment")
[987aaa1]	1772
	1773	def get_vis(self, req, fid):
[866c983]	1774	"""
	1775	Return the stored visualization for this experiment
	1776	"""
	1777	rv = None
[db6b092]	1778	state = None
[866c983]	1779
	1780	req = req.get('VisRequestBody', None)
	1781	if not req:
	1782	raise service_error(service_error.req,
	1783	"Bad request format (no VisRequestBody)")
	1784	exp = req.get('experiment', None)
	1785	if exp:
	1786	if exp.has_key('fedid'):
	1787	key = exp['fedid']
	1788	keytype = "fedid"
	1789	elif exp.has_key('localname'):
	1790	key = exp['localname']
	1791	keytype = "localname"
	1792	else:
	1793	raise service_error(service_error.req, "Unknown lookup type")
	1794	else:
	1795	raise service_error(service_error.req, "No request?")
	1796
	1797	self.check_experiment_access(fid, key)
	1798
	1799	self.state_lock.acquire()
	1800	if self.state.has_key(key):
[db6b092]	1801	if self.state[key].has_key('vis'):
	1802	rv = { 'experiment' : {keytype: key },\
	1803	'vis': self.state[key]['vis'],\
	1804	}
	1805	else:
	1806	state = self.state[key]['experimentStatus']
[866c983]	1807	self.state_lock.release()
	1808
	1809	if rv: return rv
[bd3e314]	1810	else:
[db6b092]	1811	if state:
	1812	raise service_error(service_error.partial,
	1813	"Not ready: %s" % state)
	1814	else:
	1815	raise service_error(service_error.req, "No such experiment")
[987aaa1]	1816
[65f3f29]	1817	def clean_info_response(self, rv):
[db6b092]	1818	"""
	1819	Remove the information in the experiment's state object that is not in
	1820	the info response.
	1821	"""
	1822	# Remove the owner info (should always be there, but...)
	1823	if rv.has_key('owner'): del rv['owner']
	1824
	1825	# Convert the log into the allocationLog parameter and remove the
	1826	# log entry (with defensive programming)
	1827	if rv.has_key('log'):
	1828	rv['allocationLog'] = "".join(rv['log'])
	1829	del rv['log']
	1830	else:
	1831	rv['allocationLog'] = ""
	1832
	1833	if rv['experimentStatus'] != 'active':
	1834	if rv.has_key('federant'): del rv['federant']
	1835	else:
	1836	# remove the allocationID info from each federant
	1837	for f in rv.get('federant', []):
	1838	if f.has_key('allocID'): del f['allocID']
	1839	return rv
[65f3f29]	1840
[c52c48d]	1841	def get_info(self, req, fid):
[866c983]	1842	"""
	1843	Return all the stored info about this experiment
	1844	"""
	1845	rv = None
	1846
	1847	req = req.get('InfoRequestBody', None)
	1848	if not req:
	1849	raise service_error(service_error.req,
[65f3f29]	1850	"Bad request format (no InfoRequestBody)")
[866c983]	1851	exp = req.get('experiment', None)
	1852	if exp:
	1853	if exp.has_key('fedid'):
	1854	key = exp['fedid']
	1855	keytype = "fedid"
	1856	elif exp.has_key('localname'):
	1857	key = exp['localname']
	1858	keytype = "localname"
	1859	else:
	1860	raise service_error(service_error.req, "Unknown lookup type")
	1861	else:
	1862	raise service_error(service_error.req, "No request?")
	1863
	1864	self.check_experiment_access(fid, key)
	1865
	1866	# The state may be massaged by the service function that called
	1867	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
	1868	# state.
	1869	self.state_lock.acquire()
	1870	if self.state.has_key(key):
	1871	rv = copy.deepcopy(self.state[key])
	1872	self.state_lock.release()
	1873
[db6b092]	1874	if rv:
	1875	return self.clean_info_response(rv)
[bd3e314]	1876	else:
[db6b092]	1877	raise service_error(service_error.req, "No such experiment")
[7a8d667]	1878
[65f3f29]	1879	def get_multi_info(self, req, fid):
	1880	"""
	1881	Return all the stored info that this fedid can access
	1882	"""
[db6b092]	1883	rv = { 'info': [ ] }
[65f3f29]	1884
[db6b092]	1885	self.state_lock.acquire()
	1886	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
	1887	self.check_experiment_access(fid, key)
[65f3f29]	1888
[db6b092]	1889	if self.state.has_key(key):
	1890	e = copy.deepcopy(self.state[key])
	1891	e = self.clean_info_response(e)
	1892	rv['info'].append(e)
[65f3f29]	1893	self.state_lock.release()
[db6b092]	1894	return rv
[65f3f29]	1895
[7a8d667]	1896	def terminate_experiment(self, req, fid):
[866c983]	1897	"""
	1898	Swap this experiment out on the federants and delete the shared
	1899	information
	1900	"""
	1901	tbparams = { }
	1902	req = req.get('TerminateRequestBody', None)
	1903	if not req:
	1904	raise service_error(service_error.req,
	1905	"Bad request format (no TerminateRequestBody)")
[db6b092]	1906	force = req.get('force', False)
[866c983]	1907	exp = req.get('experiment', None)
	1908	if exp:
	1909	if exp.has_key('fedid'):
	1910	key = exp['fedid']
	1911	keytype = "fedid"
	1912	elif exp.has_key('localname'):
	1913	key = exp['localname']
	1914	keytype = "localname"
	1915	else:
	1916	raise service_error(service_error.req, "Unknown lookup type")
	1917	else:
	1918	raise service_error(service_error.req, "No request?")
	1919
	1920	self.check_experiment_access(fid, key)
	1921
[db6b092]	1922	dealloc_list = [ ]
[46e4682]	1923
	1924
[5ae3857]	1925	# Create a logger that logs to the dealloc_list as well as to the main
	1926	# log file.
	1927	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
	1928	h = logging.StreamHandler(self.list_log(dealloc_list))
	1929	# XXX: there should be a global one of these rather than repeating the
	1930	# code.
	1931	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	1932	'%d %b %y %H:%M:%S'))
	1933	dealloc_log.addHandler(h)
	1934
	1935	self.state_lock.acquire()
	1936	fed_exp = self.state.get(key, None)
	1937
	1938	if fed_exp:
	1939	# This branch of the conditional holds the lock to generate a
	1940	# consistent temporary tbparams variable to deallocate experiments.
	1941	# It releases the lock to do the deallocations and reacquires it to
	1942	# remove the experiment state when the termination is complete.
	1943
	1944	# First make sure that the experiment creation is complete.
	1945	status = fed_exp.get('experimentStatus', None)
	1946
	1947	if status:
	1948	if status in ('starting', 'terminating'):
	1949	if not force:
	1950	self.state_lock.release()
	1951	raise service_error(service_error.partial,
	1952	'Experiment still being created or destroyed')
	1953	else:
	1954	self.log.warning('Experiment in %s state ' % status + \
	1955	'being terminated by force.')
	1956	else:
	1957	# No status??? trouble
	1958	self.state_lock.release()
	1959	raise service_error(service_error.internal,
	1960	"Experiment has no status!?")
	1961
	1962	ids = []
	1963	# experimentID is a list of dicts that are self-describing
	1964	# identifiers. This finds all the fedids and localnames - the
	1965	# keys of self.state - and puts them into ids.
	1966	for id in fed_exp.get('experimentID', []):
	1967	if id.has_key('fedid'): ids.append(id['fedid'])
	1968	if id.has_key('localname'): ids.append(id['localname'])
	1969
	1970	# Collect the allocation/segment ids
	1971	for fed in fed_exp.get('federant', []):
	1972	try:
	1973	tb = fed['emulab']['project']['testbed']['localname']
	1974	aid = fed['allocID']
	1975	except KeyError, e:
	1976	continue
	1977	tbparams[tb] = aid
	1978	fed_exp['experimentStatus'] = 'terminating'
	1979	if self.state_filename: self.write_state()
	1980	self.state_lock.release()
	1981
	1982	# Stop everyone. NB, wait_for_all waits until a thread starts and
	1983	# then completes, so we can't wait if nothing starts. So, no
	1984	# tbparams, no start.
	1985	if len(tbparams) > 0:
	1986	thread_pool = self.thread_pool(self.nthreads)
	1987	for tb in tbparams.keys():
	1988	# Create and start a thread to stop the segment
	1989	thread_pool.wait_for_slot()
	1990	uri = self.tbmap.get(tb, None)
	1991	t = self.pooled_thread(\
[e19b75c]	1992	target=self.terminate_segment(log=dealloc_log,
[fd556d1]	1993	testbed=tb,
[5ae3857]	1994	cert_file=self.cert_file,
	1995	cert_pwd=self.cert_pwd,
	1996	trusted_certs=self.trusted_certs,
	1997	caller=self.call_TerminateSegment),
	1998	args=(uri, tbparams[tb]), name=tb,
	1999	pdata=thread_pool, trace_file=self.trace_file)
	2000	t.start()
	2001	# Wait for completions
	2002	thread_pool.wait_for_all_done()
	2003
	2004	# release the allocations (failed experiments have done this
	2005	# already, and starting experiments may be in odd states, so we
	2006	# ignore errors releasing those allocations
	2007	try:
	2008	for tb in tbparams.keys():
	2009	self.release_access(tb, tbparams[tb])
	2010	except service_error, e:
	2011	if status != 'failed' and not force:
	2012	raise e
	2013
	2014	# Remove the terminated experiment
	2015	self.state_lock.acquire()
	2016	for id in ids:
	2017	if self.state.has_key(id): del self.state[id]
	2018
	2019	if self.state_filename: self.write_state()
	2020	self.state_lock.release()
	2021
	2022	return {
	2023	'experiment': exp ,
	2024	'deallocationLog': "".join(dealloc_list),
	2025	}
	2026	else:
	2027	# Don't forget to release the lock
	2028	self.state_lock.release()
	2029	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: