Context Navigation

source: fedd/federation/experiment_control.py @ 35aa3ae

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 35aa3ae was 641bb66, checked in by Ted Faber <faber@…>, 15 years ago
Remove one instance of experiment controller setting startcmds. The others need to go as well.
Property mode set to `100644`
File size: 87.4 KB

Rev	Line
[6679c122]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4
	5	import re
	6	import random
	7	import string
	8	import subprocess
	9	import tempfile
	10	import copy
[eee2b2e]	11	import pickle
[c971895]	12	import logging
[79b6596]	13	import signal
	14	import time
[6679c122]	15
[3441fe3]	16	import traceback
[c971895]	17	# For parsing visualization output and splitter output
	18	import xml.parsers.expat
[3441fe3]	19
[6c57fe9]	20	from threading import Lock, Thread, Condition
	21	from subprocess import call, Popen, PIPE
[6679c122]	22
[db6b092]	23	from urlparse import urlparse
	24	from urllib2 import urlopen
	25
[ec4fb42]	26	from util import *
[51cc9df]	27	from fedid import fedid, generate_fedid
[9460b1e]	28	from remote_service import xmlrpc_handler, soap_handler, service_caller
[c971895]	29	from service_error import service_error
[6679c122]	30
[db6b092]	31	import topdl
[f07fa49]	32	import list_log
[db6b092]	33	from ip_allocator import ip_allocator
	34	from ip_addr import ip_addr
	35
[11a08b0]	36
	37	class nullHandler(logging.Handler):
	38	def emit(self, record): pass
	39
	40	fl = logging.getLogger("fedd.experiment_control")
	41	fl.addHandler(nullHandler())
	42
[ec4fb42]	43	class experiment_control_local:
[0ea11af]	44	"""
	45	Control of experiments that this system can directly access.
	46
	47	Includes experiment creation, termination and information dissemination.
	48	Thred safe.
	49	"""
[79b6596]	50
	51	class ssh_cmd_timeout(RuntimeError): pass
[6679c122]	52
[1af38d6]	53	class thread_pool:
[866c983]	54	"""
	55	A class to keep track of a set of threads all invoked for the same
	56	task. Manages the mutual exclusion of the states.
	57	"""
	58	def __init__(self, nthreads):
	59	"""
	60	Start a pool.
	61	"""
	62	self.changed = Condition()
	63	self.started = 0
	64	self.terminated = 0
	65	self.nthreads = nthreads
	66
	67	def acquire(self):
	68	"""
	69	Get the pool's lock.
	70	"""
	71	self.changed.acquire()
	72
	73	def release(self):
	74	"""
	75	Release the pool's lock.
	76	"""
	77	self.changed.release()
	78
	79	def wait(self, timeout = None):
	80	"""
	81	Wait for a pool thread to start or stop.
	82	"""
	83	self.changed.wait(timeout)
	84
	85	def start(self):
	86	"""
	87	Called by a pool thread to report starting.
	88	"""
	89	self.changed.acquire()
	90	self.started += 1
	91	self.changed.notifyAll()
	92	self.changed.release()
	93
	94	def terminate(self):
	95	"""
	96	Called by a pool thread to report finishing.
	97	"""
	98	self.changed.acquire()
	99	self.terminated += 1
	100	self.changed.notifyAll()
	101	self.changed.release()
	102
	103	def clear(self):
	104	"""
	105	Clear all pool data.
	106	"""
	107	self.changed.acquire()
	108	self.started = 0
	109	self.terminated =0
	110	self.changed.notifyAll()
	111	self.changed.release()
	112
	113	def wait_for_slot(self):
	114	"""
	115	Wait until we have a free slot to start another pooled thread
	116	"""
	117	self.acquire()
	118	while self.started - self.terminated >= self.nthreads:
	119	self.wait()
	120	self.release()
	121
[32e7d93]	122	def wait_for_all_done(self, timeout=None):
[866c983]	123	"""
[32e7d93]	124	Wait until all active threads finish (and at least one has
	125	started). If a timeout is given, return after waiting that long
	126	for termination. If all threads are done (and one has started in
	127	the since the last clear()) return True, otherwise False.
[866c983]	128	"""
[32e7d93]	129	if timeout:
	130	deadline = time.time() + timeout
[866c983]	131	self.acquire()
	132	while self.started == 0 or self.started > self.terminated:
[32e7d93]	133	self.wait(timeout)
	134	if timeout:
	135	if time.time() > deadline:
	136	break
	137	timeout = deadline - time.time()
[866c983]	138	self.release()
[32e7d93]	139	return not (self.started == 0 or self.started > self.terminated)
[8bc5754]	140
[1af38d6]	141	class pooled_thread(Thread):
[866c983]	142	"""
	143	One of a set of threads dedicated to a specific task. Uses the
	144	thread_pool class above for coordination.
	145	"""
	146	def __init__(self, group=None, target=None, name=None, args=(),
	147	kwargs={}, pdata=None, trace_file=None):
	148	Thread.__init__(self, group, target, name, args, kwargs)
	149	self.rv = None # Return value of the ops in this thread
	150	self.exception = None # Exception that terminated this thread
	151	self.target=target # Target function to run on start()
	152	self.args = args # Args to pass to target
	153	self.kwargs = kwargs # Additional kw args
	154	self.pdata = pdata # thread_pool for this class
	155	# Logger for this thread
	156	self.log = logging.getLogger("fedd.experiment_control")
	157
	158	def run(self):
	159	"""
	160	Emulate Thread.run, except add pool data manipulation and error
	161	logging.
	162	"""
	163	if self.pdata:
	164	self.pdata.start()
	165
	166	if self.target:
	167	try:
	168	self.rv = self.target(self.args, *self.kwargs)
	169	except service_error, s:
	170	self.exception = s
	171	self.log.error("Thread exception: %s %s" % \
	172	(s.code_string(), s.desc))
	173	except:
	174	self.exception = sys.exc_info()[1]
	175	self.log.error(("Unexpected thread exception: %s" +\
	176	"Trace %s") % (self.exception,\
	177	traceback.format_exc()))
	178	if self.pdata:
	179	self.pdata.terminate()
[6679c122]	180
[f069052]	181	call_RequestAccess = service_caller('RequestAccess')
	182	call_ReleaseAccess = service_caller('ReleaseAccess')
[cc8d8e9]	183	call_StartSegment = service_caller('StartSegment')
[5ae3857]	184	call_TerminateSegment = service_caller('TerminateSegment')
[f069052]	185	call_Ns2Split = service_caller('Ns2Split')
[058f58e]	186
[3f6bc5f]	187	def __init__(self, config=None, auth=None):
[866c983]	188	"""
	189	Intialize the various attributes, most from the config object
	190	"""
	191
	192	def parse_tarfile_list(tf):
	193	"""
	194	Parse a tarfile list from the configuration. This is a set of
	195	paths and tarfiles separated by spaces.
	196	"""
	197	rv = [ ]
	198	if tf is not None:
	199	tl = tf.split()
	200	while len(tl) > 1:
	201	p, t = tl[0:2]
	202	del tl[0:2]
	203	rv.append((p, t))
	204	return rv
	205
	206	self.thread_with_rv = experiment_control_local.pooled_thread
	207	self.thread_pool = experiment_control_local.thread_pool
[f07fa49]	208	self.list_log = list_log.list_log
[866c983]	209
	210	self.cert_file = config.get("experiment_control", "cert_file")
	211	if self.cert_file:
	212	self.cert_pwd = config.get("experiment_control", "cert_pwd")
	213	else:
	214	self.cert_file = config.get("globals", "cert_file")
	215	self.cert_pwd = config.get("globals", "cert_pwd")
	216
	217	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
	218	or config.get("globals", "trusted_certs")
	219
[6c57fe9]	220	self.repodir = config.get("experiment_control", "repodir")
[7183b48]	221	self.repo_url = config.get("experiment_control", "repo_url",
	222	"https://users.isi.deterlab.net:23235");
[cc8d8e9]	223
[866c983]	224	self.exp_stem = "fed-stem"
	225	self.log = logging.getLogger("fedd.experiment_control")
	226	set_log_level(config, "experiment_control", self.log)
	227	self.muxmax = 2
	228	self.nthreads = 2
	229	self.randomize_experiments = False
	230
	231	self.splitter = None
	232	self.ssh_keygen = "/usr/bin/ssh-keygen"
	233	self.ssh_identity_file = None
	234
	235
	236	self.debug = config.getboolean("experiment_control", "create_debug")
[69692a9]	237	self.cleanup = not config.getboolean("experiment_control",
	238	"leave_tmpfiles")
[866c983]	239	self.state_filename = config.get("experiment_control",
	240	"experiment_state")
	241	self.splitter_url = config.get("experiment_control", "splitter_uri")
	242	self.fedkit = parse_tarfile_list(\
	243	config.get("experiment_control", "fedkit"))
	244	self.gatewaykit = parse_tarfile_list(\
	245	config.get("experiment_control", "gatewaykit"))
	246	accessdb_file = config.get("experiment_control", "accessdb")
	247
	248	self.ssh_pubkey_file = config.get("experiment_control",
	249	"ssh_pubkey_file")
	250	self.ssh_privkey_file = config.get("experiment_control",
	251	"ssh_privkey_file")
	252	# NB for internal master/slave ops, not experiment setup
	253	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
[ca489e8]	254
[db6b092]	255	self.overrides = set([])
	256	ovr = config.get('experiment_control', 'overrides')
	257	if ovr:
	258	for o in ovr.split(","):
	259	o = o.strip()
	260	if o.startswith('fedid:'): o = o[len('fedid:'):]
	261	self.overrides.add(fedid(hexstr=o))
[ca489e8]	262
[866c983]	263	self.state = { }
	264	self.state_lock = Lock()
	265	self.tclsh = "/usr/local/bin/otclsh"
	266	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
	267	config.get("experiment_control", "tcl_splitter",
	268	"/usr/testbed/lib/ns2ir/parse.tcl")
	269	mapdb_file = config.get("experiment_control", "mapdb")
	270	self.trace_file = sys.stderr
	271
	272	self.def_expstart = \
	273	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
	274	"/tmp/federate";
	275	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
	276	"FEDDIR/hosts";
	277	self.def_gwstart = \
	278	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
	279	"/tmp/bridge.log";
	280	self.def_mgwstart = \
	281	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
	282	"/tmp/bridge.log";
	283	self.def_gwimage = "FBSD61-TUNNEL2";
	284	self.def_gwtype = "pc";
	285	self.local_access = { }
	286
	287	if auth:
	288	self.auth = auth
	289	else:
	290	self.log.error(\
	291	"[access]: No authorizer initialized, creating local one.")
	292	auth = authorizer()
	293
	294
	295	if self.ssh_pubkey_file:
	296	try:
	297	f = open(self.ssh_pubkey_file, 'r')
	298	self.ssh_pubkey = f.read()
	299	f.close()
	300	except IOError:
	301	raise service_error(service_error.internal,
	302	"Cannot read sshpubkey")
	303	else:
	304	raise service_error(service_error.internal,
	305	"No SSH public key file?")
	306
	307	if not self.ssh_privkey_file:
	308	raise service_error(service_error.internal,
	309	"No SSH public key file?")
	310
	311
	312	if mapdb_file:
	313	self.read_mapdb(mapdb_file)
	314	else:
	315	self.log.warn("[experiment_control] No testbed map, using defaults")
	316	self.tbmap = {
	317	'deter':'https://users.isi.deterlab.net:23235',
	318	'emulab':'https://users.isi.deterlab.net:23236',
	319	'ucb':'https://users.isi.deterlab.net:23237',
	320	}
	321
	322	if accessdb_file:
	323	self.read_accessdb(accessdb_file)
	324	else:
	325	raise service_error(service_error.internal,
	326	"No accessdb specified in config")
	327
	328	# Grab saved state. OK to do this w/o locking because it's read only
	329	# and only one thread should be in existence that can see self.state at
	330	# this point.
	331	if self.state_filename:
	332	self.read_state()
	333
	334	# Dispatch tables
	335	self.soap_services = {\
[a3ad8bd]	336	'New': soap_handler('New', self.new_experiment),
[e19b75c]	337	'Create': soap_handler('Create', self.create_experiment),
[866c983]	338	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
	339	'Vis': soap_handler('Vis', self.get_vis),
	340	'Info': soap_handler('Info', self.get_info),
[65f3f29]	341	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
[866c983]	342	'Terminate': soap_handler('Terminate',
[e19b75c]	343	self.terminate_experiment),
[866c983]	344	}
	345
	346	self.xmlrpc_services = {\
[a3ad8bd]	347	'New': xmlrpc_handler('New', self.new_experiment),
[e19b75c]	348	'Create': xmlrpc_handler('Create', self.create_experiment),
[866c983]	349	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
	350	'Vis': xmlrpc_handler('Vis', self.get_vis),
	351	'Info': xmlrpc_handler('Info', self.get_info),
[65f3f29]	352	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
[866c983]	353	'Terminate': xmlrpc_handler('Terminate',
[e19b75c]	354	self.terminate_experiment),
[866c983]	355	}
[19cc408]	356
[a97394b]	357	# Call while holding self.state_lock
[eee2b2e]	358	def write_state(self):
[866c983]	359	"""
	360	Write a new copy of experiment state after copying the existing state
	361	to a backup.
	362
	363	State format is a simple pickling of the state dictionary.
	364	"""
	365	if os.access(self.state_filename, os.W_OK):
[40dd8c1]	366	copy_file(self.state_filename, \
	367	"%s.bak" % self.state_filename)
[866c983]	368	try:
	369	f = open(self.state_filename, 'w')
	370	pickle.dump(self.state, f)
	371	except IOError, e:
	372	self.log.error("Can't write file %s: %s" % \
	373	(self.state_filename, e))
	374	except pickle.PicklingError, e:
	375	self.log.error("Pickling problem: %s" % e)
	376	except TypeError, e:
	377	self.log.error("Pickling problem (TypeError): %s" % e)
[eee2b2e]	378
[a97394b]	379	# Call while holding self.state_lock
[eee2b2e]	380	def read_state(self):
[866c983]	381	"""
	382	Read a new copy of experiment state. Old state is overwritten.
	383
	384	State format is a simple pickling of the state dictionary.
	385	"""
[cc8d8e9]	386
	387	def get_experiment_id(state):
	388	"""
	389	Pull the fedid experimentID out of the saved state. This is kind
	390	of a gross walk through the dict.
	391	"""
	392
	393	if state.has_key('experimentID'):
	394	for e in state['experimentID']:
	395	if e.has_key('fedid'):
	396	return e['fedid']
	397	else:
	398	return None
	399	else:
	400	return None
	401
	402	def get_alloc_ids(state):
	403	"""
	404	Pull the fedids of the identifiers of each allocation from the
	405	state. Again, a dict dive that's best isolated.
	406	"""
	407
	408	return [ f['allocID']['fedid']
	409	for f in state.get('federant',[]) \
	410	if f.has_key('allocID') and \
	411	f['allocID'].has_key('fedid')]
	412
	413
[866c983]	414	try:
	415	f = open(self.state_filename, "r")
	416	self.state = pickle.load(f)
	417	self.log.debug("[read_state]: Read state from %s" % \
	418	self.state_filename)
	419	except IOError, e:
	420	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
	421	% (self.state_filename, e))
	422	except pickle.UnpicklingError, e:
	423	self.log.warning(("[read_state]: No saved state: " + \
	424	"Unpickling failed: %s") % e)
	425
[cc8d8e9]	426	for s in self.state.values():
[866c983]	427	try:
[cc8d8e9]	428
	429	eid = get_experiment_id(s)
	430	if eid :
	431	# Give the owner rights to the experiment
	432	self.auth.set_attribute(s['owner'], eid)
	433	# And holders of the eid as well
	434	self.auth.set_attribute(eid, eid)
[db6b092]	435	# allow overrides to control experiments as well
	436	for o in self.overrides:
	437	self.auth.set_attribute(o, eid)
[cc8d8e9]	438	# Set permissions to allow reading of the software repo, if
	439	# any, as well.
	440	for a in get_alloc_ids(s):
	441	self.auth.set_attribute(a, 'repo/%s' % eid)
	442	else:
	443	raise KeyError("No experiment id")
[866c983]	444	except KeyError, e:
	445	self.log.warning("[read_state]: State ownership or identity " +\
	446	"misformatted in %s: %s" % (self.state_filename, e))
[4064742]	447
	448
	449	def read_accessdb(self, accessdb_file):
[866c983]	450	"""
	451	Read the mapping from fedids that can create experiments to their name
	452	in the 3-level access namespace. All will be asserted from this
	453	testbed and can include the local username and porject that will be
	454	asserted on their behalf by this fedd. Each fedid is also added to the
	455	authorization system with the "create" attribute.
	456	"""
	457	self.accessdb = {}
	458	# These are the regexps for parsing the db
	459	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
	460	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	461	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
	462	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	463	"\s->\s(" + name_expr + ")\s*$")
	464	lineno = 0
	465
	466	# Parse the mappings and store in self.authdb, a dict of
	467	# fedid -> (proj, user)
	468	try:
	469	f = open(accessdb_file, "r")
	470	for line in f:
	471	lineno += 1
	472	line = line.strip()
	473	if len(line) == 0 or line.startswith('#'):
	474	continue
	475	m = project_line.match(line)
	476	if m:
	477	fid = fedid(hexstr=m.group(1))
	478	project, user = m.group(2,3)
	479	if not self.accessdb.has_key(fid):
	480	self.accessdb[fid] = []
	481	self.accessdb[fid].append((project, user))
	482	continue
	483
	484	m = user_line.match(line)
	485	if m:
	486	fid = fedid(hexstr=m.group(1))
	487	project = None
	488	user = m.group(2)
	489	if not self.accessdb.has_key(fid):
	490	self.accessdb[fid] = []
	491	self.accessdb[fid].append((project, user))
	492	continue
	493	self.log.warn("[experiment_control] Error parsing access " +\
	494	"db %s at line %d" % (accessdb_file, lineno))
	495	except IOError:
	496	raise service_error(service_error.internal,
	497	"Error opening/reading %s as experiment " +\
	498	"control accessdb" % accessdb_file)
	499	f.close()
	500
	501	# Initialize the authorization attributes
	502	for fid in self.accessdb.keys():
	503	self.auth.set_attribute(fid, 'create')
[a3ad8bd]	504	self.auth.set_attribute(fid, 'new')
[34bc05c]	505
	506	def read_mapdb(self, file):
[866c983]	507	"""
	508	Read a simple colon separated list of mappings for the
	509	label-to-testbed-URL mappings. Clears or creates self.tbmap.
	510	"""
	511
	512	self.tbmap = { }
	513	lineno =0
	514	try:
	515	f = open(file, "r")
	516	for line in f:
	517	lineno += 1
	518	line = line.strip()
	519	if line.startswith('#') or len(line) == 0:
	520	continue
	521	try:
	522	label, url = line.split(':', 1)
	523	self.tbmap[label] = url
	524	except ValueError, e:
	525	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
	526	"map db: %s %s" % (lineno, line, e))
	527	except IOError, e:
	528	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
	529	"open %s: %s" % (file, e))
	530	f.close()
	531
[6679c122]	532	def generate_ssh_keys(self, dest, type="rsa" ):
[866c983]	533	"""
	534	Generate a set of keys for the gateways to use to talk.
	535
	536	Keys are of type type and are stored in the required dest file.
	537	"""
	538	valid_types = ("rsa", "dsa")
	539	t = type.lower();
	540	if t not in valid_types: raise ValueError
	541	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
	542
	543	try:
	544	trace = open("/dev/null", "w")
	545	except IOError:
	546	raise service_error(service_error.internal,
	547	"Cannot open /dev/null??");
	548
	549	# May raise CalledProcessError
	550	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
[4ea1e22]	551	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
[866c983]	552	if rv != 0:
	553	raise service_error(service_error.internal,
	554	"Cannot generate nonce ssh keys. %s return code %d" \
	555	% (self.ssh_keygen, rv))
[6679c122]	556
[0d830de]	557	def gentopo(self, str):
[866c983]	558	"""
	559	Generate the topology dtat structure from the splitter's XML
	560	representation of it.
	561
	562	The topology XML looks like:
	563	<experiment>
	564	<nodes>
	565	<node><vname></vname><ips>ip1:ip2</ips></node>
	566	</nodes>
	567	<lans>
	568	<lan>
	569	<vname></vname><vnode></vnode><ip></ip>
	570	<bandwidth></bandwidth><member>node:port</member>
	571	</lan>
	572	</lans>
	573	"""
	574	class topo_parse:
	575	"""
	576	Parse the topology XML and create the dats structure.
	577	"""
	578	def __init__(self):
	579	# Typing of the subelements for data conversion
	580	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
	581	self.int_subelements = ( 'bandwidth',)
	582	self.float_subelements = ( 'delay',)
	583	# The final data structure
	584	self.nodes = [ ]
	585	self.lans = [ ]
	586	self.topo = { \
	587	'node': self.nodes,\
	588	'lan' : self.lans,\
	589	}
	590	self.element = { } # Current element being created
	591	self.chars = "" # Last text seen
	592
	593	def end_element(self, name):
	594	# After each sub element the contents is added to the current
	595	# element or to the appropriate list.
	596	if name == 'node':
	597	self.nodes.append(self.element)
	598	self.element = { }
	599	elif name == 'lan':
	600	self.lans.append(self.element)
	601	self.element = { }
	602	elif name in self.str_subelements:
	603	self.element[name] = self.chars
	604	self.chars = ""
	605	elif name in self.int_subelements:
	606	self.element[name] = int(self.chars)
	607	self.chars = ""
	608	elif name in self.float_subelements:
	609	self.element[name] = float(self.chars)
	610	self.chars = ""
	611
	612	def found_chars(self, data):
	613	self.chars += data.rstrip()
	614
	615
	616	tp = topo_parse();
	617	parser = xml.parsers.expat.ParserCreate()
	618	parser.EndElementHandler = tp.end_element
	619	parser.CharacterDataHandler = tp.found_chars
	620
	621	parser.Parse(str)
	622
	623	return tp.topo
	624
[0d830de]	625
	626	def genviz(self, topo):
[866c983]	627	"""
	628	Generate the visualization the virtual topology
	629	"""
	630
	631	neato = "/usr/local/bin/neato"
	632	# These are used to parse neato output and to create the visualization
	633	# file.
	634	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
	635	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
	636	"%s</type></node>"
	637
	638	try:
	639	# Node names
	640	nodes = [ n['vname'] for n in topo['node'] ]
	641	topo_lans = topo['lan']
[cc8d8e9]	642	except KeyError, e:
	643	raise service_error(service_error.internal, "Bad topology: %s" %e)
[866c983]	644
	645	lans = { }
	646	links = { }
	647
	648	# Walk through the virtual topology, organizing the connections into
	649	# 2-node connections (links) and more-than-2-node connections (lans).
	650	# When a lan is created, it's added to the list of nodes (there's a
	651	# node in the visualization for the lan).
	652	for l in topo_lans:
	653	if links.has_key(l['vname']):
	654	if len(links[l['vname']]) < 2:
	655	links[l['vname']].append(l['vnode'])
	656	else:
	657	nodes.append(l['vname'])
	658	lans[l['vname']] = links[l['vname']]
	659	del links[l['vname']]
	660	lans[l['vname']].append(l['vnode'])
	661	elif lans.has_key(l['vname']):
	662	lans[l['vname']].append(l['vnode'])
	663	else:
	664	links[l['vname']] = [ l['vnode'] ]
	665
	666
	667	# Open up a temporary file for dot to turn into a visualization
	668	try:
	669	df, dotname = tempfile.mkstemp()
	670	dotfile = os.fdopen(df, 'w')
	671	except IOError:
	672	raise service_error(service_error.internal,
	673	"Failed to open file in genviz")
	674
[db6b092]	675	try:
	676	dnull = open('/dev/null', 'w')
	677	except IOError:
	678	service_error(service_error.internal,
[886307f]	679	"Failed to open /dev/null in genviz")
	680
[866c983]	681	# Generate a dot/neato input file from the links, nodes and lans
	682	try:
	683	print >>dotfile, "graph G {"
	684	for n in nodes:
	685	print >>dotfile, '\t"%s"' % n
	686	for l in links.keys():
	687	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
	688	for l in lans.keys():
	689	for n in lans[l]:
	690	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
	691	print >>dotfile, "}"
	692	dotfile.close()
	693	except TypeError:
	694	raise service_error(service_error.internal,
	695	"Single endpoint link in vtopo")
	696	except IOError:
	697	raise service_error(service_error.internal, "Cannot write dot file")
	698
	699	# Use dot to create a visualization
	700	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
[886307f]	701	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
[db6b092]	702	close_fds=True)
	703	dnull.close()
[866c983]	704
	705	# Translate dot to vis format
	706	vis_nodes = [ ]
	707	vis = { 'node': vis_nodes }
	708	for line in dot.stdout:
	709	m = vis_re.match(line)
	710	if m:
	711	vn = m.group(1)
	712	vis_node = {'name': vn, \
	713	'x': float(m.group(2)),\
	714	'y' : float(m.group(3)),\
	715	}
	716	if vn in links.keys() or vn in lans.keys():
	717	vis_node['type'] = 'lan'
	718	else:
	719	vis_node['type'] = 'node'
	720	vis_nodes.append(vis_node)
	721	rv = dot.wait()
	722
	723	os.remove(dotname)
	724	if rv == 0 : return vis
	725	else: return None
[d0ae12d]	726
[99eb8cf]	727	def get_access(self, tb, nodes, tbparam, master, export_project,
[866c983]	728	access_user):
	729	"""
	730	Get access to testbed through fedd and set the parameters for that tb
	731	"""
	732	uri = self.tbmap.get(tb, None)
	733	if not uri:
	734	raise service_error(serice_error.server_config,
	735	"Unknown testbed: %s" % tb)
	736
[8218a3b]	737	# Tweak search order so that if there are entries in access_user that
	738	# have a project matching the export project, we try them first
	739	if export_project and export_project.has_key('localname'):
	740	pn = export_project['localname']
	741
	742	access_sequence = [ (p, u) for p, u in access_user if p == pn]
	743	access_sequence.extend([(p, u) for p, u in access_user if p != pn])
	744	else:
	745	access_sequence = access_user
	746
	747	for p, u in access_sequence:
[866c983]	748	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
	749	"to %s") % ((p or "None"), u, uri))
	750
	751	if p:
	752	# Request with user and project specified
	753	req = {\
	754	'destinationTestbed' : { 'uri' : uri },
[3bddd24]	755	'credential': [ "project: %s" % p, "user: %s" % u],
[866c983]	756	'allocID' : { 'localname': 'test' },
	757	}
	758	else:
	759	# Request with only user specified
	760	req = {\
	761	'destinationTestbed' : { 'uri' : uri },
[3bddd24]	762	'credential': [ 'user: %s' % u ],
[866c983]	763	'user': [ {'userID': { 'localname': u } } ],
	764	'allocID' : { 'localname': 'test' },
	765	}
	766
	767	if tb == master:
	768	# NB, the export_project parameter is a dict that includes
	769	# the type
	770	req['exportProject'] = export_project
	771
	772	# node resources if any
	773	if nodes != None and len(nodes) > 0:
	774	rnodes = [ ]
	775	for n in nodes:
	776	rn = { }
	777	image, hw, count = n.split(":")
	778	if image: rn['image'] = [ image ]
	779	if hw: rn['hardware'] = [ hw ]
	780	if count and int(count) >0 : rn['count'] = int(count)
	781	rnodes.append(rn)
	782	req['resources']= { }
	783	req['resources']['node'] = rnodes
	784
	785	try:
	786	if self.local_access.has_key(uri):
	787	# Local access call
	788	req = { 'RequestAccessRequestBody' : req }
	789	r = self.local_access[uri].RequestAccess(req,
	790	fedid(file=self.cert_file))
	791	r = { 'RequestAccessResponseBody' : r }
	792	else:
	793	r = self.call_RequestAccess(uri, req,
	794	self.cert_file, self.cert_pwd, self.trusted_certs)
	795	except service_error, e:
	796	if e.code == service_error.access:
	797	self.log.debug("[get_access] Access denied")
	798	r = None
	799	continue
	800	else:
	801	raise e
	802
[e19b75c]	803	if r.has_key('RequestAccessResponseBody'):
	804	# Through to here we have a valid response, not a fault.
	805	# Access denied is a fault, so something better or worse than
	806	# access denied has happened.
	807	r = r['RequestAccessResponseBody']
	808	self.log.debug("[get_access] Access granted")
	809	break
	810	else:
	811	raise service_error(service_error.protocol,
	812	"Bad proxy response")
	813
	814	if not r:
	815	raise service_error(service_error.access,
	816	"Access denied by %s (%s)" % (tb, uri))
[db6b092]	817
[4afcfc4]	818	tbparam[tb] = {
[69692a9]	819	"allocID" : r['allocID'],
	820	"uri": uri,
[4afcfc4]	821	}
	822
	823	# Add attributes to parameter space. We don't allow attributes to
	824	# overlay any parameters already installed.
	825	for a in r['fedAttr']:
	826	try:
	827	if a['attribute'] and \
	828	isinstance(a['attribute'], basestring)\
	829	and not tbparam[tb].has_key(a['attribute'].lower()):
	830	tbparam[tb][a['attribute'].lower()] = a['value']
	831	except KeyError:
	832	self.log.error("Bad attribute in response: %s" % a)
[db6b092]	833
[69692a9]	834	def release_access(self, tb, aid, uri=None):
[e19b75c]	835	"""
	836	Release access to testbed through fedd
	837	"""
[db6b092]	838
[69692a9]	839	if not uri:
	840	uri = self.tbmap.get(tb, None)
[e19b75c]	841	if not uri:
[69692a9]	842	raise service_error(service_error.server_config,
[e19b75c]	843	"Unknown testbed: %s" % tb)
[db6b092]	844
[e19b75c]	845	if self.local_access.has_key(uri):
	846	resp = self.local_access[uri].ReleaseAccess(\
	847	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
	848	fedid(file=self.cert_file))
	849	resp = { 'ReleaseAccessResponseBody': resp }
	850	else:
	851	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
	852	self.cert_file, self.cert_pwd, self.trusted_certs)
[db6b092]	853
[e19b75c]	854	# better error coding
[db6b092]	855
[e19b75c]	856	def remote_splitter(self, uri, desc, master):
[db6b092]	857
[e19b75c]	858	req = {
	859	'description' : { 'ns2description': desc },
	860	'master': master,
	861	'include_fedkit': bool(self.fedkit),
	862	'include_gatewaykit': bool(self.gatewaykit)
[db6b092]	863	}
	864
[e19b75c]	865	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
	866	self.trusted_certs)
	867
	868	if r.has_key('Ns2SplitResponseBody'):
	869	r = r['Ns2SplitResponseBody']
	870	if r.has_key('output'):
	871	return r['output'].splitlines()
	872	else:
	873	raise service_error(service_error.protocol,
	874	"Bad splitter response (no output)")
	875	else:
	876	raise service_error(service_error.protocol, "Bad splitter response")
[cc8d8e9]	877
[e19b75c]	878	class start_segment:
[fd556d1]	879	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[f07fa49]	880	cert_pwd=None, trusted_certs=None, caller=None,
	881	log_collector=None):
[cc8d8e9]	882	self.log = log
	883	self.debug = debug
	884	self.cert_file = cert_file
	885	self.cert_pwd = cert_pwd
	886	self.trusted_certs = None
	887	self.caller = caller
[fd556d1]	888	self.testbed = testbed
[f07fa49]	889	self.log_collector = log_collector
[69692a9]	890	self.response = None
[cc8d8e9]	891
[ecca6eb]	892	def __call__(self, uri, aid, topo, master, attrs=None):
[cc8d8e9]	893	req = {
	894	'allocID': { 'fedid' : aid },
	895	'segmentdescription': {
	896	'topdldescription': topo.to_dict(),
	897	},
[ecca6eb]	898	'master': master,
[cc8d8e9]	899	}
[6c57fe9]	900	if attrs:
	901	req['fedAttr'] = attrs
[cc8d8e9]	902
[fd556d1]	903	try:
[13e3dd2]	904	self.log.debug("Calling StartSegment at %s " % uri)
[fd556d1]	905	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	906	self.trusted_certs)
[f07fa49]	907	if r.has_key('StartSegmentResponseBody'):
	908	lval = r['StartSegmentResponseBody'].get('allocationLog',
	909	None)
	910	if lval and self.log_collector:
	911	for line in lval.splitlines(True):
	912	self.log_collector.write(line)
[69692a9]	913	self.response = r
[f07fa49]	914	else:
	915	raise service_error(service_error.internal,
	916	"Bad response!?: %s" %r)
[fd556d1]	917	return True
	918	except service_error, e:
	919	self.log.error("Start segment failed on %s: %s" % \
	920	(self.testbed, e))
	921	return False
[cc8d8e9]	922
	923
[5ae3857]	924
[e19b75c]	925	class terminate_segment:
[fd556d1]	926	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[5ae3857]	927	cert_pwd=None, trusted_certs=None, caller=None):
	928	self.log = log
	929	self.debug = debug
	930	self.cert_file = cert_file
	931	self.cert_pwd = cert_pwd
	932	self.trusted_certs = None
	933	self.caller = caller
[fd556d1]	934	self.testbed = testbed
[5ae3857]	935
	936	def __call__(self, uri, aid ):
	937	req = {
	938	'allocID': aid ,
	939	}
[fd556d1]	940	try:
	941	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	942	self.trusted_certs)
	943	return True
	944	except service_error, e:
	945	self.log.error("Terminate segment failed on %s: %s" % \
	946	(self.testbed, e))
	947	return False
[db6b092]	948
	949
[7183b48]	950	def allocate_resources(self, allocated, master, eid, expid,
[f07fa49]	951	tbparams, topo, tmpdir, alloc_log=None, log_collector=None,
	952	attrs=None):
[69692a9]	953	def get_vlan(r):
	954	if r.has_key('StartSegmentResponseBody'):
	955	srb = r['StartSegmentResponseBody']
	956	if srb.has_key('fedAttr'):
	957	for k, v in [ (a['attribute'], a['value']) \
	958	for a in srb['fedAttr']]:
	959	if k == 'vlan': return v
	960	return None
	961
[cc8d8e9]	962	started = { } # Testbeds where a sub-experiment started
	963	# successfully
	964
	965	# XXX
	966	fail_soft = False
	967
[69692a9]	968	slaves = [ k for k in allocated.keys() \
	969	if k != master and not topo[k].get_attribute('transit')]
	970	transit = [ k for k in allocated.keys() \
	971	if topo[k].get_attribute('transit')]
	972
[cc8d8e9]	973	log = alloc_log or self.log
	974
	975	thread_pool = self.thread_pool(self.nthreads)
	976	threads = [ ]
	977
[69692a9]	978	for tb in transit:
	979	uri = tbparams[tb]['uri']
[cc8d8e9]	980	if tbparams[tb].has_key('allocID') and \
	981	tbparams[tb]['allocID'].has_key('fedid'):
	982	aid = tbparams[tb]['allocID']['fedid']
	983	else:
	984	raise service_error(service_error.internal,
	985	"No alloc id for testbed %s !?" % tb)
	986
[69692a9]	987	m = re.search('(\d+)', tb)
	988	if m:
	989	to_repl = "unassigned%s" % m.group(1)
	990	else:
	991	raise service_error(service_error.internal,
	992	"Bad dynamic allocation name")
	993	break
	994
	995	ss = self.start_segment(log=log, debug=self.debug,
[6409cc5]	996	testbed=tb, cert_file=self.cert_file,
[69692a9]	997	cert_pwd=self.cert_pwd,
	998	trusted_certs=self.trusted_certs,
	999	caller=self.call_StartSegment,
	1000	log_collector=log_collector)
	1001	t = self.pooled_thread(
	1002	target=ss,
	1003	args =(uri, aid, topo[tb], False, attrs),
	1004	name=tb, pdata=thread_pool, trace_file=self.trace_file)
	1005	threads.append(t)
	1006	t.start()
	1007	# Wait until the this transit node finishes (keep pinging the log,
	1008	# though)
	1009
	1010	mins = 0
	1011	while not thread_pool.wait_for_all_done(60.0):
	1012	mins += 1
[a588632]	1013	alloc_log.info("Waiting for transit (it has been %d mins)" \
[69692a9]	1014	% mins)
	1015
	1016	if t.rv:
	1017	vlan = get_vlan(ss.response)
	1018	if vlan is not None:
	1019	for k, t in topo.items():
	1020	for e in t.elements:
	1021	for i in e.interface:
	1022	vl = i.get_attribute('dragon_vlan')
	1023	if vl is not None and vl == to_repl:
	1024	i.set_attribute('dragon_vlan', vlan)
	1025	else:
	1026	break
	1027	thread_pool.clear()
	1028
[cc8d8e9]	1029
[69692a9]	1030	failed = [ t.getName() for t in threads if not t.rv ]
[32e7d93]	1031
[69692a9]	1032	if len(failed) == 0:
	1033	for tb in slaves:
	1034	# Create and start a thread to start the segment, and save it
	1035	# to get the return value later
	1036	thread_pool.wait_for_slot()
	1037	uri = self.tbmap.get(tb, None)
	1038	if not uri:
	1039	raise service_error(service_error.internal,
	1040	"Unknown testbed %s !?" % tb)
	1041
	1042	if tbparams[tb].has_key('allocID') and \
	1043	tbparams[tb]['allocID'].has_key('fedid'):
	1044	aid = tbparams[tb]['allocID']['fedid']
	1045	else:
	1046	raise service_error(service_error.internal,
	1047	"No alloc id for testbed %s !?" % tb)
	1048
	1049	t = self.pooled_thread(\
	1050	target=self.start_segment(log=log, debug=self.debug,
	1051	testbed=tb, cert_file=self.cert_file,
	1052	cert_pwd=self.cert_pwd,
	1053	trusted_certs=self.trusted_certs,
	1054	caller=self.call_StartSegment,
	1055	log_collector=log_collector),
	1056	args=(uri, aid, topo[tb], False, attrs), name=tb,
	1057	pdata=thread_pool, trace_file=self.trace_file)
	1058	threads.append(t)
	1059	t.start()
	1060
	1061	# Wait until all finish (keep pinging the log, though)
	1062	mins = 0
	1063	while not thread_pool.wait_for_all_done(60.0):
	1064	mins += 1
	1065	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
	1066	% mins)
	1067
	1068	thread_pool.clear()
[cc8d8e9]	1069
	1070	# If none failed, start the master
	1071	failed = [ t.getName() for t in threads if not t.rv ]
	1072
	1073	if len(failed) == 0:
	1074	uri = self.tbmap.get(master, None)
	1075	if not uri:
	1076	raise service_error(service_error.internal,
	1077	"Unknown testbed %s !?" % master)
	1078
	1079	if tbparams[master].has_key('allocID') and \
	1080	tbparams[master]['allocID'].has_key('fedid'):
	1081	aid = tbparams[master]['allocID']['fedid']
	1082	else:
	1083	raise service_error(service_error.internal,
	1084	"No alloc id for testbed %s !?" % master)
[32e7d93]	1085	t = self.pooled_thread(
	1086	target=self.start_segment(log=log, debug=self.debug,
	1087	testbed=master, cert_file=self.cert_file,
	1088	cert_pwd=self.cert_pwd,
	1089	trusted_certs=self.trusted_certs,
	1090	caller=self.call_StartSegment,
	1091	log_collector=log_collector),
	1092	args =(uri, aid, topo[master], True, attrs),
	1093	name=master, pdata=thread_pool, trace_file=self.trace_file)
	1094	threads.append(t)
	1095	t.start()
	1096	# Wait until the master finishes (keep pinging the log, though)
	1097	mins = 0
	1098	while not thread_pool.wait_for_all_done(60.0):
	1099	mins += 1
	1100	alloc_log.info("Waiting for master (it has been %d mins)" \
	1101	% mins)
	1102	# update failed to include the master, if it failed
	1103	failed = [ t.getName() for t in threads if not t.rv ]
[cc8d8e9]	1104
	1105	succeeded = [tb for tb in allocated.keys() if tb not in failed]
	1106	# If one failed clean up, unless fail_soft is set
[32e7d93]	1107	if failed:
[cc8d8e9]	1108	if not fail_soft:
	1109	thread_pool.clear()
	1110	for tb in succeeded:
	1111	# Create and start a thread to stop the segment
	1112	thread_pool.wait_for_slot()
[0fa1729]	1113	uri = tbparams[tb]['uri']
[cc8d8e9]	1114	t = self.pooled_thread(\
[32e7d93]	1115	target=self.terminate_segment(log=log,
[fd556d1]	1116	testbed=tb,
[32e7d93]	1117	cert_file=self.cert_file,
	1118	cert_pwd=self.cert_pwd,
	1119	trusted_certs=self.trusted_certs,
	1120	caller=self.call_TerminateSegment),
	1121	args=(uri, tbparams[tb]['federant']['allocID']),
	1122	name=tb,
[cc8d8e9]	1123	pdata=thread_pool, trace_file=self.trace_file)
	1124	t.start()
	1125	# Wait until all finish
	1126	thread_pool.wait_for_all_done()
	1127
	1128	# release the allocations
	1129	for tb in tbparams.keys():
[69692a9]	1130	self.release_access(tb, tbparams[tb]['allocID'],
	1131	tbparams[tb].get('uri', None))
[cc8d8e9]	1132	# Remove the placeholder
	1133	self.state_lock.acquire()
	1134	self.state[eid]['experimentStatus'] = 'failed'
	1135	if self.state_filename: self.write_state()
	1136	self.state_lock.release()
	1137
	1138	log.error("Swap in failed on %s" % ",".join(failed))
	1139	return
	1140	else:
	1141	log.info("[start_segment]: Experiment %s active" % eid)
	1142
	1143
	1144	# Walk up tmpdir, deleting as we go
[69692a9]	1145	if self.cleanup:
	1146	log.debug("[start_experiment]: removing %s" % tmpdir)
	1147	for path, dirs, files in os.walk(tmpdir, topdown=False):
	1148	for f in files:
	1149	os.remove(os.path.join(path, f))
	1150	for d in dirs:
	1151	os.rmdir(os.path.join(path, d))
	1152	os.rmdir(tmpdir)
	1153	else:
	1154	log.debug("[start_experiment]: not removing %s" % tmpdir)
[cc8d8e9]	1155
	1156	# Insert the experiment into our state and update the disk copy
	1157	self.state_lock.acquire()
	1158	self.state[expid]['experimentStatus'] = 'active'
	1159	self.state[eid] = self.state[expid]
	1160	if self.state_filename: self.write_state()
	1161	self.state_lock.release()
	1162	return
	1163
	1164
[895a133]	1165	def add_kit(self, e, kit):
	1166	"""
	1167	Add a Software object created from the list of (install, location)
	1168	tuples passed as kit to the software attribute of an object e. We
	1169	do this enough to break out the code, but it's kind of a hack to
	1170	avoid changing the old tuple rep.
	1171	"""
	1172
	1173	s = [ topdl.Software(install=i, location=l) for i, l in kit]
	1174
	1175	if isinstance(e.software, list): e.software.extend(s)
	1176	else: e.software = s
	1177
	1178
[a3ad8bd]	1179	def create_experiment_state(self, fid, req, expid, expcert,
	1180	state='starting'):
[895a133]	1181	"""
	1182	Create the initial entry in the experiment's state. The expid and
	1183	expcert are the experiment's fedid and certifacte that represents that
	1184	ID, which are installed in the experiment state. If the request
	1185	includes a suggested local name that is used if possible. If the local
	1186	name is already taken by an experiment owned by this user that has
[a3ad8bd]	1187	failed, it is overwritten. Otherwise new letters are added until a
[895a133]	1188	valid localname is found. The generated local name is returned.
	1189	"""
	1190
	1191	if req.has_key('experimentID') and \
	1192	req['experimentID'].has_key('localname'):
	1193	overwrite = False
	1194	eid = req['experimentID']['localname']
	1195	# If there's an old failed experiment here with the same local name
	1196	# and accessible by this user, we'll overwrite it, otherwise we'll
	1197	# fall through and do the collision avoidance.
	1198	old_expid = self.get_experiment_fedid(eid)
	1199	if old_expid and self.check_experiment_access(fid, old_expid):
	1200	self.state_lock.acquire()
	1201	status = self.state[eid].get('experimentStatus', None)
	1202	if status and status == 'failed':
	1203	# remove the old access attribute
	1204	self.auth.unset_attribute(fid, old_expid)
	1205	overwrite = True
	1206	del self.state[eid]
	1207	del self.state[old_expid]
	1208	self.state_lock.release()
	1209	self.state_lock.acquire()
	1210	while (self.state.has_key(eid) and not overwrite):
	1211	eid += random.choice(string.ascii_letters)
	1212	# Initial state
	1213	self.state[eid] = {
	1214	'experimentID' : \
	1215	[ { 'localname' : eid }, {'fedid': expid } ],
[a3ad8bd]	1216	'experimentStatus': state,
[895a133]	1217	'experimentAccess': { 'X509' : expcert },
	1218	'owner': fid,
	1219	'log' : [],
	1220	}
	1221	self.state[expid] = self.state[eid]
	1222	if self.state_filename: self.write_state()
	1223	self.state_lock.release()
	1224	else:
	1225	eid = self.exp_stem
	1226	for i in range(0,5):
	1227	eid += random.choice(string.ascii_letters)
	1228	self.state_lock.acquire()
	1229	while (self.state.has_key(eid)):
	1230	eid = self.exp_stem
	1231	for i in range(0,5):
	1232	eid += random.choice(string.ascii_letters)
	1233	# Initial state
	1234	self.state[eid] = {
	1235	'experimentID' : \
	1236	[ { 'localname' : eid }, {'fedid': expid } ],
[a3ad8bd]	1237	'experimentStatus': state,
[895a133]	1238	'experimentAccess': { 'X509' : expcert },
	1239	'owner': fid,
	1240	'log' : [],
	1241	}
	1242	self.state[expid] = self.state[eid]
	1243	if self.state_filename: self.write_state()
	1244	self.state_lock.release()
	1245
	1246	return eid
	1247
	1248
	1249	def allocate_ips_to_topo(self, top):
	1250	"""
[69692a9]	1251	Add an ip4_address attribute to all the hosts in the topology, based on
[895a133]	1252	the shared substrates on which they sit. An /etc/hosts file is also
[69692a9]	1253	created and returned as a list of hostfiles entries. We also return
	1254	the allocator, because we may need to allocate IPs to portals
	1255	(specifically DRAGON portals).
[895a133]	1256	"""
	1257	subs = sorted(top.substrates,
	1258	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
	1259	reverse=True)
	1260	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
	1261	ifs = { }
	1262	hosts = [ ]
	1263
	1264	for idx, s in enumerate(subs):
	1265	a = ips.allocate(len(s.interfaces)+2)
	1266	if a :
	1267	base, num = a
	1268	if num < len(s.interfaces) +2 :
	1269	raise service_error(service_error.internal,
	1270	"Allocator returned wrong number of IPs??")
	1271	else:
	1272	raise service_error(service_error.req,
	1273	"Cannot allocate IP addresses")
	1274
	1275	base += 1
	1276	for i in s.interfaces:
	1277	i.attribute.append(
	1278	topdl.Attribute('ip4_address',
	1279	"%s" % ip_addr(base)))
	1280	hname = i.element.name[0]
	1281	if ifs.has_key(hname):
	1282	hosts.append("%s\t%s-%s %s-%d" % \
	1283	(ip_addr(base), hname, s.name, hname,
	1284	ifs[hname]))
	1285	else:
	1286	ifs[hname] = 0
	1287	hosts.append("%s\t%s-%s %s-%d %s" % \
	1288	(ip_addr(base), hname, s.name, hname,
	1289	ifs[hname], hname))
	1290
	1291	ifs[hname] += 1
	1292	base += 1
[69692a9]	1293	return hosts, ips
[895a133]	1294
[99eb8cf]	1295	def get_access_to_testbeds(self, testbeds, access_user,
[895a133]	1296	export_project, master, allocated, tbparams):
	1297	"""
	1298	Request access to the various testbeds required for this instantiation
	1299	(passed in as testbeds). User, access_user, expoert_project and master
	1300	are used to construct the correct requests. Per-testbed parameters are
	1301	returned in tbparams.
	1302	"""
	1303	for tb in testbeds:
[99eb8cf]	1304	self.get_access(tb, None, tbparams, master,
[895a133]	1305	export_project, access_user)
	1306	allocated[tb] = 1
	1307
	1308	def split_topology(self, top, topo, testbeds, eid, master, tbparams):
	1309	"""
	1310	Create the sub-topologies that are needed for experimetn instantiation.
	1311	Along the way attach startup commands to the computers in the
	1312	subtopologies.
	1313	"""
	1314	for tb in testbeds:
	1315	topo[tb] = top.clone()
	1316	to_delete = [ ]
	1317	for e in topo[tb].elements:
	1318	etb = e.get_attribute('testbed')
	1319	if etb and etb != tb:
	1320	for i in e.interface:
	1321	for s in i.subs:
	1322	try:
	1323	s.interfaces.remove(i)
	1324	except ValueError:
	1325	raise service_error(service_error.internal,
	1326	"Can't remove interface??")
	1327	to_delete.append(e)
	1328	for e in to_delete:
	1329	topo[tb].elements.remove(e)
	1330	topo[tb].make_indices()
	1331
	1332	for e in [ e for e in topo[tb].elements \
	1333	if isinstance(e,topdl.Computer)]:
	1334	if tb == master:
	1335	cmd = 'sudo -H /usr/local/federation/bin/make_hosts /proj/%s/exp/%s/tmp/hosts >& /tmp/federate' % (tbparams[tb].get('project', 'project'), eid)
	1336	else:
	1337	cmd = "sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& /tmp/federate"
	1338	scmd = e.get_attribute('startup')
	1339	if scmd:
	1340	cmd = "%s \\$USER '%s'" % (cmd, scmd)
	1341
	1342	e.set_attribute('startup', cmd)
	1343	if self.fedkit: self.add_kit(e, self.fedkit)
	1344
[13e3dd2]	1345	def new_portal_node(self, st, dt, tbparams, master, eid, myname, desthost,
[69692a9]	1346	portal_type, iface_desc=()):
[13e3dd2]	1347	sproject = tbparams[st].get('project', 'project')
	1348	dproject = tbparams[dt].get('project', 'project')
	1349	mproject = tbparams[master].get('project', 'project')
	1350	sdomain = tbparams[st].get('domain', ".example.com")
	1351	ddomain = tbparams[dt].get('domain', ".example.com")
	1352	mdomain = tbparams[master].get('domain', '.example.com')
	1353	muser = tbparams[master].get('user', 'root')
	1354	smbshare = tbparams[master].get('smbshare', 'USERS')
	1355	aid = tbparams[dt]['allocID']['fedid']
	1356	if st == master or dt == master:
	1357	active = ("%s" % (st == master))
	1358	else:
	1359	active = ("%s" %(st > dt))
	1360
[69692a9]	1361	ifaces = [ ]
	1362	for sub, attrs in iface_desc:
	1363	inf = topdl.Interface(
	1364	substrate=sub,
	1365	attribute=[
	1366	topdl.Attribute(
	1367	attribute=n,
	1368	value = v)
	1369	for n, v in attrs
[13e3dd2]	1370	]
[69692a9]	1371	)
	1372	ifaces.append(inf)
[13e3dd2]	1373	return topdl.Computer(
	1374	name=myname,
	1375	attribute=[
	1376	topdl.Attribute(attribute=n,value=v)
	1377	for n, v in (\
	1378	('portal', 'true'),
	1379	('domain', sdomain),
	1380	('masterdomain', mdomain),
	1381	('masterexperiment', "%s/%s" % \
	1382	(mproject, eid)),
	1383	('masteruser', muser),
	1384	('smbshare', smbshare),
	1385	('experiment', "%s/%s" % \
	1386	(sproject, eid)),
	1387	('peer', "%s" % desthost),
	1388	('peer_segment', "%s" % aid),
	1389	('scriptdir',
	1390	"/usr/local/federation/bin"),
	1391	('active', "%s" % active),
	1392	('portal_type', portal_type),
[641bb66]	1393	)
[13e3dd2]	1394	],
	1395	interface=ifaces,
	1396	)
	1397
	1398	def new_portal_substrate(self, st, dt, eid, tbparams):
	1399	ddomain = tbparams[dt].get('domain', ".example.com")
	1400	dproject = tbparams[dt].get('project', 'project')
	1401	tsubstrate = \
	1402	topdl.Substrate(name='%s-%s' % (st, dt),
	1403	attribute= [
	1404	topdl.Attribute(
	1405	attribute='portal',
	1406	value='true')
	1407	]
	1408	)
	1409	segment_element = topdl.Segment(
	1410	id= tbparams[dt]['allocID'],
	1411	type='emulab',
	1412	uri = self.tbmap.get(dt, None),
	1413	interface=[
	1414	topdl.Interface(
	1415	substrate=tsubstrate.name),
	1416	],
	1417	attribute = [
	1418	topdl.Attribute(attribute=n, value=v)
	1419	for n, v in (\
	1420	('domain', ddomain),
	1421	('experiment', "%s/%s" % \
	1422	(dproject, eid)),)
	1423	],
	1424	)
	1425
	1426	return (tsubstrate, segment_element)
	1427
[69692a9]	1428	def new_dragon_topo(self, idx, sub, topo, tbs, tbparams):
	1429	if sub.capacity is None:
	1430	raise service_error(service_error.internal,
	1431	"Cannot DRAGON split substrate w/o capacity")
	1432	segs = [ ]
	1433	substr = topdl.Substrate(name="dragon%d" % idx,
	1434	capacity=sub.capacity.clone(),
	1435	attribute=[ topdl.Attribute(attribute=n, value=v)
	1436	for n, v, in (\
	1437	('vlan', 'unassigned%d' % idx),)])
	1438	for tb in tbs.keys():
	1439	seg = topdl.Segment(
	1440	id = tbparams[tb]['allocID'],
	1441	type='emulab',
	1442	uri = self.tbmap.get(tb, None),
	1443	interface=[
	1444	topdl.Interface(
	1445	substrate=substr.name),
	1446	],
	1447	attribute=[ topdl.Attribute(
[ecf679e]	1448	attribute='dragon_endpoint',
	1449	value=tbparams[tb]['dragon']),
[69692a9]	1450	]
	1451	)
	1452	if tbparams[tb].has_key('vlans'):
	1453	seg.set_attribute('vlans', tbparams[tb]['vlans'])
	1454	segs.append(seg)
	1455
	1456	topo["dragon%d" %idx] = \
	1457	topdl.Topology(substrates=[substr], elements=segs,
	1458	attribute=[
	1459	topdl.Attribute(attribute="transit", value='true'),
	1460	topdl.Attribute(attribute="dynamic", value='true'),
	1461	topdl.Attribute(attribute="testbed", value='dragon'),
	1462	]
	1463	)
	1464
	1465	def create_dragon_substrate(self, sub, topo, tbs, tbparams, master, eid):
	1466	"""
	1467	Add attribiutes to the various elements indicating that they are to be
	1468	dragon connected and create a dragon segment in tops to be
	1469	instantiated.
	1470	"""
	1471
	1472	def get_substrate_from_topo(name, t):
	1473	for s in t.substrates:
	1474	if s.name == name: return s
	1475	else: return None
	1476
	1477	dn = len([x for x in topo.keys() if x.startswith('dragon')])
	1478	elements = [ i.element for i in sub.interfaces ]
	1479	count = { }
	1480	for e in elements:
	1481	tb = e.get_attribute('testbed')
	1482	count[tb] = count.get(tb, 0) + 1
	1483
	1484	for tb in tbs.keys():
	1485	s = get_substrate_from_topo(sub.name, topo[tb])
	1486	if s:
	1487	for i in s.interfaces:
	1488	i.set_attribute('dragon_vlan', 'unassigned%d' % dn)
	1489	if count[tb] > 1: i.set_attribute('dragon_type', 'lan')
	1490	else: i.set_attribute('dragon_type', 'link')
	1491	else:
	1492	raise service_error(service_error.internal,
	1493	"No substrate %s in testbed %s" % (sub.name, tb))
	1494
	1495	self.new_dragon_topo(dn, sub, topo, tbs, tbparams)
	1496
	1497	def insert_internet_portals(self, sub, topo, tbs, tbparams, master, eid,
	1498	segment_substrate, portals):
	1499	# More than one testbed is on this substrate. Insert
	1500	# some portals into the subtopologies. st == source testbed,
	1501	# dt == destination testbed.
	1502	for st in tbs.keys():
	1503	if not segment_substrate.has_key(st):
	1504	segment_substrate[st] = { }
	1505	if not portals.has_key(st):
	1506	portals[st] = { }
	1507	for dt in [ t for t in tbs.keys() if t != st]:
	1508	sproject = tbparams[st].get('project', 'project')
	1509	dproject = tbparams[dt].get('project', 'project')
	1510	mproject = tbparams[master].get('project', 'project')
	1511	sdomain = tbparams[st].get('domain', ".example.com")
	1512	ddomain = tbparams[dt].get('domain', ".example.com")
	1513	mdomain = tbparams[master].get('domain', '.example.com')
	1514	muser = tbparams[master].get('user', 'root')
	1515	smbshare = tbparams[master].get('smbshare', 'USERS')
	1516	aid = tbparams[dt]['allocID']['fedid']
	1517	if st == master or dt == master:
	1518	active = ("%s" % (st == master))
	1519	else:
	1520	active = ("%s" %(st > dt))
	1521	if not segment_substrate[st].has_key(dt):
	1522	# Put a substrate and a segment for the connected
	1523	# testbed in there.
	1524	tsubstrate, segment_element = \
	1525	self.new_portal_substrate(st, dt, eid, tbparams)
	1526	segment_substrate[st][dt] = tsubstrate
	1527	topo[st].substrates.append(tsubstrate)
	1528	topo[st].elements.append(segment_element)
	1529
	1530	new_portal = False
	1531	if portals[st].has_key(dt):
	1532	# There's a portal set up to go to this destination.
	1533	# See if there's room to multiples this connection on
	1534	# it. If so, add an interface to the portal; if not,
	1535	# set up to add a portal below.
	1536	# [This little festival of braces is just a pop of the
	1537	# last element in the list of portals between st and
	1538	# dt.]
	1539	portal = portals[st][dt][-1]
	1540	mux = len([ i for i in portal.interface \
	1541	if not i.get_attribute('portal')])
	1542	if mux == self.muxmax:
	1543	new_portal = True
	1544	portal_type = "experiment"
	1545	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
	1546	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
	1547	else:
	1548	new_i = topdl.Interface(
[6e44258]	1549	substrate=sub.name,
[69692a9]	1550	attribute=[
	1551	topdl.Attribute(
	1552	attribute='ip4_address',
	1553	value=tbs[dt]
	1554	)
	1555	])
	1556	portal.interface.append(new_i)
	1557	else:
	1558	# First connection to this testbed, make an empty list
	1559	# and set up to add the new portal below
	1560	new_portal = True
	1561	portals[st][dt] = [ ]
	1562	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
	1563	desthost = "%stunnel%d" % (st, len(portals[st][dt]))
	1564
	1565	if dt == master or st == master: portal_type = "both"
	1566	else: portal_type = "experiment"
	1567
	1568	if new_portal:
	1569	infs = (
	1570	(segment_substrate[st][dt].name,
	1571	(('portal', 'true'),)),
	1572	(sub.name,
	1573	(('ip4_address', tbs[dt]),))
	1574	)
	1575	portal = self.new_portal_node(st, dt, tbparams,
	1576	master, eid, myname, desthost, portal_type,
	1577	infs)
	1578	if self.fedkit:
	1579	self.add_kit(portal, self.fedkit)
	1580	if self.gatewaykit:
	1581	self.add_kit(portal, self.gatewaykit)
	1582
	1583	topo[st].elements.append(portal)
	1584	portals[st][dt].append(portal)
	1585
	1586	def add_control_portal(self, st, dt, master, eid, topo, tbparams):
	1587	# Add to the master testbed
	1588	tsubstrate, segment_element = \
	1589	self.new_portal_substrate(st, dt, eid, tbparams)
	1590	myname = "%stunnel" % dt
	1591	desthost = "%stunnel" % st
	1592
	1593	portal = self.new_portal_node(st, dt, tbparams, master,
	1594	eid, myname, desthost, "control",
	1595	((tsubstrate.name,(('portal','true'),)),))
	1596	if self.fedkit:
	1597	self.add_kit(portal, self.fedkit)
	1598	if self.gatewaykit:
	1599	self.add_kit(portal, self.gatewaykit)
	1600
	1601	topo[st].substrates.append(tsubstrate)
	1602	topo[st].elements.append(segment_element)
	1603	topo[st].elements.append(portal)
	1604
[6e44258]	1605	def new_dragon_portal(self, st, dt, master, eid, myip, dip, idx,
[69692a9]	1606	substrate, tbparams):
	1607	# Add to the master testbed
	1608	myname = "%stunnel" % dt
	1609	desthost = "%s" % ip_addr(dip)
	1610
	1611	portal = self.new_portal_node(st, dt, tbparams, master,
	1612	eid, myname, desthost, "control",
	1613	((substrate.name,(
	1614	('portal','true'),
[6e44258]	1615	('ip4_address', "%s" % ip_addr(myip)),
[69692a9]	1616	('dragon_vlan', 'unassigned%d' % idx),
	1617	('dragon_type', 'link'),)),))
	1618	if self.fedkit:
	1619	self.add_kit(portal, self.fedkit)
	1620	if self.gatewaykit:
	1621	self.add_kit(portal, self.gatewaykit)
	1622
	1623	return portal
	1624
	1625	def add_portals(self, top, topo, eid, master, tbparams, ip_allocator):
[895a133]	1626	"""
	1627	For each substrate in the main topology, find those that
	1628	have nodes on more than one testbed. Insert portal nodes
	1629	into the copies of those substrates on the sub topologies.
	1630	"""
[13e3dd2]	1631	segment_substrate = { }
	1632	portals = { }
[895a133]	1633	for s in top.substrates:
	1634	# tbs will contain an ip address on this subsrate that is in
	1635	# each testbed.
	1636	tbs = { }
	1637	for i in s.interfaces:
	1638	e = i.element
	1639	tb = e.get_attribute('testbed')
	1640	if tb and not tbs.has_key(tb):
	1641	for i in e.interface:
	1642	if s in i.subs:
	1643	tbs[tb]= i.get_attribute('ip4_address')
	1644	if len(tbs) < 2:
	1645	continue
	1646
[69692a9]	1647	# DRAGON will not create multi-site vlans yet
	1648	if len(tbs) == 2 and \
	1649	all([tbparams[x].has_key('dragon') for x in tbs]):
	1650	self.create_dragon_substrate(s, topo, tbs, tbparams,
	1651	master, eid)
	1652	else:
	1653	self.insert_internet_portals(s, topo, tbs, tbparams, master,
	1654	eid, segment_substrate, portals)
[13e3dd2]	1655
	1656	# Make sure that all the slaves have a control portal back to the
	1657	# master.
	1658	for tb in [ t for t in tbparams.keys() if t != master ]:
	1659	if len([e for e in topo[tb].elements \
	1660	if isinstance(e, topdl.Computer) and \
	1661	e.get_attribute('portal') and \
	1662	e.get_attribute('portal_type') == 'both']) == 0:
	1663
[69692a9]	1664	if tbparams[master].has_key('dragon') \
	1665	and tbparams[tb].has_key('dragon'):
	1666
	1667	idx = len([x for x in topo.keys() \
	1668	if x.startswith('dragon')])
	1669	dip, leng = ip_allocator.allocate(4)
	1670	dip += 1
[6e44258]	1671	mip = dip+1
[69692a9]	1672	csub = topdl.Substrate(
	1673	name="dragon-control-%s" % tb,
	1674	capacity=topdl.Capacity(100000.0, 'max'),
	1675	attribute=[
	1676	topdl.Attribute(
	1677	attribute='portal',
	1678	value='true'
	1679	)
	1680	]
	1681	)
	1682	seg = topdl.Segment(
	1683	id= tbparams[master]['allocID'],
	1684	type='emulab',
	1685	uri = self.tbmap.get(master, None),
	1686	interface=[
	1687	topdl.Interface(
	1688	substrate=csub.name),
	1689	],
	1690	attribute = [
	1691	topdl.Attribute(attribute=n, value=v)
	1692	for n, v in (\
	1693	('domain',
	1694	tbparams[master].get('domain',
	1695	".example.com")),
	1696	('experiment', "%s/%s" % \
	1697	(tbparams[master].get(
	1698	'project',
	1699	'project'),
	1700	eid)),)
	1701	],
	1702	)
	1703	topo[tb].substrates.append(csub)
	1704	topo[tb].elements.append(
	1705	self.new_dragon_portal(tb, master, master, eid,
[6e44258]	1706	dip, mip, idx, csub, tbparams))
[69692a9]	1707	topo[tb].elements.append(seg)
	1708
	1709	mcsub = csub.clone()
	1710	seg = topdl.Segment(
	1711	id= tbparams[tb]['allocID'],
	1712	type='emulab',
	1713	uri = self.tbmap.get(tb, None),
	1714	interface=[
	1715	topdl.Interface(
	1716	substrate=csub.name),
	1717	],
	1718	attribute = [
	1719	topdl.Attribute(attribute=n, value=v)
	1720	for n, v in (\
	1721	('domain',
	1722	tbparams[tb].get('domain',
	1723	".example.com")),
	1724	('experiment', "%s/%s" % \
	1725	(tbparams[tb].get('project',
	1726	'project'),
	1727	eid)),)
	1728	],
	1729	)
	1730	topo[master].substrates.append(mcsub)
	1731	topo[master].elements.append(
	1732	self.new_dragon_portal(master, tb, master, eid,
[6e44258]	1733	mip, dip, idx, mcsub, tbparams))
[69692a9]	1734	topo[master].elements.append(seg)
	1735
	1736	self.create_dragon_substrate(csub, topo,
	1737	{tb: 1, master:1}, tbparams, master, eid)
	1738	else:
	1739	self.add_control_portal(master, tb, master, eid, topo,
	1740	tbparams)
	1741	self.add_control_portal(tb, master, master, eid, topo,
	1742	tbparams)
[13e3dd2]	1743
	1744	# Connect the portal nodes into the topologies and clear out
[895a133]	1745	# substrates that are not in the topologies
	1746	for tb in tbparams.keys():
	1747	topo[tb].incorporate_elements()
	1748	topo[tb].substrates = \
	1749	[s for s in topo[tb].substrates \
	1750	if len(s.interfaces) >0]
	1751
	1752	def wrangle_software(self, expid, top, topo, tbparams):
	1753	"""
	1754	Copy software out to the repository directory, allocate permissions and
	1755	rewrite the segment topologies to look for the software in local
	1756	places.
	1757	"""
	1758
	1759	# Copy the rpms and tarfiles to a distribution directory from
	1760	# which the federants can retrieve them
	1761	linkpath = "%s/software" % expid
	1762	softdir ="%s/%s" % ( self.repodir, linkpath)
	1763	softmap = { }
	1764	# These are in a list of tuples format (each kit). This comprehension
	1765	# unwraps them into a single list of tuples that initilaizes the set of
	1766	# tuples.
	1767	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
	1768	for p, t in l ])
	1769	pkgs.update([x.location for e in top.elements \
	1770	for x in e.software])
	1771	try:
	1772	os.makedirs(softdir)
	1773	except IOError, e:
	1774	raise service_error(
	1775	"Cannot create software directory: %s" % e)
	1776	# The actual copying. Everything's converted into a url for copying.
	1777	for pkg in pkgs:
	1778	loc = pkg
	1779
	1780	scheme, host, path = urlparse(loc)[0:3]
	1781	dest = os.path.basename(path)
	1782	if not scheme:
	1783	if not loc.startswith('/'):
	1784	loc = "/%s" % loc
	1785	loc = "file://%s" %loc
	1786	try:
	1787	u = urlopen(loc)
	1788	except Exception, e:
	1789	raise service_error(service_error.req,
	1790	"Cannot open %s: %s" % (loc, e))
	1791	try:
	1792	f = open("%s/%s" % (softdir, dest) , "w")
	1793	self.log.debug("Writing %s/%s" % (softdir,dest) )
	1794	data = u.read(4096)
	1795	while data:
	1796	f.write(data)
	1797	data = u.read(4096)
	1798	f.close()
	1799	u.close()
	1800	except Exception, e:
	1801	raise service_error(service_error.internal,
	1802	"Could not copy %s: %s" % (loc, e))
	1803	path = re.sub("/tmp", "", linkpath)
	1804	# XXX
	1805	softmap[pkg] = \
[7183b48]	1806	"%s/%s/%s" %\
	1807	( self.repo_url, path, dest)
[895a133]	1808
	1809	# Allow the individual segments to access the software.
	1810	for tb in tbparams.keys():
	1811	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
	1812	"/%s/%s" % ( path, dest))
	1813
	1814	# Convert the software locations in the segments into the local
	1815	# copies on this host
	1816	for soft in [ s for tb in topo.values() \
	1817	for e in tb.elements \
	1818	if getattr(e, 'software', False) \
	1819	for s in e.software ]:
	1820	if softmap.has_key(soft.location):
	1821	soft.location = softmap[soft.location]
	1822
	1823
[a3ad8bd]	1824	def new_experiment(self, req, fid):
	1825	"""
	1826	The external interface to empty initial experiment creation called from
	1827	the dispatcher.
	1828
	1829	Creates a working directory, splits the incoming description using the
	1830	splitter script and parses out the avrious subsections using the
	1831	lcasses above. Once each sub-experiment is created, use pooled threads
	1832	to instantiate them and start it all up.
	1833	"""
	1834	if not self.auth.check_attribute(fid, 'new'):
	1835	raise service_error(service_error.access, "New access denied")
	1836
	1837	try:
	1838	tmpdir = tempfile.mkdtemp(prefix="split-")
	1839	except IOError:
	1840	raise service_error(service_error.internal, "Cannot create tmp dir")
	1841
	1842	try:
	1843	access_user = self.accessdb[fid]
	1844	except KeyError:
	1845	raise service_error(service_error.internal,
	1846	"Access map and authorizer out of sync in " + \
[7183b48]	1847	"new_experiment for fedid %s" % fid)
[a3ad8bd]	1848
	1849	pid = "dummy"
	1850	gid = "dummy"
	1851
	1852	req = req.get('NewRequestBody', None)
	1853	if not req:
	1854	raise service_error(service_error.req,
	1855	"Bad request format (no NewRequestBody)")
	1856
	1857	# Generate an ID for the experiment (slice) and a certificate that the
	1858	# allocator can use to prove they own it. We'll ship it back through
	1859	# the encrypted connection.
	1860	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
	1861
	1862	#now we're done with the tmpdir, and it should be empty
	1863	if self.cleanup:
	1864	self.log.debug("[new_experiment]: removing %s" % tmpdir)
	1865	os.rmdir(tmpdir)
	1866	else:
	1867	self.log.debug("[new_experiment]: not removing %s" % tmpdir)
	1868
	1869	eid = self.create_experiment_state(fid, req, expid, expcert,
	1870	state='empty')
	1871
	1872	# Let users touch the state
	1873	self.auth.set_attribute(fid, expid)
	1874	self.auth.set_attribute(expid, expid)
	1875	# Override fedids can manipulate state as well
	1876	for o in self.overrides:
	1877	self.auth.set_attribute(o, expid)
	1878
	1879	rv = {
	1880	'experimentID': [
	1881	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	1882	],
	1883	'experimentStatus': 'empty',
	1884	'experimentAccess': { 'X509' : expcert }
	1885	}
	1886
	1887	return rv
	1888
	1889
[e19b75c]	1890	def create_experiment(self, req, fid):
[db6b092]	1891	"""
	1892	The external interface to experiment creation called from the
	1893	dispatcher.
	1894
	1895	Creates a working directory, splits the incoming description using the
	1896	splitter script and parses out the avrious subsections using the
	1897	lcasses above. Once each sub-experiment is created, use pooled threads
	1898	to instantiate them and start it all up.
	1899	"""
[7183b48]	1900
	1901	req = req.get('CreateRequestBody', None)
	1902	if not req:
	1903	raise service_error(service_error.req,
	1904	"Bad request format (no CreateRequestBody)")
	1905
	1906	# Get the experiment access
	1907	exp = req.get('experimentID', None)
	1908	if exp:
	1909	if exp.has_key('fedid'):
	1910	key = exp['fedid']
	1911	expid = key
	1912	eid = None
	1913	elif exp.has_key('localname'):
	1914	key = exp['localname']
	1915	eid = key
	1916	expid = None
	1917	else:
	1918	raise service_error(service_error.req, "Unknown lookup type")
	1919	else:
	1920	raise service_error(service_error.req, "No request?")
	1921
	1922	self.check_experiment_access(fid, key)
[db6b092]	1923
	1924	try:
	1925	tmpdir = tempfile.mkdtemp(prefix="split-")
[895a133]	1926	os.mkdir(tmpdir+"/keys")
[db6b092]	1927	except IOError:
	1928	raise service_error(service_error.internal, "Cannot create tmp dir")
	1929
	1930	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
	1931	gw_secretkey_base = "fed.%s" % self.ssh_type
	1932	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
	1933	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
	1934	tclfile = tmpdir + "/experiment.tcl"
	1935	tbparams = { }
	1936	try:
	1937	access_user = self.accessdb[fid]
	1938	except KeyError:
	1939	raise service_error(service_error.internal,
	1940	"Access map and authorizer out of sync in " + \
	1941	"create_experiment for fedid %s" % fid)
	1942
	1943	pid = "dummy"
	1944	gid = "dummy"
	1945
	1946	# The tcl parser needs to read a file so put the content into that file
	1947	descr=req.get('experimentdescription', None)
	1948	if descr:
	1949	file_content=descr.get('ns2description', None)
	1950	if file_content:
	1951	try:
	1952	f = open(tclfile, 'w')
	1953	f.write(file_content)
	1954	f.close()
	1955	except IOError:
	1956	raise service_error(service_error.internal,
	1957	"Cannot write temp experiment description")
	1958	else:
	1959	raise service_error(service_error.req,
	1960	"Only ns2descriptions supported")
	1961	else:
	1962	raise service_error(service_error.req, "No experiment description")
	1963
[7183b48]	1964	self.state_lock.acquire()
	1965	if self.state.has_key(key):
[4afcfc4]	1966	self.state[key]['experimentStatus'] = "starting"
[7183b48]	1967	for e in self.state[key].get('experimentID',[]):
	1968	if not expid and e.has_key('fedid'):
	1969	expid = e['fedid']
	1970	elif not eid and e.has_key('localname'):
	1971	eid = e['localname']
	1972	self.state_lock.release()
	1973
	1974	if not (eid and expid):
	1975	raise service_error(service_error.internal,
	1976	"Cannot find local experiment info!?")
[db6b092]	1977
	1978	try:
	1979	# This catches exceptions to clear the placeholder if necessary
	1980	try:
	1981	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
	1982	except ValueError:
	1983	raise service_error(service_error.server_config,
	1984	"Bad key type (%s)" % self.ssh_type)
	1985
	1986	master = req.get('master', None)
	1987	if not master:
	1988	raise service_error(service_error.req,
	1989	"No master testbed label")
	1990	export_project = req.get('exportProject', None)
	1991	if not export_project:
	1992	raise service_error(service_error.req, "No export project")
[895a133]	1993
	1994	# Translate to topdl
[db6b092]	1995	if self.splitter_url:
[895a133]	1996	# XXX: need remote topdl translator
[db6b092]	1997	self.log.debug("Calling remote splitter at %s" % \
	1998	self.splitter_url)
	1999	split_data = self.remote_splitter(self.splitter_url,
	2000	file_content, master)
	2001	else:
	2002	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
	2003	str(self.muxmax), '-m', master]
	2004
	2005	if self.fedkit:
	2006	tclcmd.append('-k')
	2007
	2008	if self.gatewaykit:
	2009	tclcmd.append('-K')
	2010
	2011	tclcmd.extend([pid, gid, eid, tclfile])
	2012
	2013	self.log.debug("running local splitter %s", " ".join(tclcmd))
	2014	# This is just fantastic. As a side effect the parser copies
	2015	# tb_compat.tcl into the current directory, so that directory
	2016	# must be writable by the fedd user. Doing this in the
	2017	# temporary subdir ensures this is the case.
[70caa72]	2018	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
[db6b092]	2019	cwd=tmpdir)
[866c983]	2020	split_data = tclparser.stdout
	2021
[cc8d8e9]	2022	top = topdl.topology_from_xml(file=split_data, top="experiment")
[895a133]	2023
[69692a9]	2024	hosts, ip_allocator = self.allocate_ips_to_topo(top)
[895a133]	2025	# Find the testbeds to look up
	2026	testbeds = set([ a.value for e in top.elements \
	2027	for a in e.attribute \
	2028	if a.attribute == 'testbed'] )
	2029
	2030	allocated = { } # Testbeds we can access
	2031	topo ={ } # Sub topologies
[99eb8cf]	2032	self.get_access_to_testbeds(testbeds, access_user,
[895a133]	2033	export_project, master, allocated, tbparams)
	2034	self.split_topology(top, topo, testbeds, eid, master, tbparams)
	2035
	2036	# Copy configuration files into the remote file store
[6c57fe9]	2037	# The config urlpath
	2038	configpath = "/%s/config" % expid
	2039	# The config file system location
	2040	configdir ="%s%s" % ( self.repodir, configpath)
	2041	try:
	2042	os.makedirs(configdir)
	2043	except IOError, e:
	2044	raise service_error(
	2045	"Cannot create config directory: %s" % e)
	2046	try:
	2047	f = open("%s/hosts" % configdir, "w")
	2048	f.write('\n'.join(hosts))
	2049	f.close()
	2050	except IOError, e:
	2051	raise service_error(service_error.internal,
	2052	"Cannot write hosts file: %s" % e)
	2053	try:
[40dd8c1]	2054	copy_file("%s" % gw_pubkey, "%s/%s" % \
[6c57fe9]	2055	(configdir, gw_pubkey_base))
[40dd8c1]	2056	copy_file("%s" % gw_secretkey, "%s/%s" % \
[6c57fe9]	2057	(configdir, gw_secretkey_base))
	2058	except IOError, e:
	2059	raise service_error(service_error.internal,
	2060	"Cannot copy keyfiles: %s" % e)
[cc8d8e9]	2061
[6c57fe9]	2062	# Allow the individual testbeds to access the configuration files.
	2063	for tb in tbparams.keys():
	2064	asignee = tbparams[tb]['allocID']['fedid']
	2065	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
	2066	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
[cc8d8e9]	2067
[69692a9]	2068	self.add_portals(top, topo, eid, master, tbparams, ip_allocator)
	2069	# Now get access to the dynamic testbeds
	2070	for k, t in topo.items():
	2071	if not t.get_attribute('dynamic'):
	2072	continue
	2073	tb = t.get_attribute('testbed')
	2074	if tb:
	2075	self.get_access(tb, None, user, tbparams, master,
	2076	export_project, access_user)
	2077	tbparams[k] = tbparams[tb]
	2078	del tbparams[tb]
	2079	allocated[k] = 1
	2080	else:
	2081	raise service_error(service_error.internal,
	2082	"Dynamic allocation from no testbed!?")
	2083
[895a133]	2084	self.wrangle_software(expid, top, topo, tbparams)
[cc8d8e9]	2085
	2086	vtopo = topdl.topology_to_vtopo(top)
	2087	vis = self.genviz(vtopo)
[db6b092]	2088
[866c983]	2089	# save federant information
	2090	for k in allocated.keys():
[ecf679e]	2091	tbparams[k]['federant'] = {
	2092	'name': [ { 'localname' : eid} ],
	2093	'allocID' : tbparams[k]['allocID'],
	2094	'master' : k == master,
	2095	'uri': tbparams[k]['uri'],
[866c983]	2096	}
[69692a9]	2097	if tbparams[k].has_key('emulab'):
	2098	tbparams[k]['federant']['emulab'] = \
	2099	tbparams[k]['emulab']
[866c983]	2100
[db6b092]	2101	self.state_lock.acquire()
	2102	self.state[eid]['vtopo'] = vtopo
	2103	self.state[eid]['vis'] = vis
	2104	self.state[expid]['federant'] = \
	2105	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
	2106	if tbparams[tb].has_key('federant') ]
[cc8d8e9]	2107	if self.state_filename:
	2108	self.write_state()
[db6b092]	2109	self.state_lock.release()
[866c983]	2110	except service_error, e:
	2111	# If something goes wrong in the parse (usually an access error)
	2112	# clear the placeholder state. From here on out the code delays
[db6b092]	2113	# exceptions. Failing at this point returns a fault to the remote
	2114	# caller.
[cc8d8e9]	2115
[866c983]	2116	self.state_lock.acquire()
	2117	del self.state[eid]
[bd3e314]	2118	del self.state[expid]
	2119	if self.state_filename: self.write_state()
[866c983]	2120	self.state_lock.release()
	2121	raise e
	2122
	2123
[db6b092]	2124	# Start the background swapper and return the starting state. From
	2125	# here on out, the state will stick around a while.
[866c983]	2126
[db6b092]	2127	# Let users touch the state
[bd3e314]	2128	self.auth.set_attribute(fid, expid)
	2129	self.auth.set_attribute(expid, expid)
[db6b092]	2130	# Override fedids can manipulate state as well
	2131	for o in self.overrides:
	2132	self.auth.set_attribute(o, expid)
	2133
	2134	# Create a logger that logs to the experiment's state object as well as
	2135	# to the main log file.
	2136	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
[f07fa49]	2137	alloc_collector = self.list_log(self.state[eid]['log'])
	2138	h = logging.StreamHandler(alloc_collector)
[db6b092]	2139	# XXX: there should be a global one of these rather than repeating the
	2140	# code.
	2141	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2142	'%d %b %y %H:%M:%S'))
	2143	alloc_log.addHandler(h)
	2144
[6c57fe9]	2145	attrs = [
	2146	{
	2147	'attribute': 'ssh_pubkey',
	2148	'value': '%s/%s/config/%s' % \
[7183b48]	2149	(self.repo_url, expid, gw_pubkey_base)
[6c57fe9]	2150	},
	2151	{
	2152	'attribute': 'ssh_secretkey',
	2153	'value': '%s/%s/config/%s' % \
[7183b48]	2154	(self.repo_url, expid, gw_secretkey_base)
[6c57fe9]	2155	},
	2156	{
	2157	'attribute': 'hosts',
	2158	'value': '%s/%s/config/hosts' % \
[7183b48]	2159	(self.repo_url, expid)
[6c57fe9]	2160	},
[ecca6eb]	2161	{
	2162	'attribute': 'experiment_name',
	2163	'value': eid,
	2164	},
[6c57fe9]	2165	]
	2166
[db6b092]	2167	# Start a thread to do the resource allocation
[e19b75c]	2168	t = Thread(target=self.allocate_resources,
[7183b48]	2169	args=(allocated, master, eid, expid, tbparams,
[f07fa49]	2170	topo, tmpdir, alloc_log, alloc_collector, attrs),
[db6b092]	2171	name=eid)
	2172	t.start()
	2173
	2174	rv = {
	2175	'experimentID': [
	2176	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	2177	],
	2178	'experimentStatus': 'starting',
	2179	}
	2180
	2181	return rv
[9479343]	2182
	2183	def get_experiment_fedid(self, key):
	2184	"""
[db6b092]	2185	find the fedid associated with the localname key in the state database.
[9479343]	2186	"""
	2187
[db6b092]	2188	rv = None
	2189	self.state_lock.acquire()
	2190	if self.state.has_key(key):
	2191	if isinstance(self.state[key], dict):
	2192	try:
	2193	kl = [ f['fedid'] for f in \
	2194	self.state[key]['experimentID']\
	2195	if f.has_key('fedid') ]
	2196	except KeyError:
	2197	self.state_lock.release()
	2198	raise service_error(service_error.internal,
	2199	"No fedid for experiment %s when getting "+\
	2200	"fedid(!?)" % key)
	2201	if len(kl) == 1:
	2202	rv = kl[0]
	2203	else:
	2204	self.state_lock.release()
	2205	raise service_error(service_error.internal,
	2206	"multiple fedids for experiment %s when " +\
	2207	"getting fedid(!?)" % key)
	2208	else:
	2209	self.state_lock.release()
	2210	raise service_error(service_error.internal,
	2211	"Unexpected state for %s" % key)
	2212	self.state_lock.release()
	2213	return rv
[a97394b]	2214
[4064742]	2215	def check_experiment_access(self, fid, key):
[866c983]	2216	"""
	2217	Confirm that the fid has access to the experiment. Though a request
	2218	may be made in terms of a local name, the access attribute is always
	2219	the experiment's fedid.
	2220	"""
	2221	if not isinstance(key, fedid):
[db6b092]	2222	key = self.get_experiment_fedid(key)
[866c983]	2223
	2224	if self.auth.check_attribute(fid, key):
	2225	return True
	2226	else:
	2227	raise service_error(service_error.access, "Access Denied")
[4064742]	2228
	2229
[db6b092]	2230	def get_handler(self, path, fid):
[7183b48]	2231	self.log.info("Get handler %s %s" % (path, fid))
[6c57fe9]	2232	if self.auth.check_attribute(fid, path):
	2233	return ("%s/%s" % (self.repodir, path), "application/binary")
	2234	else:
	2235	return (None, None)
[987aaa1]	2236
	2237	def get_vtopo(self, req, fid):
[866c983]	2238	"""
	2239	Return the stored virtual topology for this experiment
	2240	"""
	2241	rv = None
[db6b092]	2242	state = None
[866c983]	2243
	2244	req = req.get('VtopoRequestBody', None)
	2245	if not req:
	2246	raise service_error(service_error.req,
	2247	"Bad request format (no VtopoRequestBody)")
	2248	exp = req.get('experiment', None)
	2249	if exp:
	2250	if exp.has_key('fedid'):
	2251	key = exp['fedid']
	2252	keytype = "fedid"
	2253	elif exp.has_key('localname'):
	2254	key = exp['localname']
	2255	keytype = "localname"
	2256	else:
	2257	raise service_error(service_error.req, "Unknown lookup type")
	2258	else:
	2259	raise service_error(service_error.req, "No request?")
	2260
	2261	self.check_experiment_access(fid, key)
	2262
	2263	self.state_lock.acquire()
	2264	if self.state.has_key(key):
[db6b092]	2265	if self.state[key].has_key('vtopo'):
	2266	rv = { 'experiment' : {keytype: key },\
	2267	'vtopo': self.state[key]['vtopo'],\
	2268	}
	2269	else:
	2270	state = self.state[key]['experimentStatus']
[866c983]	2271	self.state_lock.release()
	2272
	2273	if rv: return rv
[bd3e314]	2274	else:
[db6b092]	2275	if state:
	2276	raise service_error(service_error.partial,
	2277	"Not ready: %s" % state)
	2278	else:
	2279	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2280
	2281	def get_vis(self, req, fid):
[866c983]	2282	"""
	2283	Return the stored visualization for this experiment
	2284	"""
	2285	rv = None
[db6b092]	2286	state = None
[866c983]	2287
	2288	req = req.get('VisRequestBody', None)
	2289	if not req:
	2290	raise service_error(service_error.req,
	2291	"Bad request format (no VisRequestBody)")
	2292	exp = req.get('experiment', None)
	2293	if exp:
	2294	if exp.has_key('fedid'):
	2295	key = exp['fedid']
	2296	keytype = "fedid"
	2297	elif exp.has_key('localname'):
	2298	key = exp['localname']
	2299	keytype = "localname"
	2300	else:
	2301	raise service_error(service_error.req, "Unknown lookup type")
	2302	else:
	2303	raise service_error(service_error.req, "No request?")
	2304
	2305	self.check_experiment_access(fid, key)
	2306
	2307	self.state_lock.acquire()
	2308	if self.state.has_key(key):
[db6b092]	2309	if self.state[key].has_key('vis'):
	2310	rv = { 'experiment' : {keytype: key },\
	2311	'vis': self.state[key]['vis'],\
	2312	}
	2313	else:
	2314	state = self.state[key]['experimentStatus']
[866c983]	2315	self.state_lock.release()
	2316
	2317	if rv: return rv
[bd3e314]	2318	else:
[db6b092]	2319	if state:
	2320	raise service_error(service_error.partial,
	2321	"Not ready: %s" % state)
	2322	else:
	2323	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2324
[65f3f29]	2325	def clean_info_response(self, rv):
[db6b092]	2326	"""
	2327	Remove the information in the experiment's state object that is not in
	2328	the info response.
	2329	"""
	2330	# Remove the owner info (should always be there, but...)
	2331	if rv.has_key('owner'): del rv['owner']
	2332
	2333	# Convert the log into the allocationLog parameter and remove the
	2334	# log entry (with defensive programming)
	2335	if rv.has_key('log'):
	2336	rv['allocationLog'] = "".join(rv['log'])
	2337	del rv['log']
	2338	else:
	2339	rv['allocationLog'] = ""
	2340
	2341	if rv['experimentStatus'] != 'active':
	2342	if rv.has_key('federant'): del rv['federant']
	2343	else:
[69692a9]	2344	# remove the allocationID and uri info from each federant
[db6b092]	2345	for f in rv.get('federant', []):
	2346	if f.has_key('allocID'): del f['allocID']
[69692a9]	2347	if f.has_key('uri'): del f['uri']
[db6b092]	2348	return rv
[65f3f29]	2349
[c52c48d]	2350	def get_info(self, req, fid):
[866c983]	2351	"""
	2352	Return all the stored info about this experiment
	2353	"""
	2354	rv = None
	2355
	2356	req = req.get('InfoRequestBody', None)
	2357	if not req:
	2358	raise service_error(service_error.req,
[65f3f29]	2359	"Bad request format (no InfoRequestBody)")
[866c983]	2360	exp = req.get('experiment', None)
	2361	if exp:
	2362	if exp.has_key('fedid'):
	2363	key = exp['fedid']
	2364	keytype = "fedid"
	2365	elif exp.has_key('localname'):
	2366	key = exp['localname']
	2367	keytype = "localname"
	2368	else:
	2369	raise service_error(service_error.req, "Unknown lookup type")
	2370	else:
	2371	raise service_error(service_error.req, "No request?")
	2372
	2373	self.check_experiment_access(fid, key)
	2374
	2375	# The state may be massaged by the service function that called
	2376	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
	2377	# state.
	2378	self.state_lock.acquire()
	2379	if self.state.has_key(key):
	2380	rv = copy.deepcopy(self.state[key])
	2381	self.state_lock.release()
	2382
[db6b092]	2383	if rv:
	2384	return self.clean_info_response(rv)
[bd3e314]	2385	else:
[db6b092]	2386	raise service_error(service_error.req, "No such experiment")
[7a8d667]	2387
[65f3f29]	2388	def get_multi_info(self, req, fid):
	2389	"""
	2390	Return all the stored info that this fedid can access
	2391	"""
[db6b092]	2392	rv = { 'info': [ ] }
[65f3f29]	2393
[db6b092]	2394	self.state_lock.acquire()
	2395	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
[829246e]	2396	try:
	2397	self.check_experiment_access(fid, key)
	2398	except service_error, e:
	2399	if e.code == service_error.access:
	2400	continue
	2401	else:
	2402	self.state_lock.release()
	2403	raise e
[65f3f29]	2404
[db6b092]	2405	if self.state.has_key(key):
	2406	e = copy.deepcopy(self.state[key])
	2407	e = self.clean_info_response(e)
	2408	rv['info'].append(e)
[65f3f29]	2409	self.state_lock.release()
[db6b092]	2410	return rv
[65f3f29]	2411
[7a8d667]	2412	def terminate_experiment(self, req, fid):
[866c983]	2413	"""
	2414	Swap this experiment out on the federants and delete the shared
	2415	information
	2416	"""
	2417	tbparams = { }
	2418	req = req.get('TerminateRequestBody', None)
	2419	if not req:
	2420	raise service_error(service_error.req,
	2421	"Bad request format (no TerminateRequestBody)")
[db6b092]	2422	force = req.get('force', False)
[866c983]	2423	exp = req.get('experiment', None)
	2424	if exp:
	2425	if exp.has_key('fedid'):
	2426	key = exp['fedid']
	2427	keytype = "fedid"
	2428	elif exp.has_key('localname'):
	2429	key = exp['localname']
	2430	keytype = "localname"
	2431	else:
	2432	raise service_error(service_error.req, "Unknown lookup type")
	2433	else:
	2434	raise service_error(service_error.req, "No request?")
	2435
	2436	self.check_experiment_access(fid, key)
	2437
[db6b092]	2438	dealloc_list = [ ]
[46e4682]	2439
	2440
[5ae3857]	2441	# Create a logger that logs to the dealloc_list as well as to the main
	2442	# log file.
	2443	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
	2444	h = logging.StreamHandler(self.list_log(dealloc_list))
	2445	# XXX: there should be a global one of these rather than repeating the
	2446	# code.
	2447	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2448	'%d %b %y %H:%M:%S'))
	2449	dealloc_log.addHandler(h)
	2450
	2451	self.state_lock.acquire()
	2452	fed_exp = self.state.get(key, None)
	2453
	2454	if fed_exp:
	2455	# This branch of the conditional holds the lock to generate a
	2456	# consistent temporary tbparams variable to deallocate experiments.
	2457	# It releases the lock to do the deallocations and reacquires it to
	2458	# remove the experiment state when the termination is complete.
	2459
	2460	# First make sure that the experiment creation is complete.
	2461	status = fed_exp.get('experimentStatus', None)
	2462
	2463	if status:
	2464	if status in ('starting', 'terminating'):
	2465	if not force:
	2466	self.state_lock.release()
	2467	raise service_error(service_error.partial,
	2468	'Experiment still being created or destroyed')
	2469	else:
	2470	self.log.warning('Experiment in %s state ' % status + \
	2471	'being terminated by force.')
	2472	else:
	2473	# No status??? trouble
	2474	self.state_lock.release()
	2475	raise service_error(service_error.internal,
	2476	"Experiment has no status!?")
	2477
	2478	ids = []
	2479	# experimentID is a list of dicts that are self-describing
	2480	# identifiers. This finds all the fedids and localnames - the
	2481	# keys of self.state - and puts them into ids.
	2482	for id in fed_exp.get('experimentID', []):
	2483	if id.has_key('fedid'): ids.append(id['fedid'])
	2484	if id.has_key('localname'): ids.append(id['localname'])
	2485
[63a35b7]	2486	# Collect the allocation/segment ids into a dict keyed by the fedid
	2487	# of the allocation (or a monotonically increasing integer) that
	2488	# contains a tuple of uri, aid (which is a dict...)
	2489	for i, fed in enumerate(fed_exp.get('federant', [])):
[5ae3857]	2490	try:
[63a35b7]	2491	uri = fed['uri']
	2492	aid = fed['allocID']
	2493	k = fed['allocID'].get('fedid', i)
[5ae3857]	2494	except KeyError, e:
	2495	continue
[63a35b7]	2496	tbparams[k] = (uri, aid)
[5ae3857]	2497	fed_exp['experimentStatus'] = 'terminating'
	2498	if self.state_filename: self.write_state()
	2499	self.state_lock.release()
	2500
	2501	# Stop everyone. NB, wait_for_all waits until a thread starts and
	2502	# then completes, so we can't wait if nothing starts. So, no
	2503	# tbparams, no start.
	2504	if len(tbparams) > 0:
	2505	thread_pool = self.thread_pool(self.nthreads)
[63a35b7]	2506	for k in tbparams.keys():
[5ae3857]	2507	# Create and start a thread to stop the segment
	2508	thread_pool.wait_for_slot()
[63a35b7]	2509	uri, aid = tbparams[k]
[5ae3857]	2510	t = self.pooled_thread(\
[e19b75c]	2511	target=self.terminate_segment(log=dealloc_log,
[63a35b7]	2512	testbed=uri,
[5ae3857]	2513	cert_file=self.cert_file,
	2514	cert_pwd=self.cert_pwd,
	2515	trusted_certs=self.trusted_certs,
	2516	caller=self.call_TerminateSegment),
[63a35b7]	2517	args=(uri, aid), name=k,
[5ae3857]	2518	pdata=thread_pool, trace_file=self.trace_file)
	2519	t.start()
	2520	# Wait for completions
	2521	thread_pool.wait_for_all_done()
	2522
	2523	# release the allocations (failed experiments have done this
	2524	# already, and starting experiments may be in odd states, so we
	2525	# ignore errors releasing those allocations
	2526	try:
[63a35b7]	2527	for k in tbparams.keys():
[ecf679e]	2528	# This releases access by uri
[63a35b7]	2529	uri, aid = tbparams[k]
	2530	self.release_access(None, aid, uri=uri)
[5ae3857]	2531	except service_error, e:
	2532	if status != 'failed' and not force:
	2533	raise e
	2534
	2535	# Remove the terminated experiment
	2536	self.state_lock.acquire()
	2537	for id in ids:
	2538	if self.state.has_key(id): del self.state[id]
	2539
	2540	if self.state_filename: self.write_state()
	2541	self.state_lock.release()
	2542
	2543	return {
	2544	'experiment': exp ,
	2545	'deallocationLog': "".join(dealloc_list),
	2546	}
	2547	else:
	2548	# Don't forget to release the lock
	2549	self.state_lock.release()
	2550	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: