Context Navigation

source: fedd/federation/experiment_control.py @ 5b74b63

axis_examplecompt_changesinfo-opsversion-3.01version-3.02

Last change on this file since 5b74b63 was 5b74b63, checked in by Ted Faber <faber@…>, 14 years ago
Initial commit of support for transit connection info
Property mode set to `100644`
File size: 87.9 KB

Rev	Line
[6679c122]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4
	5	import re
	6	import random
	7	import string
	8	import subprocess
	9	import tempfile
	10	import copy
[eee2b2e]	11	import pickle
[c971895]	12	import logging
[79b6596]	13	import signal
	14	import time
[6679c122]	15
[3441fe3]	16	import traceback
[c971895]	17	# For parsing visualization output and splitter output
	18	import xml.parsers.expat
[3441fe3]	19
[6c57fe9]	20	from threading import Lock, Thread, Condition
	21	from subprocess import call, Popen, PIPE
[6679c122]	22
[db6b092]	23	from urlparse import urlparse
	24	from urllib2 import urlopen
	25
[ec4fb42]	26	from util import *
[51cc9df]	27	from fedid import fedid, generate_fedid
[9460b1e]	28	from remote_service import xmlrpc_handler, soap_handler, service_caller
[c971895]	29	from service_error import service_error
[6679c122]	30
[db6b092]	31	import topdl
[f07fa49]	32	import list_log
[db6b092]	33	from ip_allocator import ip_allocator
	34	from ip_addr import ip_addr
	35
[11a08b0]	36
	37	class nullHandler(logging.Handler):
	38	def emit(self, record): pass
	39
	40	fl = logging.getLogger("fedd.experiment_control")
	41	fl.addHandler(nullHandler())
	42
[ec4fb42]	43	class experiment_control_local:
[0ea11af]	44	"""
	45	Control of experiments that this system can directly access.
	46
	47	Includes experiment creation, termination and information dissemination.
	48	Thred safe.
	49	"""
[79b6596]	50
	51	class ssh_cmd_timeout(RuntimeError): pass
[6679c122]	52
[1af38d6]	53	class thread_pool:
[866c983]	54	"""
	55	A class to keep track of a set of threads all invoked for the same
	56	task. Manages the mutual exclusion of the states.
	57	"""
	58	def __init__(self, nthreads):
	59	"""
	60	Start a pool.
	61	"""
	62	self.changed = Condition()
	63	self.started = 0
	64	self.terminated = 0
	65	self.nthreads = nthreads
	66
	67	def acquire(self):
	68	"""
	69	Get the pool's lock.
	70	"""
	71	self.changed.acquire()
	72
	73	def release(self):
	74	"""
	75	Release the pool's lock.
	76	"""
	77	self.changed.release()
	78
	79	def wait(self, timeout = None):
	80	"""
	81	Wait for a pool thread to start or stop.
	82	"""
	83	self.changed.wait(timeout)
	84
	85	def start(self):
	86	"""
	87	Called by a pool thread to report starting.
	88	"""
	89	self.changed.acquire()
	90	self.started += 1
	91	self.changed.notifyAll()
	92	self.changed.release()
	93
	94	def terminate(self):
	95	"""
	96	Called by a pool thread to report finishing.
	97	"""
	98	self.changed.acquire()
	99	self.terminated += 1
	100	self.changed.notifyAll()
	101	self.changed.release()
	102
	103	def clear(self):
	104	"""
	105	Clear all pool data.
	106	"""
	107	self.changed.acquire()
	108	self.started = 0
	109	self.terminated =0
	110	self.changed.notifyAll()
	111	self.changed.release()
	112
	113	def wait_for_slot(self):
	114	"""
	115	Wait until we have a free slot to start another pooled thread
	116	"""
	117	self.acquire()
	118	while self.started - self.terminated >= self.nthreads:
	119	self.wait()
	120	self.release()
	121
[32e7d93]	122	def wait_for_all_done(self, timeout=None):
[866c983]	123	"""
[32e7d93]	124	Wait until all active threads finish (and at least one has
	125	started). If a timeout is given, return after waiting that long
	126	for termination. If all threads are done (and one has started in
	127	the since the last clear()) return True, otherwise False.
[866c983]	128	"""
[32e7d93]	129	if timeout:
	130	deadline = time.time() + timeout
[866c983]	131	self.acquire()
	132	while self.started == 0 or self.started > self.terminated:
[32e7d93]	133	self.wait(timeout)
	134	if timeout:
	135	if time.time() > deadline:
	136	break
	137	timeout = deadline - time.time()
[866c983]	138	self.release()
[32e7d93]	139	return not (self.started == 0 or self.started > self.terminated)
[8bc5754]	140
[1af38d6]	141	class pooled_thread(Thread):
[866c983]	142	"""
	143	One of a set of threads dedicated to a specific task. Uses the
	144	thread_pool class above for coordination.
	145	"""
	146	def __init__(self, group=None, target=None, name=None, args=(),
	147	kwargs={}, pdata=None, trace_file=None):
	148	Thread.__init__(self, group, target, name, args, kwargs)
	149	self.rv = None # Return value of the ops in this thread
	150	self.exception = None # Exception that terminated this thread
	151	self.target=target # Target function to run on start()
	152	self.args = args # Args to pass to target
	153	self.kwargs = kwargs # Additional kw args
	154	self.pdata = pdata # thread_pool for this class
	155	# Logger for this thread
	156	self.log = logging.getLogger("fedd.experiment_control")
	157
	158	def run(self):
	159	"""
	160	Emulate Thread.run, except add pool data manipulation and error
	161	logging.
	162	"""
	163	if self.pdata:
	164	self.pdata.start()
	165
	166	if self.target:
	167	try:
	168	self.rv = self.target(self.args, *self.kwargs)
	169	except service_error, s:
	170	self.exception = s
	171	self.log.error("Thread exception: %s %s" % \
	172	(s.code_string(), s.desc))
	173	except:
	174	self.exception = sys.exc_info()[1]
	175	self.log.error(("Unexpected thread exception: %s" +\
	176	"Trace %s") % (self.exception,\
	177	traceback.format_exc()))
	178	if self.pdata:
	179	self.pdata.terminate()
[6679c122]	180
[f069052]	181	call_RequestAccess = service_caller('RequestAccess')
	182	call_ReleaseAccess = service_caller('ReleaseAccess')
[cc8d8e9]	183	call_StartSegment = service_caller('StartSegment')
[5ae3857]	184	call_TerminateSegment = service_caller('TerminateSegment')
[f069052]	185	call_Ns2Split = service_caller('Ns2Split')
[058f58e]	186
[3f6bc5f]	187	def __init__(self, config=None, auth=None):
[866c983]	188	"""
	189	Intialize the various attributes, most from the config object
	190	"""
	191
	192	def parse_tarfile_list(tf):
	193	"""
	194	Parse a tarfile list from the configuration. This is a set of
	195	paths and tarfiles separated by spaces.
	196	"""
	197	rv = [ ]
	198	if tf is not None:
	199	tl = tf.split()
	200	while len(tl) > 1:
	201	p, t = tl[0:2]
	202	del tl[0:2]
	203	rv.append((p, t))
	204	return rv
	205
	206	self.thread_with_rv = experiment_control_local.pooled_thread
	207	self.thread_pool = experiment_control_local.thread_pool
[f07fa49]	208	self.list_log = list_log.list_log
[866c983]	209
	210	self.cert_file = config.get("experiment_control", "cert_file")
	211	if self.cert_file:
	212	self.cert_pwd = config.get("experiment_control", "cert_pwd")
	213	else:
	214	self.cert_file = config.get("globals", "cert_file")
	215	self.cert_pwd = config.get("globals", "cert_pwd")
	216
	217	self.trusted_certs = config.get("experiment_control", "trusted_certs") \
	218	or config.get("globals", "trusted_certs")
	219
[6c57fe9]	220	self.repodir = config.get("experiment_control", "repodir")
[7183b48]	221	self.repo_url = config.get("experiment_control", "repo_url",
	222	"https://users.isi.deterlab.net:23235");
[cc8d8e9]	223
[866c983]	224	self.exp_stem = "fed-stem"
	225	self.log = logging.getLogger("fedd.experiment_control")
	226	set_log_level(config, "experiment_control", self.log)
	227	self.muxmax = 2
	228	self.nthreads = 2
	229	self.randomize_experiments = False
	230
	231	self.splitter = None
	232	self.ssh_keygen = "/usr/bin/ssh-keygen"
	233	self.ssh_identity_file = None
	234
	235
	236	self.debug = config.getboolean("experiment_control", "create_debug")
[69692a9]	237	self.cleanup = not config.getboolean("experiment_control",
	238	"leave_tmpfiles")
[866c983]	239	self.state_filename = config.get("experiment_control",
	240	"experiment_state")
	241	self.splitter_url = config.get("experiment_control", "splitter_uri")
	242	self.fedkit = parse_tarfile_list(\
	243	config.get("experiment_control", "fedkit"))
	244	self.gatewaykit = parse_tarfile_list(\
	245	config.get("experiment_control", "gatewaykit"))
	246	accessdb_file = config.get("experiment_control", "accessdb")
	247
	248	self.ssh_pubkey_file = config.get("experiment_control",
	249	"ssh_pubkey_file")
	250	self.ssh_privkey_file = config.get("experiment_control",
	251	"ssh_privkey_file")
	252	# NB for internal master/slave ops, not experiment setup
	253	self.ssh_type = config.get("experiment_control", "sshkeytype", "rsa")
[ca489e8]	254
[db6b092]	255	self.overrides = set([])
	256	ovr = config.get('experiment_control', 'overrides')
	257	if ovr:
	258	for o in ovr.split(","):
	259	o = o.strip()
	260	if o.startswith('fedid:'): o = o[len('fedid:'):]
	261	self.overrides.add(fedid(hexstr=o))
[ca489e8]	262
[866c983]	263	self.state = { }
	264	self.state_lock = Lock()
	265	self.tclsh = "/usr/local/bin/otclsh"
	266	self.tcl_splitter = config.get("splitter", "tcl_splitter") or \
	267	config.get("experiment_control", "tcl_splitter",
	268	"/usr/testbed/lib/ns2ir/parse.tcl")
	269	mapdb_file = config.get("experiment_control", "mapdb")
	270	self.trace_file = sys.stderr
	271
	272	self.def_expstart = \
	273	"sudo -H /bin/sh /usr/local/federation/bin/federate.sh >& " +\
	274	"/tmp/federate";
	275	self.def_mexpstart = "sudo -H /usr/local/federation/bin/make_hosts " +\
	276	"FEDDIR/hosts";
	277	self.def_gwstart = \
	278	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF>& " +\
	279	"/tmp/bridge.log";
	280	self.def_mgwstart = \
	281	"sudo -H /usr/local/federation/bin/fed-tun.pl -f GWCONF >& " +\
	282	"/tmp/bridge.log";
	283	self.def_gwimage = "FBSD61-TUNNEL2";
	284	self.def_gwtype = "pc";
	285	self.local_access = { }
	286
	287	if auth:
	288	self.auth = auth
	289	else:
	290	self.log.error(\
	291	"[access]: No authorizer initialized, creating local one.")
	292	auth = authorizer()
	293
	294
	295	if self.ssh_pubkey_file:
	296	try:
	297	f = open(self.ssh_pubkey_file, 'r')
	298	self.ssh_pubkey = f.read()
	299	f.close()
	300	except IOError:
	301	raise service_error(service_error.internal,
	302	"Cannot read sshpubkey")
	303	else:
	304	raise service_error(service_error.internal,
	305	"No SSH public key file?")
	306
	307	if not self.ssh_privkey_file:
	308	raise service_error(service_error.internal,
	309	"No SSH public key file?")
	310
	311
	312	if mapdb_file:
	313	self.read_mapdb(mapdb_file)
	314	else:
	315	self.log.warn("[experiment_control] No testbed map, using defaults")
	316	self.tbmap = {
	317	'deter':'https://users.isi.deterlab.net:23235',
	318	'emulab':'https://users.isi.deterlab.net:23236',
	319	'ucb':'https://users.isi.deterlab.net:23237',
	320	}
	321
	322	if accessdb_file:
	323	self.read_accessdb(accessdb_file)
	324	else:
	325	raise service_error(service_error.internal,
	326	"No accessdb specified in config")
	327
	328	# Grab saved state. OK to do this w/o locking because it's read only
	329	# and only one thread should be in existence that can see self.state at
	330	# this point.
	331	if self.state_filename:
	332	self.read_state()
	333
	334	# Dispatch tables
	335	self.soap_services = {\
[a3ad8bd]	336	'New': soap_handler('New', self.new_experiment),
[e19b75c]	337	'Create': soap_handler('Create', self.create_experiment),
[866c983]	338	'Vtopo': soap_handler('Vtopo', self.get_vtopo),
	339	'Vis': soap_handler('Vis', self.get_vis),
	340	'Info': soap_handler('Info', self.get_info),
[65f3f29]	341	'MultiInfo': soap_handler('MultiInfo', self.get_multi_info),
[866c983]	342	'Terminate': soap_handler('Terminate',
[e19b75c]	343	self.terminate_experiment),
[866c983]	344	}
	345
	346	self.xmlrpc_services = {\
[a3ad8bd]	347	'New': xmlrpc_handler('New', self.new_experiment),
[e19b75c]	348	'Create': xmlrpc_handler('Create', self.create_experiment),
[866c983]	349	'Vtopo': xmlrpc_handler('Vtopo', self.get_vtopo),
	350	'Vis': xmlrpc_handler('Vis', self.get_vis),
	351	'Info': xmlrpc_handler('Info', self.get_info),
[65f3f29]	352	'MultiInfo': xmlrpc_handler('MultiInfo', self.get_multi_info),
[866c983]	353	'Terminate': xmlrpc_handler('Terminate',
[e19b75c]	354	self.terminate_experiment),
[866c983]	355	}
[19cc408]	356
[a97394b]	357	# Call while holding self.state_lock
[eee2b2e]	358	def write_state(self):
[866c983]	359	"""
	360	Write a new copy of experiment state after copying the existing state
	361	to a backup.
	362
	363	State format is a simple pickling of the state dictionary.
	364	"""
	365	if os.access(self.state_filename, os.W_OK):
[40dd8c1]	366	copy_file(self.state_filename, \
	367	"%s.bak" % self.state_filename)
[866c983]	368	try:
	369	f = open(self.state_filename, 'w')
	370	pickle.dump(self.state, f)
	371	except IOError, e:
	372	self.log.error("Can't write file %s: %s" % \
	373	(self.state_filename, e))
	374	except pickle.PicklingError, e:
	375	self.log.error("Pickling problem: %s" % e)
	376	except TypeError, e:
	377	self.log.error("Pickling problem (TypeError): %s" % e)
[eee2b2e]	378
[a97394b]	379	# Call while holding self.state_lock
[eee2b2e]	380	def read_state(self):
[866c983]	381	"""
	382	Read a new copy of experiment state. Old state is overwritten.
	383
	384	State format is a simple pickling of the state dictionary.
	385	"""
[cc8d8e9]	386
	387	def get_experiment_id(state):
	388	"""
	389	Pull the fedid experimentID out of the saved state. This is kind
	390	of a gross walk through the dict.
	391	"""
	392
	393	if state.has_key('experimentID'):
	394	for e in state['experimentID']:
	395	if e.has_key('fedid'):
	396	return e['fedid']
	397	else:
	398	return None
	399	else:
	400	return None
	401
	402	def get_alloc_ids(state):
	403	"""
	404	Pull the fedids of the identifiers of each allocation from the
	405	state. Again, a dict dive that's best isolated.
	406	"""
	407
	408	return [ f['allocID']['fedid']
	409	for f in state.get('federant',[]) \
	410	if f.has_key('allocID') and \
	411	f['allocID'].has_key('fedid')]
	412
	413
[866c983]	414	try:
	415	f = open(self.state_filename, "r")
	416	self.state = pickle.load(f)
	417	self.log.debug("[read_state]: Read state from %s" % \
	418	self.state_filename)
	419	except IOError, e:
	420	self.log.warning("[read_state]: No saved state: Can't open %s: %s"\
	421	% (self.state_filename, e))
	422	except pickle.UnpicklingError, e:
	423	self.log.warning(("[read_state]: No saved state: " + \
	424	"Unpickling failed: %s") % e)
	425
[cc8d8e9]	426	for s in self.state.values():
[866c983]	427	try:
[cc8d8e9]	428
	429	eid = get_experiment_id(s)
	430	if eid :
	431	# Give the owner rights to the experiment
	432	self.auth.set_attribute(s['owner'], eid)
	433	# And holders of the eid as well
	434	self.auth.set_attribute(eid, eid)
[db6b092]	435	# allow overrides to control experiments as well
	436	for o in self.overrides:
	437	self.auth.set_attribute(o, eid)
[cc8d8e9]	438	# Set permissions to allow reading of the software repo, if
	439	# any, as well.
	440	for a in get_alloc_ids(s):
	441	self.auth.set_attribute(a, 'repo/%s' % eid)
	442	else:
	443	raise KeyError("No experiment id")
[866c983]	444	except KeyError, e:
	445	self.log.warning("[read_state]: State ownership or identity " +\
	446	"misformatted in %s: %s" % (self.state_filename, e))
[4064742]	447
	448
	449	def read_accessdb(self, accessdb_file):
[866c983]	450	"""
	451	Read the mapping from fedids that can create experiments to their name
	452	in the 3-level access namespace. All will be asserted from this
	453	testbed and can include the local username and porject that will be
	454	asserted on their behalf by this fedd. Each fedid is also added to the
	455	authorization system with the "create" attribute.
	456	"""
	457	self.accessdb = {}
	458	# These are the regexps for parsing the db
	459	name_expr = "[" + string.ascii_letters + string.digits + "\.\-]+"
	460	project_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	461	"\s->\(\s("+name_expr+")\s,\s("+name_expr+")\s\)\s$")
	462	user_line = re.compile("^\s*fedid:([" + string.hexdigits + "]+)"+ \
	463	"\s->\s(" + name_expr + ")\s*$")
	464	lineno = 0
	465
	466	# Parse the mappings and store in self.authdb, a dict of
	467	# fedid -> (proj, user)
	468	try:
	469	f = open(accessdb_file, "r")
	470	for line in f:
	471	lineno += 1
	472	line = line.strip()
	473	if len(line) == 0 or line.startswith('#'):
	474	continue
	475	m = project_line.match(line)
	476	if m:
	477	fid = fedid(hexstr=m.group(1))
	478	project, user = m.group(2,3)
	479	if not self.accessdb.has_key(fid):
	480	self.accessdb[fid] = []
	481	self.accessdb[fid].append((project, user))
	482	continue
	483
	484	m = user_line.match(line)
	485	if m:
	486	fid = fedid(hexstr=m.group(1))
	487	project = None
	488	user = m.group(2)
	489	if not self.accessdb.has_key(fid):
	490	self.accessdb[fid] = []
	491	self.accessdb[fid].append((project, user))
	492	continue
	493	self.log.warn("[experiment_control] Error parsing access " +\
	494	"db %s at line %d" % (accessdb_file, lineno))
	495	except IOError:
	496	raise service_error(service_error.internal,
	497	"Error opening/reading %s as experiment " +\
	498	"control accessdb" % accessdb_file)
	499	f.close()
	500
	501	# Initialize the authorization attributes
	502	for fid in self.accessdb.keys():
	503	self.auth.set_attribute(fid, 'create')
[a3ad8bd]	504	self.auth.set_attribute(fid, 'new')
[34bc05c]	505
	506	def read_mapdb(self, file):
[866c983]	507	"""
	508	Read a simple colon separated list of mappings for the
	509	label-to-testbed-URL mappings. Clears or creates self.tbmap.
	510	"""
	511
	512	self.tbmap = { }
	513	lineno =0
	514	try:
	515	f = open(file, "r")
	516	for line in f:
	517	lineno += 1
	518	line = line.strip()
	519	if line.startswith('#') or len(line) == 0:
	520	continue
	521	try:
	522	label, url = line.split(':', 1)
	523	self.tbmap[label] = url
	524	except ValueError, e:
	525	self.log.warn("[read_mapdb] Ignored bad line (%d) in " +\
	526	"map db: %s %s" % (lineno, line, e))
	527	except IOError, e:
	528	self.log.warning("[read_mapdb]: No saved map database: Can't " +\
	529	"open %s: %s" % (file, e))
	530	f.close()
	531
[6679c122]	532	def generate_ssh_keys(self, dest, type="rsa" ):
[866c983]	533	"""
	534	Generate a set of keys for the gateways to use to talk.
	535
	536	Keys are of type type and are stored in the required dest file.
	537	"""
	538	valid_types = ("rsa", "dsa")
	539	t = type.lower();
	540	if t not in valid_types: raise ValueError
	541	cmd = [self.ssh_keygen, '-t', t, '-N', '', '-f', dest]
	542
	543	try:
	544	trace = open("/dev/null", "w")
	545	except IOError:
	546	raise service_error(service_error.internal,
	547	"Cannot open /dev/null??");
	548
	549	# May raise CalledProcessError
	550	self.log.debug("[generate_ssh_keys]: %s" % " ".join(cmd))
[4ea1e22]	551	rv = call(cmd, stdout=trace, stderr=trace, close_fds=True)
[866c983]	552	if rv != 0:
	553	raise service_error(service_error.internal,
	554	"Cannot generate nonce ssh keys. %s return code %d" \
	555	% (self.ssh_keygen, rv))
[6679c122]	556
[0d830de]	557	def gentopo(self, str):
[866c983]	558	"""
	559	Generate the topology dtat structure from the splitter's XML
	560	representation of it.
	561
	562	The topology XML looks like:
	563	<experiment>
	564	<nodes>
	565	<node><vname></vname><ips>ip1:ip2</ips></node>
	566	</nodes>
	567	<lans>
	568	<lan>
	569	<vname></vname><vnode></vnode><ip></ip>
	570	<bandwidth></bandwidth><member>node:port</member>
	571	</lan>
	572	</lans>
	573	"""
	574	class topo_parse:
	575	"""
	576	Parse the topology XML and create the dats structure.
	577	"""
	578	def __init__(self):
	579	# Typing of the subelements for data conversion
	580	self.str_subelements = ('vname', 'vnode', 'ips', 'ip', 'member')
	581	self.int_subelements = ( 'bandwidth',)
	582	self.float_subelements = ( 'delay',)
	583	# The final data structure
	584	self.nodes = [ ]
	585	self.lans = [ ]
	586	self.topo = { \
	587	'node': self.nodes,\
	588	'lan' : self.lans,\
	589	}
	590	self.element = { } # Current element being created
	591	self.chars = "" # Last text seen
	592
	593	def end_element(self, name):
	594	# After each sub element the contents is added to the current
	595	# element or to the appropriate list.
	596	if name == 'node':
	597	self.nodes.append(self.element)
	598	self.element = { }
	599	elif name == 'lan':
	600	self.lans.append(self.element)
	601	self.element = { }
	602	elif name in self.str_subelements:
	603	self.element[name] = self.chars
	604	self.chars = ""
	605	elif name in self.int_subelements:
	606	self.element[name] = int(self.chars)
	607	self.chars = ""
	608	elif name in self.float_subelements:
	609	self.element[name] = float(self.chars)
	610	self.chars = ""
	611
	612	def found_chars(self, data):
	613	self.chars += data.rstrip()
	614
	615
	616	tp = topo_parse();
	617	parser = xml.parsers.expat.ParserCreate()
	618	parser.EndElementHandler = tp.end_element
	619	parser.CharacterDataHandler = tp.found_chars
	620
	621	parser.Parse(str)
	622
	623	return tp.topo
	624
[0d830de]	625
	626	def genviz(self, topo):
[866c983]	627	"""
	628	Generate the visualization the virtual topology
	629	"""
	630
	631	neato = "/usr/local/bin/neato"
	632	# These are used to parse neato output and to create the visualization
	633	# file.
	634	vis_re = re.compile('^\s"?([\w\-]+)"?\s+\[.pos="(\d+),(\d+)"')
	635	vis_fmt = "<node><name>%s</name><x>%s</x><y>%s</y><type>" + \
	636	"%s</type></node>"
	637
	638	try:
	639	# Node names
	640	nodes = [ n['vname'] for n in topo['node'] ]
	641	topo_lans = topo['lan']
[cc8d8e9]	642	except KeyError, e:
	643	raise service_error(service_error.internal, "Bad topology: %s" %e)
[866c983]	644
	645	lans = { }
	646	links = { }
	647
	648	# Walk through the virtual topology, organizing the connections into
	649	# 2-node connections (links) and more-than-2-node connections (lans).
	650	# When a lan is created, it's added to the list of nodes (there's a
	651	# node in the visualization for the lan).
	652	for l in topo_lans:
	653	if links.has_key(l['vname']):
	654	if len(links[l['vname']]) < 2:
	655	links[l['vname']].append(l['vnode'])
	656	else:
	657	nodes.append(l['vname'])
	658	lans[l['vname']] = links[l['vname']]
	659	del links[l['vname']]
	660	lans[l['vname']].append(l['vnode'])
	661	elif lans.has_key(l['vname']):
	662	lans[l['vname']].append(l['vnode'])
	663	else:
	664	links[l['vname']] = [ l['vnode'] ]
	665
	666
	667	# Open up a temporary file for dot to turn into a visualization
	668	try:
	669	df, dotname = tempfile.mkstemp()
	670	dotfile = os.fdopen(df, 'w')
	671	except IOError:
	672	raise service_error(service_error.internal,
	673	"Failed to open file in genviz")
	674
[db6b092]	675	try:
	676	dnull = open('/dev/null', 'w')
	677	except IOError:
	678	service_error(service_error.internal,
[886307f]	679	"Failed to open /dev/null in genviz")
	680
[866c983]	681	# Generate a dot/neato input file from the links, nodes and lans
	682	try:
	683	print >>dotfile, "graph G {"
	684	for n in nodes:
	685	print >>dotfile, '\t"%s"' % n
	686	for l in links.keys():
	687	print >>dotfile, '\t"%s" -- "%s"' % tuple(links[l])
	688	for l in lans.keys():
	689	for n in lans[l]:
	690	print >>dotfile, '\t "%s" -- "%s"' % (n,l)
	691	print >>dotfile, "}"
	692	dotfile.close()
	693	except TypeError:
	694	raise service_error(service_error.internal,
	695	"Single endpoint link in vtopo")
	696	except IOError:
	697	raise service_error(service_error.internal, "Cannot write dot file")
	698
	699	# Use dot to create a visualization
	700	dot = Popen([neato, '-Gstart=rand', '-Gepsilon=0.005', '-Gmaxiter=2000',
[886307f]	701	'-Gpack=true', dotname], stdout=PIPE, stderr=dnull,
[db6b092]	702	close_fds=True)
	703	dnull.close()
[866c983]	704
	705	# Translate dot to vis format
	706	vis_nodes = [ ]
	707	vis = { 'node': vis_nodes }
	708	for line in dot.stdout:
	709	m = vis_re.match(line)
	710	if m:
	711	vn = m.group(1)
	712	vis_node = {'name': vn, \
	713	'x': float(m.group(2)),\
	714	'y' : float(m.group(3)),\
	715	}
	716	if vn in links.keys() or vn in lans.keys():
	717	vis_node['type'] = 'lan'
	718	else:
	719	vis_node['type'] = 'node'
	720	vis_nodes.append(vis_node)
	721	rv = dot.wait()
	722
	723	os.remove(dotname)
	724	if rv == 0 : return vis
	725	else: return None
[d0ae12d]	726
[99eb8cf]	727	def get_access(self, tb, nodes, tbparam, master, export_project,
[e02cd14]	728	access_user, services):
[866c983]	729	"""
	730	Get access to testbed through fedd and set the parameters for that tb
	731	"""
	732	uri = self.tbmap.get(tb, None)
	733	if not uri:
	734	raise service_error(serice_error.server_config,
	735	"Unknown testbed: %s" % tb)
	736
[8218a3b]	737	# Tweak search order so that if there are entries in access_user that
	738	# have a project matching the export project, we try them first
	739	if export_project and export_project.has_key('localname'):
	740	pn = export_project['localname']
	741
	742	access_sequence = [ (p, u) for p, u in access_user if p == pn]
	743	access_sequence.extend([(p, u) for p, u in access_user if p != pn])
	744	else:
	745	access_sequence = access_user
	746
	747	for p, u in access_sequence:
[866c983]	748	self.log.debug(("[get_access] Attempting access from (%s, %s) " + \
	749	"to %s") % ((p or "None"), u, uri))
	750
	751	if p:
	752	# Request with user and project specified
	753	req = {\
	754	'destinationTestbed' : { 'uri' : uri },
[3bddd24]	755	'credential': [ "project: %s" % p, "user: %s" % u],
[866c983]	756	'allocID' : { 'localname': 'test' },
	757	}
	758	else:
	759	# Request with only user specified
	760	req = {\
	761	'destinationTestbed' : { 'uri' : uri },
[3bddd24]	762	'credential': [ 'user: %s' % u ],
[866c983]	763	'user': [ {'userID': { 'localname': u } } ],
	764	'allocID' : { 'localname': 'test' },
	765	}
	766
	767	if tb == master:
	768	# NB, the export_project parameter is a dict that includes
	769	# the type
	770	req['exportProject'] = export_project
[e02cd14]	771	req['service'] = [
	772	{ 'name': 'userconfig', 'visibility': 'export'},
	773	{ 'name': 'SMB', 'visibility': 'export'},
	774	{ 'name': 'seer', 'visibility': 'export'},
	775	{ 'name': 'tmcd', 'visibility': 'export'},
	776	]
[866c983]	777
	778	# node resources if any
	779	if nodes != None and len(nodes) > 0:
	780	rnodes = [ ]
	781	for n in nodes:
	782	rn = { }
	783	image, hw, count = n.split(":")
	784	if image: rn['image'] = [ image ]
	785	if hw: rn['hardware'] = [ hw ]
	786	if count and int(count) >0 : rn['count'] = int(count)
	787	rnodes.append(rn)
	788	req['resources']= { }
	789	req['resources']['node'] = rnodes
	790
	791	try:
	792	if self.local_access.has_key(uri):
	793	# Local access call
	794	req = { 'RequestAccessRequestBody' : req }
	795	r = self.local_access[uri].RequestAccess(req,
	796	fedid(file=self.cert_file))
	797	r = { 'RequestAccessResponseBody' : r }
	798	else:
	799	r = self.call_RequestAccess(uri, req,
	800	self.cert_file, self.cert_pwd, self.trusted_certs)
	801	except service_error, e:
	802	if e.code == service_error.access:
	803	self.log.debug("[get_access] Access denied")
	804	r = None
	805	continue
	806	else:
	807	raise e
	808
[e19b75c]	809	if r.has_key('RequestAccessResponseBody'):
	810	# Through to here we have a valid response, not a fault.
	811	# Access denied is a fault, so something better or worse than
	812	# access denied has happened.
	813	r = r['RequestAccessResponseBody']
	814	self.log.debug("[get_access] Access granted")
	815	break
	816	else:
	817	raise service_error(service_error.protocol,
	818	"Bad proxy response")
	819
	820	if not r:
	821	raise service_error(service_error.access,
	822	"Access denied by %s (%s)" % (tb, uri))
[db6b092]	823
[4afcfc4]	824	tbparam[tb] = {
[69692a9]	825	"allocID" : r['allocID'],
	826	"uri": uri,
[4afcfc4]	827	}
[e02cd14]	828	if 'service' in r:
	829	services.extend(r['service'])
[4afcfc4]	830
	831	# Add attributes to parameter space. We don't allow attributes to
	832	# overlay any parameters already installed.
	833	for a in r['fedAttr']:
	834	try:
	835	if a['attribute'] and \
	836	isinstance(a['attribute'], basestring)\
	837	and not tbparam[tb].has_key(a['attribute'].lower()):
	838	tbparam[tb][a['attribute'].lower()] = a['value']
	839	except KeyError:
	840	self.log.error("Bad attribute in response: %s" % a)
[db6b092]	841
[69692a9]	842	def release_access(self, tb, aid, uri=None):
[e19b75c]	843	"""
	844	Release access to testbed through fedd
	845	"""
[db6b092]	846
[69692a9]	847	if not uri:
	848	uri = self.tbmap.get(tb, None)
[e19b75c]	849	if not uri:
[69692a9]	850	raise service_error(service_error.server_config,
[e19b75c]	851	"Unknown testbed: %s" % tb)
[db6b092]	852
[e19b75c]	853	if self.local_access.has_key(uri):
	854	resp = self.local_access[uri].ReleaseAccess(\
	855	{ 'ReleaseAccessRequestBody' : {'allocID': aid},},
	856	fedid(file=self.cert_file))
	857	resp = { 'ReleaseAccessResponseBody': resp }
	858	else:
	859	resp = self.call_ReleaseAccess(uri, {'allocID': aid},
	860	self.cert_file, self.cert_pwd, self.trusted_certs)
[db6b092]	861
[e19b75c]	862	# better error coding
[db6b092]	863
[e19b75c]	864	def remote_splitter(self, uri, desc, master):
[db6b092]	865
[e19b75c]	866	req = {
	867	'description' : { 'ns2description': desc },
	868	'master': master,
	869	'include_fedkit': bool(self.fedkit),
	870	'include_gatewaykit': bool(self.gatewaykit)
[db6b092]	871	}
	872
[e19b75c]	873	r = self.call_Ns2Split(uri, req, self.cert_file, self.cert_pwd,
	874	self.trusted_certs)
	875
	876	if r.has_key('Ns2SplitResponseBody'):
	877	r = r['Ns2SplitResponseBody']
	878	if r.has_key('output'):
	879	return r['output'].splitlines()
	880	else:
	881	raise service_error(service_error.protocol,
	882	"Bad splitter response (no output)")
	883	else:
	884	raise service_error(service_error.protocol, "Bad splitter response")
[cc8d8e9]	885
[e19b75c]	886	class start_segment:
[fd556d1]	887	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[f07fa49]	888	cert_pwd=None, trusted_certs=None, caller=None,
	889	log_collector=None):
[cc8d8e9]	890	self.log = log
	891	self.debug = debug
	892	self.cert_file = cert_file
	893	self.cert_pwd = cert_pwd
	894	self.trusted_certs = None
	895	self.caller = caller
[fd556d1]	896	self.testbed = testbed
[f07fa49]	897	self.log_collector = log_collector
[69692a9]	898	self.response = None
[cc8d8e9]	899
[e02cd14]	900	def __call__(self, uri, aid, topo, master, attrs=None, connInfo=None,
	901	services=None):
[cc8d8e9]	902	req = {
	903	'allocID': { 'fedid' : aid },
	904	'segmentdescription': {
	905	'topdldescription': topo.to_dict(),
	906	},
[ecca6eb]	907	'master': master,
[cc8d8e9]	908	}
[e02cd14]	909
	910	if connInfo:
	911	req['connection'] = connInfo
	912	# Add services to request. The master exports, everyone else
	913	# imports.
	914	if services:
	915	svcs = [ x.copy() for x in services]
	916	for s in svcs:
	917	if master: s['visibility'] = 'export'
	918	else: s['visibility'] = 'import'
	919	req['service'] = svcs
[6c57fe9]	920	if attrs:
	921	req['fedAttr'] = attrs
[cc8d8e9]	922
[fd556d1]	923	try:
[13e3dd2]	924	self.log.debug("Calling StartSegment at %s " % uri)
[5b74b63]	925	print req
[fd556d1]	926	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	927	self.trusted_certs)
[f07fa49]	928	if r.has_key('StartSegmentResponseBody'):
	929	lval = r['StartSegmentResponseBody'].get('allocationLog',
	930	None)
	931	if lval and self.log_collector:
	932	for line in lval.splitlines(True):
	933	self.log_collector.write(line)
[69692a9]	934	self.response = r
[f07fa49]	935	else:
	936	raise service_error(service_error.internal,
	937	"Bad response!?: %s" %r)
[fd556d1]	938	return True
	939	except service_error, e:
	940	self.log.error("Start segment failed on %s: %s" % \
	941	(self.testbed, e))
	942	return False
[cc8d8e9]	943
	944
[5ae3857]	945
[e19b75c]	946	class terminate_segment:
[fd556d1]	947	def __init__(self, debug=False, log=None, testbed="", cert_file=None,
[5ae3857]	948	cert_pwd=None, trusted_certs=None, caller=None):
	949	self.log = log
	950	self.debug = debug
	951	self.cert_file = cert_file
	952	self.cert_pwd = cert_pwd
	953	self.trusted_certs = None
	954	self.caller = caller
[fd556d1]	955	self.testbed = testbed
[5ae3857]	956
	957	def __call__(self, uri, aid ):
	958	req = {
	959	'allocID': aid ,
	960	}
[fd556d1]	961	try:
	962	r = self.caller(uri, req, self.cert_file, self.cert_pwd,
	963	self.trusted_certs)
	964	return True
	965	except service_error, e:
	966	self.log.error("Terminate segment failed on %s: %s" % \
	967	(self.testbed, e))
	968	return False
[db6b092]	969
	970
[7183b48]	971	def allocate_resources(self, allocated, master, eid, expid,
[f07fa49]	972	tbparams, topo, tmpdir, alloc_log=None, log_collector=None,
[e02cd14]	973	attrs=None, connInfo={}, services=[]):
[69692a9]	974	def get_vlan(r):
	975	if r.has_key('StartSegmentResponseBody'):
	976	srb = r['StartSegmentResponseBody']
	977	if srb.has_key('fedAttr'):
	978	for k, v in [ (a['attribute'], a['value']) \
	979	for a in srb['fedAttr']]:
	980	if k == 'vlan': return v
	981	return None
	982
[cc8d8e9]	983	started = { } # Testbeds where a sub-experiment started
	984	# successfully
	985
	986	# XXX
	987	fail_soft = False
	988
[3132419]	989	non_transit = [ k for k in allocated.keys() \
	990	if not topo[k].get_attribute('transit')]
[69692a9]	991	transit = [ k for k in allocated.keys() \
	992	if topo[k].get_attribute('transit')]
	993
[cc8d8e9]	994	log = alloc_log or self.log
	995
	996	thread_pool = self.thread_pool(self.nthreads)
	997	threads = [ ]
	998
[69692a9]	999	for tb in transit:
	1000	uri = tbparams[tb]['uri']
[cc8d8e9]	1001	if tbparams[tb].has_key('allocID') and \
	1002	tbparams[tb]['allocID'].has_key('fedid'):
	1003	aid = tbparams[tb]['allocID']['fedid']
	1004	else:
	1005	raise service_error(service_error.internal,
	1006	"No alloc id for testbed %s !?" % tb)
	1007
[69692a9]	1008	m = re.search('(\d+)', tb)
	1009	if m:
	1010	to_repl = "unassigned%s" % m.group(1)
	1011	else:
	1012	raise service_error(service_error.internal,
	1013	"Bad dynamic allocation name")
	1014	break
	1015
	1016	ss = self.start_segment(log=log, debug=self.debug,
[6409cc5]	1017	testbed=tb, cert_file=self.cert_file,
[69692a9]	1018	cert_pwd=self.cert_pwd,
	1019	trusted_certs=self.trusted_certs,
	1020	caller=self.call_StartSegment,
	1021	log_collector=log_collector)
	1022	t = self.pooled_thread(
	1023	target=ss,
[e02cd14]	1024	args =(uri, aid, topo[tb], False, attrs, connInfo[tb],
	1025	services),
[69692a9]	1026	name=tb, pdata=thread_pool, trace_file=self.trace_file)
	1027	threads.append(t)
	1028	t.start()
	1029	# Wait until the this transit node finishes (keep pinging the log,
	1030	# though)
	1031
	1032	mins = 0
	1033	while not thread_pool.wait_for_all_done(60.0):
	1034	mins += 1
[a588632]	1035	alloc_log.info("Waiting for transit (it has been %d mins)" \
[69692a9]	1036	% mins)
	1037
	1038	if t.rv:
	1039	vlan = get_vlan(ss.response)
	1040	if vlan is not None:
	1041	for k, t in topo.items():
	1042	for e in t.elements:
	1043	for i in e.interface:
	1044	vl = i.get_attribute('dragon_vlan')
	1045	if vl is not None and vl == to_repl:
	1046	i.set_attribute('dragon_vlan', vlan)
	1047	else:
	1048	break
	1049	thread_pool.clear()
	1050
[cc8d8e9]	1051
[69692a9]	1052	failed = [ t.getName() for t in threads if not t.rv ]
[32e7d93]	1053
[69692a9]	1054	if len(failed) == 0:
[3132419]	1055	for tb in non_transit:
[69692a9]	1056	# Create and start a thread to start the segment, and save it
	1057	# to get the return value later
	1058	thread_pool.wait_for_slot()
	1059	uri = self.tbmap.get(tb, None)
	1060	if not uri:
	1061	raise service_error(service_error.internal,
	1062	"Unknown testbed %s !?" % tb)
	1063
	1064	if tbparams[tb].has_key('allocID') and \
	1065	tbparams[tb]['allocID'].has_key('fedid'):
	1066	aid = tbparams[tb]['allocID']['fedid']
	1067	else:
	1068	raise service_error(service_error.internal,
	1069	"No alloc id for testbed %s !?" % tb)
	1070
	1071	t = self.pooled_thread(\
	1072	target=self.start_segment(log=log, debug=self.debug,
	1073	testbed=tb, cert_file=self.cert_file,
	1074	cert_pwd=self.cert_pwd,
	1075	trusted_certs=self.trusted_certs,
	1076	caller=self.call_StartSegment,
	1077	log_collector=log_collector),
[3132419]	1078	args=(uri, aid, topo[tb], tb == master,
	1079	attrs, connInfo[tb], services),
[e02cd14]	1080	name=tb,
[69692a9]	1081	pdata=thread_pool, trace_file=self.trace_file)
	1082	threads.append(t)
	1083	t.start()
	1084
	1085	# Wait until all finish (keep pinging the log, though)
	1086	mins = 0
	1087	while not thread_pool.wait_for_all_done(60.0):
	1088	mins += 1
	1089	alloc_log.info("Waiting for sub threads (it has been %d mins)" \
	1090	% mins)
	1091
	1092	thread_pool.clear()
[cc8d8e9]	1093
	1094	failed = [ t.getName() for t in threads if not t.rv ]
	1095	succeeded = [tb for tb in allocated.keys() if tb not in failed]
[3132419]	1096
[cc8d8e9]	1097	# If one failed clean up, unless fail_soft is set
[32e7d93]	1098	if failed:
[cc8d8e9]	1099	if not fail_soft:
	1100	thread_pool.clear()
	1101	for tb in succeeded:
	1102	# Create and start a thread to stop the segment
	1103	thread_pool.wait_for_slot()
[0fa1729]	1104	uri = tbparams[tb]['uri']
[cc8d8e9]	1105	t = self.pooled_thread(\
[32e7d93]	1106	target=self.terminate_segment(log=log,
[fd556d1]	1107	testbed=tb,
[32e7d93]	1108	cert_file=self.cert_file,
	1109	cert_pwd=self.cert_pwd,
	1110	trusted_certs=self.trusted_certs,
	1111	caller=self.call_TerminateSegment),
	1112	args=(uri, tbparams[tb]['federant']['allocID']),
	1113	name=tb,
[cc8d8e9]	1114	pdata=thread_pool, trace_file=self.trace_file)
	1115	t.start()
	1116	# Wait until all finish
	1117	thread_pool.wait_for_all_done()
	1118
	1119	# release the allocations
	1120	for tb in tbparams.keys():
[69692a9]	1121	self.release_access(tb, tbparams[tb]['allocID'],
	1122	tbparams[tb].get('uri', None))
[cc8d8e9]	1123	# Remove the placeholder
	1124	self.state_lock.acquire()
	1125	self.state[eid]['experimentStatus'] = 'failed'
	1126	if self.state_filename: self.write_state()
	1127	self.state_lock.release()
	1128
	1129	log.error("Swap in failed on %s" % ",".join(failed))
	1130	return
	1131	else:
	1132	log.info("[start_segment]: Experiment %s active" % eid)
	1133
	1134
	1135	# Walk up tmpdir, deleting as we go
[69692a9]	1136	if self.cleanup:
	1137	log.debug("[start_experiment]: removing %s" % tmpdir)
	1138	for path, dirs, files in os.walk(tmpdir, topdown=False):
	1139	for f in files:
	1140	os.remove(os.path.join(path, f))
	1141	for d in dirs:
	1142	os.rmdir(os.path.join(path, d))
	1143	os.rmdir(tmpdir)
	1144	else:
	1145	log.debug("[start_experiment]: not removing %s" % tmpdir)
[cc8d8e9]	1146
	1147	# Insert the experiment into our state and update the disk copy
	1148	self.state_lock.acquire()
	1149	self.state[expid]['experimentStatus'] = 'active'
	1150	self.state[eid] = self.state[expid]
	1151	if self.state_filename: self.write_state()
	1152	self.state_lock.release()
	1153	return
	1154
	1155
[895a133]	1156	def add_kit(self, e, kit):
	1157	"""
	1158	Add a Software object created from the list of (install, location)
	1159	tuples passed as kit to the software attribute of an object e. We
	1160	do this enough to break out the code, but it's kind of a hack to
	1161	avoid changing the old tuple rep.
	1162	"""
	1163
	1164	s = [ topdl.Software(install=i, location=l) for i, l in kit]
	1165
	1166	if isinstance(e.software, list): e.software.extend(s)
	1167	else: e.software = s
	1168
	1169
[a3ad8bd]	1170	def create_experiment_state(self, fid, req, expid, expcert,
	1171	state='starting'):
[895a133]	1172	"""
	1173	Create the initial entry in the experiment's state. The expid and
	1174	expcert are the experiment's fedid and certifacte that represents that
	1175	ID, which are installed in the experiment state. If the request
	1176	includes a suggested local name that is used if possible. If the local
	1177	name is already taken by an experiment owned by this user that has
[a3ad8bd]	1178	failed, it is overwritten. Otherwise new letters are added until a
[895a133]	1179	valid localname is found. The generated local name is returned.
	1180	"""
	1181
	1182	if req.has_key('experimentID') and \
	1183	req['experimentID'].has_key('localname'):
	1184	overwrite = False
	1185	eid = req['experimentID']['localname']
	1186	# If there's an old failed experiment here with the same local name
	1187	# and accessible by this user, we'll overwrite it, otherwise we'll
	1188	# fall through and do the collision avoidance.
	1189	old_expid = self.get_experiment_fedid(eid)
	1190	if old_expid and self.check_experiment_access(fid, old_expid):
	1191	self.state_lock.acquire()
	1192	status = self.state[eid].get('experimentStatus', None)
	1193	if status and status == 'failed':
	1194	# remove the old access attribute
	1195	self.auth.unset_attribute(fid, old_expid)
	1196	overwrite = True
	1197	del self.state[eid]
	1198	del self.state[old_expid]
	1199	self.state_lock.release()
	1200	self.state_lock.acquire()
	1201	while (self.state.has_key(eid) and not overwrite):
	1202	eid += random.choice(string.ascii_letters)
	1203	# Initial state
	1204	self.state[eid] = {
	1205	'experimentID' : \
	1206	[ { 'localname' : eid }, {'fedid': expid } ],
[a3ad8bd]	1207	'experimentStatus': state,
[895a133]	1208	'experimentAccess': { 'X509' : expcert },
	1209	'owner': fid,
	1210	'log' : [],
	1211	}
	1212	self.state[expid] = self.state[eid]
	1213	if self.state_filename: self.write_state()
	1214	self.state_lock.release()
	1215	else:
	1216	eid = self.exp_stem
	1217	for i in range(0,5):
	1218	eid += random.choice(string.ascii_letters)
	1219	self.state_lock.acquire()
	1220	while (self.state.has_key(eid)):
	1221	eid = self.exp_stem
	1222	for i in range(0,5):
	1223	eid += random.choice(string.ascii_letters)
	1224	# Initial state
	1225	self.state[eid] = {
	1226	'experimentID' : \
	1227	[ { 'localname' : eid }, {'fedid': expid } ],
[a3ad8bd]	1228	'experimentStatus': state,
[895a133]	1229	'experimentAccess': { 'X509' : expcert },
	1230	'owner': fid,
	1231	'log' : [],
	1232	}
	1233	self.state[expid] = self.state[eid]
	1234	if self.state_filename: self.write_state()
	1235	self.state_lock.release()
	1236
	1237	return eid
	1238
	1239
	1240	def allocate_ips_to_topo(self, top):
	1241	"""
[69692a9]	1242	Add an ip4_address attribute to all the hosts in the topology, based on
[895a133]	1243	the shared substrates on which they sit. An /etc/hosts file is also
[69692a9]	1244	created and returned as a list of hostfiles entries. We also return
	1245	the allocator, because we may need to allocate IPs to portals
	1246	(specifically DRAGON portals).
[895a133]	1247	"""
	1248	subs = sorted(top.substrates,
	1249	cmp=lambda x,y: cmp(len(x.interfaces), len(y.interfaces)),
	1250	reverse=True)
	1251	ips = ip_allocator(int(ip_addr("10.0.0.0")), 2 **24)
	1252	ifs = { }
	1253	hosts = [ ]
	1254
	1255	for idx, s in enumerate(subs):
	1256	a = ips.allocate(len(s.interfaces)+2)
	1257	if a :
	1258	base, num = a
	1259	if num < len(s.interfaces) +2 :
	1260	raise service_error(service_error.internal,
	1261	"Allocator returned wrong number of IPs??")
	1262	else:
	1263	raise service_error(service_error.req,
	1264	"Cannot allocate IP addresses")
	1265
	1266	base += 1
	1267	for i in s.interfaces:
	1268	i.attribute.append(
	1269	topdl.Attribute('ip4_address',
	1270	"%s" % ip_addr(base)))
	1271	hname = i.element.name[0]
	1272	if ifs.has_key(hname):
	1273	hosts.append("%s\t%s-%s %s-%d" % \
	1274	(ip_addr(base), hname, s.name, hname,
	1275	ifs[hname]))
	1276	else:
	1277	ifs[hname] = 0
	1278	hosts.append("%s\t%s-%s %s-%d %s" % \
	1279	(ip_addr(base), hname, s.name, hname,
	1280	ifs[hname], hname))
	1281
	1282	ifs[hname] += 1
	1283	base += 1
[69692a9]	1284	return hosts, ips
[895a133]	1285
[99eb8cf]	1286	def get_access_to_testbeds(self, testbeds, access_user,
[e02cd14]	1287	export_project, master, allocated, tbparams, services):
[895a133]	1288	"""
	1289	Request access to the various testbeds required for this instantiation
	1290	(passed in as testbeds). User, access_user, expoert_project and master
	1291	are used to construct the correct requests. Per-testbed parameters are
	1292	returned in tbparams.
	1293	"""
	1294	for tb in testbeds:
[99eb8cf]	1295	self.get_access(tb, None, tbparams, master,
[e02cd14]	1296	export_project, access_user, services)
[895a133]	1297	allocated[tb] = 1
	1298
	1299	def split_topology(self, top, topo, testbeds, eid, master, tbparams):
	1300	"""
[e02cd14]	1301	Create the sub-topologies that are needed for experiment instantiation.
[895a133]	1302	"""
	1303	for tb in testbeds:
	1304	topo[tb] = top.clone()
	1305	to_delete = [ ]
[e02cd14]	1306	# XXX: copy in for loop to simplify
[895a133]	1307	for e in topo[tb].elements:
	1308	etb = e.get_attribute('testbed')
	1309	if etb and etb != tb:
	1310	for i in e.interface:
	1311	for s in i.subs:
	1312	try:
	1313	s.interfaces.remove(i)
	1314	except ValueError:
	1315	raise service_error(service_error.internal,
	1316	"Can't remove interface??")
	1317	to_delete.append(e)
	1318	for e in to_delete:
	1319	topo[tb].elements.remove(e)
	1320	topo[tb].make_indices()
	1321
	1322	for e in [ e for e in topo[tb].elements \
	1323	if isinstance(e,topdl.Computer)]:
	1324	if self.fedkit: self.add_kit(e, self.fedkit)
	1325
[13e3dd2]	1326	def new_portal_node(self, st, dt, tbparams, master, eid, myname, desthost,
[5b74b63]	1327	portal_type, iface_desc=(), conn_type="ssh", conn_attrs=[]):
[e02cd14]	1328	"""
	1329	Return a new internet portal node and a dict with the connectionInfo to
	1330	be attached.
	1331	"""
[d87778f]	1332	dproject = tbparams[dt].get('project', 'project')
	1333	ddomain = tbparams[dt].get('domain', ".example.com")
[13e3dd2]	1334	mdomain = tbparams[master].get('domain', '.example.com')
[e02cd14]	1335	mproject = tbparams[master].get('project', 'project')
[13e3dd2]	1336	muser = tbparams[master].get('user', 'root')
	1337	smbshare = tbparams[master].get('smbshare', 'USERS')
[e02cd14]	1338
[13e3dd2]	1339	if st == master or dt == master:
	1340	active = ("%s" % (st == master))
	1341	else:
[e02cd14]	1342	active = ("%s" % (st > dt))
[13e3dd2]	1343
[69692a9]	1344	ifaces = [ ]
	1345	for sub, attrs in iface_desc:
	1346	inf = topdl.Interface(
[5b74b63]	1347	name="inf%03d" % len(ifaces),
[69692a9]	1348	substrate=sub,
	1349	attribute=[
	1350	topdl.Attribute(
	1351	attribute=n,
	1352	value = v)
	1353	for n, v in attrs
[13e3dd2]	1354	]
[69692a9]	1355	)
	1356	ifaces.append(inf)
[e02cd14]	1357	info = {
[5b74b63]	1358	"type" : conn_type,
[e02cd14]	1359	"portal": myname,
[5b74b63]	1360	'peer': desthost,
[e02cd14]	1361	'fedAttr': [
	1362	{ 'attribute': 'masterdomain', 'value': mdomain},
	1363	{ 'attribute': 'masterexperiment', 'value':
	1364	"%s/%s" % (mproject, eid)},
[d87778f]	1365	{ 'attribute': 'active', 'value': active},
[e02cd14]	1366	# Move to SMB service description
	1367	{ 'attribute': 'masteruser', 'value': muser},
	1368	{ 'attribute': 'smbshare', 'value': smbshare},
	1369	],
	1370	}
[5b74b63]	1371
	1372	if conn_type == 'transit':
	1373	info['member'] = [ { 'element': myname, 'interface': 'inf000'} ]
	1374	if conn_attrs:
	1375	info['fedAttr'].extend(con_attrs)
	1376
[e02cd14]	1377	return (topdl.Computer(
[13e3dd2]	1378	name=myname,
	1379	attribute=[
	1380	topdl.Attribute(attribute=n,value=v)
	1381	for n, v in (\
	1382	('portal', 'true'),
	1383	('portal_type', portal_type),
[641bb66]	1384	)
[13e3dd2]	1385	],
	1386	interface=ifaces,
[e02cd14]	1387	), info)
[13e3dd2]	1388
	1389	def new_portal_substrate(self, st, dt, eid, tbparams):
	1390	ddomain = tbparams[dt].get('domain', ".example.com")
	1391	dproject = tbparams[dt].get('project', 'project')
	1392	tsubstrate = \
	1393	topdl.Substrate(name='%s-%s' % (st, dt),
	1394	attribute= [
	1395	topdl.Attribute(
	1396	attribute='portal',
	1397	value='true')
	1398	]
	1399	)
	1400	segment_element = topdl.Segment(
	1401	id= tbparams[dt]['allocID'],
	1402	type='emulab',
	1403	uri = self.tbmap.get(dt, None),
	1404	interface=[
	1405	topdl.Interface(
	1406	substrate=tsubstrate.name),
	1407	],
	1408	attribute = [
	1409	topdl.Attribute(attribute=n, value=v)
	1410	for n, v in (\
	1411	('domain', ddomain),
	1412	('experiment', "%s/%s" % \
	1413	(dproject, eid)),)
	1414	],
	1415	)
	1416
	1417	return (tsubstrate, segment_element)
	1418
[69692a9]	1419	def new_dragon_topo(self, idx, sub, topo, tbs, tbparams):
	1420	if sub.capacity is None:
	1421	raise service_error(service_error.internal,
	1422	"Cannot DRAGON split substrate w/o capacity")
	1423	segs = [ ]
	1424	substr = topdl.Substrate(name="dragon%d" % idx,
	1425	capacity=sub.capacity.clone(),
	1426	attribute=[ topdl.Attribute(attribute=n, value=v)
	1427	for n, v, in (\
	1428	('vlan', 'unassigned%d' % idx),)])
	1429	for tb in tbs.keys():
	1430	seg = topdl.Segment(
	1431	id = tbparams[tb]['allocID'],
	1432	type='emulab',
	1433	uri = self.tbmap.get(tb, None),
	1434	interface=[
	1435	topdl.Interface(
	1436	substrate=substr.name),
	1437	],
	1438	attribute=[ topdl.Attribute(
[ecf679e]	1439	attribute='dragon_endpoint',
	1440	value=tbparams[tb]['dragon']),
[69692a9]	1441	]
	1442	)
	1443	if tbparams[tb].has_key('vlans'):
	1444	seg.set_attribute('vlans', tbparams[tb]['vlans'])
	1445	segs.append(seg)
	1446
	1447	topo["dragon%d" %idx] = \
	1448	topdl.Topology(substrates=[substr], elements=segs,
	1449	attribute=[
	1450	topdl.Attribute(attribute="transit", value='true'),
	1451	topdl.Attribute(attribute="dynamic", value='true'),
	1452	topdl.Attribute(attribute="testbed", value='dragon'),
	1453	]
	1454	)
	1455
[5b74b63]	1456	def create_dragon_substrate(self, sub, topo, tbs, tbparams, master, eid,
	1457	connInfo):
[69692a9]	1458	"""
	1459	Add attribiutes to the various elements indicating that they are to be
[5b74b63]	1460	dragon connected and create a dragon segment in topo to be
[69692a9]	1461	instantiated.
	1462	"""
	1463
	1464	def get_substrate_from_topo(name, t):
	1465	for s in t.substrates:
	1466	if s.name == name: return s
	1467	else: return None
	1468
[5b74b63]	1469	# dn is the number of previously created dragon nets. This routine
	1470	# creates a net numbered by dn
[69692a9]	1471	dn = len([x for x in topo.keys() if x.startswith('dragon')])
[5b74b63]	1472	# Count the number of interfaces on this substrate in each testbed from
	1473	# the global topology
[69692a9]	1474	count = { }
[5b74b63]	1475	for e in [ i.element for i in sub.interfaces ]:
[69692a9]	1476	tb = e.get_attribute('testbed')
	1477	count[tb] = count.get(tb, 0) + 1
	1478
[5b74b63]	1479	# Set the attributes in the copies that will allow setup of dragon
	1480	# connections.
[69692a9]	1481	for tb in tbs.keys():
	1482	s = get_substrate_from_topo(sub.name, topo[tb])
	1483	if s:
[5b74b63]	1484	if not connInfo.has_key(tb):
	1485	connInfo[tb] = [ ]
	1486	info = {
	1487	'type': 'transit',
	1488	'member': [ {
	1489	'element': i.element.name[0],
	1490	'interface': i.name
	1491	} for i in s.interfaces ],
	1492	'fedAttr': [ {
	1493	'attribute': 'vlan_id',
	1494	'value': 'unassigned%d' % dn
	1495	} ]
	1496	}
	1497	connInfo[tb].append(info)
[69692a9]	1498	else:
	1499	raise service_error(service_error.internal,
	1500	"No substrate %s in testbed %s" % (sub.name, tb))
	1501
	1502	self.new_dragon_topo(dn, sub, topo, tbs, tbparams)
	1503
	1504	def insert_internet_portals(self, sub, topo, tbs, tbparams, master, eid,
[e02cd14]	1505	segment_substrate, portals, connInfo):
[69692a9]	1506	# More than one testbed is on this substrate. Insert
	1507	# some portals into the subtopologies. st == source testbed,
	1508	# dt == destination testbed.
	1509	for st in tbs.keys():
	1510	if not segment_substrate.has_key(st):
	1511	segment_substrate[st] = { }
	1512	if not portals.has_key(st):
	1513	portals[st] = { }
[e02cd14]	1514	if not connInfo.has_key(st):
	1515	connInfo[st] = [ ]
[69692a9]	1516	for dt in [ t for t in tbs.keys() if t != st]:
	1517	sproject = tbparams[st].get('project', 'project')
	1518	dproject = tbparams[dt].get('project', 'project')
	1519	mproject = tbparams[master].get('project', 'project')
	1520	sdomain = tbparams[st].get('domain', ".example.com")
	1521	ddomain = tbparams[dt].get('domain', ".example.com")
	1522	mdomain = tbparams[master].get('domain', '.example.com')
	1523	muser = tbparams[master].get('user', 'root')
	1524	smbshare = tbparams[master].get('smbshare', 'USERS')
	1525	aid = tbparams[dt]['allocID']['fedid']
	1526	if st == master or dt == master:
	1527	active = ("%s" % (st == master))
	1528	else:
	1529	active = ("%s" %(st > dt))
	1530	if not segment_substrate[st].has_key(dt):
	1531	# Put a substrate and a segment for the connected
	1532	# testbed in there.
	1533	tsubstrate, segment_element = \
	1534	self.new_portal_substrate(st, dt, eid, tbparams)
	1535	segment_substrate[st][dt] = tsubstrate
	1536	topo[st].substrates.append(tsubstrate)
	1537	topo[st].elements.append(segment_element)
	1538
	1539	new_portal = False
	1540	if portals[st].has_key(dt):
	1541	# There's a portal set up to go to this destination.
[e02cd14]	1542	# See if there's room to multiplex this connection on
[69692a9]	1543	# it. If so, add an interface to the portal; if not,
	1544	# set up to add a portal below.
	1545	# [This little festival of braces is just a pop of the
	1546	# last element in the list of portals between st and
	1547	# dt.]
	1548	portal = portals[st][dt][-1]
	1549	mux = len([ i for i in portal.interface \
	1550	if not i.get_attribute('portal')])
	1551	if mux == self.muxmax:
	1552	new_portal = True
	1553	portal_type = "experiment"
	1554	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
[5b74b63]	1555	desthost = "%stunnel%d.%s.%s%s" % (st,
	1556	len(portals[st][dt]), eid.lower(),
	1557	dproject.lower(), ddomain.lower())
[69692a9]	1558	else:
	1559	new_i = topdl.Interface(
[6e44258]	1560	substrate=sub.name,
[69692a9]	1561	attribute=[
	1562	topdl.Attribute(
	1563	attribute='ip4_address',
	1564	value=tbs[dt]
	1565	)
	1566	])
	1567	portal.interface.append(new_i)
	1568	else:
	1569	# First connection to this testbed, make an empty list
	1570	# and set up to add the new portal below
	1571	new_portal = True
	1572	portals[st][dt] = [ ]
	1573	myname = "%stunnel%d" % (dt, len(portals[st][dt]))
[5b74b63]	1574	desthost = "%stunnel%d.%s.%s%s" % (st.lower(),
	1575	len(portals[st][dt]), eid.lower(),
	1576	dproject.lower(), ddomain.lower())
[69692a9]	1577
	1578	if dt == master or st == master: portal_type = "both"
	1579	else: portal_type = "experiment"
	1580
	1581	if new_portal:
	1582	infs = (
	1583	(segment_substrate[st][dt].name,
	1584	(('portal', 'true'),)),
	1585	(sub.name,
	1586	(('ip4_address', tbs[dt]),))
	1587	)
[e02cd14]	1588	portal, info = self.new_portal_node(st, dt, tbparams,
[69692a9]	1589	master, eid, myname, desthost, portal_type,
	1590	infs)
	1591	if self.fedkit:
	1592	self.add_kit(portal, self.fedkit)
	1593	if self.gatewaykit:
	1594	self.add_kit(portal, self.gatewaykit)
	1595
	1596	topo[st].elements.append(portal)
	1597	portals[st][dt].append(portal)
[e02cd14]	1598	connInfo[st].append(info)
[69692a9]	1599
[e02cd14]	1600	def add_control_portal(self, st, dt, master, eid, topo, tbparams, connInfo):
[69692a9]	1601	# Add to the master testbed
	1602	tsubstrate, segment_element = \
	1603	self.new_portal_substrate(st, dt, eid, tbparams)
	1604	myname = "%stunnel" % dt
	1605	desthost = "%stunnel" % st
	1606
[e02cd14]	1607	portal, info = self.new_portal_node(st, dt, tbparams, master,
[69692a9]	1608	eid, myname, desthost, "control",
	1609	((tsubstrate.name,(('portal','true'),)),))
	1610	if self.fedkit:
	1611	self.add_kit(portal, self.fedkit)
	1612	if self.gatewaykit:
	1613	self.add_kit(portal, self.gatewaykit)
	1614
	1615	topo[st].substrates.append(tsubstrate)
	1616	topo[st].elements.append(segment_element)
	1617	topo[st].elements.append(portal)
[e02cd14]	1618	if not connInfo.has_key(st):
	1619	connInfo[st] = [ ]
	1620	connInfo[st].append(info)
[69692a9]	1621
[6e44258]	1622	def new_dragon_portal(self, st, dt, master, eid, myip, dip, idx,
[69692a9]	1623	substrate, tbparams):
	1624	# Add to the master testbed
	1625	myname = "%stunnel" % dt
	1626	desthost = "%s" % ip_addr(dip)
	1627
[5b74b63]	1628	portal, info = self.new_portal_node(st, dt, tbparams, master,
[69692a9]	1629	eid, myname, desthost, "control",
	1630	((substrate.name,(
	1631	('portal','true'),
[5b74b63]	1632	('ip4_address', "%s" % ip_addr(myip)),)),),
	1633	conn_type="transit",
	1634	conn_attrs = [ {
	1635	'attribute': 'vlan_id', 'value': 'unassigned%d' % idx
	1636	}])
[69692a9]	1637	if self.fedkit:
	1638	self.add_kit(portal, self.fedkit)
	1639	if self.gatewaykit:
	1640	self.add_kit(portal, self.gatewaykit)
	1641
[5b74b63]	1642	return portal, info
[69692a9]	1643
[e02cd14]	1644	def add_portals(self, top, topo, eid, master, tbparams, ip_allocator,
	1645	connInfo):
[895a133]	1646	"""
	1647	For each substrate in the main topology, find those that
	1648	have nodes on more than one testbed. Insert portal nodes
	1649	into the copies of those substrates on the sub topologies.
	1650	"""
[13e3dd2]	1651	segment_substrate = { }
	1652	portals = { }
[895a133]	1653	for s in top.substrates:
	1654	# tbs will contain an ip address on this subsrate that is in
	1655	# each testbed.
	1656	tbs = { }
	1657	for i in s.interfaces:
	1658	e = i.element
	1659	tb = e.get_attribute('testbed')
	1660	if tb and not tbs.has_key(tb):
	1661	for i in e.interface:
	1662	if s in i.subs:
	1663	tbs[tb]= i.get_attribute('ip4_address')
	1664	if len(tbs) < 2:
	1665	continue
	1666
[69692a9]	1667	# DRAGON will not create multi-site vlans yet
	1668	if len(tbs) == 2 and \
	1669	all([tbparams[x].has_key('dragon') for x in tbs]):
	1670	self.create_dragon_substrate(s, topo, tbs, tbparams,
[5b74b63]	1671	master, eid, connInfo)
[69692a9]	1672	else:
	1673	self.insert_internet_portals(s, topo, tbs, tbparams, master,
[e02cd14]	1674	eid, segment_substrate, portals, connInfo)
[13e3dd2]	1675
	1676	# Make sure that all the slaves have a control portal back to the
	1677	# master.
	1678	for tb in [ t for t in tbparams.keys() if t != master ]:
	1679	if len([e for e in topo[tb].elements \
	1680	if isinstance(e, topdl.Computer) and \
	1681	e.get_attribute('portal') and \
	1682	e.get_attribute('portal_type') == 'both']) == 0:
	1683
[69692a9]	1684	if tbparams[master].has_key('dragon') \
	1685	and tbparams[tb].has_key('dragon'):
	1686
	1687	idx = len([x for x in topo.keys() \
	1688	if x.startswith('dragon')])
	1689	dip, leng = ip_allocator.allocate(4)
	1690	dip += 1
[6e44258]	1691	mip = dip+1
[69692a9]	1692	csub = topdl.Substrate(
	1693	name="dragon-control-%s" % tb,
	1694	capacity=topdl.Capacity(100000.0, 'max'),
	1695	attribute=[
	1696	topdl.Attribute(
	1697	attribute='portal',
	1698	value='true'
	1699	)
	1700	]
	1701	)
	1702	seg = topdl.Segment(
	1703	id= tbparams[master]['allocID'],
	1704	type='emulab',
	1705	uri = self.tbmap.get(master, None),
	1706	interface=[
	1707	topdl.Interface(
	1708	substrate=csub.name),
	1709	],
	1710	attribute = [
	1711	topdl.Attribute(attribute=n, value=v)
	1712	for n, v in (\
	1713	('domain',
	1714	tbparams[master].get('domain',
	1715	".example.com")),
	1716	('experiment', "%s/%s" % \
	1717	(tbparams[master].get(
	1718	'project',
	1719	'project'),
	1720	eid)),)
	1721	],
	1722	)
[5b74b63]	1723	portal, info = self.new_dragon_portal(tb, master,
	1724	master, eid, dip, mip, idx, csub, tbparams)
[69692a9]	1725	topo[tb].substrates.append(csub)
[5b74b63]	1726	topo[tb].elements.append(portal)
[69692a9]	1727	topo[tb].elements.append(seg)
[5b74b63]	1728	if not connInfo.has_key(tb):
	1729	connInfo[tb] = [info]
	1730	else:
	1731	connInfo[tb].append(info)
[69692a9]	1732
	1733	mcsub = csub.clone()
	1734	seg = topdl.Segment(
	1735	id= tbparams[tb]['allocID'],
	1736	type='emulab',
	1737	uri = self.tbmap.get(tb, None),
	1738	interface=[
	1739	topdl.Interface(
	1740	substrate=csub.name),
	1741	],
	1742	attribute = [
	1743	topdl.Attribute(attribute=n, value=v)
	1744	for n, v in (\
	1745	('domain',
	1746	tbparams[tb].get('domain',
	1747	".example.com")),
	1748	('experiment', "%s/%s" % \
	1749	(tbparams[tb].get('project',
	1750	'project'),
	1751	eid)),)
	1752	],
	1753	)
[5b74b63]	1754	portal, info = self.new_dragon_portal(master, tb, master,
	1755	eid, mip, dip, idx, mcsub, tbparams)
[69692a9]	1756	topo[master].substrates.append(mcsub)
[5b74b63]	1757	topo[master].elements.append(portal)
[69692a9]	1758	topo[master].elements.append(seg)
[5b74b63]	1759	if not connInfo.has_key(master):
	1760	connInfo[master] = [info]
	1761	else:
	1762	connInfo[master].append(info)
[69692a9]	1763
	1764	self.create_dragon_substrate(csub, topo,
	1765	{tb: 1, master:1}, tbparams, master, eid)
	1766	else:
	1767	self.add_control_portal(master, tb, master, eid, topo,
[e02cd14]	1768	tbparams, connInfo)
[69692a9]	1769	self.add_control_portal(tb, master, master, eid, topo,
[e02cd14]	1770	tbparams, connInfo)
[13e3dd2]	1771
	1772	# Connect the portal nodes into the topologies and clear out
[895a133]	1773	# substrates that are not in the topologies
	1774	for tb in tbparams.keys():
	1775	topo[tb].incorporate_elements()
	1776	topo[tb].substrates = \
	1777	[s for s in topo[tb].substrates \
	1778	if len(s.interfaces) >0]
	1779
	1780	def wrangle_software(self, expid, top, topo, tbparams):
	1781	"""
	1782	Copy software out to the repository directory, allocate permissions and
	1783	rewrite the segment topologies to look for the software in local
	1784	places.
	1785	"""
	1786
	1787	# Copy the rpms and tarfiles to a distribution directory from
	1788	# which the federants can retrieve them
	1789	linkpath = "%s/software" % expid
	1790	softdir ="%s/%s" % ( self.repodir, linkpath)
	1791	softmap = { }
	1792	# These are in a list of tuples format (each kit). This comprehension
	1793	# unwraps them into a single list of tuples that initilaizes the set of
	1794	# tuples.
	1795	pkgs = set([ t for l in [self.fedkit, self.gatewaykit] \
	1796	for p, t in l ])
	1797	pkgs.update([x.location for e in top.elements \
	1798	for x in e.software])
	1799	try:
	1800	os.makedirs(softdir)
	1801	except IOError, e:
	1802	raise service_error(
	1803	"Cannot create software directory: %s" % e)
	1804	# The actual copying. Everything's converted into a url for copying.
	1805	for pkg in pkgs:
	1806	loc = pkg
	1807
	1808	scheme, host, path = urlparse(loc)[0:3]
	1809	dest = os.path.basename(path)
	1810	if not scheme:
	1811	if not loc.startswith('/'):
	1812	loc = "/%s" % loc
	1813	loc = "file://%s" %loc
	1814	try:
	1815	u = urlopen(loc)
	1816	except Exception, e:
	1817	raise service_error(service_error.req,
	1818	"Cannot open %s: %s" % (loc, e))
	1819	try:
	1820	f = open("%s/%s" % (softdir, dest) , "w")
	1821	self.log.debug("Writing %s/%s" % (softdir,dest) )
	1822	data = u.read(4096)
	1823	while data:
	1824	f.write(data)
	1825	data = u.read(4096)
	1826	f.close()
	1827	u.close()
	1828	except Exception, e:
	1829	raise service_error(service_error.internal,
	1830	"Could not copy %s: %s" % (loc, e))
	1831	path = re.sub("/tmp", "", linkpath)
	1832	# XXX
	1833	softmap[pkg] = \
[7183b48]	1834	"%s/%s/%s" %\
	1835	( self.repo_url, path, dest)
[895a133]	1836
	1837	# Allow the individual segments to access the software.
	1838	for tb in tbparams.keys():
	1839	self.auth.set_attribute(tbparams[tb]['allocID']['fedid'],
	1840	"/%s/%s" % ( path, dest))
	1841
	1842	# Convert the software locations in the segments into the local
	1843	# copies on this host
	1844	for soft in [ s for tb in topo.values() \
	1845	for e in tb.elements \
	1846	if getattr(e, 'software', False) \
	1847	for s in e.software ]:
	1848	if softmap.has_key(soft.location):
	1849	soft.location = softmap[soft.location]
	1850
	1851
[a3ad8bd]	1852	def new_experiment(self, req, fid):
	1853	"""
	1854	The external interface to empty initial experiment creation called from
	1855	the dispatcher.
	1856
	1857	Creates a working directory, splits the incoming description using the
	1858	splitter script and parses out the avrious subsections using the
	1859	lcasses above. Once each sub-experiment is created, use pooled threads
	1860	to instantiate them and start it all up.
	1861	"""
	1862	if not self.auth.check_attribute(fid, 'new'):
	1863	raise service_error(service_error.access, "New access denied")
	1864
	1865	try:
	1866	tmpdir = tempfile.mkdtemp(prefix="split-")
	1867	except IOError:
	1868	raise service_error(service_error.internal, "Cannot create tmp dir")
	1869
	1870	try:
	1871	access_user = self.accessdb[fid]
	1872	except KeyError:
	1873	raise service_error(service_error.internal,
	1874	"Access map and authorizer out of sync in " + \
[7183b48]	1875	"new_experiment for fedid %s" % fid)
[a3ad8bd]	1876
	1877	pid = "dummy"
	1878	gid = "dummy"
	1879
	1880	req = req.get('NewRequestBody', None)
	1881	if not req:
	1882	raise service_error(service_error.req,
	1883	"Bad request format (no NewRequestBody)")
	1884
	1885	# Generate an ID for the experiment (slice) and a certificate that the
	1886	# allocator can use to prove they own it. We'll ship it back through
	1887	# the encrypted connection.
	1888	(expid, expcert) = generate_fedid("test", dir=tmpdir, log=self.log)
	1889
	1890	#now we're done with the tmpdir, and it should be empty
	1891	if self.cleanup:
	1892	self.log.debug("[new_experiment]: removing %s" % tmpdir)
	1893	os.rmdir(tmpdir)
	1894	else:
	1895	self.log.debug("[new_experiment]: not removing %s" % tmpdir)
	1896
	1897	eid = self.create_experiment_state(fid, req, expid, expcert,
	1898	state='empty')
	1899
	1900	# Let users touch the state
	1901	self.auth.set_attribute(fid, expid)
	1902	self.auth.set_attribute(expid, expid)
	1903	# Override fedids can manipulate state as well
	1904	for o in self.overrides:
	1905	self.auth.set_attribute(o, expid)
	1906
	1907	rv = {
	1908	'experimentID': [
	1909	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	1910	],
	1911	'experimentStatus': 'empty',
	1912	'experimentAccess': { 'X509' : expcert }
	1913	}
	1914
	1915	return rv
	1916
	1917
[e19b75c]	1918	def create_experiment(self, req, fid):
[db6b092]	1919	"""
	1920	The external interface to experiment creation called from the
	1921	dispatcher.
	1922
	1923	Creates a working directory, splits the incoming description using the
	1924	splitter script and parses out the avrious subsections using the
	1925	lcasses above. Once each sub-experiment is created, use pooled threads
	1926	to instantiate them and start it all up.
	1927	"""
[7183b48]	1928
	1929	req = req.get('CreateRequestBody', None)
	1930	if not req:
	1931	raise service_error(service_error.req,
	1932	"Bad request format (no CreateRequestBody)")
	1933
	1934	# Get the experiment access
	1935	exp = req.get('experimentID', None)
	1936	if exp:
	1937	if exp.has_key('fedid'):
	1938	key = exp['fedid']
	1939	expid = key
	1940	eid = None
	1941	elif exp.has_key('localname'):
	1942	key = exp['localname']
	1943	eid = key
	1944	expid = None
	1945	else:
	1946	raise service_error(service_error.req, "Unknown lookup type")
	1947	else:
	1948	raise service_error(service_error.req, "No request?")
	1949
	1950	self.check_experiment_access(fid, key)
[db6b092]	1951
	1952	try:
	1953	tmpdir = tempfile.mkdtemp(prefix="split-")
[895a133]	1954	os.mkdir(tmpdir+"/keys")
[db6b092]	1955	except IOError:
	1956	raise service_error(service_error.internal, "Cannot create tmp dir")
	1957
	1958	gw_pubkey_base = "fed.%s.pub" % self.ssh_type
	1959	gw_secretkey_base = "fed.%s" % self.ssh_type
	1960	gw_pubkey = tmpdir + "/keys/" + gw_pubkey_base
	1961	gw_secretkey = tmpdir + "/keys/" + gw_secretkey_base
	1962	tclfile = tmpdir + "/experiment.tcl"
	1963	tbparams = { }
	1964	try:
	1965	access_user = self.accessdb[fid]
	1966	except KeyError:
	1967	raise service_error(service_error.internal,
	1968	"Access map and authorizer out of sync in " + \
	1969	"create_experiment for fedid %s" % fid)
	1970
	1971	pid = "dummy"
	1972	gid = "dummy"
	1973
	1974	# The tcl parser needs to read a file so put the content into that file
	1975	descr=req.get('experimentdescription', None)
	1976	if descr:
	1977	file_content=descr.get('ns2description', None)
	1978	if file_content:
	1979	try:
	1980	f = open(tclfile, 'w')
	1981	f.write(file_content)
	1982	f.close()
	1983	except IOError:
	1984	raise service_error(service_error.internal,
	1985	"Cannot write temp experiment description")
	1986	else:
	1987	raise service_error(service_error.req,
	1988	"Only ns2descriptions supported")
	1989	else:
	1990	raise service_error(service_error.req, "No experiment description")
	1991
[7183b48]	1992	self.state_lock.acquire()
	1993	if self.state.has_key(key):
[4afcfc4]	1994	self.state[key]['experimentStatus'] = "starting"
[7183b48]	1995	for e in self.state[key].get('experimentID',[]):
	1996	if not expid and e.has_key('fedid'):
	1997	expid = e['fedid']
	1998	elif not eid and e.has_key('localname'):
	1999	eid = e['localname']
	2000	self.state_lock.release()
	2001
	2002	if not (eid and expid):
	2003	raise service_error(service_error.internal,
	2004	"Cannot find local experiment info!?")
[db6b092]	2005
	2006	try:
	2007	# This catches exceptions to clear the placeholder if necessary
	2008	try:
	2009	self.generate_ssh_keys(gw_secretkey, self.ssh_type)
	2010	except ValueError:
	2011	raise service_error(service_error.server_config,
	2012	"Bad key type (%s)" % self.ssh_type)
	2013
	2014	master = req.get('master', None)
	2015	if not master:
	2016	raise service_error(service_error.req,
	2017	"No master testbed label")
	2018	export_project = req.get('exportProject', None)
	2019	if not export_project:
	2020	raise service_error(service_error.req, "No export project")
[895a133]	2021
	2022	# Translate to topdl
[db6b092]	2023	if self.splitter_url:
[895a133]	2024	# XXX: need remote topdl translator
[db6b092]	2025	self.log.debug("Calling remote splitter at %s" % \
	2026	self.splitter_url)
	2027	split_data = self.remote_splitter(self.splitter_url,
	2028	file_content, master)
	2029	else:
	2030	tclcmd = [self.tclsh, self.tcl_splitter, '-t', '-x',
	2031	str(self.muxmax), '-m', master]
	2032
	2033	if self.fedkit:
	2034	tclcmd.append('-k')
	2035
	2036	if self.gatewaykit:
	2037	tclcmd.append('-K')
	2038
	2039	tclcmd.extend([pid, gid, eid, tclfile])
	2040
	2041	self.log.debug("running local splitter %s", " ".join(tclcmd))
	2042	# This is just fantastic. As a side effect the parser copies
	2043	# tb_compat.tcl into the current directory, so that directory
	2044	# must be writable by the fedd user. Doing this in the
	2045	# temporary subdir ensures this is the case.
[70caa72]	2046	tclparser = Popen(tclcmd, stdout=PIPE, close_fds=True,
[db6b092]	2047	cwd=tmpdir)
[866c983]	2048	split_data = tclparser.stdout
	2049
[cc8d8e9]	2050	top = topdl.topology_from_xml(file=split_data, top="experiment")
[895a133]	2051
[69692a9]	2052	hosts, ip_allocator = self.allocate_ips_to_topo(top)
[895a133]	2053	# Find the testbeds to look up
	2054	testbeds = set([ a.value for e in top.elements \
	2055	for a in e.attribute \
	2056	if a.attribute == 'testbed'] )
	2057
	2058	allocated = { } # Testbeds we can access
	2059	topo ={ } # Sub topologies
[e02cd14]	2060	connInfo = { } # Connection information
	2061	services = [ ]
[99eb8cf]	2062	self.get_access_to_testbeds(testbeds, access_user,
[e02cd14]	2063	export_project, master, allocated, tbparams, services)
[895a133]	2064	self.split_topology(top, topo, testbeds, eid, master, tbparams)
	2065
	2066	# Copy configuration files into the remote file store
[6c57fe9]	2067	# The config urlpath
	2068	configpath = "/%s/config" % expid
	2069	# The config file system location
	2070	configdir ="%s%s" % ( self.repodir, configpath)
	2071	try:
	2072	os.makedirs(configdir)
	2073	except IOError, e:
	2074	raise service_error(
	2075	"Cannot create config directory: %s" % e)
	2076	try:
	2077	f = open("%s/hosts" % configdir, "w")
	2078	f.write('\n'.join(hosts))
	2079	f.close()
	2080	except IOError, e:
	2081	raise service_error(service_error.internal,
	2082	"Cannot write hosts file: %s" % e)
	2083	try:
[40dd8c1]	2084	copy_file("%s" % gw_pubkey, "%s/%s" % \
[6c57fe9]	2085	(configdir, gw_pubkey_base))
[40dd8c1]	2086	copy_file("%s" % gw_secretkey, "%s/%s" % \
[6c57fe9]	2087	(configdir, gw_secretkey_base))
	2088	except IOError, e:
	2089	raise service_error(service_error.internal,
	2090	"Cannot copy keyfiles: %s" % e)
[cc8d8e9]	2091
[6c57fe9]	2092	# Allow the individual testbeds to access the configuration files.
	2093	for tb in tbparams.keys():
	2094	asignee = tbparams[tb]['allocID']['fedid']
	2095	for f in ("hosts", gw_secretkey_base, gw_pubkey_base):
	2096	self.auth.set_attribute(asignee, "%s/%s" % (configpath, f))
[cc8d8e9]	2097
[e02cd14]	2098	self.add_portals(top, topo, eid, master, tbparams, ip_allocator,
	2099	connInfo)
[69692a9]	2100	# Now get access to the dynamic testbeds
	2101	for k, t in topo.items():
	2102	if not t.get_attribute('dynamic'):
	2103	continue
	2104	tb = t.get_attribute('testbed')
	2105	if tb:
	2106	self.get_access(tb, None, user, tbparams, master,
[e02cd14]	2107	export_project, access_user, services)
[69692a9]	2108	tbparams[k] = tbparams[tb]
	2109	del tbparams[tb]
	2110	allocated[k] = 1
	2111	else:
	2112	raise service_error(service_error.internal,
	2113	"Dynamic allocation from no testbed!?")
	2114
[895a133]	2115	self.wrangle_software(expid, top, topo, tbparams)
[cc8d8e9]	2116
	2117	vtopo = topdl.topology_to_vtopo(top)
	2118	vis = self.genviz(vtopo)
[db6b092]	2119
[866c983]	2120	# save federant information
	2121	for k in allocated.keys():
[ecf679e]	2122	tbparams[k]['federant'] = {
	2123	'name': [ { 'localname' : eid} ],
	2124	'allocID' : tbparams[k]['allocID'],
	2125	'master' : k == master,
	2126	'uri': tbparams[k]['uri'],
[866c983]	2127	}
[69692a9]	2128	if tbparams[k].has_key('emulab'):
	2129	tbparams[k]['federant']['emulab'] = \
	2130	tbparams[k]['emulab']
[866c983]	2131
[db6b092]	2132	self.state_lock.acquire()
	2133	self.state[eid]['vtopo'] = vtopo
	2134	self.state[eid]['vis'] = vis
	2135	self.state[expid]['federant'] = \
	2136	[ tbparams[tb]['federant'] for tb in tbparams.keys() \
	2137	if tbparams[tb].has_key('federant') ]
[cc8d8e9]	2138	if self.state_filename:
	2139	self.write_state()
[db6b092]	2140	self.state_lock.release()
[866c983]	2141	except service_error, e:
	2142	# If something goes wrong in the parse (usually an access error)
	2143	# clear the placeholder state. From here on out the code delays
[db6b092]	2144	# exceptions. Failing at this point returns a fault to the remote
	2145	# caller.
[cc8d8e9]	2146
[866c983]	2147	self.state_lock.acquire()
	2148	del self.state[eid]
[bd3e314]	2149	del self.state[expid]
	2150	if self.state_filename: self.write_state()
[866c983]	2151	self.state_lock.release()
	2152	raise e
	2153
	2154
[db6b092]	2155	# Start the background swapper and return the starting state. From
	2156	# here on out, the state will stick around a while.
[866c983]	2157
[db6b092]	2158	# Let users touch the state
[bd3e314]	2159	self.auth.set_attribute(fid, expid)
	2160	self.auth.set_attribute(expid, expid)
[db6b092]	2161	# Override fedids can manipulate state as well
	2162	for o in self.overrides:
	2163	self.auth.set_attribute(o, expid)
	2164
	2165	# Create a logger that logs to the experiment's state object as well as
	2166	# to the main log file.
	2167	alloc_log = logging.getLogger('fedd.experiment_control.%s' % eid)
[f07fa49]	2168	alloc_collector = self.list_log(self.state[eid]['log'])
	2169	h = logging.StreamHandler(alloc_collector)
[db6b092]	2170	# XXX: there should be a global one of these rather than repeating the
	2171	# code.
	2172	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2173	'%d %b %y %H:%M:%S'))
	2174	alloc_log.addHandler(h)
	2175
[6c57fe9]	2176	attrs = [
	2177	{
	2178	'attribute': 'ssh_pubkey',
	2179	'value': '%s/%s/config/%s' % \
[7183b48]	2180	(self.repo_url, expid, gw_pubkey_base)
[6c57fe9]	2181	},
	2182	{
	2183	'attribute': 'ssh_secretkey',
	2184	'value': '%s/%s/config/%s' % \
[7183b48]	2185	(self.repo_url, expid, gw_secretkey_base)
[6c57fe9]	2186	},
	2187	{
	2188	'attribute': 'hosts',
	2189	'value': '%s/%s/config/hosts' % \
[7183b48]	2190	(self.repo_url, expid)
[6c57fe9]	2191	},
[ecca6eb]	2192	{
	2193	'attribute': 'experiment_name',
	2194	'value': eid,
	2195	},
[6c57fe9]	2196	]
	2197
[db6b092]	2198	# Start a thread to do the resource allocation
[e19b75c]	2199	t = Thread(target=self.allocate_resources,
[7183b48]	2200	args=(allocated, master, eid, expid, tbparams,
[e02cd14]	2201	topo, tmpdir, alloc_log, alloc_collector, attrs, connInfo,
	2202	services),
[db6b092]	2203	name=eid)
	2204	t.start()
	2205
	2206	rv = {
	2207	'experimentID': [
	2208	{'localname' : eid }, { 'fedid': copy.copy(expid) }
	2209	],
	2210	'experimentStatus': 'starting',
	2211	}
	2212
	2213	return rv
[9479343]	2214
	2215	def get_experiment_fedid(self, key):
	2216	"""
[db6b092]	2217	find the fedid associated with the localname key in the state database.
[9479343]	2218	"""
	2219
[db6b092]	2220	rv = None
	2221	self.state_lock.acquire()
	2222	if self.state.has_key(key):
	2223	if isinstance(self.state[key], dict):
	2224	try:
	2225	kl = [ f['fedid'] for f in \
	2226	self.state[key]['experimentID']\
	2227	if f.has_key('fedid') ]
	2228	except KeyError:
	2229	self.state_lock.release()
	2230	raise service_error(service_error.internal,
	2231	"No fedid for experiment %s when getting "+\
	2232	"fedid(!?)" % key)
	2233	if len(kl) == 1:
	2234	rv = kl[0]
	2235	else:
	2236	self.state_lock.release()
	2237	raise service_error(service_error.internal,
	2238	"multiple fedids for experiment %s when " +\
	2239	"getting fedid(!?)" % key)
	2240	else:
	2241	self.state_lock.release()
	2242	raise service_error(service_error.internal,
	2243	"Unexpected state for %s" % key)
	2244	self.state_lock.release()
	2245	return rv
[a97394b]	2246
[4064742]	2247	def check_experiment_access(self, fid, key):
[866c983]	2248	"""
	2249	Confirm that the fid has access to the experiment. Though a request
	2250	may be made in terms of a local name, the access attribute is always
	2251	the experiment's fedid.
	2252	"""
	2253	if not isinstance(key, fedid):
[db6b092]	2254	key = self.get_experiment_fedid(key)
[866c983]	2255
	2256	if self.auth.check_attribute(fid, key):
	2257	return True
	2258	else:
	2259	raise service_error(service_error.access, "Access Denied")
[4064742]	2260
	2261
[db6b092]	2262	def get_handler(self, path, fid):
[7183b48]	2263	self.log.info("Get handler %s %s" % (path, fid))
[6c57fe9]	2264	if self.auth.check_attribute(fid, path):
	2265	return ("%s/%s" % (self.repodir, path), "application/binary")
	2266	else:
	2267	return (None, None)
[987aaa1]	2268
	2269	def get_vtopo(self, req, fid):
[866c983]	2270	"""
	2271	Return the stored virtual topology for this experiment
	2272	"""
	2273	rv = None
[db6b092]	2274	state = None
[866c983]	2275
	2276	req = req.get('VtopoRequestBody', None)
	2277	if not req:
	2278	raise service_error(service_error.req,
	2279	"Bad request format (no VtopoRequestBody)")
	2280	exp = req.get('experiment', None)
	2281	if exp:
	2282	if exp.has_key('fedid'):
	2283	key = exp['fedid']
	2284	keytype = "fedid"
	2285	elif exp.has_key('localname'):
	2286	key = exp['localname']
	2287	keytype = "localname"
	2288	else:
	2289	raise service_error(service_error.req, "Unknown lookup type")
	2290	else:
	2291	raise service_error(service_error.req, "No request?")
	2292
	2293	self.check_experiment_access(fid, key)
	2294
	2295	self.state_lock.acquire()
	2296	if self.state.has_key(key):
[db6b092]	2297	if self.state[key].has_key('vtopo'):
	2298	rv = { 'experiment' : {keytype: key },\
	2299	'vtopo': self.state[key]['vtopo'],\
	2300	}
	2301	else:
	2302	state = self.state[key]['experimentStatus']
[866c983]	2303	self.state_lock.release()
	2304
	2305	if rv: return rv
[bd3e314]	2306	else:
[db6b092]	2307	if state:
	2308	raise service_error(service_error.partial,
	2309	"Not ready: %s" % state)
	2310	else:
	2311	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2312
	2313	def get_vis(self, req, fid):
[866c983]	2314	"""
	2315	Return the stored visualization for this experiment
	2316	"""
	2317	rv = None
[db6b092]	2318	state = None
[866c983]	2319
	2320	req = req.get('VisRequestBody', None)
	2321	if not req:
	2322	raise service_error(service_error.req,
	2323	"Bad request format (no VisRequestBody)")
	2324	exp = req.get('experiment', None)
	2325	if exp:
	2326	if exp.has_key('fedid'):
	2327	key = exp['fedid']
	2328	keytype = "fedid"
	2329	elif exp.has_key('localname'):
	2330	key = exp['localname']
	2331	keytype = "localname"
	2332	else:
	2333	raise service_error(service_error.req, "Unknown lookup type")
	2334	else:
	2335	raise service_error(service_error.req, "No request?")
	2336
	2337	self.check_experiment_access(fid, key)
	2338
	2339	self.state_lock.acquire()
	2340	if self.state.has_key(key):
[db6b092]	2341	if self.state[key].has_key('vis'):
	2342	rv = { 'experiment' : {keytype: key },\
	2343	'vis': self.state[key]['vis'],\
	2344	}
	2345	else:
	2346	state = self.state[key]['experimentStatus']
[866c983]	2347	self.state_lock.release()
	2348
	2349	if rv: return rv
[bd3e314]	2350	else:
[db6b092]	2351	if state:
	2352	raise service_error(service_error.partial,
	2353	"Not ready: %s" % state)
	2354	else:
	2355	raise service_error(service_error.req, "No such experiment")
[987aaa1]	2356
[65f3f29]	2357	def clean_info_response(self, rv):
[db6b092]	2358	"""
	2359	Remove the information in the experiment's state object that is not in
	2360	the info response.
	2361	"""
	2362	# Remove the owner info (should always be there, but...)
	2363	if rv.has_key('owner'): del rv['owner']
	2364
	2365	# Convert the log into the allocationLog parameter and remove the
	2366	# log entry (with defensive programming)
	2367	if rv.has_key('log'):
	2368	rv['allocationLog'] = "".join(rv['log'])
	2369	del rv['log']
	2370	else:
	2371	rv['allocationLog'] = ""
	2372
	2373	if rv['experimentStatus'] != 'active':
	2374	if rv.has_key('federant'): del rv['federant']
	2375	else:
[69692a9]	2376	# remove the allocationID and uri info from each federant
[db6b092]	2377	for f in rv.get('federant', []):
	2378	if f.has_key('allocID'): del f['allocID']
[69692a9]	2379	if f.has_key('uri'): del f['uri']
[db6b092]	2380	return rv
[65f3f29]	2381
[c52c48d]	2382	def get_info(self, req, fid):
[866c983]	2383	"""
	2384	Return all the stored info about this experiment
	2385	"""
	2386	rv = None
	2387
	2388	req = req.get('InfoRequestBody', None)
	2389	if not req:
	2390	raise service_error(service_error.req,
[65f3f29]	2391	"Bad request format (no InfoRequestBody)")
[866c983]	2392	exp = req.get('experiment', None)
	2393	if exp:
	2394	if exp.has_key('fedid'):
	2395	key = exp['fedid']
	2396	keytype = "fedid"
	2397	elif exp.has_key('localname'):
	2398	key = exp['localname']
	2399	keytype = "localname"
	2400	else:
	2401	raise service_error(service_error.req, "Unknown lookup type")
	2402	else:
	2403	raise service_error(service_error.req, "No request?")
	2404
	2405	self.check_experiment_access(fid, key)
	2406
	2407	# The state may be massaged by the service function that called
	2408	# get_info (e.g., encoded for XMLRPC transport) so send a copy of the
	2409	# state.
	2410	self.state_lock.acquire()
	2411	if self.state.has_key(key):
	2412	rv = copy.deepcopy(self.state[key])
	2413	self.state_lock.release()
	2414
[db6b092]	2415	if rv:
	2416	return self.clean_info_response(rv)
[bd3e314]	2417	else:
[db6b092]	2418	raise service_error(service_error.req, "No such experiment")
[7a8d667]	2419
[65f3f29]	2420	def get_multi_info(self, req, fid):
	2421	"""
	2422	Return all the stored info that this fedid can access
	2423	"""
[db6b092]	2424	rv = { 'info': [ ] }
[65f3f29]	2425
[db6b092]	2426	self.state_lock.acquire()
	2427	for key in [ k for k in self.state.keys() if isinstance(k, fedid)]:
[829246e]	2428	try:
	2429	self.check_experiment_access(fid, key)
	2430	except service_error, e:
	2431	if e.code == service_error.access:
	2432	continue
	2433	else:
	2434	self.state_lock.release()
	2435	raise e
[65f3f29]	2436
[db6b092]	2437	if self.state.has_key(key):
	2438	e = copy.deepcopy(self.state[key])
	2439	e = self.clean_info_response(e)
	2440	rv['info'].append(e)
[65f3f29]	2441	self.state_lock.release()
[db6b092]	2442	return rv
[65f3f29]	2443
[7a8d667]	2444	def terminate_experiment(self, req, fid):
[866c983]	2445	"""
	2446	Swap this experiment out on the federants and delete the shared
	2447	information
	2448	"""
	2449	tbparams = { }
	2450	req = req.get('TerminateRequestBody', None)
	2451	if not req:
	2452	raise service_error(service_error.req,
	2453	"Bad request format (no TerminateRequestBody)")
[db6b092]	2454	force = req.get('force', False)
[866c983]	2455	exp = req.get('experiment', None)
	2456	if exp:
	2457	if exp.has_key('fedid'):
	2458	key = exp['fedid']
	2459	keytype = "fedid"
	2460	elif exp.has_key('localname'):
	2461	key = exp['localname']
	2462	keytype = "localname"
	2463	else:
	2464	raise service_error(service_error.req, "Unknown lookup type")
	2465	else:
	2466	raise service_error(service_error.req, "No request?")
	2467
	2468	self.check_experiment_access(fid, key)
	2469
[db6b092]	2470	dealloc_list = [ ]
[46e4682]	2471
	2472
[5ae3857]	2473	# Create a logger that logs to the dealloc_list as well as to the main
	2474	# log file.
	2475	dealloc_log = logging.getLogger('fedd.experiment_control.%s' % key)
	2476	h = logging.StreamHandler(self.list_log(dealloc_list))
	2477	# XXX: there should be a global one of these rather than repeating the
	2478	# code.
	2479	h.setFormatter(logging.Formatter("%(asctime)s %(name)s %(message)s",
	2480	'%d %b %y %H:%M:%S'))
	2481	dealloc_log.addHandler(h)
	2482
	2483	self.state_lock.acquire()
	2484	fed_exp = self.state.get(key, None)
	2485
	2486	if fed_exp:
	2487	# This branch of the conditional holds the lock to generate a
	2488	# consistent temporary tbparams variable to deallocate experiments.
	2489	# It releases the lock to do the deallocations and reacquires it to
	2490	# remove the experiment state when the termination is complete.
	2491
	2492	# First make sure that the experiment creation is complete.
	2493	status = fed_exp.get('experimentStatus', None)
	2494
	2495	if status:
	2496	if status in ('starting', 'terminating'):
	2497	if not force:
	2498	self.state_lock.release()
	2499	raise service_error(service_error.partial,
	2500	'Experiment still being created or destroyed')
	2501	else:
	2502	self.log.warning('Experiment in %s state ' % status + \
	2503	'being terminated by force.')
	2504	else:
	2505	# No status??? trouble
	2506	self.state_lock.release()
	2507	raise service_error(service_error.internal,
	2508	"Experiment has no status!?")
	2509
	2510	ids = []
	2511	# experimentID is a list of dicts that are self-describing
	2512	# identifiers. This finds all the fedids and localnames - the
	2513	# keys of self.state - and puts them into ids.
	2514	for id in fed_exp.get('experimentID', []):
	2515	if id.has_key('fedid'): ids.append(id['fedid'])
	2516	if id.has_key('localname'): ids.append(id['localname'])
	2517
[63a35b7]	2518	# Collect the allocation/segment ids into a dict keyed by the fedid
	2519	# of the allocation (or a monotonically increasing integer) that
	2520	# contains a tuple of uri, aid (which is a dict...)
	2521	for i, fed in enumerate(fed_exp.get('federant', [])):
[5ae3857]	2522	try:
[63a35b7]	2523	uri = fed['uri']
	2524	aid = fed['allocID']
	2525	k = fed['allocID'].get('fedid', i)
[5ae3857]	2526	except KeyError, e:
	2527	continue
[63a35b7]	2528	tbparams[k] = (uri, aid)
[5ae3857]	2529	fed_exp['experimentStatus'] = 'terminating'
	2530	if self.state_filename: self.write_state()
	2531	self.state_lock.release()
	2532
	2533	# Stop everyone. NB, wait_for_all waits until a thread starts and
	2534	# then completes, so we can't wait if nothing starts. So, no
	2535	# tbparams, no start.
	2536	if len(tbparams) > 0:
	2537	thread_pool = self.thread_pool(self.nthreads)
[63a35b7]	2538	for k in tbparams.keys():
[5ae3857]	2539	# Create and start a thread to stop the segment
	2540	thread_pool.wait_for_slot()
[63a35b7]	2541	uri, aid = tbparams[k]
[5ae3857]	2542	t = self.pooled_thread(\
[e19b75c]	2543	target=self.terminate_segment(log=dealloc_log,
[63a35b7]	2544	testbed=uri,
[5ae3857]	2545	cert_file=self.cert_file,
	2546	cert_pwd=self.cert_pwd,
	2547	trusted_certs=self.trusted_certs,
	2548	caller=self.call_TerminateSegment),
[63a35b7]	2549	args=(uri, aid), name=k,
[5ae3857]	2550	pdata=thread_pool, trace_file=self.trace_file)
	2551	t.start()
	2552	# Wait for completions
	2553	thread_pool.wait_for_all_done()
	2554
	2555	# release the allocations (failed experiments have done this
	2556	# already, and starting experiments may be in odd states, so we
	2557	# ignore errors releasing those allocations
	2558	try:
[63a35b7]	2559	for k in tbparams.keys():
[ecf679e]	2560	# This releases access by uri
[63a35b7]	2561	uri, aid = tbparams[k]
	2562	self.release_access(None, aid, uri=uri)
[5ae3857]	2563	except service_error, e:
	2564	if status != 'failed' and not force:
	2565	raise e
	2566
	2567	# Remove the terminated experiment
	2568	self.state_lock.acquire()
	2569	for id in ids:
	2570	if self.state.has_key(id): del self.state[id]
	2571
	2572	if self.state_filename: self.write_state()
	2573	self.state_lock.release()
	2574
	2575	return {
	2576	'experiment': exp ,
	2577	'deallocationLog': "".join(dealloc_list),
	2578	}
	2579	else:
	2580	# Don't forget to release the lock
	2581	self.state_lock.release()
	2582	raise service_error(service_error.req, "No saved state")

Note: See TracBrowser for help on using the repository browser.

Download in other formats: