Context Navigation

source: fedd/federation/protogeni_access.py @ 0c9393e

axis_examplecompt_changesinfo-ops

Last change on this file since 0c9393e was e83f2f2, checked in by Ted Faber <faber@…>, 14 years ago
Move proofs around. Lots of changes, including fault handling.
Property mode set to `100644`
File size: 48.2 KB

Rev	Line
[c119839]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4	import stat # for chmod constants
	5	import re
[dd3e38b]	6	import time
[c119839]	7	import string
	8	import copy
	9	import pickle
	10	import logging
	11	import subprocess
[42cd8a7]	12	import random
[3551ae1]	13	import traceback
[1b6cc95]	14	import xml.parsers.expat
[c119839]	15
[42cd8a7]	16	from threading import Thread, Timer, Lock
[c119839]	17
	18	from util import *
	19	from fedid import fedid, generate_fedid
[3cec20c]	20	from authorizer import authorizer, abac_authorizer
[c119839]	21	from service_error import service_error
	22	from remote_service import xmlrpc_handler, soap_handler, service_caller
	23
	24	import httplib
	25	import tempfile
	26	from urlparse import urlparse
	27
[3551ae1]	28	from access import access_base
[35a5879]	29	from legacy_access import legacy_access
[208797c]	30	from protogeni_proxy import protogeni_proxy
	31	from geniapi_proxy import geniapi_proxy
[3551ae1]	32
[c119839]	33	import topdl
	34	import list_log
	35
	36
	37	# Make log messages disappear if noone configures a fedd logger
	38	class nullHandler(logging.Handler):
	39	def emit(self, record): pass
	40
	41	fl = logging.getLogger("fedd.access")
	42	fl.addHandler(nullHandler())
	43
[35a5879]	44	class access(access_base, legacy_access):
[c119839]	45	"""
	46	The implementation of access control based on mapping users to projects.
	47
	48	Users can be mapped to existing projects or have projects created
	49	dynamically. This implements both direct requests and proxies.
	50	"""
	51
	52	def __init__(self, config=None, auth=None):
	53	"""
	54	Initializer. Pulls parameters out of the ConfigParser's access section.
	55	"""
	56
[3551ae1]	57	access_base.__init__(self, config, auth)
[c119839]	58
	59	self.domain = config.get("access", "domain")
	60	self.userconfdir = config.get("access","userconfdir")
	61	self.userconfcmd = config.get("access","userconfcmd")
	62	self.userconfurl = config.get("access","userconfurl")
[9b3627e]	63	self.federation_software = config.get("access", "federation_software")
	64	self.portal_software = config.get("access", "portal_software")
[c119839]	65	self.ssh_port = config.get("access","ssh_port") or "22"
	66	self.sshd = config.get("access","sshd")
	67	self.sshd_config = config.get("access", "sshd_config")
	68	self.access_type = config.get("access", "type")
	69	self.staging_dir = config.get("access", "staging_dir") or "/tmp"
	70	self.staging_host = config.get("access", "staging_host") \
	71	or "ops.emulab.net"
[a65a65a]	72	self.local_seer_software = config.get("access", "local_seer_software")
	73	self.local_seer_image = config.get("access", "local_seer_image")
	74	self.local_seer_start = config.get("access", "local_seer_start")
[9b3627e]	75
[a65a65a]	76	self.dragon_endpoint = config.get("access", "dragon")
	77	self.dragon_vlans = config.get("access", "dragon_vlans")
	78	self.deter_internal = config.get("access", "deter_internal")
	79
	80	self.tunnel_config = config.getboolean("access", "tunnel_config")
	81	self.portal_command = config.get("access", "portal_command")
	82	self.portal_image = config.get("access", "portal_image")
	83	self.portal_type = config.get("access", "portal_type") or "pc"
	84	self.portal_startcommand = config.get("access", "portal_startcommand")
	85	self.node_startcommand = config.get("access", "node_startcommand")
	86
[3551ae1]	87	self.federation_software = self.software_list(self.federation_software)
	88	self.portal_software = self.software_list(self.portal_software)
	89	self.local_seer_software = self.software_list(self.local_seer_software)
[c119839]	90
[310d419]	91	self.renewal_interval = config.get("access", "renewal") or (3 * 60 )
	92	self.renewal_interval = int(self.renewal_interval) * 60
[dd3e38b]	93
[c119839]	94	self.ch_url = config.get("access", "ch_url")
	95	self.sa_url = config.get("access", "sa_url")
	96	self.cm_url = config.get("access", "cm_url")
	97
	98	self.restricted = [ ]
	99
[3551ae1]	100	# read_state in the base_class
	101	self.state_lock.acquire()
	102	for a in ('allocation', 'projects', 'keys', 'types'):
	103	if a not in self.state:
	104	self.state[a] = { }
	105	self.allocation = self.state['allocation']
	106	self.projects = self.state['projects']
	107	self.keys = self.state['keys']
	108	self.types = self.state['types']
	109	self.state_lock.release()
[c119839]	110
	111
[3551ae1]	112	self.log = logging.getLogger("fedd.access")
	113	set_log_level(config, "access", self.log)
[c119839]	114
[3cec20c]	115	# authorization information
	116	self.auth_type = config.get('access', 'auth_type') \
	117	or 'legacy'
	118	self.auth_dir = config.get('access', 'auth_dir')
	119	accessdb = config.get("access", "accessdb")
	120	# initialize the authorization system
	121	if self.auth_type == 'legacy':
	122	self.access = { }
	123	if accessdb:
	124	self.legacy_read_access(accessdb, self.make_access_info)
	125	# Add the ownership attributes to the authorizer. Note that the
	126	# indices of the allocation dict are strings, but the attributes are
	127	# fedids, so there is a conversion.
	128	self.state_lock.acquire()
	129	for k in self.state.get('allocation', {}).keys():
	130	for o in self.state['allocation'][k].get('owners', []):
	131	self.auth.set_attribute(o, fedid(hexstr=k))
	132	self.auth.set_attribute(fedid(hexstr=k),fedid(hexstr=k))
[3551ae1]	133
[3cec20c]	134	self.state_lock.release()
	135	self.lookup_access = self.legacy_lookup_access_base
	136	elif self.auth_type == 'abac':
	137	self.auth = abac_authorizer(load=self.auth_dir)
	138	self.access = [ ]
	139	if accessdb:
	140	self.read_access(accessdb, self.make_access_info)
	141	else:
	142	raise service_error(service_error.internal,
	143	"Unknown auth_type: %s" % self.auth_type)
[208797c]	144	api = config.get("access", "api") or "protogeni"
	145	if api == "protogeni":
	146	self.api_proxy = protogeni_proxy
	147	elif api == "geniapi":
	148	self.api_proxy = geniapi_proxy
	149	else:
	150	self.log.debug("Unknown interface, using protogeni")
	151	self.api_proxy = protogeni_proxy
	152
[c119839]	153	self.call_SetValue = service_caller('SetValue')
	154	self.call_GetValue = service_caller('GetValue')
[3551ae1]	155	self.exports = {
	156	'local_seer_control': self.export_local_seer,
	157	'seer_master': self.export_seer_master,
	158	'hide_hosts': self.export_hide_hosts,
	159	}
	160
	161	if not self.local_seer_image or not self.local_seer_software or \
	162	not self.local_seer_start:
	163	if 'local_seer_control' in self.exports:
	164	del self.exports['local_seer_control']
	165
	166	if not self.local_seer_image or not self.local_seer_software or \
	167	not self.seer_master_start:
	168	if 'seer_master' in self.exports:
	169	del self.exports['seer_master']
[c119839]	170
[dd3e38b]	171	self.RenewSlices()
	172
[c119839]	173	self.soap_services = {\
	174	'RequestAccess': soap_handler("RequestAccess", self.RequestAccess),
	175	'ReleaseAccess': soap_handler("ReleaseAccess", self.ReleaseAccess),
	176	'StartSegment': soap_handler("StartSegment", self.StartSegment),
	177	'TerminateSegment': soap_handler("TerminateSegment",
	178	self.TerminateSegment),
	179	}
	180	self.xmlrpc_services = {\
	181	'RequestAccess': xmlrpc_handler('RequestAccess',
	182	self.RequestAccess),
	183	'ReleaseAccess': xmlrpc_handler('ReleaseAccess',
	184	self.ReleaseAccess),
	185	'StartSegment': xmlrpc_handler("StartSegment", self.StartSegment),
	186	'TerminateSegment': xmlrpc_handler('TerminateSegment',
	187	self.TerminateSegment),
	188	}
	189
[3551ae1]	190	@staticmethod
	191	def make_access_info(s):
	192	"""
	193	Split a string of the form (id, id, id, id) ito its constituent tuples
	194	and return them as a tuple. Use to import access info from the
	195	access_db.
	196	"""
[c119839]	197
[3551ae1]	198	ss = s.strip()
	199	if ss.startswith('(') and ss.endswith(')'):
	200	l = [ s.strip() for s in ss[1:-1].split(",")]
	201	if len(l) == 4:
	202	return tuple(l)
	203	else:
	204	raise self.parse_error(
	205	"Exactly 4 elements in access info required")
	206	else:
	207	raise self.parse_error("Expecting parenthezied values")
[c119839]	208
	209
	210	def get_handler(self, path, fid):
	211	self.log.info("Get handler %s %s" % (path, fid))
	212	if self.auth.check_attribute(fid, path) and self.userconfdir:
	213	return ("%s/%s" % (self.userconfdir, path), "application/binary")
	214	else:
	215	return (None, None)
	216
[e83f2f2]	217	def build_access_response(self, alloc_id, services, proof):
[c119839]	218	"""
	219	Create the SOAP response.
	220
	221	Build the dictionary description of the response and use
	222	fedd_utils.pack_soap to create the soap message. ap is the allocate
	223	project message returned from a remote project allocation (even if that
	224	allocation was done locally).
	225	"""
	226	# Because alloc_id is already a fedd_services_types.IDType_Holder,
	227	# there's no need to repack it
	228	msg = {
	229	'allocID': alloc_id,
	230	'fedAttr': [
	231	{ 'attribute': 'domain', 'value': self.domain } ,
[e83f2f2]	232	],
	233	'proof': proof.to_dict()
[c119839]	234	}
[a65a65a]	235	if self.dragon_endpoint:
	236	msg['fedAttr'].append({'attribute': 'dragon',
	237	'value': self.dragon_endpoint})
	238	if self.deter_internal:
	239	msg['fedAttr'].append({'attribute': 'deter_internal',
	240	'value': self.deter_internal})
	241	#XXX: ??
	242	if self.dragon_vlans:
	243	msg['fedAttr'].append({'attribute': 'vlans',
	244	'value': self.dragon_vlans})
[c119839]	245
	246	if services:
	247	msg['service'] = services
	248	return msg
	249
	250	def RequestAccess(self, req, fid):
	251	"""
[3551ae1]	252	Handle the access request.
[c119839]	253	"""
	254
	255	# The dance to get into the request body
	256	if req.has_key('RequestAccessRequestBody'):
	257	req = req['RequestAccessRequestBody']
	258	else:
	259	raise service_error(service_error.req, "No request!?")
	260
	261	if req.has_key('destinationTestbed'):
	262	dt = unpack_id(req['destinationTestbed'])
	263
[3551ae1]	264	# Request for this fedd
[e83f2f2]	265	found, match, owners, proof = self.lookup_access(req, fid)
[3551ae1]	266	services, svc_state = self.export_services(req.get('service',[]),
	267	None, None)
	268	# keep track of what's been added
	269	allocID, alloc_cert = generate_fedid(subj="alloc", log=self.log)
	270	aid = unicode(allocID)
[c119839]	271
[3551ae1]	272	self.state_lock.acquire()
	273	self.allocation[aid] = { }
	274	# The protoGENI certificate
	275	self.allocation[aid]['credentials'] = found
	276	# The list of owner FIDs
[3cec20c]	277	self.allocation[aid]['owners'] = owners
[c65b7e4]	278	self.allocation[aid]['auth'] = set()
	279	self.append_allocation_authorization(aid,
	280	((fid, allocID), (allocID, allocID)), state_attr='allocation')
[3551ae1]	281	self.write_state()
	282	self.state_lock.release()
	283
	284	try:
	285	f = open("%s/%s.pem" % (self.certdir, aid), "w")
	286	print >>f, alloc_cert
	287	f.close()
	288	except EnvironmentError, e:
	289	raise service_error(service_error.internal,
	290	"Can't open %s/%s : %s" % (self.certdir, aid, e))
[e83f2f2]	291	return self.build_access_response({ 'fedid': allocID }, None, proof)
[c119839]	292
	293
	294	def ReleaseAccess(self, req, fid):
	295	# The dance to get into the request body
	296	if req.has_key('ReleaseAccessRequestBody'):
	297	req = req['ReleaseAccessRequestBody']
	298	else:
	299	raise service_error(service_error.req, "No request!?")
	300
[3551ae1]	301	# Local request
	302	try:
	303	if req['allocID'].has_key('localname'):
	304	auth_attr = aid = req['allocID']['localname']
	305	elif req['allocID'].has_key('fedid'):
	306	aid = unicode(req['allocID']['fedid'])
	307	auth_attr = req['allocID']['fedid']
[c119839]	308	else:
	309	raise service_error(service_error.req,
[3551ae1]	310	"Only localnames and fedids are understood")
	311	except KeyError:
	312	raise service_error(service_error.req, "Badly formed request")
[c119839]	313
[3551ae1]	314	self.log.debug("[access] deallocation requested for %s", aid)
[e83f2f2]	315	access_ok , proof = self.auth.check_attribute(fid, auth_attr,
	316	with_proof=True)
	317	if not access_ok:
[3551ae1]	318	self.log.debug("[access] deallocation denied for %s", aid)
	319	raise service_error(service_error.access, "Access Denied")
[c119839]	320
[3551ae1]	321	self.state_lock.acquire()
	322	if self.allocation.has_key(aid):
	323	self.log.debug("Found allocation for %s" %aid)
[c65b7e4]	324	self.clear_allocation_authorization(aid, state_attr='allocation')
[3551ae1]	325	del self.allocation[aid]
	326	self.write_state()
	327	self.state_lock.release()
	328	# And remove the access cert
	329	cf = "%s/%s.pem" % (self.certdir, aid)
	330	self.log.debug("Removing %s" % cf)
	331	os.remove(cf)
[e83f2f2]	332	return { 'allocID': req['allocID'], 'proof': proof.to_dict() }
[3551ae1]	333	else:
	334	self.state_lock.release()
	335	raise service_error(service_error.req, "No such allocation")
[c119839]	336
[42cd8a7]	337	def manifest_to_dict(self, manifest, ignore_debug=False):
[37ed9a5]	338	"""
	339	Turn the manifest into a dict were each virtual nodename (i.e. the
	340	topdl name) has an entry with the allocated machine in hostname and the
	341	interfaces in 'interfaces'. I love having XML parser code lying
	342	around.
	343	"""
[42cd8a7]	344	if self.create_debug and not ignore_debug:
	345	self.log.debug("Returning null manifest dict")
	346	return { }
	347
	348	# The class allows us to keep a little state - the dict under
	349	# consteruction and the current entry in that dict for the interface
	350	# element code.
	351	class manifest_parser:
	352	def __init__(self):
	353	self.d = { }
	354	self.current_key=None
	355
	356	# If the element is a node, create a dict entry for it. If it's an
	357	# interface inside a node, add an entry in the interfaces list with
	358	# the virtual name and component id.
	359	def start_element(self, name, attrs):
	360	if name == 'node':
	361	self.current_key = attrs.get('virtual_id',"")
	362	if self.current_key:
	363	self.d[self.current_key] = {
	364	'hostname': attrs.get('hostname', None),
[814b5e5]	365	'interfaces': { },
	366	'mac': { }
[42cd8a7]	367	}
	368	elif name == 'interface' and self.current_key:
	369	self.d[self.current_key]['interfaces']\
	370	[attrs.get('virtual_id','')] = \
	371	attrs.get('component_id', None)
[814b5e5]	372	elif name == 'interface_ref':
	373	# Collect mac address information from an interface_ref.
	374	# These appear after the node info has been parsed.
	375	nid = attrs.get('virtual_node_id', None)
	376	ifid = attrs.get('virtual_interface_id', None)
	377	mac = attrs.get('MAC', None)
	378	self.d[nid]['mac'][ifid] = mac
[42cd8a7]	379	# When a node is finished, clear current_key
	380	def end_element(self, name):
	381	if name == 'node': self.current_key = None
	382
	383	node = { }
	384
	385	mp = manifest_parser()
	386	p = xml.parsers.expat.ParserCreate()
	387	# These are bound to the class we just created
	388	p.StartElementHandler = mp.start_element
	389	p.EndElementHandler = mp.end_element
	390
	391	p.Parse(manifest)
	392	# Make the node dict that the callers expect
	393	for k in mp.d:
	394	node[k] = mp.d.get('hostname', '')
	395	return mp.d
	396
	397	def fake_manifest(self, topo):
[37ed9a5]	398	"""
	399	Fake the output of manifest_to_dict with a bunch of generic node an
	400	interface names, for debugging.
	401	"""
[42cd8a7]	402	node = { }
	403	for i, e in enumerate([ e for e in topo.elements \
	404	if isinstance(e, topdl.Computer)]):
	405	node[e.name] = {
	406	'hostname': "node%03d" % i,
	407	'interfaces': { }
	408	}
	409	for j, inf in enumerate(e.interface):
	410	node[e.name]['interfaces'][inf.name] = 'eth%d' % j
	411
	412	return node
	413
	414
	415	def generate_portal_configs(self, topo, pubkey_base,
	416	secretkey_base, tmpdir, leid, connInfo, services, nodes):
	417
	418	def conninfo_to_dict(key, info):
	419	"""
	420	Make a cpoy of the connection information about key, and flatten it
	421	into a single dict by parsing out any feddAttrs.
	422	"""
	423
	424	rv = None
	425	for i in info:
	426	if key == i.get('portal', "") or \
	427	key in [e.get('element', "") \
	428	for e in i.get('member', [])]:
	429	rv = i.copy()
	430	break
	431
	432	else:
	433	return rv
	434
	435	if 'fedAttr' in rv:
	436	for a in rv['fedAttr']:
	437	attr = a.get('attribute', "")
	438	val = a.get('value', "")
	439	if attr and attr not in rv:
	440	rv[attr] = val
	441	del rv['fedAttr']
	442	return rv
	443
	444	# XXX: un hardcode this
	445	def client_null(f, s):
	446	print >>f, "Service: %s" % s['name']
	447
	448	def client_seer_master(f, s):
	449	print >>f, 'PortalAlias: seer-master'
	450
	451	def client_smb(f, s):
	452	print >>f, "Service: %s" % s['name']
	453	smbshare = None
	454	smbuser = None
	455	smbproj = None
	456	for a in s.get('fedAttr', []):
	457	if a.get('attribute', '') == 'SMBSHARE':
	458	smbshare = a.get('value', None)
	459	elif a.get('attribute', '') == 'SMBUSER':
	460	smbuser = a.get('value', None)
	461	elif a.get('attribute', '') == 'SMBPROJ':
	462	smbproj = a.get('value', None)
	463
	464	if all((smbshare, smbuser, smbproj)):
	465	print >>f, "SMBshare: %s" % smbshare
	466	print >>f, "ProjectUser: %s" % smbuser
	467	print >>f, "ProjectName: %s" % smbproj
	468
	469	def client_hide_hosts(f, s):
	470	for a in s.get('fedAttr', [ ]):
	471	if a.get('attribute', "") == 'hosts':
	472	print >>f, 'Hide: %s' % a.get('value', "")
	473
	474	client_service_out = {
	475	'SMB': client_smb,
	476	'tmcd': client_null,
	477	'seer': client_null,
	478	'userconfig': client_null,
	479	'project_export': client_null,
	480	'seer_master': client_seer_master,
	481	'hide_hosts': client_hide_hosts,
	482	}
	483
	484	def client_seer_master_export(f, s):
	485	print >>f, "AddedNode: seer-master"
	486
	487	def client_seer_local_export(f, s):
	488	print >>f, "AddedNode: control"
	489
	490	client_export_service_out = {
	491	'seer_master': client_seer_master_export,
	492	'local_seer_control': client_seer_local_export,
	493	}
	494
	495	def server_port(f, s):
	496	p = urlparse(s.get('server', 'http://localhost'))
	497	print >>f, 'port: remote:%s:%s:%s' % (p.port, p.hostname, p.port)
	498
	499	def server_null(f,s): pass
	500
	501	def server_seer(f, s):
	502	print >>f, 'seer: true'
	503
	504	server_service_out = {
	505	'SMB': server_port,
	506	'tmcd': server_port,
	507	'userconfig': server_null,
	508	'project_export': server_null,
	509	'seer': server_seer,
	510	'seer_master': server_port,
	511	'hide_hosts': server_null,
	512	}
	513	# XXX: end un hardcode this
	514
	515
	516	seer_out = False
	517	client_out = False
[6fd2b29]	518	control_gw = None
[42cd8a7]	519	for e in [ e for e in topo.elements \
	520	if isinstance(e, topdl.Computer) and e.get_attribute('portal')]:
	521	myname = e.name
	522	type = e.get_attribute('portal_type')
	523
	524	info = conninfo_to_dict(myname, connInfo)
	525
	526	if not info:
	527	raise service_error(service_error.req,
	528	"No connectivity info for %s" % myname)
	529
	530	# Translate to physical name (ProtoGENI doesn't have DNS)
	531	physname = nodes.get(myname, { }).get('hostname', None)
	532	peer = info.get('peer', "")
	533	ldomain = self.domain
	534	ssh_port = info.get('ssh_port', 22)
	535
	536	# Collect this for the client.conf file
	537	if 'masterexperiment' in info:
	538	mproj, meid = info['masterexperiment'].split("/", 1)
	539
	540	active = info.get('active', 'False')
	541
	542	if type in ('control', 'both'):
	543	testbed = e.get_attribute('testbed')
	544	control_gw = myname
	545
	546	cfn = "%s/%s.gw.conf" % (tmpdir, myname.lower())
	547	tunnelconfig = self.tunnel_config
	548	try:
	549	f = open(cfn, "w")
	550	if active == 'True':
	551	print >>f, "active: True"
	552	print >>f, "ssh_port: %s" % ssh_port
	553	if type in ('control', 'both'):
	554	for s in [s for s in services \
	555	if s.get('name', "") in self.imports]:
	556	server_service_out[s['name']](f, s)
	557
	558	if tunnelconfig:
	559	print >>f, "tunnelip: %s" % tunnelconfig
	560	print >>f, "peer: %s" % peer.lower()
	561	print >>f, "ssh_pubkey: /usr/local/federation/etc/%s" % \
	562	pubkey_base
	563	print >>f, "ssh_privkey: /usr/local/federation/etc/%s" % \
	564	secretkey_base
	565	f.close()
	566	except EnvironmentError, e:
	567	raise service_error(service_error.internal,
	568	"Can't write protal config %s: %s" % (cfn, e))
	569
	570	# Done with portals, write the client config file.
	571	try:
	572	f = open("%s/client.conf" % tmpdir, "w")
	573	if control_gw:
	574	print >>f, "ControlGateway: %s" % physname.lower()
	575	for s in services:
	576	if s.get('name',"") in self.imports and \
	577	s.get('visibility','') == 'import':
	578	client_service_out[s['name']](f, s)
	579	if s.get('name', '') in self.exports and \
	580	s.get('visibility', '') == 'export' and \
	581	s['name'] in client_export_service_out:
	582	client_export_service_out[s['name']](f, s)
	583	# Seer uses this.
	584	if mproj and meid:
	585	print >>f, "ExperimentID: %s/%s" % (mproj, meid)
	586	f.close()
	587	except EnvironmentError, e:
	588	raise service_error(service_error.internal,
	589	"Cannot write client.conf: %s" %s)
	590
	591
	592
	593	def export_store_info(self, cf, nodes, ssh_port, connInfo):
	594	"""
	595	For the export requests in the connection info, install the peer names
	596	at the experiment controller via SetValue calls.
	597	"""
	598
	599	for c in connInfo:
	600	for p in [ p for p in c.get('parameter', []) \
	601	if p.get('type', '') == 'output']:
	602
	603	if p.get('name', '') == 'peer':
	604	k = p.get('key', None)
	605	surl = p.get('store', None)
	606	if surl and k and k.index('/') != -1:
	607	if self.create_debug:
	608	req = { 'name': k, 'value': 'debug' }
	609	self.call_SetValue(surl, req, cf)
	610	else:
	611	n = nodes.get(k[k.index('/')+1:], { })
	612	value = n.get('hostname', None)
	613	if value:
	614	req = { 'name': k, 'value': value }
	615	self.call_SetValue(surl, req, cf)
	616	else:
	617	self.log.error("No hostname for %s" % \
	618	k[k.index('/'):])
	619	else:
	620	self.log.error("Bad export request: %s" % p)
	621	elif p.get('name', '') == 'ssh_port':
	622	k = p.get('key', None)
	623	surl = p.get('store', None)
	624	if surl and k:
	625	req = { 'name': k, 'value': ssh_port }
	626	self.call_SetValue(surl, req, cf)
	627	else:
	628	self.log.error("Bad export request: %s" % p)
	629	else:
	630
	631	self.log.error("Unknown export parameter: %s" % \
	632	p.get('name'))
	633	continue
	634
[37ed9a5]	635	def write_node_config_script(self, elem, node, user, pubkey,
	636	secretkey, stagingdir, tmpdir):
	637	"""
	638	Write out the configuration script that is to run on the node
	639	represented by elem in the topology. This is called
	640	once per node to configure.
	641	"""
	642	# These little functions/functors just make things more readable. Each
	643	# one encapsulates a small task of copying software files or installing
	644	# them.
[42cd8a7]	645	class stage_file_type:
[37ed9a5]	646	"""
	647	Write code copying file sfrom the staging host to the host on which
	648	this will run.
	649	"""
[42cd8a7]	650	def __init__(self, user, host, stagingdir):
	651	self.user = user
	652	self.host = host
	653	self.stagingdir = stagingdir
	654	self.scp = "/usr/bin/scp -i .ssh/id_rsa -o " + \
	655	"'ForwardX11 no' -o 'StrictHostKeyChecking no' "
	656
	657	def __call__(self, script, file, dest="."):
	658	# If the file is a full pathname, do not use stagingdir
	659	if file.find('/') == -1:
	660	file = "%s/%s" % (self.stagingdir, file)
	661	print >>script, "%s %s@%s:%s %s" % \
	662	(self.scp, self.user, self.host, file, dest)
	663
	664	def install_tar(script, loc, base):
[37ed9a5]	665	"""
	666	Print code to script to install a tarfile in loc.
	667	"""
[42cd8a7]	668	tar = "/bin/tar"
	669	mkdir="/bin/mkdir"
	670
	671	print >>script, "%s -p %s" % (mkdir, loc)
	672	print >>script, "%s -C %s -xzf %s" % (tar, loc, base)
	673
	674	def install_rpm(script, base):
[37ed9a5]	675	"""
	676	Print code to script to install an rpm
	677	"""
[42cd8a7]	678	rpm = "/bin/rpm"
	679	print >>script, "%s --install %s" % (rpm, base)
	680
[37ed9a5]	681	ifconfig = "/sbin/ifconfig"
[814b5e5]	682	findif = '/usr/local/etc/emulab/findif'
[37ed9a5]	683	stage_file = stage_file_type(user, self.staging_host, stagingdir)
	684	pname = node.get('hostname', None)
[42cd8a7]	685	fed_dir = "/usr/local/federation"
	686	fed_etc_dir = "%s/etc" % fed_dir
	687	fed_bin_dir = "%s/bin" % fed_dir
	688	fed_lib_dir = "%s/lib" % fed_dir
	689
[37ed9a5]	690	if pname:
	691	sfile = "%s/%s.startup" % (tmpdir, pname)
	692	script = open(sfile, "w")
	693	# Reset the interfaces to the ones in the topo file
	694	for i in [ i for i in elem.interface \
	695	if not i.get_attribute('portal')]:
[3cec20c]	696	if 'interfaces' in node:
	697	pinf = node['interfaces'].get(i.name, None)
	698	else:
	699	pinf = None
	700
	701	if 'mac' in node:
	702	pmac = node['mac'].get(i.name, None)
	703	else:
	704	pmac = None
[37ed9a5]	705	addr = i.get_attribute('ip4_address')
	706	netmask = i.get_attribute('ip4_netmask') or '255.255.255.0'
[814b5e5]	707	# The interface names in manifests are not to be trusted, so we
	708	# find the interface to configure using the local node's script
	709	# to match mac address to interface name.
	710	if pinf and addr and pmac:
	711	print >>script, '# %s' % pinf
[37ed9a5]	712	print >>script, \
[814b5e5]	713	"%s `%s %s` %s netmask %s" % \
	714	(ifconfig, findif, pmac, addr, netmask)
[37ed9a5]	715	else:
	716	self.log.error("Missing interface or address for %s" \
	717	% i.name)
	718
	719	for l, f in self.federation_software:
	720	base = os.path.basename(f)
	721	stage_file(script, base)
	722	if l: install_tar(script, l, base)
	723	else: install_rpm(script, base)
	724
	725	for s in elem.software:
	726	s_base = s.location.rpartition('/')[2]
	727	stage_file(script, s_base)
	728	if s.install: install_tar(script, s.install, s_base)
	729	else: install_rpm(script, s_base)
	730
	731	for f in ('hosts', pubkey, secretkey, 'client.conf',
	732	'userconf'):
	733	stage_file(script, f, fed_etc_dir)
	734	if self.sshd:
	735	stage_file(script, self.sshd, fed_bin_dir)
	736	if self.sshd_config:
	737	stage_file(script, self.sshd_config, fed_etc_dir)
	738
	739	# Look in tmpdir to get the names. They've all been copied
	740	# into the (remote) staging dir
	741	if os.access("%s/%s.gw.conf" % (tmpdir, elem.name), os.R_OK):
	742	stage_file(script, "%s.gw.conf" % elem.name, fed_etc_dir)
	743
[c2f92c5]	744	# Done with staging, remove the identity used to stage
	745	print >>script, "#/bin/rm .ssh/id_rsa"
[37ed9a5]	746
	747	# Start commands
	748	if elem.get_attribute('portal') and self.portal_startcommand:
	749	# Install portal software
	750	for l, f in self.portal_software:
[42cd8a7]	751	base = os.path.basename(f)
	752	stage_file(script, base)
	753	if l: install_tar(script, l, base)
	754	else: install_rpm(script, base)
	755
[37ed9a5]	756	# Portals never have a user-specified start command
	757	print >>script, self.portal_startcommand
	758	elif self.node_startcommand:
	759	# XXX: debug
[1b6cc95]	760	print >>script, "sudo perl -I%s %s/import_key.pl /users/%s/.ssh/authorized_keys /root/.ssh/authorized_keys" % (fed_lib_dir, fed_bin_dir, user)
[37ed9a5]	761	# XXX: debug
	762	if elem.get_attribute('startup'):
	763	print >>script, "%s \\$USER '%s'" % \
	764	(self.node_startcommand,
	765	elem.get_attribute('startup'))
	766	else:
	767	print >>script, self.node_startcommand
	768	script.close()
	769	return sfile, pname
	770	else:
	771	return None, None
[42cd8a7]	772
[37ed9a5]	773
	774	def configure_nodes(self, segment_commands, topo, nodes, user,
	775	pubkey, secretkey, stagingdir, tmpdir):
	776	"""
	777	For each node in the topology, generate a script file that copies
	778	software onto it and installs it in the proper places and then runs the
	779	startup command (including the federation commands.
	780	"""
	781
	782
	783
	784	for e in [ e for e in topo.elements if isinstance(e, topdl.Computer)]:
	785	vname = e.name
	786	sfile, pname = self.write_node_config_script(e,
	787	nodes.get(vname, { }),
	788	user, pubkey, secretkey, stagingdir, tmpdir)
	789	if sfile:
	790	if not segment_commands.scp_file(sfile, user, pname):
[42cd8a7]	791	self.log.error("Could not copy script to %s" % pname)
	792	else:
	793	self.log.error("Unmapped node: %s" % vname)
	794
	795	def start_node(self, user, host, node, segment_commands):
[37ed9a5]	796	"""
	797	Copy an identity to a node for the configuration script to be able to
	798	import data and then run the startup script remotely.
	799	"""
[42cd8a7]	800	# Place an identity on the node so that the copying can succeed
[c2f92c5]	801	segment_commands.scp_file( segment_commands.ssh_privkey_file,
	802	user, node, ".ssh/id_rsa")
[42cd8a7]	803	segment_commands.ssh_cmd(user, node,
	804	"sudo /bin/sh ./%s.startup &" % node)
	805
	806	def start_nodes(self, user, host, nodes, segment_commands):
[37ed9a5]	807	"""
	808	Start a thread to initialize each node and wait for them to complete.
	809	Each thread runs start_node.
	810	"""
[42cd8a7]	811	threads = [ ]
	812	for n in nodes:
	813	t = Thread(target=self.start_node, args=(user, host, n,
	814	segment_commands))
	815	t.start()
	816	threads.append(t)
	817
	818	done = [not t.isAlive() for t in threads]
	819	while not all(done):
	820	self.log.info("Waiting for threads %s" % done)
	821	time.sleep(10)
	822	done = [not t.isAlive() for t in threads]
	823
[37ed9a5]	824	def set_up_staging_filespace(self, segment_commands, user, host,
	825	stagingdir):
[42cd8a7]	826	"""
[37ed9a5]	827	Set up teh staging area on the staging machine. To reduce the number
	828	of ssh commands, we compose a script and execute it remotely.
[42cd8a7]	829	"""
	830
	831	self.log.info("[start_segment]: creating script file")
	832	try:
	833	sf, scriptname = tempfile.mkstemp()
	834	scriptfile = os.fdopen(sf, 'w')
	835	except EnvironmentError:
	836	return False
	837
	838	scriptbase = os.path.basename(scriptname)
	839
	840	# Script the filesystem changes
	841	print >>scriptfile, "/bin/rm -rf %s" % stagingdir
	842	print >>scriptfile, 'mkdir -p %s' % stagingdir
	843	print >>scriptfile, "rm -f %s" % scriptbase
	844	scriptfile.close()
	845
	846	# Move the script to the remote machine
	847	# XXX: could collide tempfile names on the remote host
	848	if segment_commands.scp_file(scriptname, user, host, scriptbase):
	849	os.remove(scriptname)
	850	else:
	851	return False
	852
	853	# Execute the script (and the script's last line deletes it)
	854	if not segment_commands.ssh_cmd(user, host, "sh -x %s" % scriptbase):
	855	return False
	856
[37ed9a5]	857	def initialize_protogeni_context(self, segment_commands, certfile, certpw):
	858	"""
	859	Protogeni interactions take a context and a protogeni certificate.
	860	This establishes both for later calls and returns them.
	861	"""
	862	if os.access(certfile, os.R_OK):
	863	ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
	864	else:
	865	self.log.error("[start_segment]: Cannot read certfile: %s" % \
	866	certfile)
	867	return None, None
	868
[42cd8a7]	869	try:
[88dbe63]	870	gcred = segment_commands.slice_authority_call('GetCredential',
	871	{}, ctxt)
	872	except segment_commands.ProtoGENIError, e:
[42cd8a7]	873	raise service_error(service_error.federant,
	874	"ProtoGENI: %s" % e)
[37ed9a5]	875
	876	return ctxt, gcred
	877
	878	def get_free_slicename(self, segment_commands, user, gcred, ctxt):
	879	"""
	880	Find a usable slice name by trying random ones until there's no
	881	collision.
	882	"""
	883
	884	def random_slicename(user):
	885	"""
	886	Return a random slicename by appending 5 letters to the username.
	887	"""
	888	slicename = user
	889	for i in range(0,5):
	890	slicename += random.choice(string.ascii_letters)
	891	return slicename
	892
[42cd8a7]	893	while True:
	894	slicename = random_slicename(user)
	895	try:
	896	param = {
	897	'credential': gcred,
	898	'hrn': slicename,
	899	'type': 'Slice'
	900	}
[3cec20c]	901
	902	if not self.create_debug:
	903	segment_commands.slice_authority_call('Resolve', param,
	904	ctxt)
	905	else:
	906	raise segment_commands.ProtoGENIError(0,0,'Debug')
[42cd8a7]	907	except segment_commands.ProtoGENIError, e:
	908	print e
	909	break
	910
[37ed9a5]	911	return slicename
	912
	913	def allocate_slice(self, segment_commands, slicename, rspec, gcred, ctxt):
	914	"""
	915	Create the slice and allocate resources. If any of this stuff fails,
	916	the allocations will time out on PG in short order, so we just raise
	917	the service_error. Return the slice and sliver credentials as well as
	918	the manifest.
	919	"""
[42cd8a7]	920	try:
	921	param = {
	922	'credential': gcred,
	923	'hrn': slicename,
	924	'type': 'Slice'
	925	}
[88dbe63]	926	slice_cred = segment_commands.slice_authority_call('Register',
[37ed9a5]	927	param, ctxt)
[d49c11c]	928	# Resolve the slice to get the URN that PG has assigned it.
	929	param = {
	930	'credential': gcred,
	931	'type': 'Slice',
	932	'hrn': slicename
	933	}
[88dbe63]	934	data = segment_commands.slice_authority_call('Resolve', param,
[d49c11c]	935	ctxt)
	936	if 'urn' in data:
	937	slice_urn = data['urn']
	938	else:
	939	raise service_error(service_error.federant,
[3cec20c]	940	"No URN returned for slice %s" % slicename)
[4875e93]	941
	942	if 'creator_urn' in data:
	943	creator_urn = data['creator_urn']
	944	else:
	945	raise service_error(service_error.federant,
[3cec20c]	946	"No creator URN returned for slice %s" % slicename)
[42cd8a7]	947	# Populate the ssh keys (let PG format them)
	948	param = {
	949	'credential': gcred,
	950	}
[88dbe63]	951	keys = segment_commands.slice_authority_call('GetKeys', param,
[37ed9a5]	952	ctxt)
[d49c11c]	953	# Create a Sliver
[42cd8a7]	954	param = {
[d49c11c]	955	'credentials': [ slice_cred ],
	956	'rspec': rspec,
[4875e93]	957	'users': [ {
	958	'urn': creator_urn,
	959	'keys': keys,
	960	},
	961	],
[d49c11c]	962	'slice_urn': slice_urn,
[42cd8a7]	963	}
[208797c]	964	rv = segment_commands.component_manager_call(
[d49c11c]	965	'CreateSliver', param, ctxt)
[208797c]	966
	967	# the GENIAPI AM just hands back the manifest, bit the ProtoGENI
	968	# API hands back a sliver credential. This just finds the format
	969	# of choice.
	970	if isinstance(rv, tuple):
	971	manifest = rv[1]
	972	else:
	973	manifest = rv
[42cd8a7]	974	except segment_commands.ProtoGENIError, e:
	975	raise service_error(service_error.federant,
	976	"ProtoGENI: %s %s" % (e.code, e))
	977
[add53ea]	978	return (slice_urn, slice_cred, manifest, rspec)
[37ed9a5]	979
[4875e93]	980	def wait_for_slice(self, segment_commands, slice_cred, slice_urn, ctxt,
[d49c11c]	981	timeout=None):
[37ed9a5]	982	"""
	983	Wait for the given slice to finish its startup. Return the final
	984	status.
	985	"""
[d49c11c]	986	completed_states = ('failed', 'ready')
	987	status = 'changing'
[c2f92c5]	988	if timeout is not None:
	989	end = time.time() + timeout
[37ed9a5]	990	try:
[d49c11c]	991	while status not in completed_states:
[37ed9a5]	992	param = {
[4875e93]	993	'credentials': [ slice_cred ],
[d49c11c]	994	'slice_urn': slice_urn,
[37ed9a5]	995	}
[88dbe63]	996	r = segment_commands.component_manager_call(
[d49c11c]	997	'SliverStatus', param, ctxt)
[208797c]	998	# GENIAPI uses geni_status as the key, so check for both
	999	status = r.get('status', r.get('geni_status','changing'))
[d49c11c]	1000	if status not in completed_states:
[c2f92c5]	1001	if timeout is not None and time.time() > end:
	1002	return 'timeout'
[37ed9a5]	1003	time.sleep(30)
	1004	except segment_commands.ProtoGENIError, e:
	1005	raise service_error(service_error.federant,
	1006	"ProtoGENI: %s %s" % (e.code, e))
	1007
	1008	return status
	1009
[d49c11c]	1010	def delete_slice(self, segment_commands, slice_cred, slice_urn, ctxt):
[37ed9a5]	1011	"""
	1012	Delete the slice resources. An error from the service is ignores,
	1013	because the soft state will go away anyway.
	1014	"""
	1015	try:
[d49c11c]	1016	param = {
	1017	'credentials': [ slice_cred, ],
	1018	'slice_urn': slice_urn,
	1019	}
[88dbe63]	1020	segment_commands.component_manager_call('DeleteSlice',
[37ed9a5]	1021	param, ctxt)
	1022	except segment_commands.ProtoGENIError, e:
	1023	self.log.warn("ProtoGENI: %s" % e)
	1024
	1025
	1026
	1027	def start_segment(self, segment_commands, aid, user, rspec, pubkey,
	1028	secretkey, ename, stagingdir, tmpdir, certfile, certpw,
	1029	export_certfile, topo, connInfo, services, timeout=0):
	1030	"""
	1031	Start a sub-experiment on a federant.
	1032
	1033	Get the current state, modify or create as appropriate, ship data
	1034	and configs and start the experiment. There are small ordering
	1035	differences based on the initial state of the sub-experiment.
	1036	"""
	1037
	1038	# Local software dir
	1039	lsoftdir = "%s/software" % tmpdir
	1040	host = self.staging_host
	1041
	1042	ctxt, gcred = self.initialize_protogeni_context(segment_commands,
	1043	certfile, certpw)
	1044
[814b5e5]	1045	if not ctxt: return False, {}
[37ed9a5]	1046
	1047	self.set_up_staging_filespace(segment_commands, user, host, stagingdir)
	1048	slicename = self.get_free_slicename(segment_commands, user, gcred, ctxt)
	1049	self.log.info("Creating %s" % slicename)
[add53ea]	1050	slice_urn, slice_cred, manifest, rpsec = self.allocate_slice(
[37ed9a5]	1051	segment_commands, slicename, rspec, gcred, ctxt)
	1052
[42cd8a7]	1053	# With manifest in hand, we can export the portal node names.
	1054	if self.create_debug: nodes = self.fake_manifest(topo)
	1055	else: nodes = self.manifest_to_dict(manifest)
	1056
	1057	self.export_store_info(export_certfile, nodes, self.ssh_port,
	1058	connInfo)
	1059	self.generate_portal_configs(topo, pubkey, secretkey, tmpdir,
	1060	ename, connInfo, services, nodes)
	1061
	1062	# Copy software to the staging machine (done after generation to copy
	1063	# those, too)
	1064	for d in (tmpdir, lsoftdir):
	1065	if os.path.isdir(d):
	1066	for f in os.listdir(d):
	1067	if not os.path.isdir("%s/%s" % (d, f)):
	1068	if not segment_commands.scp_file("%s/%s" % (d, f),
	1069	user, host, "%s/%s" % (stagingdir, f)):
	1070	self.log.error("Scp failed")
[814b5e5]	1071	return False, {}
[42cd8a7]	1072
	1073
	1074	# Now we wait for the nodes to start on PG
[4875e93]	1075	status = self.wait_for_slice(segment_commands, slice_cred, slice_urn,
[d49c11c]	1076	ctxt, timeout=300)
[42cd8a7]	1077	if status == 'failed':
	1078	self.log.error('Sliver failed to start on ProtoGENI')
[d49c11c]	1079	self.delete_slice(segment_commands, slice_cred, slice_urn, ctxt)
[814b5e5]	1080	return False, {}
[c2f92c5]	1081	elif status == 'timeout':
	1082	self.log.error('Sliver failed to start on ProtoGENI (timeout)')
[d49c11c]	1083	self.delete_slice(segment_commands, slice_cred, slice_urn, ctxt)
[814b5e5]	1084	return False, {}
[42cd8a7]	1085	else:
[37ed9a5]	1086	# All good: save ProtoGENI info in shared state
[42cd8a7]	1087	self.state_lock.acquire()
[d49c11c]	1088	self.allocation[aid]['slice_urn'] = slice_urn
[42cd8a7]	1089	self.allocation[aid]['slice_name'] = slicename
	1090	self.allocation[aid]['slice_credential'] = slice_cred
	1091	self.allocation[aid]['manifest'] = manifest
[add53ea]	1092	self.allocation[aid]['rspec'] = rspec
[42cd8a7]	1093	self.allocation[aid]['certfile'] = certfile
	1094	self.allocation[aid]['certpw'] = certpw
	1095	self.write_state()
	1096	self.state_lock.release()
	1097
	1098	# Now we have configuration to do for ProtoGENI
[37ed9a5]	1099	self.configure_nodes(segment_commands, topo, nodes, user, pubkey,
	1100	secretkey, stagingdir, tmpdir)
[42cd8a7]	1101
	1102	self.start_nodes(user, self.staging_host,
	1103	[ n.get('hostname', None) for n in nodes.values()],
	1104	segment_commands)
	1105
	1106	# Everything has gone OK.
	1107	return True, dict([(k, n.get('hostname', None)) \
	1108	for k, n in nodes.items()])
	1109
[3551ae1]	1110	def generate_rspec(self, topo, softdir, connInfo):
[c2f92c5]	1111
	1112	# Force a useful image. Without picking this the nodes can get
	1113	# different images and there is great pain.
	1114	def image_filter(e):
	1115	if isinstance(e, topdl.Computer):
	1116	return '<disk_image name="urn:publicid:IDN+emulab.net+' + \
	1117	'image+emulab-ops//FEDORA10-STD" />'
	1118	else:
	1119	return ""
	1120	# Main line of generate
[c119839]	1121	t = topo.clone()
	1122
	1123	starts = { }
	1124	# Weed out the things we aren't going to instantiate: Segments, portal
	1125	# substrates, and portal interfaces. (The copy in the for loop allows
	1126	# us to delete from e.elements in side the for loop). While we're
	1127	# touching all the elements, we also adjust paths from the original
	1128	# testbed to local testbed paths and put the federation commands and
	1129	# startcommands into a dict so we can start them manually later.
	1130	# ProtoGENI requires setup before the federation commands run, so we
	1131	# run them by hand after we've seeded configurations.
	1132	for e in [e for e in t.elements]:
	1133	if isinstance(e, topdl.Segment):
	1134	t.elements.remove(e)
	1135	# Fix software paths
	1136	for s in getattr(e, 'software', []):
	1137	s.location = re.sub("^.*/", softdir, s.location)
	1138	if isinstance(e, topdl.Computer):
[a65a65a]	1139	if e.get_attribute('portal') and self.portal_startcommand:
[c119839]	1140	# Portals never have a user-specified start command
[a65a65a]	1141	starts[e.name] = self.portal_startcommand
	1142	elif self.node_startcommand:
[c119839]	1143	if e.get_attribute('startup'):
[a65a65a]	1144	starts[e.name] = "%s \\$USER '%s'" % \
	1145	(self.node_startcommand,
	1146	e.get_attribute('startup'))
[c119839]	1147	e.remove_attribute('startup')
	1148	else:
[a65a65a]	1149	starts[e.name] = self.node_startcommand
[c119839]	1150
	1151	# Remove portal interfaces
	1152	e.interface = [i for i in e.interface \
	1153	if not i.get_attribute('portal')]
	1154
	1155	t.substrates = [ s.clone() for s in t.substrates ]
	1156	t.incorporate_elements()
	1157
[c2f92c5]	1158	# Customize the rspec output to use the image we like
	1159	filters = [ image_filter ]
[c119839]	1160
	1161	# Convert to rspec and return it
	1162	exp_rspec = topdl.topology_to_rspec(t, filters)
	1163
	1164	return exp_rspec
	1165
[3551ae1]	1166	def retrieve_software(self, topo, certfile, softdir):
	1167	"""
	1168	Collect the software that nodes in the topology need loaded and stage
	1169	it locally. This implies retrieving it from the experiment_controller
	1170	and placing it into softdir. Certfile is used to prove that this node
	1171	has access to that data (it's the allocation/segment fedid). Finally
	1172	local portal and federation software is also copied to the same staging
	1173	directory for simplicity - all software needed for experiment creation
	1174	is in softdir.
	1175	"""
	1176	sw = set()
	1177	for e in topo.elements:
	1178	for s in getattr(e, 'software', []):
	1179	sw.add(s.location)
	1180	os.mkdir(softdir)
	1181	for s in sw:
	1182	self.log.debug("Retrieving %s" % s)
	1183	try:
	1184	get_url(s, certfile, softdir)
	1185	except:
	1186	t, v, st = sys.exc_info()
	1187	raise service_error(service_error.internal,
	1188	"Error retrieving %s: %s" % (s, v))
	1189
	1190	# Copy local portal node software to the tempdir
	1191	for s in (self.portal_software, self.federation_software):
	1192	for l, f in s:
	1193	base = os.path.basename(f)
	1194	copy_file(f, "%s/%s" % (softdir, base))
	1195
	1196
	1197	def initialize_experiment_info(self, attrs, aid, certfile, tmpdir):
	1198	"""
	1199	Gather common configuration files, retrieve or create an experiment
	1200	name and project name, and return the ssh_key filenames. Create an
	1201	allocation log bound to the state log variable as well.
	1202	"""
[c119839]	1203	configs = set(('hosts', 'ssh_pubkey', 'ssh_secretkey'))
[3551ae1]	1204	ename = None
	1205	pubkey_base = None
	1206	secretkey_base = None
	1207	alloc_log = None
	1208
	1209	for a in attrs:
	1210	if a['attribute'] in configs:
	1211	try:
	1212	self.log.debug("Retrieving %s" % a['value'])
	1213	get_url(a['value'], certfile, tmpdir)
	1214	except:
	1215	t, v, st = sys.exc_info()
	1216	raise service_error(service_error.internal,
	1217	"Error retrieving %s: %s" % (a.get('value', ""), v))
	1218	if a['attribute'] == 'ssh_pubkey':
	1219	pubkey_base = a['value'].rpartition('/')[2]
	1220	if a['attribute'] == 'ssh_secretkey':
	1221	secretkey_base = a['value'].rpartition('/')[2]
	1222	if a['attribute'] == 'experiment_name':
	1223	ename = a['value']
	1224
	1225	if not ename:
	1226	ename = ""
	1227	for i in range(0,5):
	1228	ename += random.choice(string.ascii_letters)
	1229	self.log.warn("No experiment name: picked one randomly: %s" \
	1230	% ename)
	1231
	1232	self.state_lock.acquire()
	1233	if self.allocation.has_key(aid):
	1234	cf, user, ssh_key, cpw = self.allocation[aid]['credentials']
	1235	self.allocation[aid]['experiment'] = ename
	1236	self.allocation[aid]['log'] = [ ]
	1237	# Create a logger that logs to the experiment's state object as
	1238	# well as to the main log file.
	1239	alloc_log = logging.getLogger('fedd.access.%s' % ename)
	1240	h = logging.StreamHandler(
	1241	list_log.list_log(self.allocation[aid]['log']))
	1242	# XXX: there should be a global one of these rather than
	1243	# repeating the code.
	1244	h.setFormatter(logging.Formatter(
	1245	"%(asctime)s %(name)s %(message)s",
	1246	'%d %b %y %H:%M:%S'))
	1247	alloc_log.addHandler(h)
	1248	self.write_state()
	1249	else:
	1250	self.log.error("No allocation for %s!?" % aid)
	1251	self.state_lock.release()
[c119839]	1252
[3551ae1]	1253	return (ename, pubkey_base, secretkey_base, cf, user, ssh_key,
	1254	cpw, alloc_log)
	1255
[e83f2f2]	1256	def finalize_experiment(self, topo, nodes, aid, alloc_id, proof):
[3551ae1]	1257	# Copy the assigned names into the return topology
	1258	rvtopo = topo.clone()
	1259	embedding = [ ]
[42cd8a7]	1260	for k, n in nodes.items():
[3551ae1]	1261	embedding.append({
[42cd8a7]	1262	'toponame': k,
[1b6cc95]	1263	'physname': [n ],
[3551ae1]	1264	})
	1265	# Grab the log (this is some anal locking, but better safe than
	1266	# sorry)
	1267	self.state_lock.acquire()
	1268	logv = "".join(self.allocation[aid]['log'])
	1269	# It's possible that the StartSegment call gets retried (!).
	1270	# if the 'started' key is in the allocation, we'll return it rather
	1271	# than redo the setup.
	1272	self.allocation[aid]['started'] = {
	1273	'allocID': alloc_id,
	1274	'allocationLog': logv,
	1275	'segmentdescription': {
	1276	'topdldescription': rvtopo.to_dict() },
	1277	'embedding': embedding,
[e83f2f2]	1278	'proof': proof.to_dict(),
[3551ae1]	1279	}
	1280	retval = copy.deepcopy(self.allocation[aid]['started'])
	1281	self.write_state()
	1282	self.state_lock.release()
	1283
	1284	return retval
	1285
	1286	def StartSegment(self, req, fid):
[c119839]	1287	err = None # Any service_error generated after tmpdir is created
	1288	rv = None # Return value from segment creation
	1289
	1290	try:
	1291	req = req['StartSegmentRequestBody']
[3551ae1]	1292	topref = req['segmentdescription']['topdldescription']
[c119839]	1293	except KeyError:
	1294	raise service_error(service_error.req, "Badly formed request")
	1295
	1296	connInfo = req.get('connection', [])
	1297	services = req.get('service', [])
	1298	auth_attr = req['allocID']['fedid']
	1299	aid = "%s" % auth_attr
	1300	attrs = req.get('fedAttr', [])
[e83f2f2]	1301	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
	1302	with_proof=True)
	1303	if not access_ok:
[c119839]	1304	raise service_error(service_error.access, "Access denied")
[cd06678]	1305	else:
	1306	# See if this is a replay of an earlier succeeded StartSegment -
	1307	# sometimes SSL kills 'em. If so, replay the response rather than
	1308	# redoing the allocation.
	1309	self.state_lock.acquire()
	1310	retval = self.allocation[aid].get('started', None)
	1311	self.state_lock.release()
	1312	if retval:
	1313	self.log.warning("Duplicate StartSegment for %s: " % aid + \
	1314	"replaying response")
	1315	return retval
[c119839]	1316
[3551ae1]	1317	if topref:
	1318	topo = topdl.Topology(**topref)
[c119839]	1319	else:
	1320	raise service_error(service_error.req,
	1321	"Request missing segmentdescription'")
	1322
	1323	certfile = "%s/%s.pem" % (self.certdir, auth_attr)
	1324	try:
	1325	tmpdir = tempfile.mkdtemp(prefix="access-")
	1326	softdir = "%s/software" % tmpdir
[d3c8759]	1327	except EnvironmentError:
[c119839]	1328	raise service_error(service_error.internal, "Cannot create tmp dir")
	1329
	1330	# Try block alllows us to clean up temporary files.
	1331	try:
[3551ae1]	1332	self.retrieve_software(topo, certfile, softdir)
	1333	self.configure_userconf(services, tmpdir)
	1334	ename, pubkey_base, secretkey_base, cf, user, ssh_key, \
	1335	cpw, alloc_log = self.initialize_experiment_info(attrs,
	1336	aid, certfile, tmpdir)
[c119839]	1337	self.import_store_info(certfile, connInfo)
	1338	rspec = self.generate_rspec(topo, "%s/%s/" \
[3551ae1]	1339	% (self.staging_dir, ename), connInfo)
[c119839]	1340
[208797c]	1341	segment_commands = self.api_proxy(keyfile=ssh_key,
[c119839]	1342	debug=self.create_debug, log=alloc_log,
	1343	ch_url = self.ch_url, sa_url=self.sa_url,
	1344	cm_url=self.cm_url)
[42cd8a7]	1345	rv, nodes = self.start_segment(segment_commands, aid, user, rspec,
	1346	pubkey_base,
	1347	secretkey_base, ename,
[c119839]	1348	"%s/%s" % (self.staging_dir, ename), tmpdir, cf, cpw,
[593e901]	1349	certfile, topo, connInfo, services)
[37ed9a5]	1350	except EnvironmentError, e:
[3551ae1]	1351	err = service_error(service_error.internal, "%s" % e)
[c119839]	1352	except service_error, e:
	1353	err = e
[3551ae1]	1354	except:
	1355	t, v, st = sys.exc_info()
	1356	err = service_error(service_error.internal, "%s: %s" % \
	1357	(v, traceback.extract_tb(st)))
[c119839]	1358
	1359	# Walk up tmpdir, deleting as we go
[3551ae1]	1360	if self.cleanup: self.remove_dirs(tmpdir)
	1361	else: self.log.debug("[StartSegment]: not removing %s" % tmpdir)
[c119839]	1362
	1363	if rv:
[e83f2f2]	1364	return self.finalize_experiment(topo, nodes, aid, req['allocID'],
	1365	proof)
[c119839]	1366	elif err:
	1367	raise service_error(service_error.federant,
	1368	"Swapin failed: %s" % err)
	1369	else:
	1370	raise service_error(service_error.federant, "Swapin failed")
	1371
[42cd8a7]	1372	def stop_segment(self, segment_commands, user, stagingdir, slice_cred,
[d49c11c]	1373	slice_urn, certfile, certpw):
[42cd8a7]	1374	"""
	1375	Stop a sub experiment by calling swapexp on the federant
	1376	"""
	1377	host = self.staging_host
	1378	rv = False
	1379	try:
	1380	# Clean out tar files: we've gone over quota in the past
	1381	if stagingdir:
	1382	segment_commands.ssh_cmd(user, host, "rm -rf %s" % stagingdir)
	1383	if slice_cred:
	1384	self.log.error('Removing Sliver on ProtoGENI')
	1385	ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
[d49c11c]	1386	self.delete_slice(segment_commands, slice_cred, slice_urn, ctxt)
[42cd8a7]	1387	return True
	1388	except self.ssh_cmd_timeout:
	1389	rv = False
	1390	return rv
	1391
[c119839]	1392	def TerminateSegment(self, req, fid):
	1393	try:
	1394	req = req['TerminateSegmentRequestBody']
	1395	except KeyError:
	1396	raise service_error(service_error.req, "Badly formed request")
	1397
	1398	auth_attr = req['allocID']['fedid']
	1399	aid = "%s" % auth_attr
	1400	attrs = req.get('fedAttr', [])
[e83f2f2]	1401	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
	1402	with_proof=True)
	1403	if not access_ok:
[c119839]	1404	raise service_error(service_error.access, "Access denied")
	1405
	1406	self.state_lock.acquire()
	1407	if self.allocation.has_key(aid):
	1408	cf, user, ssh_key, cpw = self.allocation[aid]['credentials']
	1409	slice_cred = self.allocation[aid].get('slice_credential', None)
[d49c11c]	1410	slice_urn = self.allocation[aid].get('slice_urn', None)
[c119839]	1411	ename = self.allocation[aid].get('experiment', None)
	1412	else:
	1413	cf, user, ssh_key, cpw = (None, None, None, None)
	1414	slice_cred = None
[4875e93]	1415	slice_urn = None
[c119839]	1416	ename = None
	1417	self.state_lock.release()
	1418
	1419	if ename:
	1420	staging = "%s/%s" % ( self.staging_dir, ename)
	1421	else:
	1422	self.log.warn("Can't find experiment name for %s" % aid)
	1423	staging = None
	1424
[208797c]	1425	segment_commands = self.api_proxy(keyfile=ssh_key,
[42cd8a7]	1426	debug=self.create_debug, ch_url = self.ch_url,
	1427	sa_url=self.sa_url, cm_url=self.cm_url)
[d49c11c]	1428	self.stop_segment(segment_commands, user, staging, slice_cred,
	1429	slice_urn, cf, cpw)
[e83f2f2]	1430	return { 'allocID': req['allocID'], 'proof': proof.to_dict() }
[dd3e38b]	1431
[d49c11c]	1432	def renew_segment(self, segment_commands, name, scred, slice_urn, interval,
[42cd8a7]	1433	certfile, certpw):
[37ed9a5]	1434	"""
	1435	Linear code through the segment renewal calls.
	1436	"""
[42cd8a7]	1437	ctxt = fedd_ssl_context(my_cert=certfile, password=certpw)
	1438	try:
	1439	expiration = time.strftime("%Y%m%dT%H:%M:%S",
	1440	time.gmtime(time.time() + interval))
[88dbe63]	1441	cred = segment_commands.slice_authority_call('GetCredential',
[37ed9a5]	1442	{}, ctxt)
[42cd8a7]	1443
	1444	param = {
	1445	'credential': scred,
	1446	'expiration': expiration
	1447	}
[88dbe63]	1448	r = segment_commands.slice_authority_call('RenewSlice', param, ctxt)
[42cd8a7]	1449	param = {
	1450	'credential': cred,
[814b5e5]	1451	'urn': slice_urn,
[42cd8a7]	1452	'type': 'Slice',
	1453	}
[88dbe63]	1454	new_scred = segment_commands.slice_authority_call('GetCredential',
[37ed9a5]	1455	param, ctxt)
[42cd8a7]	1456
	1457	except segment_commands.ProtoGENIError, e:
	1458	self.log.error("Failed to extend slice %s: %s" % (name, e))
	1459	return None
	1460	try:
	1461	param = {
[d49c11c]	1462	'credentials': [new_scred,],
	1463	'slice_urn': slice_urn,
[42cd8a7]	1464	}
[88dbe63]	1465	r = segment_commands.component_manager_call('RenewSlice', param,
	1466	ctxt)
[42cd8a7]	1467	except segment_commands.ProtoGENIError, e:
	1468	self.log.warn("Failed to renew sliver for %s: %s" % (name, e))
	1469
	1470	return new_scred
	1471
	1472
[dd3e38b]	1473	def RenewSlices(self):
	1474	self.log.info("Scanning for slices to renew")
	1475	self.state_lock.acquire()
	1476	aids = self.allocation.keys()
	1477	self.state_lock.release()
	1478
	1479	for aid in aids:
	1480	self.state_lock.acquire()
	1481	if self.allocation.has_key(aid):
	1482	name = self.allocation[aid].get('slice_name', None)
	1483	scred = self.allocation[aid].get('slice_credential', None)
[d49c11c]	1484	slice_urn = self.allocation[aid].get('slice_urn', None)
[dd3e38b]	1485	cf, user, ssh_key, cpw = self.allocation[aid]['credentials']
	1486	else:
	1487	name = None
	1488	scred = None
	1489	self.state_lock.release()
	1490
[3551ae1]	1491	if not os.access(cf, os.R_OK):
	1492	self.log.error(
	1493	"[RenewSlices] cred.file %s unreadable, ignoring" % cf)
	1494	continue
	1495
[dd3e38b]	1496	# There's a ProtoGENI slice associated with the segment; renew it.
[d49c11c]	1497	if name and scred and slice_urn:
[208797c]	1498	segment_commands = self.api_proxy(log=self.log,
[dd3e38b]	1499	debug=self.create_debug, keyfile=ssh_key,
	1500	cm_url = self.cm_url, sa_url = self.sa_url,
	1501	ch_url = self.ch_url)
[42cd8a7]	1502	new_scred = self.renew_segment(segment_commands, name, scred,
[d49c11c]	1503	slice_urn, self.renewal_interval, cf, cpw)
[dd3e38b]	1504	if new_scred:
	1505	self.log.info("Slice %s renewed until %s GMT" % \
	1506	(name, time.asctime(time.gmtime(\
	1507	time.time()+self.renewal_interval))))
	1508	self.state_lock.acquire()
	1509	if self.allocation.has_key(aid):
	1510	self.allocation[aid]['slice_credential'] = new_scred
[4875e93]	1511	self.write_state()
[dd3e38b]	1512	self.state_lock.release()
	1513	else:
	1514	self.log.info("Failed to renew slice %s " % name)
	1515
	1516	# Let's do this all again soon. (4 tries before the slices time out)
	1517	t = Timer(self.renewal_interval/4, self.RenewSlices)
	1518	t.start()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: