Context Navigation

source: fedd/federation/desktop_access.py @ f24fc8d

Last change on this file since f24fc8d was 5dbcc93, checked in by Ted Faber <faber@…>, 12 years ago
Last few tricks. Manage hosts, make whole script sudo
Property mode set to `100644`
File size: 20.2 KB

Rev	Line
[1819839]	1	#!/usr/local/bin/python
	2
	3	import os,sys
	4	import re
	5	import string
	6	import copy
	7	import pickle
	8	import logging
	9	import random
	10	import subprocess
	11
	12	from util import *
	13	from deter import fedid, generate_fedid
	14	from authorizer import authorizer, abac_authorizer
	15	from service_error import service_error
	16	from remote_service import xmlrpc_handler, soap_handler, service_caller
	17
	18	from deter import topdl
	19
	20	from access import access_base
	21
	22	# Make log messages disappear if noone configures a fedd logger. This is
	23	# something of an incantation, but basically it creates a logger object
	24	# registered to fedd.access if no other module above us has. It's an extra
	25	# belt for the suspenders.
	26	class nullHandler(logging.Handler):
	27	def emit(self, record): pass
	28
	29	fl = logging.getLogger("fedd.access")
	30	fl.addHandler(nullHandler())
	31
	32
	33	# The plug-in itself.
	34	class access(access_base):
	35	"""
	36	This is a demonstration plug-in for fedd. It responds to all the
	37	experiment_control requests and keeps internal state. The allocations it
	38	makes are simple integers associated with each valid request. It makes use
	39	of the general routines in access.access_base.
	40
	41	Detailed comments in the code and info at
	42	"""
	43	def __init__(self, config=None, auth=None):
	44	"""
	45	Initializer. Pulls parameters out of the ConfigParser's access
	46	section, and initializes simple internal state. This version reads a
	47	maximum integer to assign from the configuration file, while most other
	48	configuration entries are read by the base class.
	49
	50	An access database in the cannonical format is also read as well as a
	51	state database that is a hash of internal state. Routines to
	52	manipulate these are in the base class, but specializations appear
	53	here.
	54
	55	The access database maps users to a simple string.
	56	"""
	57
	58	# Calling the base initializer, which reads canonical configuration
	59	# information and initializes canonical members.
	60	access_base.__init__(self, config, auth)
	61	# Reading the maximum integer parameter from the configuration file
	62
	63	self.src_addr = config.get('access', 'interface_address')
	64	self.router = config.get('access', 'gateway')
	65	self.hostname = config.get('access', 'hostname')
	66	# Storage for ephemeral ssh keys and host files
	67	self.localdir = config.get('access', 'localdir')
	68	self.ssh_identity = None
	69
	70	# hostname is the name of the ssh endpoint for the other side. That
	71	# side needs it to set up routing tables. If hostname is not
	72	# available, but an IP address is, use that.
	73	if self.hostname is None:
	74	if self.src_addr is None:
	75	raise service_error(service_error.server_config,
	76	'Hostname or interface_address must be set in config')
	77	self.hostname = self.src_addr
	78
	79	self.ssh_port = config.get('access', 'ssh_port', '22')
	80
	81	# authorization information
	82	self.auth_type = config.get('access', 'auth_type') \
	83	or 'abac'
	84	self.auth_dir = config.get('access', 'auth_dir')
	85	accessdb = config.get("access", "accessdb")
	86	# initialize the authorization system. We make a call to
	87	# read the access database that maps from authorization information
	88	# into local information. The local information is parsed by the
	89	# translator above.
	90	if self.auth_type == 'abac':
	91	# Load the current authorization state
	92	self.auth = abac_authorizer(load=self.auth_dir)
	93	self.access = [ ]
	94	if accessdb:
	95	try:
	96	self.read_access(accessdb)
	97	except EnvironmentError, e:
	98	self.log.error("Cannot read %s: %s" % \
	99	(config.get("access", "accessdb"), e))
	100	raise e
	101	else:
	102	raise service_error(service_error.internal,
	103	"Unknown auth_type: %s" % self.auth_type)
	104
	105	# The superclass has read the state, but if this is the first run ever,
	106	# we must initialise the running flag. This plugin only supports one
	107	# connection, so StartSegment will fail when self.state['running'] is
	108	# true.
	109	self.state_lock.acquire()
	110	if 'running' not in self.state:
	111	self.state['running'] = False
	112	self.state_lock.release()
	113
	114	# These dictionaries register the plug-in's local routines for handline
	115	# these four messages with the server code above. There's a version
	116	# for SOAP and XMLRPC, depending on which interfaces the plugin
	117	# supports. There's rarely a technical reason not to support one or
	118	# the other - the plugin code almost never deals with the transport -
	119	# but if a plug-in writer wanted to disable XMLRPC, they could leave
	120	# the self.xmlrpc_services dictionary empty.
	121	self.soap_services = {\
	122	'RequestAccess': soap_handler("RequestAccess", self.RequestAccess),
	123	'ReleaseAccess': soap_handler("ReleaseAccess", self.ReleaseAccess),
	124	'StartSegment': soap_handler("StartSegment", self.StartSegment),
	125	'TerminateSegment': soap_handler("TerminateSegment",
	126	self.TerminateSegment),
	127	}
	128	self.xmlrpc_services = {\
	129	'RequestAccess': xmlrpc_handler('RequestAccess',
	130	self.RequestAccess),
	131	'ReleaseAccess': xmlrpc_handler('ReleaseAccess',
	132	self.ReleaseAccess),
	133	'StartSegment': xmlrpc_handler("StartSegment", self.StartSegment),
	134	'TerminateSegment': xmlrpc_handler('TerminateSegment',
	135	self.TerminateSegment),
	136	}
	137	self.call_SetValue = service_caller('SetValue', log=self.log)
	138	self.call_GetValue = service_caller('GetValue', log=self.log)
	139
[2dc99e3]	140	# ReleaseAccess come from the base class, this is a slightly modified
	141	# RequestAccess from the base that includes a fedAttr to force this side to
	142	# be active.
	143	def RequestAccess(self, req, fid):
	144	"""
	145	Handle an access request. Success here maps the requester into the
	146	local access control space and establishes state about that user keyed
	147	to a fedid. We also save a copy of the certificate underlying that
	148	fedid so this allocation can access configuration information and
	149	shared parameters on the experiment controller.
	150	"""
	151
	152	self.log.info("RequestAccess called by %s" % fid)
	153	# The dance to get into the request body
	154	if req.has_key('RequestAccessRequestBody'):
	155	req = req['RequestAccessRequestBody']
	156	else:
	157	raise service_error(service_error.req, "No request!?")
	158
	159	# Base class lookup routine. If this fails, it throws a service
	160	# exception denying access that triggers a fault response back to the
	161	# caller.
	162	found, owners, proof = self.lookup_access(req, fid)
	163	self.log.info(
	164	"[RequestAccess] Access granted local creds %s" % found)
	165	# Make a fedid for this allocation
	166	allocID, alloc_cert = generate_fedid(subj="alloc", log=self.log)
	167	aid = unicode(allocID)
	168
	169	# Store the data about this allocation:
	170	self.state_lock.acquire()
	171	self.state[aid] = { }
	172	self.state[aid]['user'] = found
	173	self.state[aid]['owners'] = owners
	174	self.state[aid]['auth'] = set()
	175	# Authorize the creating fedid and the principal representing the
	176	# allocation to manipulate it.
	177	self.append_allocation_authorization(aid,
	178	((fid, allocID), (allocID, allocID)))
	179	self.write_state()
	180	self.state_lock.release()
	181
	182	# Create a directory to stash the certificate in, ans stash it.
	183	try:
	184	f = open("%s/%s.pem" % (self.certdir, aid), "w")
	185	print >>f, alloc_cert
	186	f.close()
	187	except EnvironmentError, e:
	188	raise service_error(service_error.internal,
	189	"Can't open %s/%s : %s" % (self.certdir, aid, e))
	190	self.log.debug('[RequestAccess] Returning allocation ID: %s' % allocID)
	191	msg = {
	192	'allocID': { 'fedid': allocID },
	193	'fedAttr': [{ 'attribute': 'nat_portals', 'value': 'True' }],
	194	'proof': proof.to_dict()
	195	}
	196	return msg
[1819839]	197
	198	def validate_topology(self, top):
	199	'''
	200	Validate the topology. Desktops can only be single connections.
	201	Though the topology will include a portal and a node, the access
	202	controller will implement both on one node.
	203
	204	As more capabilities are added to the contoller the constraints here
	205	will relax.
	206	'''
	207
	208	comps = []
	209	for e in top.elements:
	210	if isinstance(e, topdl.Computer): comps.append(e)
	211	if len(comps) > 2:
	212	raise service_error(service_error.req,
	213	"Desktop only supports 1-node subexperiments")
	214
	215	portals = 0
	216	for c in comps:
	217	if c.get_attribute('portal') is not None:
	218	portals += 1
	219	continue
	220	if len(c.interface) > 1:
	221	raise service_error(service_error.req,
	222	"Desktop Node has more than one interface")
	223	i = c.interface[0]
	224	if len(i.subs) > 1:
	225	raise service_error(service_error.req,
	226	"Desktop Node has more than one substate on interface")
	227	sub = i.subs[0]
	228	for i in sub.interfaces:
	229	if i.element not in comps:
	230	raise service_error(service_error.req,
	231	"Desktop Node connected to non-portal")
	232
	233	if portals > 1:
	234	raise service_error(service_error.req,
	235	"Desktop segment has more than one portal")
	236	return True
	237
	238	def validate_connInfo(self, connInfo):
	239	if len(connInfo) != 1:
	240	raise service_error(service_error.req,
	241	"Desktop segment requests multiple connections")
	242	if connInfo[0]['type'] != 'ssh':
	243	raise service_error(service_error.req,
	244	"Desktop segment requires ssh connecton")
	245	return True
	246
	247	def export_store_info(self, certfile, connInfo):
	248	'''
	249	Tell the other portal node where to reach this desktop. The other side
	250	uses this information to set up routing, though the ssh_port is unused
	251	as the Desktop always initiates ssh connections.
	252	'''
	253	values = { 'peer': self.hostname, 'ssh_port': self.ssh_port }
	254	for c in connInfo:
	255	for p in c.get('parameter', []):
	256	if p.get('type','') == 'input': continue
	257	pname = p.get('name', '')
	258	key = p.get('key', '')
	259	surl = p.get('store', '')
	260	if pname not in values:
	261	self.log('Unknown export parameter: %s' % pname)
	262	continue
	263	val = values[pname]
	264	req = { 'name': key, 'value': val }
	265	self.log.debug('Setting %s (%s) to %s on %s' % \
	266	(pname, key, val, surl))
	267	self.call_SetValue(surl, req, certfile)
	268
	269	def set_route(self, dest, script, gw=None, src=None):
	270	if sys.platform.startswith('freebsd'):
	271	if src is not None and gw is not None:
	272	raise service_error(service_error.internal,
	273	'FreeBSD will not route based on src address')
	274	elif src is not None:
	275	raise service_error(service_error.internal,
	276	'FreeBSD will not route based on src address')
	277	elif gw is not None:
[5dbcc93]	278	print >>script, 'route add %s %s' % (dest, gw)
[1819839]	279	elif sys.platform.startswith('linux'):
	280	if src is not None and gw is not None:
[5dbcc93]	281	print >>script, 'ip route add %s via %s src %s' % \
[1819839]	282	(dest, gw, src)
	283	elif src is not None:
[5dbcc93]	284	print >>script, 'ip route add %s src %s' % \
[1819839]	285	(dest, src)
	286	elif gw is not None:
[5dbcc93]	287	print >>script, 'ip route add %s via %s' % (dest, gw)
[1819839]	288	else:
	289	raise service_error(service_error.internal,
	290	'Unknown platform %s' % sys.platform)
	291
	292	def unset_route(self, dest, script):
	293	rv = 0
	294	if sys.platform.startswith('freebsd'):
[5dbcc93]	295	print >>script, 'route delete %s' % dest
[1819839]	296	elif sys.platform.startswith('linux'):
[5dbcc93]	297	print >>script, 'ip route delete %s' % dest
[1819839]	298
[2dc99e3]	299	def find_a_peer(self, addr):
	300	'''
	301	Find another node in the experiment that's on our subnet. This is a
	302	hack to handle the problem that we really cannot require the desktop to
	303	dynamically route. Will be improved by distributing static routes.
	304	'''
	305
	306	peer = None
	307	hosts = os.path.join(self.localdir, 'hosts')
	308	p = addr.rfind('.')
	309	if p == -1:
	310	raise service_error(service_error.req, 'bad address in topology')
	311	prefix = addr[0:p]
	312	addr_re = re.compile('(%s.\\d+)' % prefix)
	313	try:
	314	f = open(hosts, 'r')
	315	for line in f:
	316	m = addr_re.search(line)
	317	if m is not None and m.group(1) != addr:
	318	peer = m.group(1)
	319	break
	320	else:
	321	raise service_error(service_error.req,
	322	'No other nodes in this subnet??')
	323	except EnvironmentError, e:
	324	raise service_error(service_error.internal,
	325	'Cannot open %s: %s' % (e.filename, e.strerror))
	326	return peer
	327
	328
[1819839]	329
	330
	331	def configure_desktop(self, top, connInfo):
	332	'''
	333	Build the connection. Establish routing to the peer if using a
	334	separate interface, wait until the other end confirms setup, establish
	335	the ssh layer-two tunnel (tap), assign the in-experiment IP address to
	336	the tunnel and establish routing to the experiment through the tap.
	337	'''
	338
	339
	340	# get the peer and ssh port from the portal and our IP from the other
	341	peer = None
	342	port = None
	343	my_addr = None
	344	for e in top.elements:
	345	if not isinstance(e, topdl.Computer): continue
	346	if e.get_attribute('portal') is None:
	347	# there should be one interface with one IPv4 address
	348	if len(e.interface) <1 :
	349	raise service_error(service_error.internal,
	350	'No interface on experiment node!?!?')
	351	my_addr = e.interface[0].get_attribute('ip4_address')
	352	else:
	353	for ci in connInfo:
	354	if ci.get('portal', '') != e.name: continue
	355	peer = ci.get('peer')
	356	port = '22'
	357	for a in ci.get('fedAttr', []):
	358	if a['attribute'] == 'ssh_port': port = a['value']
	359
	360	# XXX scan hosts for IP addresses and compose better routing entry
	361
	362	if not all([peer, port, my_addr]):
	363	raise service_error(service_error.req,
	364	'Cannot find all config parameters %s %s %s' % (peer, port, my_addr))
	365
[2dc99e3]	366	exp_peer = self.find_a_peer(my_addr)
	367
[1819839]	368	cscript = os.path.join(self.localdir, 'connect')
	369	dscript = os.path.join(self.localdir, 'disconnect')
[5dbcc93]	370	local_hosts = os.path.join(self.localdir, 'hosts')
[1819839]	371	try:
	372	f = open(cscript, 'w')
	373	print >>f, '#!/bin/sh'
	374	# This picks the outgoing interface to the experiment using the
	375	# routing system.
	376	self.set_route(peer, f, self.router, self.src_addr)
	377	# Wait until the other end reports that it is configured py placing
	378	# a file this end can access into its local file system. Try once
	379	# a minute.
[2dc99e3]	380	print >>f,'while ! /usr/bin/scp -o "StrictHostKeyChecking no" -i %s %s:/usr/local/federation/etc/prep_done /dev/null; do' % (self.ssh_identity, peer)
[1819839]	381	print >>f, 'sleep 60; done'
[5dbcc93]	382	print >>f, ('ssh -w 0:0 -p %s -o "Tunnel ethernet" ' + \
[2dc99e3]	383	'-o "StrictHostKeyChecking no" -i %s %s perl -I/usr/local/federation/lib /usr/local/federation/bin/setup_bridge.pl --tapno=0 --addr=%s &') % \
	384	(port, self.ssh_identity, peer, my_addr)
[1819839]	385	# This should give the tap a a chance to come up
	386	print >>f,'sleep 10'
[5dbcc93]	387	# Add experiment nodes to hosts
	388	print >>f, 'cp /etc/hosts /etc/hosts.DETER.fedd.hold'
	389	print >>f, 'echo "#--- BEGIN FEDD ADDITIONS ---" >> /etc/hosts'
	390	print >>f, 'cat %s >> /etc/hosts' % local_hosts
	391	print >>f, 'echo "#--- END FEDD ADDITIONS ---" >> /etc/hosts'
	392	# Assign tap address and route experiment connections through it.
	393	print >>f, 'ifconfig tap0 %s netmask 255.255.255.0 up' % \
[1819839]	394	my_addr
[2dc99e3]	395	self.set_route('10.0.0.0/8', f, exp_peer)
[1819839]	396	f.close()
	397	os.chmod(cscript, 0755)
	398	f = open(dscript, 'w')
	399	print >>f, '#!/bin/sh'
[5dbcc93]	400	print >>f, 'ifconfig tap0 destroy'
[1819839]	401	self.unset_route(peer, f)
	402	self.unset_route('10.0.0.0/8', f)
[5dbcc93]	403	print >>f, 'mv /etc/hosts.DETER.fedd.hold /etc/hosts'
[1819839]	404	f.close()
	405	os.chmod(dscript, 0755)
	406	except EnvironmentError, e:
	407	raise service_error(service_error.internal,
	408	'Cannot create connect %s: %s' % (e.filename, e.strerror))
[5dbcc93]	409	script_log = open('/tmp/connect.log', 'w')
	410	subprocess.Popen(['sudo', '/bin/sh', cscript], stdout=script_log, stderr=script_log)
[1819839]	411	return True
	412
	413	def StartSegment(self, req, fid):
	414	"""
	415	Start a segment. In this simple skeleton, this means to parse the
	416	request and assign an unassigned integer to it. We store the integer
	417	in the persistent state.
	418	"""
	419	try:
	420	req = req['StartSegmentRequestBody']
	421	# Get the request topology. If not present, a KeyError is thrown.
	422	topref = req['segmentdescription']['topdldescription']
	423	# The fedid of the allocation we're attaching resources to
	424	auth_attr = req['allocID']['fedid']
	425	except KeyError:
	426	raise service_error(service_error.req, "Badly formed request")
	427
	428	# String version of the allocation ID for keying
	429	aid = "%s" % auth_attr
	430	# Authorization check
	431	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
	432	with_proof=True)
	433	if not access_ok:
	434	raise service_error(service_error.access, "Access denied",
	435	proof=proof)
	436	else:
	437	# See if this is a replay of an earlier succeeded StartSegment -
	438	# sometimes SSL kills 'em. If so, replay the response rather than
	439	# redoing the allocation.
	440	self.state_lock.acquire()
	441	# Test and set :-)
	442	running = self.state['running']
	443	self.state['running'] = True
	444	retval = self.state[aid].get('started', None)
	445	self.state_lock.release()
	446	if retval:
	447	self.log.warning(
	448	"[StartSegment] Duplicate StartSegment for %s: " \
	449	% aid + \
	450	"replaying response")
	451	return retval
	452	if running:
	453	self.log.debug('[StartSegment] already running')
	454	raise service_error(service_error.federant,
	455	'Desktop is already in an experiment')
	456
	457	certfile = "%s/%s.pem" % (self.certdir, aid)
	458
	459	# Convert the topology into topdl data structures. Again, the
	460	# skeletion doesn't do anything with it, but this is how one parses a
	461	# topology request.
	462	if topref: topo = topdl.Topology(**topref)
	463	else:
	464	raise service_error(service_error.req,
	465	"Request missing segmentdescription'")
	466
	467	err = None
	468	try:
	469	self.validate_topology(topo)
	470
	471	# The attributes of the request. The ones we care about are the ssh
	472	# keys to operate the tunnel.
	473	attrs = req.get('fedAttr', [])
	474	for a in attrs:
	475	# Save the hosts and ssh_privkeys to our local dir
	476	if a['attribute'] in ('hosts', 'ssh_secretkey'):
	477	self.log.debug('Getting %s from %s' % \
	478	(a['attribute'], a['value']))
	479	get_url(a['value'], certfile, self.localdir, log=self.log)
	480	base = os.path.basename(a['value'])
	481	if a['attribute'] == 'ssh_secretkey':
	482	self.ssh_identity = os.path.join(self.localdir, base)
	483	os.chmod(os.path.join(self.localdir, base), 0600)
	484	else:
	485	self.log.debug('Ignoring attribute %s' % a['attribute'])
	486
	487	# Gather connection information and exchange parameters.
	488	connInfo = req.get('connection', [])
	489	self.validate_connInfo(connInfo)
	490	self.export_store_info(certfile, connInfo)
	491	self.import_store_info(certfile, connInfo)
	492
	493	#build it
	494	self.configure_desktop(topo, connInfo)
	495	except service_error, e:
	496	err = e
	497
	498	# Save the information
	499	if err is None:
	500	# It's possible that the StartSegment call gets retried (!). if
	501	# the 'started' key is in the allocation, we'll return it rather
	502	# than redo the setup. The integer allocation was saved when we
	503	# made it.
	504	self.state_lock.acquire()
	505	self.state[aid]['started'] = {
	506	'allocID': req['allocID'],
	507	'allocationLog': "Allocatation complete",
	508	'segmentdescription': { 'topdldescription': topo.to_dict() },
	509	'proof': proof.to_dict(),
	510	}
	511	retval = copy.deepcopy(self.state[aid]['started'])
	512	self.write_state()
	513	self.state_lock.release()
	514	else:
	515	# Something bad happened - clear the "running" flag so we can try
	516	# again
	517	self.state_lock.acquire()
	518	self.state['running'] = False
	519	self.state_lock.release()
	520	raise err
	521
	522	return retval
	523
	524	def TerminateSegment(self, req, fid):
	525	"""
	526	Remove the resources associated with th eallocation and stop the music.
	527	In this example, this simply means removing the integer we allocated.
	528	"""
	529	# Gather the same access information as for Start Segment
	530	try:
	531	req = req['TerminateSegmentRequestBody']
	532	except KeyError:
	533	raise service_error(service_error.req, "Badly formed request")
	534
	535	auth_attr = req['allocID']['fedid']
	536	aid = "%s" % auth_attr
	537
	538	self.log.debug("Terminate request for %s" %aid)
	539	# Check authorization
	540	access_ok, proof = self.auth.check_attribute(fid, auth_attr,
	541	with_proof=True)
	542	if not access_ok:
	543	raise service_error(service_error.access, "Access denied",
	544	proof=proof)
	545	cscript = os.path.join(self.localdir, 'connect')
	546	dscript = os.path.join(self.localdir, 'disconnect')
[5dbcc93]	547	# Do the work of disconnecting
	548	if os.path.exists(dscript):
	549	self.log.debug('calling %s' % dscript)
	550	rv = subprocess.call(['sudo', '/bin/sh', dscript])
	551	if rv != 0:
	552	self.log.warning('%s had an error: %d' % (dscript, rv))
	553	else:
	554	self.log.warn('No disconnection script!?')
[1819839]	555
	556	try:
	557	for bfn in os.listdir(self.localdir):
	558	fn = os.path.join(self.localdir, bfn)
	559	self.log.debug('Removing %s' % fn)
	560	if os.path.exists(fn):
	561	os.remove(fn)
	562	except EnvironmentError, e:
	563	self.log.warn('Failed to remove %s: %s' % (e.filename, e.strerror))
	564
	565	self.ssh_identity = None
	566
	567	self.state_lock.acquire()
	568	self.state['running'] = False
	569	self.state_lock.release()
	570
	571	return { 'allocID': req['allocID'], 'proof': proof.to_dict() }

Note: See TracBrowser for help on using the repository browser.

Download in other formats: