source: fedd/federation/desktop_access.py @ b06744b

Last change on this file since b06744b was b06744b, checked in by Ted Faber <faber@…>, 10 years ago

DEV push

  • Property mode set to 100644
File size: 20.9 KB
Line 
1#!/usr/local/bin/python
2
3import os,sys
4import re
5import string
6import copy
7import pickle
8import logging
9import random
10import subprocess
11
12from util import *
13from deter import fedid, generate_fedid
14from authorizer import authorizer, abac_authorizer
15from service_error import service_error
16from remote_service import xmlrpc_handler, soap_handler, service_caller
17
18from deter import topdl
19
20from access import access_base
21
22# Make log messages disappear if noone configures a fedd logger.  This is
23# something of an incantation, but basically it creates a logger object
24# registered to fedd.access if no other module above us has.  It's an extra
25# belt for the suspenders.
26class nullHandler(logging.Handler):
27    def emit(self, record): pass
28
29fl = logging.getLogger("fedd.access")
30fl.addHandler(nullHandler())
31
32
33# The plug-in itself.
34class access(access_base):
35    """
36    This is a demonstration plug-in for fedd.  It responds to all the
37    experiment_control requests and keeps internal state.  The allocations it
38    makes are simple integers associated with each valid request.  It makes use
39    of the general routines in access.access_base.
40
41    Detailed comments in the code and info at
42    """
43    def __init__(self, config=None, auth=None):
44        """
45        Initializer.  Pulls parameters out of the ConfigParser's access
46        section, and initializes simple internal state.  This version reads a
47        maximum integer to assign from the configuration file, while most other
48        configuration entries  are read by the base class. 
49
50        An access database in the cannonical format is also read as well as a
51        state database that is a hash of internal state.  Routines to
52        manipulate these are in the base class, but specializations appear
53        here.
54
55        The access database maps users to a simple string.
56        """
57
58        # Calling the base initializer, which reads canonical configuration
59        # information and initializes canonical members.
60        access_base.__init__(self, config, auth)
61        # Reading the maximum integer parameter from the configuration file
62
63        self.src_addr = config.get('access', 'interface_address')
64        self.router = config.get('access', 'gateway')
65        self.hostname = config.get('access', 'hostname')
66        # Storage for ephemeral ssh keys and host files
67        self.localdir = config.get('access', 'localdir')
68        self.ssh_identity = None
69
70        # hostname is the name of the ssh endpoint for the other side.  That
71        # side needs it to set up routing tables.  If hostname is not
72        # available, but an IP address is, use that.
73        if self.hostname is None:
74            if  self.src_addr is None:
75                raise service_error(service_error.server_config,
76                        'Hostname or interface_address must be set in config')
77            self.hostname = self.src_addr
78       
79        self.ssh_port = config.get('access', 'ssh_port', '22')
80
81        # authorization information
82        self.auth_type = config.get('access', 'auth_type') \
83                or 'abac'
84        self.auth_dir = config.get('access', 'auth_dir')
85        accessdb = config.get("access", "accessdb")
86        # initialize the authorization system.  We make a call to
87        # read the access database that maps from authorization information
88        # into local information.  The local information is parsed by the
89        # translator above.
90        if self.auth_type == 'abac':
91            #  Load the current authorization state
92            self.auth = abac_authorizer(load=self.auth_dir)
93            self.access = [ ]
94            if accessdb:
95                try:
96                    self.read_access(accessdb)
97                except EnvironmentError, e:
98                    self.log.error("Cannot read %s: %s" % \
99                            (config.get("access", "accessdb"), e))
100                    raise e
101        else:
102            raise service_error(service_error.internal, 
103                    "Unknown auth_type: %s" % self.auth_type)
104
105        # The superclass has read the state, but if this is the first run ever,
106        # we must initialise the running flag.  This plugin only supports one
107        # connection, so StartSegment will fail when self.state['running'] is
108        # true.
109        self.state_lock.acquire()
110        if 'running' not in self.state:
111            self.state['running'] = False
112        self.state_lock.release()
113
114        # These dictionaries register the plug-in's local routines for handline
115        # these four messages with the server code above.  There's a version
116        # for SOAP and XMLRPC, depending on which interfaces the plugin
117        # supports.  There's rarely a technical reason not to support one or
118        # the other - the plugin code almost never deals with the transport -
119        # but if a plug-in writer wanted to disable XMLRPC, they could leave
120        # the self.xmlrpc_services dictionary empty.
121        self.soap_services = {\
122            'RequestAccess': soap_handler("RequestAccess", self.RequestAccess),
123            'ReleaseAccess': soap_handler("ReleaseAccess", self.ReleaseAccess),
124            'StartSegment': soap_handler("StartSegment", self.StartSegment),
125            'TerminateSegment': soap_handler("TerminateSegment", 
126                self.TerminateSegment),
127            }
128        self.xmlrpc_services =  {\
129            'RequestAccess': xmlrpc_handler('RequestAccess',
130                self.RequestAccess),
131            'ReleaseAccess': xmlrpc_handler('ReleaseAccess',
132                self.ReleaseAccess),
133            'StartSegment': xmlrpc_handler("StartSegment", self.StartSegment),
134            'TerminateSegment': xmlrpc_handler('TerminateSegment',
135                self.TerminateSegment),
136            }
137        self.call_SetValue = service_caller('SetValue', log=self.log)
138        self.call_GetValue = service_caller('GetValue', log=self.log)
139
140    # ReleaseAccess come from the base class, this is a slightly modified
141    # RequestAccess from the base that includes a fedAttr to force this side to
142    # be active.
143    def RequestAccess(self, req, fid):
144        """
145        Handle an access request.  Success here maps the requester into the
146        local access control space and establishes state about that user keyed
147        to a fedid.  We also save a copy of the certificate underlying that
148        fedid so this allocation can access configuration information and
149        shared parameters on the experiment controller.
150        """
151
152        self.log.info("RequestAccess called by %s" % fid)
153        # The dance to get into the request body
154        if req.has_key('RequestAccessRequestBody'):
155            req = req['RequestAccessRequestBody']
156        else:
157            raise service_error(service_error.req, "No request!?")
158
159        # Base class lookup routine.  If this fails, it throws a service
160        # exception denying access that triggers a fault response back to the
161        # caller.
162        found,  owners, proof = self.lookup_access(req, fid)
163        self.log.info(
164                "[RequestAccess] Access granted local creds %s" % found)
165        # Make a fedid for this allocation
166        allocID, alloc_cert = generate_fedid(subj="alloc", log=self.log)
167        aid = unicode(allocID)
168
169        # Store the data about this allocation:
170        self.state_lock.acquire()
171        self.state[aid] = { }
172        self.state[aid]['user'] = found
173        self.state[aid]['owners'] = owners
174        self.state[aid]['auth'] = set()
175        # Authorize the creating fedid and the principal representing the
176        # allocation to manipulate it.
177        self.append_allocation_authorization(aid, 
178                ((fid, allocID), (allocID, allocID)))
179        self.write_state()
180        self.state_lock.release()
181
182        # Create a directory to stash the certificate in, ans stash it.
183        try:
184            f = open("%s/%s.pem" % (self.certdir, aid), "w")
185            print >>f, alloc_cert
186            f.close()
187        except EnvironmentError, e:
188            raise service_error(service_error.internal, 
189                    "Can't open %s/%s : %s" % (self.certdir, aid, e))
190        self.log.debug('[RequestAccess] Returning allocation ID: %s' % allocID)
191        msg = { 
192                'allocID': { 'fedid': allocID }, 
193                'fedAttr': [{ 'attribute': 'nat_portals', 'value': 'True' }],
194                'proof': proof.to_dict()
195                }
196        return msg
197
198    def validate_topology(self, top):
199        '''
200        Validate the topology.  Desktops can only be single connections.
201        Though the topology will include a portal and a node, the access
202        controller will implement both on one node.
203
204        As more capabilities are added to the contoller the constraints here
205        will relax.
206        '''
207
208        comps = []
209        for e in top.elements:
210            if isinstance(e, topdl.Computer): comps.append(e)
211        if len(comps) > 2: 
212            raise service_error(service_error.req,
213                    "Desktop only supports 1-node subexperiments")
214
215        portals = 0
216        for c in comps:
217            if c.get_attribute('portal') is not None: 
218                portals += 1
219                continue
220            if len(c.interface) > 1:
221                raise service_error(service_error.req,
222                        "Desktop Node has more than one interface")
223            i  = c.interface[0]
224            if len(i.subs) > 1: 
225                raise service_error(service_error.req,
226                        "Desktop Node has more than one substate on interface")
227            sub = i.subs[0]
228            for i in sub.interfaces:
229                if i.element not in comps:
230                    raise service_error(service_error.req,
231                            "Desktop Node connected to non-portal")
232
233        if portals > 1:
234            raise service_error(service_error.req,
235                    "Desktop segment has more than one portal")
236        return True
237
238    def validate_connInfo(self, connInfo):
239        if len(connInfo) != 1: 
240            raise service_error(service_error.req,
241                    "Desktop segment requests multiple connections")
242        if connInfo[0]['type'] != 'ssh':
243            raise service_error(service_error.req,
244                    "Desktop segment requires ssh connecton")
245        return True
246
247    def export_store_info(self, certfile, connInfo):
248        '''
249        Tell the other portal node where to reach this desktop.  The other side
250        uses this information to set up routing, though the ssh_port is unused
251        as the Desktop always initiates ssh connections.
252        '''
253        values = { 'peer': self.hostname, 'ssh_port': self.ssh_port }
254        for c in connInfo:
255            for p in c.get('parameter', []):
256                if p.get('type','') == 'input': continue
257                pname = p.get('name', '')
258                key = p.get('key', '')
259                surl = p.get('store', '')
260                if pname not in values:
261                    self.log('Unknown export parameter: %s'  % pname)
262                    continue
263                val = values[pname]
264                req = { 'name': key, 'value': val }
265                self.log.debug('Setting %s (%s) to %s on %s' % \
266                        (pname, key,  val, surl))
267                self.call_SetValue(surl, req, certfile)
268
269    def set_route(self, dest, script, gw=None, src=None):
270        if sys.platform.startswith('freebsd'):
271            if src is not None and gw is not None:
272                raise service_error(service_error.internal, 
273                        'FreeBSD will not route based on src address')
274            elif src is not None:
275                raise service_error(service_error.internal, 
276                        'FreeBSD will not route based on src address')
277            elif gw is not None:
278                print >>script, 'route add %s %s' % (dest, gw)
279        elif sys.platform.startswith('linux'):
280            if src is not None and gw is not None:
281                print >>script, 'ip route add %s via %s src %s' % \
282                        (dest, gw, src)
283            elif src is not None:
284                print >>script, 'ip route add %s src %s' % \
285                        (dest, src)
286            elif gw is not None:
287                print >>script, 'ip route add %s via %s' % (dest, gw)
288        else:
289            raise service_error(service_error.internal, 
290                    'Unknown platform %s' % sys.platform)
291
292    def unset_route(self, dest, script):
293        rv = 0
294        if sys.platform.startswith('freebsd'):
295            print >>script, 'route delete %s' % dest
296        elif sys.platform.startswith('linux'):
297            print >>script, 'ip route delete %s' % dest
298
299    def find_a_peer(self, addr): 
300        '''
301        Find another node in the experiment that's on our subnet.  This is a
302        hack to handle the problem that we really cannot require the desktop to
303        dynamically route.  Will be improved by distributing static routes.
304        '''
305
306        peer = None
307        hosts = os.path.join(self.localdir, 'hosts')
308        p = addr.rfind('.')
309        if p == -1:
310            raise service_error(service_error.req, 'bad address in topology')
311        prefix = addr[0:p]
312        addr_re = re.compile('(%s.\\d+)' % prefix)
313        try:
314            f = open(hosts, 'r')
315            for line in f:
316                m = addr_re.search(line)
317                if m is not None and m.group(1) != addr:
318                    peer = m.group(1)
319                    break
320            else:
321                raise service_error(service_error.req, 
322                        'No other nodes in this subnet??')
323        except EnvironmentError, e:
324            raise service_error(service_error.internal, 
325                    'Cannot open %s: %s' % (e.filename, e.strerror))
326        return peer
327
328
329
330
331    def configure_desktop(self, top, connInfo):
332        '''
333        Build the connection.  Establish routing to the peer if using a
334        separate interface, wait until the other end confirms setup, establish
335        the ssh layer-two tunnel (tap), assign the in-experiment IP address to
336        the tunnel and establish routing to the experiment through the tap.
337        '''
338
339
340        # get the peer and ssh port from the portal and our IP from the other
341        peer = None
342        port = None
343        my_addr = None
344        my_name = None
345        for e in top.elements:
346            if not isinstance(e, topdl.Computer): continue
347            if e.get_attribute('portal') is None: 
348                my_name = e.name
349                # there should be one interface with one IPv4 address
350                if len(e.interface) <1 :
351                    raise service_error(service_error.internal,
352                            'No interface on experiment node!?!?')
353                my_addr = e.interface[0].get_attribute('ip4_address')
354            else:
355                for ci in connInfo:
356                    if ci.get('portal', '') != e.name: continue
357                    peer = ci.get('peer')
358                    port = '22'
359                    for a in ci.get('fedAttr', []):
360                        if a['attribute'] == 'ssh_port': port = a['value']
361
362        # XXX scan hosts for IP addresses and compose better routing entry
363       
364        if not all([peer, port, my_addr]):
365            raise service_error(service_error.req, 
366                    'Cannot find all config parameters %s %s %s' % (peer, port, my_addr))
367
368        exp_peer = self.find_a_peer(my_addr)
369
370        cscript = os.path.join(self.localdir, 'connect')
371        dscript = os.path.join(self.localdir, 'disconnect')
372        local_hosts = os.path.join(self.localdir, 'hosts')
373        zebra_conf = os.path.join(self.localdir, 'zebra.conf')
374        ospfd_conf = os.path.join(self.localdir, 'ospfd.conf')
375        try:
376            f = open(cscript, 'w')
377            print >>f, '#!/bin/sh'
378            # This picks the outgoing interface to the experiment using the
379            # routing system.
380            self.set_route(peer, f, self.router, self.src_addr)
381            # Wait until the other end reports that it is configured py placing
382            # a file this end can access into its local file system.  Try once
383            # a minute.
384            print >>f,'while ! /usr/bin/scp -o "StrictHostKeyChecking no" -i %s %s:/usr/local/federation/etc/prep_done /dev/null; do' % (self.ssh_identity, peer)
385            print >>f, 'sleep 60; done'
386            print >>f, ('ssh -w 0:0 -p %s -o "Tunnel ethernet" ' + \
387                    '-o "StrictHostKeyChecking no" -i %s %s perl -I/usr/local/federation/lib /usr/local/federation/bin/setup_bridge.pl --tapno=0 --addr=%s &') % \
388                    (port, self.ssh_identity, peer, my_addr)
389            # This should give the tap a a chance to come up
390            print >>f,'sleep 10'
391            # Add experiment nodes to hosts
392            print >>f, 'cp /etc/hosts /etc/hosts.DETER.fedd.hold'
393            print >>f, 'echo "#--- BEGIN FEDD ADDITIONS ---" >> /etc/hosts'
394            print >>f, 'cat %s >> /etc/hosts' % local_hosts
395            print >>f, 'echo "#--- END FEDD ADDITIONS ---" >> /etc/hosts'
396            # Assign tap address and route experiment connections through it.
397            print >>f, 'ifconfig tap0 %s netmask 255.255.255.0 up' % \
398                    my_addr
399            # self.set_route('10.0.0.0/8', f, exp_peer)
400            print >>f, '/usr/local/sbin/zebra -d -i -f %s' % zebra_conf
401            print >>f, '/usr/local/sbin/ospfd -d -i -f %s' % ospfd_conf
402            f.close()
403            os.chmod(cscript, 0755)
404            f = open(dscript, 'w')
405            print >>f, '#!/bin/sh'
406            print >>f, 'ifconfig tap0 destroy'
407            self.unset_route(peer, f)
408            #self.unset_route('10.0.0.0/8', f)
409            print >>f, 'mv /etc/hosts.DETER.fedd.hold /etc/hosts'
410            print >>f, 'kill `cat /var/run/ospfd.pid`'
411            print >>f, 'kill `cat /var/run/zebra.pid`'
412            f.close()
413            os.chmod(dscript, 0755)
414            f = open(zebra_conf, 'w')
415            print >>f, 'hostname %s' % my_name
416            print >>f, 'interface tap0'
417            f.close()
418            os.chmod(zebra_conf, 0644)
419            f = open(ospfd_conf, 'w')
420            print >>f, 'hostname %s' % my_name
421            print >>f, 'router ospf'
422            print >>f, ' redistribute static'
423            print >>f, ' network %s/24 area 0.0.0.2' % my_addr
424        except EnvironmentError, e:
425            raise service_error(service_error.internal, 
426                    'Cannot create connect %s: %s' % (e.filename, e.strerror))
427        script_log = open('/tmp/connect.log', 'w')
428        subprocess.Popen(['sudo', '/bin/sh', cscript], stdout=script_log, stderr=script_log)
429        return True
430
431    def StartSegment(self, req, fid):
432        """
433        Start a segment.  In this simple skeleton, this means to parse the
434        request and assign an unassigned integer to it.  We store the integer
435        in the persistent state.
436        """
437        try:
438            req = req['StartSegmentRequestBody']
439            # Get the request topology.  If not present, a KeyError is thrown.
440            topref = req['segmentdescription']['topdldescription']
441            # The fedid of the allocation we're attaching resources to
442            auth_attr = req['allocID']['fedid']
443        except KeyError:
444            raise service_error(service_error.req, "Badly formed request")
445
446        # String version of the allocation ID for keying
447        aid = "%s" % auth_attr
448        # Authorization check
449        access_ok, proof = self.auth.check_attribute(fid, auth_attr, 
450                with_proof=True)
451        if not access_ok:
452            raise service_error(service_error.access, "Access denied", 
453                    proof=proof)
454        else:
455            # See if this is a replay of an earlier succeeded StartSegment -
456            # sometimes SSL kills 'em.  If so, replay the response rather than
457            # redoing the allocation.
458            self.state_lock.acquire()
459            # Test and set :-)
460            running = self.state['running']
461            self.state['running'] = True
462            retval = self.state[aid].get('started', None)
463            self.state_lock.release()
464            if retval:
465                self.log.warning(
466                        "[StartSegment] Duplicate StartSegment for %s: " \
467                                % aid + \
468                        "replaying response")
469                return retval
470            if running:
471                self.log.debug('[StartSegment] already running')
472                raise service_error(service_error.federant,
473                        'Desktop is already in an experiment')
474
475        certfile = "%s/%s.pem" % (self.certdir, aid)
476
477        # Convert the topology into topdl data structures.  Again, the
478        # skeletion doesn't do anything with it, but this is how one parses a
479        # topology request.
480        if topref: topo = topdl.Topology(**topref)
481        else:
482            raise service_error(service_error.req, 
483                    "Request missing segmentdescription'")
484
485        err = None
486        try:
487            self.validate_topology(topo)
488
489            # The attributes of the request.  The ones we care about are the ssh
490            # keys to operate the tunnel.
491            attrs = req.get('fedAttr', [])
492            for a in attrs:
493                # Save the hosts and ssh_privkeys to our local dir
494                if a['attribute'] in ('hosts', 'ssh_secretkey'):
495                    self.log.debug('Getting %s from %s' % \
496                            (a['attribute'], a['value']))
497                    get_url(a['value'], certfile, self.localdir, log=self.log)
498                    base = os.path.basename(a['value'])
499                    if a['attribute'] == 'ssh_secretkey':
500                        self.ssh_identity = os.path.join(self.localdir, base)
501                    os.chmod(os.path.join(self.localdir, base), 0600)
502                else:
503                    self.log.debug('Ignoring attribute %s' % a['attribute'])
504
505            # Gather connection information and exchange parameters.
506            connInfo = req.get('connection', [])
507            self.validate_connInfo(connInfo)
508            self.export_store_info(certfile, connInfo)
509            self.import_store_info(certfile, connInfo)
510
511            #build it
512            self.configure_desktop(topo, connInfo)
513        except service_error, e:
514            err = e
515
516        # Save the information
517        if err is None:
518            # It's possible that the StartSegment call gets retried (!).  if
519            # the 'started' key is in the allocation, we'll return it rather
520            # than redo the setup.  The integer allocation was saved when we
521            # made it.
522            self.state_lock.acquire()
523            self.state[aid]['started'] = { 
524                    'allocID': req['allocID'],
525                    'allocationLog': "Allocatation complete",
526                    'segmentdescription': { 'topdldescription': topo.to_dict() },
527                    'proof': proof.to_dict(),
528                    }
529            retval = copy.deepcopy(self.state[aid]['started'])
530            self.write_state()
531            self.state_lock.release()
532        else:
533            # Something bad happened - clear the "running" flag so we can try
534            # again
535            self.state_lock.acquire()
536            self.state['running'] = False
537            self.state_lock.release()
538            raise err
539
540        return retval
541
542    def TerminateSegment(self, req, fid):
543        """
544        Remove the resources associated with th eallocation and stop the music.
545        In this example, this simply means removing the integer we allocated.
546        """
547        # Gather the same access information as for Start Segment
548        try:
549            req = req['TerminateSegmentRequestBody']
550        except KeyError:
551            raise service_error(service_error.req, "Badly formed request")
552
553        auth_attr = req['allocID']['fedid']
554        aid = "%s" % auth_attr
555
556        self.log.debug("Terminate request for %s" %aid)
557        # Check authorization
558        access_ok, proof = self.auth.check_attribute(fid, auth_attr, 
559                with_proof=True)
560        if not access_ok:
561            raise service_error(service_error.access, "Access denied", 
562                    proof=proof)
563        cscript = os.path.join(self.localdir, 'connect')
564        dscript = os.path.join(self.localdir, 'disconnect')
565        # Do the work of disconnecting
566        if os.path.exists(dscript):
567            self.log.debug('calling %s' % dscript)
568            rv = subprocess.call(['sudo', '/bin/sh', dscript])
569            if rv != 0:
570                self.log.warning('%s had an error: %d' % (dscript, rv))
571        else:
572            self.log.warn('No disconnection script!?')
573
574        try:
575            for bfn in os.listdir(self.localdir):
576                fn = os.path.join(self.localdir, bfn)
577                self.log.debug('Removing %s' % fn)
578                if os.path.exists(fn):
579                    os.remove(fn)
580        except EnvironmentError, e:
581            self.log.warn('Failed to remove %s: %s' % (e.filename, e.strerror))
582
583        self.ssh_identity = None
584
585        self.state_lock.acquire()
586        self.state['running'] = False
587        self.state_lock.release()
588   
589        return { 'allocID': req['allocID'], 'proof': proof.to_dict() }
Note: See TracBrowser for help on using the repository browser.