Changeset 0dc62df


Ignore:
Timestamp:
Nov 8, 2011 6:41:55 PM (13 years ago)
Author:
Ted Faber <faber@…>
Branches:
compt_changes, info-ops, master
Children:
db974ed
Parents:
8cb269a
Message:

Significantly improve resilience to SSL failures. #35

Location:
fedd/federation
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • fedd/federation/experiment_control.py

    r8cb269a r0dc62df  
    8888    call_ReleaseAccess = service_caller('ReleaseAccess')
    8989    call_StartSegment = service_caller('StartSegment')
    90     call_TerminateSegment = service_caller('TerminateSegment')
     90    call_TerminateSegment = service_caller('TerminateSegment', log = logging.getLogger("Test"))
    9191    call_Ns2Topdl = service_caller('Ns2Topdl')
    9292
  • fedd/federation/remote_service.py

    r8cb269a r0dc62df  
    55from socket import error as socket_error
    66from socket import sslerror
     7from socket import SHUT_RDWR
    78
    89import M2Crypto.httpslib
     
    417418        return self.apply_to_tags(resp, self.decap_fedids)
    418419
     420    def hammer_port_shut(self, port):
     421        """
     422        If there is an ssl failure, the connection to the service often stays
     423        open and idle, confusing subsequent attempts to contact it (the symptom
     424        is a stuck connection).  This routine walks through the open port and
     425        hammers everything as closed as it can.  It is called a couple places
     426        in call_soap_service.  This is dirty code, walking the internals of a
     427        couple data structures, but without it, the daemon can lock up.
     428        """
     429        try:
     430            if port is not None:
     431                binding = getattr(port, 'binding', None)
     432                if binding is not None: connection = getattr(binding, 'h', None)
     433                else: connection = None
     434
     435                if connection is not None:
     436                    csock = getattr(connection, 'sock', None)
     437                    if csock is not None:
     438                        csock.clear()
     439                        csock.close()
     440                        cssocket = getattr(csock, 'socket', None)
     441                        if cssocket is not None:
     442                            cssocket.shutdown(SHUT_RDWR)
     443                            cssocket.close()
     444                        del csock
     445                        connection.sock = None
     446                    httplib.HTTPConnection.close(connection)
     447                    if self.log:
     448                        self.log.debug("Closed connection with prejudice")
     449
     450                del port
     451                port = None
     452                if self.log:
     453                    self.log.debug("Annihilated port")
     454        except Exception, e:
     455            port = None
     456
     457        return port
     458
     459
    419460    def call_soap_service(self, url, req, cert_file=None, cert_pwd=None,
    420461            trusted_certs=None, context=None, tracefile=None):
     
    434475        ok = False
    435476        retries = 0
     477        port = None
    436478        while not ok and retries < self.max_retries:
    437479            try:
     480                # If this is a retry, close the request and annihilate the port
     481                port = self.hammer_port_shut(port)
     482
    438483                # Reconstruct the full request message
    439484                msg = self.request_message()
     
    487532                ok = True
    488533            except socket_error, e:
     534                self.hammer_port_shut(port)
    489535                raise service_error(service_error.connect,
    490536                        "Cannot connect to %s: %s" % (url, e[1]))
     
    510556                retries +=1
    511557            except ParseException, e:
     558                port = self.hammer_port_shut(port)
    512559                raise service_error(service_error.protocol,
    513560                        "Bad format message (XMLRPC??): %s" % e)
    514561            except FaultException, e:
     562                port = self.hammer_port_shut(port)
    515563                # If the method isn't implemented we get a FaultException
    516564                # without a detail (which would be a FeddFault).  If that's the
     
    535583
    536584        if retries >= self.max_retries and fail_exc and not ok:
     585            port = self.hammer_port_shut(port)
    537586            raise service_error(service_error.connect,
    538587                    "Too many failures: %s" % fail_exc)
  • fedd/federation/util.py

    r8cb269a r0dc62df  
    106106                    callback=fedd_ssl_verify_callback)
    107107
     108        # no session caching
     109        self.set_session_cache_mode(0)
     110
    108111class file_expanding_opts(OptionParser):
    109112    def expand_file(self, option, opt_str, v, p):
Note: See TracChangeset for help on using the changeset viewer.