Show
Ignore:
Timestamp:
02/29/08 09:45:35 (4 years ago)
Author:
Ted Faber <faber@…>
Children:
f64fa816aea8218515cc0900940696d700329c0a
Parents:
906c76398e933601fe2e59e503084b2b645df5db
git-committer:
Ted Faber <faber@isi.edu> / 2008-02-29T17:45:35Z+0000
Message:

Changes to imporve reliability of routes coming up. Restructuring of
federate leaves the stsyem in a state where we can safely restart the router
(specifically, talking to the local boss again) and we restart it in the
final topology. This introduces a lag in routing actually being established.
It may be useful to add a lag to the startcmd to take this into account.

Location:
fedkit
Files:
1 added
4 modified

Legend:

Unmodified
Added
Removed
  • fedkit/fed-tun.pl

    rfd7a59b r4abace9  
    265265           ($tunnel_mask ? " netmask $tunnel_mask" : "")); 
    266266    warn "configuration of tunnel interface failed" if ($?); 
    267          
    268     system("route add $opts{'peer'} $tunnel_router"); 
    269     warn "configuration routes via tunnel interface failed" if ($?); 
     267 
     268    # Sometimes the insertion of DNS names lags a bit.  Retry this 
     269    # configuration a few times to let DNS catch up.  Might want to really 
     270    # check the DNS name before we try this... 
     271    my $config_succeeded = 0; 
     272    my $tries = 0; 
     273    my $max_retries = 6; 
     274 
     275    do { 
     276        system("route add $opts{'peer'} $tunnel_router"); 
     277        if ( $? ) { 
     278            warn "configuration routes via tunnel interface failed"; 
     279            $tries++; 
     280            sleep(10); 
     281        } 
     282        else { $config_succeeded = 1; } 
     283    } until ( $config_succeeded || $tries > $max_retries ); 
    270284 
    271285    print "setup_tunnel_cfg done\n" if ($debug); 
  • fedkit/federate.sh

    rd53dda5 r4abace9  
    11#!/bin/sh 
    22 
    3 # Take the user and the user's startcmd as arguments 
     3# Set up a federated environment on a client.  Unmount all local file systems, 
     4# erase all local accounts and then bring in accounts and file systems from the 
     5# master testbed.  Once all that's done, restore client communication to the 
     6# local bossnode and execute any startup command passed in as arguments to this 
     7# script. 
    48 
     9 
     10# The file containing the DNS name of the current boss 
    511BOSSNODE="/usr/local/etc/emulab/bossnode" 
    6 # GATEWAY="detertunnel" 
    712 
     13# Ports that the master gateway will forward 
    814PORTS="139 7777" 
     15# network cat command (to listen for the gateway) 
    916NC="/usr/bin/nc" 
     17# Emulab rc scripts directory 
    1018RCDIR="/usr/local/etc/emulab/rc" 
    1119 
     20# Emulab rc script to reset accounts and federation srcipt to start the smb 
     21# automount process. 
    1222RCACCT="rc.accounts" 
    1323SMBMOUNT="smbmount.pl" 
     
    2636SCRIPTDIR=`$EMUDIR/tmcc -b status | perl -ne '/ALLOCATED=([^\/]+)\/([^\s]+)/ && print "/proj/$1/exp/$2/tmp\n";'` 
    2737 
    28 # Die if things are weird 
     38# Die if Scripts are not where they should be.  NB, this is checked by the 
     39# bootstrapper, too, so a failure here is unlikely. 
    2940if [ -z "$SCRIPTDIR" ] ; then 
    3041    echo "Can't find federation scripts.  Tmcc status says:" 
     
    3344fi 
    3445 
    35 # Move the SMB mounting script out of NFS so we can call it after the umount 
    36 cp "$SCRIPTDIR/$SMBMOUNT" /tmp 
    37  
    3846# Copy the global hostnames file into /etc/hosts so this node can address 
    39 # others in the experiment on other testbeds. 
     47# others in the experiment on other testbeds.  The extra steps preserves teh 
     48# localhost entry that's aliased to this host's name. 
    4049grep 127\\.0\\.0\\.1 /etc/hosts > /tmp/hosts 
    4150cat "$SCRIPTDIR/hosts" >> /tmp/hosts 
     
    6776 
    6877echo "Configuring TMCC." 
    69  
    70 $EMUDIR/sethostname 
    71  
    7278echo $GATEWAY > $BOSSNODE 
    7379 
     
    8591 
    8692# 
    87 # Copy and exec the smbmount perl script 
     93# Call the smbmount perl script, this invokes the automounter to mount teh 
     94# needed smb filesystems. 
    8895# 
    89  
    9096echo "Mounting via SMB." 
    91  
    9297"/tmp/$SMBMOUNT" $SHARE $GATEWAY $SMBUSER  
    9398 
     99echo "Restoring old bossnode" 
     100# remove bossnode override 
     101/bin/rm -f $BOSSNODE 
     102# Also clear any cached data that might interfere with rourte construction. 
     103/bin/rm -f /var/emulab/boot/tmcc/ifconfig 
     104 
     105# I remain unclear why ospfd seems screwed up after the resetting of accounts 
     106# and mounts.  This seems to fix it, but it's not very satisfying.  At this 
     107# point the federated topology is complete, so any problems with routing in a 
     108# partial topology -e.g. failed or partial bridging - that might have confused 
     109# the router should be gone. 
     110/bin/pkill ospf 
     111/bin/pkill gated 
     112/usr/local/etc/emulab/rc/rc.route boot 
     113 
     114# Execute any command passed in as a startcmd.  This basically daisy chains the 
     115# startcmd. 
    94116if [ ! -z "$1" ]; then  
    95117        su -l "$1" -c "$2" 
  • fedkit/splitter.pl

    r22bb7f8 r4abace9  
    99use File::Copy; 
    1010 
    11 my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl"); 
     11my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.pl",  
     12    "make_hosts", "fed-tun.pl"); 
    1213my $local_script_dir = "."; 
    1314my($pid, $gid);                 # Process and group IDs for calling parse.tcl 
  • fedkit/testbeds.example

    r0404558 r4abace9  
    4949# These slave commands are appropriate for generic testbeds, which emulab and 
    5050# WAIL are.  The -H on sudo is crucial when calling federate.sh 
    51 SlaveNodeStartCmd: sudo -H /bin/sh FEDDIR/federate.sh >& /tmp/federate 
     51SlaveNodeStartCmd: sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate 
    5252SlaveConnectorStartCmd: sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log 
    5353 
    5454# These master commands are placeholders 
    55 MasterNodeStartCmd: sudo -H /bin/sh FEDDIR/federate.sh &> /tmp/mfederate  
     55MasterNodeStartCmd: sudo -H /bin/sh FEDDIR/fed_bootstrap &> /tmp/mfederate  
    5656MasterConnectorStartCmd: sudo -H /users/faber/fed-tun.pl >& /tmp/bridge.log 
    5757FileServer: fs 
     
    6666ConnectorType: pc 
    6767ConnectorImage: FBSD61-TUNNEL2 
    68 SlaveNodeStartCmd: sudo -H /bin/sh FEDDIR/federate.sh >& /tmp/federate 
     68SlaveNodeStartCmd: sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate 
    6969SlaveConnectorStartCmd: sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log 
    70 MasterNodeStartCmd: sudo -H /bin/sh FEDDIR/federate.sh &> /tmp/mfederate  
     70MasterNodeStartCmd: sudo -H /bin/sh FEDDIR/fed_bootstrap &> /tmp/mfederate  
    7171MasterConnectorStartCmd: sudo -H /users/faber/fed-tun.pl >& /tmp/bridge.log 
    7272FileServer: fs