#!/usr/bin/perl

use strict;

use Getopt::Std;
use IO::File;
use IO::Dir;
use IO::Pipe;
use File::Copy;

my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.FreeBSD.pl", 
    "smbmount.Linux.pl", "make_hosts", "fed-tun.pl", "fed_evrepeater",
    "rc.accounts.patch");
my $local_script_dir = ".";
my($pid, $gid);			# Process and group IDs for calling parse.tcl
my $splitter_config;		# Configuration file
my $debug;			# True if thecalled in debug mode
my $verbose;			# True for extra progress reports
my $startem;			# If true, start the sub-experiments
my $eid;			# Experiment ID
my $tcl;			# The experiment description (topology file)
my $master;			# Master testbed
my $tmpdir;			# tmp files
my $tb_config;			# testbed configurations
my $smb_share;			# Share to mount from the master
my $project_user;		# User to mount project dirs as
my $auth_proj;			# Local project for resource access
my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path & 
				# basename)
my($keytype);			# Type (DSA or RSA) of generated gateway keys
my $tcl_splitter;		# tcl program to split experiments
				# (changed during devel)
my $tclsh;			# tclsh to call directly (changed during devel)
my $fedd_client;		# Program to call for testbed access params
my $muxmax;			# Maximum number of links/lans over 1 gw pair
my @tarfiles;			# Tarfiles in use by this experiment
my @rpms;			# Rpms in use by this experiment
my $timeout;			# The timeout to use for experiment swap ins
my %opts;			# Parsed options

my $tbparams = {};		# Map of the per-testbed parameters from the
				# testbeds file.  It is a reference to a hash
				# of hashes (because it's passed around a bunch
				# and it's nicer to have one access pattern
				# throughout the script, in the main loop and
				# the subroutines).  That access is exemplified
				# by  $tbparams->{'deter'}->{'domain'} which is
				# the domain parameter of the DETER testbed.  
my $fail_soft;			# Do not swap failed sub-experiments out
my $max_children=1;		# Maximum number of simultaneous swap-ins

# Default commands for starting experiment and gateway nodes.  Testbeds can
# override these.  (The 'm' prefixed commands are for operating as the master
# testbed.)
my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
my $def_gwimage = "FBSD61-TUNNEL2";
my $def_gwtype = "pc";

# Parse the config file.  The format is a colon-separated parameter name
# followed by the value of that parameter to the end of the line.  This parses
# that format and puts the parameters into the referenced hash.  Parameter
# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
# failure (e.g. file open) and 1 on success.
sub parse_config {
    my($file, $href) = @_;
    my $fh = new IO::File($file);
	
    unless ($fh) {
	warn "Can't open $file: $!\n";
	return 0;
    }

    while (<$fh>) {
	next if /^\s*#/ || /^\s*$/;	# Skip comments & blanks
	chomp;
	/^([^:]+):\s*(.*)/ && do {
	    my $key = $1; 

	    $key =~ tr/A-Z/a-z/;
	    $href->{$key} = $2;
	    next;
	};
	warn "Unparasble line in $file: $_\n";
    }
    $fh->close();   # It will close when it goes out of scope, but...
    return 1;
}

# Parse an easier-to-read testbeds file (the original was comma-separated
# unadorned strings).  The format is a testbed scope as [testbed] followed by
# the colon-separated attribute-value pairs for the testbed.  Right now these
# go into a set of global hashes indexed by testbed, but that should probably
# change.  The file parameter is an open IO::Handle.  &parse_testbeds_filename
# opens the file and calls this.  Parse_testbeds can be used on pipes as well,
# e.g. fedd_client output.
sub parse_testbeds {
    my($fh, $tbparams) = @_;	    # Testbeds file and parameter hash
    my $tb;			    # Current testbed
    # Convert attribute in the file to tbparams hash key
    my %attr_to_hash = (
	"opsnode" => "host",
	"user" => "user",
	"domain" => "domain",
	"project" => "project",
	"connectortype" => "gwtype",
	"slavenodestartcmd" => "expstart",
	"slaveconnectorstartcmd" => "gwstart",
	"masternodestartcmd" => "mexpstart",
	"masterconnectorstartcmd" => "mgwstart",
	"connectorimage" => "gwimage",
	"fileserver" => "fs",
	"boss" => "boss",
	"eventserver" => "eventserver",
	"tunnelcfg" => "tun",
	"uri" => "uri",
	"access" => "access"
    );

    while (<$fh>) {
	next if /^\s*#/ || /^\s*$/;	# Skip comments & blanks
	print STDERR "testbeds: $_";
	chomp;
	/^\s*\[(.*)\]/ && do {
	    $tb = $1;
	    $tbparams->{$tb} = {} unless $tbparams->{$tb};
	    next;
	};

	/^([^:]+):\s*(.*)/ && do {
	    unless ($tb) {
		warn "Ignored attribute definition before testbed: $_\n ";
		next;
	    }
	    my $key = $1; 
	    $key =~ tr/A-Z/a-z/;
	    my $var = $attr_to_hash{$key};

	    if ($var) { $tbparams->{$tb}->{$var} = $2; }
	    else { warn "Unknown keyword $key\n"; }

	    next;
	};
	warn "Unparasble line: $_\n";
    }
    return 1;
}


# Open the given file name and parse the testbeds file it contains by calling
# &parse_testbeds.
sub parse_testbeds_filename {
    my($file, $tbparams) = @_;	    # Testbeds file and parameter hash
    my $fh = new IO::File($file);   # Testbeds filehandle

    if ($fh) {
	my $rv = &parse_testbeds($fh, $tbparams);
	$fh->close();   # It will close when it goes out of scope, but...
	$rv;
    }
    else {
	warn "Can't open $file: $!\n";
	return 0;
    }
}

# Generate SSH keys for use by the gateways.  The parameters are the type and
# the filename for the private key.  The pubkey will be stored in a filename
# with the same name as the private key but with .pub appended.  Type can be
# dsa or rsa.

sub generate_ssh_keys {
    my($type, $dest) = @_;

    $type =~ tr/A-Z/a-z/;
    return 0 if $type !~ /(rsa|dsa)/;
    system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
    return $@ ? 0 : 1;
}

# use scp to transfer a file, reporting true if successful and false otherwise.
# Parameters are the local file name, the ssh host destination (either hostname
# oe user@host), and an optional destination file name or directory.  If no
# destination is given, the file is transferred to the given user's home
# directory.  If only a machine is given in the ssh host destination, the
# current user is used.
sub scp_file {
    my($file, $user, $host, $dest) = @_;

    # XXX system with a relative pathname is sort of gross
    system("scp $file $user\@$host:$dest");
    if ($?) {
	warn "scp failed $?\n";
	return 0;
    }
    else { return 1; }
}

# use ssh to execute the given command on the machine (and as the user) in
# $where.  Parameters are the ssh destination directive ($where) and the
# command to execute, and a prefix to be placed on a message generated if the
# command fails.   On failure print a warning if a warning prefix was given and
# return false.  If timeout is given fork a process and set an alarm of that
# many seconds.  Timeouts also return 0;
sub ssh_cmd {
    my($user, $host, $cmd, $wname, $timeout) = @_;
    my $pid;		    # Child pid

    $timeout = 0 unless $timeout;   # Force default timeout

    if ( $pid = fork () ) {
	# Parent process
	# The eval acts as a signal catcher.  If the alarm goes off inside
	# the eval, the die will put "alarm\n" into $@, otherwise the
	# return value of the execution in the child process will be used.
	my $rv = eval {
	    local $SIG{'ALRM'} = sub{ die "alarm\n"; };
	    my $rv;

	    alarm $timeout;
	    $rv = waitpid($pid, 0);
	    alarm 0;
	    $rv;
	};

	# If the eval succeeded, $@ will be null and we can use $rv, which
	# is the return code from the subprocess.  If the eval timed out,
	# print a warning and assume the best.
	if ($@ eq "alarm\n" ) {
	    warn "$wname timed out - pid $pid still live\n";
	    return 1;
	}
	else {
	    return $rv;
	}
    }
    else {
	# Child process
	exec("ssh $user\@$host $cmd");
	exit 0;
    }
}

# Ship local copies of the federation scripts out to the given host.  If any of
# the script transfers fails, return 0.  The scripts to transfer are from the
# global @scripts and are found locally in $local_script_dir (another global).
sub ship_scripts {
    my($host, $user, $dest_dir) = @_;	    # Where, who, where remotely
    my $s;

    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
    for $s (@scripts) {
	&scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
	    return 0;
    }
    return 1;
}

# Ship per-testbed configuration generated by this script to the remote /proj
# directories on the remote testbeds
sub ship_configs {
    my($host, $user, $src_dir, $dest_dir) = @_;	    # Where, who, where remotely
    my($d, $f);

    $d = IO::Dir->new($src_dir) || return 0;

    # All directories under $tmpdir are 770 so we can delete them later.
    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
    while ( $f = $d->read()) {
	next if $f =~ /^\./;
	if ( -d "$src_dir/$f" ) {
	    &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
		return 0;
	}
	else {
	    &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
	}
    }
    return 1;
}

# Start a sub section of the experiment on a given testbed.  The testbed and
# the user to start the experiment as are pulled from the global per-testbed
# hash, passed in as $tbparams, as is the project name on the remote testbed.
# Parameters are the testbed and the experiment id.  Configuration files are
# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
# Then the current state of the experiment determined using expinfo.  From that
# state, the experiment is either created, modified or spapped in.  If
# everything succeeds, true is returned.  If the global verbose is set progress
# messages are printed.
sub start_segment {
    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID, 
					    # per-testbed parameters and remote
					    # swap-in timeout
    my $host =				    # Host name of remote ops (FQDN)
	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
					    # experiment under
    my $tclfile = "./$eid.$tb.tcl";	    # Local tcl file with the
					    # sub-experiment
    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
    my $state;				    # State of remote experiment
    my $status = new IO::Pipe;		    # The pipe to get status

    # Determine the status of the remote experiment
    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
	die "Can't ssh to $user\@$host:$!\n";
    # XXX: this is simple now.  Parsing may become more complex
    while (<$status>) {
	/State: (\w+)/ && ($state = $1);
	/No\s+such\s+experiment/ && ($state = "none");
    }
    $status->close();
    print "$tb: $state\n";

    # Copy the experiment definition data over 
    print "transferring subexperiment to $tb\n" if $verbose;
    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
    # Clear out any old experiment data; if not deleted, copies over it by
    # different users will fail.
    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
    # force the /bin/sh interpretation of the trailing * (which we need to keep
    # tmp around))  Again, this needs to be done more properly once we have a
    # non-ssh interface here.)
    print "clearing experiment subdirs on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
	return 0;
    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
	return 0;
    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
	return 0;
    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
	return 0;
    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
	return 0;
    # Remote experiment is active.  Modify it.
    if ($state eq "active") {
	print "Transferring federation support files to $tb\n" if $verbose;
	# First copy new scripts and hostinfo into the remote /proj
	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
	    return 0;
	&ship_scripts($host, $user, $proj_dir) || return 0;
	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;

	if ( -d "$tmpdir/tarfiles") {
	    &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
		return 0;
	}

	if ( -d "$tmpdir/rpms") {
	    &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
		return 0;
	}

	print "Modifying $eid in place on $tb\n" if $verbose;
	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
	    "$eid $tclfile", "modexp", $timeout) || return 0;
	return 1;
    }

    # Remote experiment is swapped out, modify it and swap it in.
    if ($state eq "swapped") {
	print "Transferring federation support files to $tb\n" if $verbose;
	# First copy new scripts and hostinfo into the remote /proj (because
	# the experiment exists, the directory tree should be there.
	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
	    return 0;
	&ship_scripts($host, $user, $proj_dir) || return 0;
	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
	if ( -d "$tmpdir/tarfiles") {
	    &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
		return 0;
	}

	if ( -d "$tmpdir/rpms") {
	    &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
		return 0;
	}

	print "Modifying $eid on $tb\n" if $verbose;
	&ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
	    "modexp") || return 0;
	print "Swapping $eid in on $tb\n" if $verbose;
	# Now start up
	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
	    "swapexp", $timeout) || return 0;
	return 1;
    }

    # No remote experiment.  Create one.  We do this in 2 steps so we can put
    # the configuration files and scripts into the new experiment directories.
    if ($state eq "none") {

	if ( -d "$tmpdir/tarfiles") {
	    # Tarfiles have to exist for the creation to work
	    print "copying tarfiles to $tb\n";
	    &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
		return 0;
	}

	if ( -d "$tmpdir/rpms") {
	    &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
		return 0;
	}

	print "Creating $eid on $tb\n" if $verbose;
	&ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
	    "$pid -e $eid $tclfile", "startexp") || return 0;
	# After startexp succeeds, the per-experiment directories exist on the
	# remote testbed.
	print "Transferring federation support files to $tb\n" if $verbose;
	# First copy new scripts and hostinfo into the remote /proj
	&scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
	    return 0;
	&ship_scripts($host, $user, $proj_dir) || return 0;
	&ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
	# Now start up
	print "Swapping $eid in on $tb\n" if $verbose;
	&ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
	    "swapexp", $timeout) || return 0;
	return 1;
    }

    # Every branch for a known state returns.  If execution gets here, the
    # state is unknown.
    warn "unknown state: $state\n";
    return 0;
}

# Swap out a sub-experiment - probably because another has failed.  Arguments
# are testbed and experiment.  Most of the control flow is similar to
# start_segment, though much simpler.
sub stop_segment {
    my($tb, $eid, $tbparams) = @_;	    # testbed, experiment ID and 
					    # per-testbed parameters
    my $user = $tbparams->{$tb}->{'user'};  # testbed user
    my $host =				    # Ops node
	$tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
    my $pid = $tbparams->{$tb}->{'project'};# testbed project

    print "Stopping $eid on $tb\n" if $verbose;
    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
	"swapexp (out)") || return 0;
    return 1;
}


$pid = $gid = "dummy";		    # Default project and group to pass to
				    # $tcl_splitter above.  These are total
				    # dummy arguments;  the splitter doesn't
				    # use them at all, but we supply them to
				    # keep our changes to the parser minimal.
# Argument processing.
getopts('Ft:c:p:f:ndvNP:', \%opts);
$splitter_config = $opts{'c'} || "./splitter.conf";
$debug = $opts{'d'};
$verbose = $opts{'v'} || $opts{'d'};

&parse_config("$splitter_config", \%opts) || 
    die "Cannot read config file $splitter_config: $!\n";

warn "-N does nothing now.  Only one testbeds format supported.\n"
    if $opts{'N'};
$fail_soft = $opts{'F'} || $opts{'failsoft'};
$startem = $opts{'n'} ? 0 : 1;		# If true, start the sub-experiments
$timeout = $opts{'t'} || $opts{'timeout'};
$eid = $opts{'experiment'};		# Experiment ID
$tcl = $opts{'f'} || shift;		# The experiment description
$master = $opts{'master'};		# Master testbed
$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
$tb_config = $opts{'testbeds'} || "./testbeds";	# testbed configurations
$local_script_dir = $opts{'scriptdir'};	# Local scripts
$muxmax  = $opts{'muxlimit'} || 3;	# Number of connections muxed on one
					# gateway

$max_children = $opts{'p'} || $opts{'maxchildren'} 
    if $opts{'p'} || $opts{'maxchildren'};

$smb_share = $opts{'smbshare'} ||	# Share to mount from the master
    die "Must give an SMB share\n";
$project_user = $opts{'smbuser'} ||	# User to mount project dirs as
    die "Must give an SMB user\n";
$auth_proj = $opts{'P'};

# tcl program to split experiments (changed during devel)
$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
# tclsh to call directly (changed during devel)
$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
# fedd_client to get testbed access parameters
$fedd_client = $opts{'feddclient'} || "fedd_client";

# Prefix to avoid collisions
$tmpdir .= "/split$$";

print "Temp files are in $tmpdir\n" if $verbose;
# Create a workspace
unless (-d "$tmpdir") {
    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
}

# If the keys are given, use them.  Otherwise create a set under $tmpdir

if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
    $gw_pubkey = $opts{'gatewaypubkey'};
    $gw_secretkey = $opts{'gatewaysecretkey'};
}
else {
    $keytype = $opts{'gatewaykeytype'} || "rsa";
    mkdir("$tmpdir/keys") || die "Can't create temoprary key dir: $!\n";
    $gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
    $gw_secretkey = "$tmpdir/keys/fed.$keytype";
    print "Generating $keytype keys\n" if $verbose;
    generate_ssh_keys($keytype, $gw_secretkey) || 
	die "Cannot generate kets:$@\n";
}
# Generate the basenames
($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;


# Validate scripts directory
for my $s (@scripts) {
    die "$local_script_dir/$s not in local script directory. Try -d\n"
	unless -r "$local_script_dir/$s";
}

die "Must supply file, master and experiment" unless $master && $tcl && $eid;

&parse_testbeds_filename($tb_config, $tbparams) ||
    die "Cannot testbed congfigurations from $tb_config: $!\n";

# Open a pipe to the splitter program and start it parsing the experiments
my $pipe = new IO::Pipe;
# NB no more -p call on parse call.
$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master  $pid $gid $eid $tcl") || 
    die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";

# Parsing variables
my $ctb;			# Current testbed
my %allocated;			# If allocated{$tb} > 0, $tb is in use
my $destfile;			# File that the sub-experiment tcl file is
				# being written to, or "" if none.  Also used
				# for hostnames file.
my $desthandle;			# File handle for distfile
my $gateways;			# when gateway lists are being processed this
				# is the testbed whose gateways are being
				# gathered.
my $control_gateway;		# Control net gateway for the current testbed
my %active_end;			# If active_end{"a-b"} > 0 then a is the active
				# end of the a <-> b connector pair.

# Parse the splitter output.  This loop creates the sub experiments, gateway
# configurations and hostnames file
while (<$pipe>) {
    # Allbeds lists all the testbeds that this experiment accesses.  This code
    # acquires access to them and pulls in their access parameters from fedd.
    (/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do {
	next if /^#/;
	chomp;

	my $tb;		# Current testbed
	my @nodes;	# Current testbed node requests

	/([^|]+)\|?(.*)/ && do {
	    my $n;

	    ($tb , $n) = ($1, $2);
	    @nodes = split(/\|/, $n);
	};

	# If this testbed has not had its access parameters read from fedd, try
	# to read them, if we have a way to talk to fedd
	unless ($tbparams->{$tb}->{'access'} || !$fedd_client) { 
	    my $access_pipe = new IO::Pipe || 
		die "Can't open pipe to fedd:$!\n";
	    my $proj = $auth_proj ? " -p $auth_proj " : "";

	    print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'}
		. "\n") if $verbose;

	    my $cmd = "$fedd_client -t " . 
		$tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
		"-l $tb $proj" . (@nodes ? " -n " : " ") .
		join(" -n ", @nodes) . "| /usr/bin/tee fedd.$tb ";
	    print "$cmd\n" if $verbose;

	    $access_pipe->reader($cmd) || 
		    die "Can't exec fedd_client: $!\n";

	    &parse_testbeds($access_pipe, $tbparams) || 
		warn("Error reading fedd output: $!\n");
	    $access_pipe->close();
	}
	next;
    };

    # Start of a sub-experiment
    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
	$ctb = $1;

	# If we know the testbed, start collecting its sub experiment tcl
	# description.  If not, warn the user.
	if ($tbparams->{$ctb}->{'access'}) {
	    $allocated{$ctb}++;	# Keep track of the testbeds allocated

	    unless (-d "$tmpdir/$ctb") {
		mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
	    }
	    $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";

	    $desthandle = new IO::File(">$destfile") || 
		die "Cannot open $destfile:$!\n";
	}
	else{
	    warn "No such testbed $ctb\n";
	    $destfile = "";
	}
	next;
    };

    # End of that experiment
    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
	# Simple syntax check and close out this experiment's tcl description
	die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
	$desthandle->close() if $desthandle;
	$destfile = $ctb = "";
	next;
    };

    # Beginning of a gateway set
    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
	$gateways = $1;
	# If we've heard of this tb, create the config lines for it one at a
	# time.
	if ($allocated{$gateways}) {
	    # Just in case.  This directory should already have been created
	    # above.
	    unless (-d "$tmpdir/$gateways") {
		mkdir("$tmpdir/$gateways") || 
		    die "Can't create $tmpdir/$gateways: $!";
	    }
	}
	else {
	    warn "Gateways given (and ignored) for testbed not in use: " .
		"$gateways\n";
	    $gateways = 0;
	}
	next;
    };
    # End of the gateways section.  Output the client config for this testbed
    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
	die "Mismatched gateway markers ($1, $gateways)\n" 
	    unless !$gateways || $gateways == $1;

	if ($control_gateway ) {
	    # Client config
	    my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
	    my $master_project = $tbparams->{$master}->{'project'};
	    die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
	    print $cc "ControlGateway: $control_gateway\n";
	    print $cc "SMBShare: $smb_share\n";
	    print $cc "ProjectUser: $project_user\n";
	    print $cc "ProjectName: $master_project\n";
	    $cc->close();
	}
	else { warn "No control gateway for $gateways?\n"; }
	    
	$gateways = 0;
	next;
    };
    # Beginning of the hostnames list.  Collection is always in the hostnames
    # file.
    /^#\s+Begin\s+hostnames/ && do {
	$destfile = "$tmpdir/hostnames";
	$desthandle = new IO::File(">$destfile") || 
	    die "Can't open $destfile:$!\n";
	next;
    };
    # end of the hostnames list.
    /^#\s+End\s+hostnames/ && do {
	$desthandle->close();
	$destfile = "";
	next;
    };

    # Generate gateway configuration info, one file per line
    $gateways && do {
	chomp;
	my($dtb, $myname, $desthost, $type) = split(" ", $_);

	# Many of these are to simplify print statements
	my $sdomain =				# domain for the source
	    $tbparams->{$gateways}->{'domain'};
	my $ddomain =				# domain for the destination
	    $tbparams->{$dtb}->{'domain'};
	my $sproject =				# Project of the source
	    $tbparams->{$gateways}->{'project'};
	my $dproject =				# Project of the destination
	    $tbparams->{$dtb}->{'project'};
	my $fs =				# Master fs node (FQDN)
	    $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
	my $boss =				# Master boss node (FQDN)
	    $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
	my $event_server =			# Master event-server (FQDN)
	    $tbparams->{$master}->{'eventserver'} .  
	    $tbparams->{$master}->{'domain'};
	my $remote_event_server =		# Slave event-server (FQDN)
	    $tbparams->{$dtb}->{'eventserver'} .  
	    $tbparams->{$dtb}->{'domain'};
	my $remote_script_dir =			# Remote fed script location
	    "/proj/" . $dproject . "/exp/$eid/tmp";
	my $local_script_dir =			# Local fed script location
	    "/proj/" . $sproject . "/exp/$eid/tmp";
	my $active;				# Is this the active side of
						# the connector?
	my $tunnel_cfg =			# Use DETER's config stuff
	    $tbparams->{$gateways}->{'tun'} || "false";
				    

	$sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
	$ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";

	my $conf_file = "$myname$sdomain.gw.conf";
	my $remote_conf_file = "$desthost$ddomain.gw.conf";
	# translate to lower case so the `hostname` hack for specifying
	# configuration files works.
	$conf_file =~ tr/A-Z/a-z/;
	$remote_conf_file =~ tr/A-Z/a-z/;

	# If either end of this link is in the master side of the testbed, that
	# side is the active end. Otherwise the first testbed encountered in
	# the file will be the active end.  The $active_end variable keeps
	# track of those decisions
	if ( $dtb eq $master ) { $active = "false"; }
	elsif ($gateways eq $master ) { $active = "true"; }
	elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
	else { $active_end{"$gateways-$dtb"}++; $active = "true"; }

	# This is used to create the client configuration.
	$control_gateway = "$myname$sdomain"
	    if $type =~ /(control|both)/;

	# Write out the file
	my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
	    die "can't open $tmpdir/$gateways/$conf_file: $!\n";

	print $gwconfig "Active: $active\n";
	print $gwconfig "TunnelCfg: $tunnel_cfg\n";
	print $gwconfig "BossName: $boss\n";
	print $gwconfig "FsName: $fs\n";
	print $gwconfig "EventServerName: $event_server\n";
	print $gwconfig "RemoteEventServerName: $remote_event_server\n";
	print $gwconfig "Type: $type\n";
	print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
	print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
	print $gwconfig "RemoteExperiment: $dproject/$eid\n";
	print $gwconfig "LocalExperiment: $sproject/$eid\n";
	print $gwconfig "RemoteConfigFile: " . 
	    "$remote_script_dir/$remote_conf_file\n";
	print $gwconfig "Peer: $desthost$ddomain\n";
	print $gwconfig "Pubkeys: " . 
	    "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
	print $gwconfig "Privkeys: " .
	    "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
	$gwconfig->close();

	# This testbed has a gateway (most will) so make a copy of the keys it
	# needs in this testbed's subdirectory.  start_segment will transfer
	# them.
	unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
	    copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
		die "Can't copy pubkeys ($gw_pubkey to " . 
		    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
	}
	if ($active eq "true" ) {
	    unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
		copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
		    die "Can't copy secret keys ($gw_secretkey to " . 
			"$tmpdir/$gateways/$gw_secretkey_base): $!\n";
	    }
	}

	#done processing gateway entry, ready for next line
	next; 
    };
    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
	next if /^#/;
	chomp;
	push(@tarfiles, $_);
	next;
    };
    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
	next if /^#/;
	chomp;
	push(@rpms, $_);
	next;
    };

    next unless $destfile;  # Unidentified testbed, ignore config
    # local copies that can be used in the substitutions below
    my $gwtype = $tbparams->{$ctb}->{'gwtype'} || $def_gwtype;
    my $gwimage = $tbparams->{$ctb}->{'gwimage'} || $def_gwimage;
    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} || $def_mgwstart;
    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} || $def_mexpstart;
    my $gwstart = $tbparams->{$ctb}->{'gwstart'} || $def_gwstart;
    my $expstart = $tbparams->{$ctb}->{'expstart'} || $def_expstart;
    my $project = $tbparams->{$ctb}->{'project'};

    # Substitute variables
    s/GWTYPE/$gwtype/g;
    s/GWIMAGE/$gwimage/g;
    if ($ctb eq $master ) {
	s/GWSTART/$mgwstart/g;
	s/EXPSTART/$mexpstart/g;
    }
    else {
	s/GWSTART/$gwstart/g;
	s/EXPSTART/$expstart/g;
    }
    # XXX: oh is this bad
    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
    s#PROJDIR#/proj/$project/#g;
    s#EID#$eid#g;
    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
    print $desthandle $_;
}
$pipe->close();
die "No nodes in master testbed ($master)\n" unless $allocated{$master};

# Copy tarfiles and rpms needed at remote sites to the staging directories.
# Start_segment will distribute them
for my $t  (@tarfiles) {
    die "tarfile '$t' unreadable: $!\n" unless -r $t;
    unless (-d "$tmpdir/tarfiles") {
	mkdir("$tmpdir/tarfiles") || 
	    die "Can't create $tmpdir/tarfiles:$!\n";
    }
    copy($t, "$tmpdir/tarfiles") || 
	die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
}

for my $r  (@rpms) {
    die "rpm '$r' unreadable: $!\n" unless -r $r;
    unless (-d "$tmpdir/rpms") {
	mkdir("$tmpdir/rpms") || 
	    die "Can't create $tmpdir/rpms:$!\n";
    }
    copy($r, "$tmpdir/rpms") || 
	die "Can't copy $r to  $tmpdir/rpms:$!\n";
}

exit(0) unless $startem;

my %started;		    # If $started{$tb} then $tb successfully started
my %child;		    # If $child{$pid} then a process with that pid is
			    # working on a starting a segment
my $nworking = 0;	    # Number of children working on swapin
my $pid;		    # Scratch variable for pids

# Start up the slave sub-experiments first
TESTBED:
for my $tb  (keys %allocated) {
    if ( $tb ne $master ) {
	while ( $nworking == $max_children ) {
	    print "Waiting for a child process to complete\n" if $verbose;
	    if (($pid = wait()) != -1 ) {
		# The $? >> 8 is the exit code of the subprocess, which is
		# non-zero if the &start_segment routine failed.
		my $exit_code = ($? >> 8);

		print "Child $pid completed exit code ($exit_code)\n"
		    if $verbose;
		$nworking--;
		$started{$child{$pid}}++ unless $exit_code;
		if ($child{$pid} ) { delete $child{$pid}; }
		else { warn "Reaped a pid we did not start?? ($pid)\n"; }
		last TESTBED if $exit_code;
	    }
	    else { warn "wait returned without reaping: $!\n"; }
	}
	if ( $pid = fork() ) {
	    # Parent process
	    $nworking ++;
	    $child{$pid} = $tb;
	    print "Started process $pid to start testbed $tb\n"
		if $verbose;
	}
	else {
	    # Child.  Note that we reverse the sense of the return code when it
	    # becomes an exit value.  Zero exit values indicate success.
	    exit(!&start_segment($tb, $eid, $tbparams, $timeout));
	}
    }
}

# Now wait for any still running processes.
while ( $nworking ) {
    print "Waiting for a child process to complete ($nworking running)\n" 
	if $verbose;
    if (($pid = wait()) != -1 ) {
	# The $? >> 8 is the exit code of the subprocess, which is
	# non-zero if the &start_segment routine failed.
	my $exit_code = ($? >> 8);

	print "Child $pid completed exit code ($exit_code)\n"
	    if $verbose;
	$nworking--;
	$started{$child{$pid}}++ unless $exit_code;
	if ($child{$pid} ) { delete $child{$pid}; }
	else { warn "Reaped a pid we did not start?? ($pid)\n"; }
    }
    else { warn "wait returned without reaping: $!\n"; }
}

# Now the master
if (&start_segment($master, $eid, $tbparams, $timeout)) { 
    $started{$master}++;
}

# If any testbed failed, swap the rest out.
if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
    print "Error starting experiment\n";
    exit(1);
}
print "Experiment started\n";
print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
system("rm -rf $tmpdir") unless $debug;
exit(0);    # set the exit value

=pod

=head1 NAME

B<splitter.pl>

=head1 SYNOPSIS

B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]

=head1 DESCRIPTION

B<splitter.pl> invokes the DETER experiment parser to split an annotated
experiment into multiple sub-experments and instantiates the sub-experiments on
their intended testbeds.  Annotation is accomplished using the
tb-set-node-testbed command, added to the parser.

Much of the script's behavior depends on the configuration file, specified with
the B<-c> flag and defaulting to F<./splitter.conf>.

The testbed labels supplied in the B<tb-set-node-testbed> command are
meaningful based on their presence in the testbeds file.  that file can be
specified in the configuration file using the B<Testbeds> directive, and
defaults to F<./testbeds>.  The syntax is described below.

Most of the intermediate files are staged in a sub-directory of a temporary
files directory and deleted at the end of the script.  Specifying the B<-d>
flag on the command line avoids the deletion for debbugging.  By default the
temporary files directory is directory is F</tmp> and can be reset in the
configuration file using the B<Tmpdir> directive.  Intermediate files are
stored under a subdirectory formed by adding the process ID of the splitter
process.  For example, if the temporary files directory is F</tmp> and the
B<splitter.pl> process ID is 2323, the temporary files will be stored in
F</tmp/split2323/>.

The expreriment is split out into one experiment description per testbed in the
temporary directory named as F<experiment.testbed.tcl> where the experiment is
the experiment ID given in the configuration file, and the testbed is the
tb-set-node-testbed parameter for the nodes in the file.

If the B<-n> option is absent the sub-experiments are then instantiated on
their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
Per-testbed parameters are set in the testbeds file.  Sub-experiments on
slave testbeds are instantiated in a random order, but the master testbed is
currently instantiated last.

Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
and the maximum number of simultaneous processes to start.

Scripts to start federation (the federation kit) are copied into the local
experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
taken from the directory given by the B<ScriptDir> directive in the
configuration file.

If B<-t> is given the parameter is treated as a parameter to B<Timeout> in 
F<splitter.conf>.

If any sub-experiment fails to instantiate, the other sub-exeriments are
swapped out.  B<-F> avoids this swap out, which can also be specified as 
B<SoftFail: true> in F<splitter.conf>

=head2 Configuration File

The configuration file is a simple set of colon-separated parameters and
values.  A configuration file must be present, either specified in the B<-c>
flag or the default F<./splitter.conf>.  All the parameter names are case
insensitive, but should not include any whitespace.  Parameter values may
include whitespace, but no newlines.

Possible parameters are:

=over 5

=item Experiment

The name of the experiment on the various testbeds

=item Master

The master testbed label from the testbeds file, described below.

=item Testbeds

The testbeds file described below, giving per-testbed parameters.  If this
directive is absent the testbeds file defaults to F<./testbeds>

=item ScriptDir

Location of the default federation scripts, i.e. the federation kit.

=item GatewayPubkey

=item GatewaySecretKey

The names of the files containing secret and public keys to use in setting up
tunnels between testbeds.  If given they are used, otherwise keys are generated.

=item GatewayKeyType

This controls the kind of SSH keys generated to configure the geatways.  If
given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>.  The parameter
is csase insensitive.

=item TmpDir

=item TempDir

The directory where temporary files are created.  These are synonyms, but
should both be specified, B<TmpDir> has priority.  If neither is specified,
F</tmp> is used.

=item SMBShare

The SMB share on the master testbed that will be exported to remote clients.

=item SMBUser

The experiment user to mount project directories as.  This user needs to be a
member of the exported experiment - that is one of the users in the project
containing this experiment on the master testbed.

=item Timeout

Value in seconds after which a swap-in operatioin will be considered a success.
Often long swap-ins will hang when there are partial failures.  This works
around this issue.  (This behavior can be requested on the command line by
specifying B<-t> I<secs>.)

=item FailSoft

If not set, failure of any sub experiment swaps the rest out.  Setting this to
any value avoids this swap out.  (This behavior can be requested on the command
line by specifying B<-F>.)

=item MuxLimit

The maximum bumber of links/lans carried by one gateway pair

=item Tclparse

The pathname to the experiment parsing program.  Only developers should set
this.

=item Tclsh

The pathname to the local oTcl shell.  Only developers should set
this.

=back

=head2 Testbeds file

The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
scoped attribute-value pairs where each attribute is specified on a separate
line of the configuration file.  Each testbed's parameters are preceeded by the
testbed label in brackets ([]) on a line by itself.  After that the parameters
are specified as parameter: value.  This is essentially the same format as the
configuration file.  Parameters are:

=over 4

=item User

The user under which to make requests to this testbed.  The user running
B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
testbed.

=item OpsNode

The host name of the testbed's ops node.  The user calling B<splitter.pl> must
be able to execute commands on this host via L<ssh(1)>.

=item Domain

The domain of nodes in this testbed (including the ops host).  This parameter
should always start with a period.

=item Project

The project under which to instantiate sub-experiments on this testbed.

=item ConnectorType

The node type for inter-testbed connector nodes on this testbed.

=item SlaveNodeStartCmd

The start command to run on experimental nodes when this testbed is used as a
slave.  In all the start commands the following string substitutions are made:

=over 10

=item FEDDIR

The local experiment's federation scripts directory.  Each local experiment
will have this replaced by the scripts directory on its local boss.

=item GWCONF 

The full pathname of the gateway configuration file.  As with FEDDIR, this is
on the local boss.

=item PROJDIR

The project directory on the local boss.

=item EID

The local experiment name.

=back

All startcmds specified in F<testbeds> undergo these expansions.

=item SlaveConnectorStartCmd

The start command to run on gateway nodes when this testbed is used as a slave.
The same string substitutions are made in this command as in SlaveNodeStartCmd.

=item MasterNodeStartCmd

The start command to run on experimental nodes when this testbed is used as a
master.  The same string substitutions are made in this command as in
SlaveNodeStartCmd.

=item MasterConnectorStartCmd

The start command to run on gateway nodes when this testbed is used as a
master.  The same string substitutions are made in this command as in
SlaveNodeStartCmd.

=item ConnectorImage

The disk image to be loaded on a gateway node on this testbed.

=item FileServer

The node in the master testbed from which filesystems are mounted.

=item Boss

The node in the master testbed that controls the testbed.

=item TunnelCfg

True if the connector needs to do DETER federation.  This parameter will
probably be removed.


=back

=head1 ENVIRONMENT

B<splitter.pl> does not directly make use of environment variables, but calls
out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
environment.

=head1 BUGS

A deprecated B<-N> flag was used to select testbeds file format.  Only one
format is supported now, and B<-N> generates a warning, but otherwise does not
affect B<splitter.pl>.

=head1 SEE ALSO

L<sh(1)>, L<ssh(1)> 

=cut