source: fedkit/splitter.pl @ dab4d56

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since dab4d56 was 7c3008e, checked in by Ted Faber <faber@…>, 17 years ago

checkpoint

  • Property mode set to 100644
File size: 46.3 KB
RevLine 
[1a8a08a]1#!/usr/bin/perl
2
[b814529]3use strict;
4
[1a8a08a]5use Getopt::Std;
[9c00d41]6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
[7c3008e]10use XML::Parser;
[1a8a08a]11
[f3691ff]12my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.FreeBSD.pl", 
13    "smbmount.Linux.pl", "make_hosts", "fed-tun.pl", "fed_evrepeater",
14    "rc.accounts.patch");
[527321c]15my $local_script_dir = ".";
[b814529]16my($pid, $gid);                 # Process and group IDs for calling parse.tcl
[527321c]17my $splitter_config;            # Configuration file
18my $debug;                      # True if thecalled in debug mode
19my $verbose;                    # True for extra progress reports
20my $startem;                    # If true, start the sub-experiments
21my $eid;                        # Experiment ID
22my $tcl;                        # The experiment description (topology file)
23my $master;                     # Master testbed
24my $tmpdir;                     # tmp files
25my $tb_config;                  # testbed configurations
26my $smb_share;                  # Share to mount from the master
27my $project_user;               # User to mount project dirs as
[2396559e]28my $auth_proj;                  # Local project for resource access
[b814529]29my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
30my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
31                                # basename)
[22bb7f8]32my($keytype);                   # Type (DSA or RSA) of generated gateway keys
[527321c]33my $tcl_splitter;               # tcl program to split experiments
[b814529]34                                # (changed during devel)
[527321c]35my $tclsh;                      # tclsh to call directly (changed during devel)
[2396559e]36my $fedd_client;                # Program to call for testbed access params
[e5fee75]37my $muxmax;                     # Maximum number of links/lans over 1 gw pair
[527321c]38my @tarfiles;                   # Tarfiles in use by this experiment
[e2a71ebe]39my @rpms;                       # Rpms in use by this experiment
[5479c80]40my $timeout;                    # The timeout to use for experiment swap ins
[527321c]41my %opts;                       # Parsed options
42
43my $tbparams = {};              # Map of the per-testbed parameters from the
44                                # testbeds file.  It is a reference to a hash
45                                # of hashes (because it's passed around a bunch
46                                # and it's nicer to have one access pattern
47                                # throughout the script, in the main loop and
48                                # the subroutines).  That access is exemplified
49                                # by  $tbparams->{'deter'}->{'domain'} which is
50                                # the domain parameter of the DETER testbed. 
[637adfa]51my $fail_soft;                  # Do not swap failed sub-experiments out
[e863014]52my $max_children=1;             # Maximum number of simultaneous swap-ins
[63f7c7e]53
[2396559e]54# Default commands for starting experiment and gateway nodes.  Testbeds can
55# override these.  (The 'm' prefixed commands are for operating as the master
56# testbed.)
57my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
58my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
59my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
60my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
61my $def_gwimage = "FBSD61-TUNNEL2";
62my $def_gwtype = "pc";
63
[9c00d41]64# Parse the config file.  The format is a colon-separated parameter name
65# followed by the value of that parameter to the end of the line.  This parses
66# that format and puts the parameters into the referenced hash.  Parameter
67# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
68# failure (e.g. file open) and 1 on success.
69sub parse_config {
70    my($file, $href) = @_;
[5f1cd41]71    my $fh = new IO::File($file);
[9c00d41]72       
73    unless ($fh) {
74        warn "Can't open $file: $!\n";
75        return 0;
76    }
77
78    while (<$fh>) {
79        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
80        chomp;
81        /^([^:]+):\s*(.*)/ && do {
[5f1cd41]82            my $key = $1; 
[9c00d41]83
84            $key =~ tr/A-Z/a-z/;
85            $href->{$key} = $2;
86            next;
87        };
88        warn "Unparasble line in $file: $_\n";
89    }
90    $fh->close();   # It will close when it goes out of scope, but...
91    return 1;
92}
93
[fe459d0]94# Parse an easier-to-read testbeds file (the original was comma-separated
95# unadorned strings).  The format is a testbed scope as [testbed] followed by
96# the colon-separated attribute-value pairs for the testbed.  Right now these
97# go into a set of global hashes indexed by testbed, but that should probably
[2396559e]98# change.  The file parameter is an open IO::Handle.  &parse_testbeds_filename
99# opens the file and calls this.  Parse_testbeds can be used on pipes as well,
100# e.g. fedd_client output.
[fe459d0]101sub parse_testbeds {
[2396559e]102    my($fh, $tbparams) = @_;        # Testbeds file and parameter hash
[527321c]103    my $tb;                         # Current testbed
[e2a71ebe]104    # Convert attribute in the file to tbparams hash key
[527321c]105    my %attr_to_hash = (
[fe459d0]106        "opsnode" => "host",
107        "user" => "user",
108        "domain" => "domain",
109        "project" => "project",
110        "connectortype" => "gwtype",
111        "slavenodestartcmd" => "expstart",
112        "slaveconnectorstartcmd" => "gwstart",
113        "masternodestartcmd" => "mexpstart",
114        "masterconnectorstartcmd" => "mgwstart",
115        "connectorimage" => "gwimage",
116        "fileserver" => "fs",
117        "boss" => "boss",
[f64fa81]118        "eventserver" => "eventserver",
[2396559e]119        "tunnelcfg" => "tun",
120        "uri" => "uri",
121        "access" => "access"
[fe459d0]122    );
123
124    while (<$fh>) {
125        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
[2396559e]126        print STDERR "testbeds: $_";
[fe459d0]127        chomp;
128        /^\s*\[(.*)\]/ && do {
129            $tb = $1;
[527321c]130            $tbparams->{$tb} = {} unless $tbparams->{$tb};
[fe459d0]131            next;
132        };
133
134        /^([^:]+):\s*(.*)/ && do {
135            unless ($tb) {
[2396559e]136                warn "Ignored attribute definition before testbed: $_\n ";
[fe459d0]137                next;
138            }
[527321c]139            my $key = $1; 
[fe459d0]140            $key =~ tr/A-Z/a-z/;
[527321c]141            my $var = $attr_to_hash{$key};
[fe459d0]142
[527321c]143            if ($var) { $tbparams->{$tb}->{$var} = $2; }
[2396559e]144            else { warn "Unknown keyword $key\n"; }
[fe459d0]145
146            next;
147        };
[2396559e]148        warn "Unparasble line: $_\n";
[fe459d0]149    }
150    return 1;
151}
152
[2396559e]153
154# Open the given file name and parse the testbeds file it contains by calling
155# &parse_testbeds.
156sub parse_testbeds_filename {
157    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
158    my $fh = new IO::File($file);   # Testbeds filehandle
159
160    if ($fh) {
161        my $rv = &parse_testbeds($fh, $tbparams);
162        $fh->close();   # It will close when it goes out of scope, but...
163        $rv;
164    }
165    else {
166        warn "Can't open $file: $!\n";
167        return 0;
168    }
169}
170
[22bb7f8]171# Generate SSH keys for use by the gateways.  The parameters are the type and
172# the filename for the private key.  The pubkey will be stored in a filename
173# with the same name as the private key but with .pub appended.  Type can be
174# dsa or rsa.
175
176sub generate_ssh_keys {
177    my($type, $dest) = @_;
178
179    $type =~ tr/A-Z/a-z/;
180    return 0 if $type !~ /(rsa|dsa)/;
181    system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
182    return $@ ? 0 : 1;
183}
[9c00d41]184
[1a8a08a]185# use scp to transfer a file, reporting true if successful and false otherwise.
186# Parameters are the local file name, the ssh host destination (either hostname
187# oe user@host), and an optional destination file name or directory.  If no
188# destination is given, the file is transferred to the given user's home
189# directory.  If only a machine is given in the ssh host destination, the
190# current user is used.
191sub scp_file {
[b68f597]192    my($file, $user, $host, $dest) = @_;
[1a8a08a]193
194    # XXX system with a relative pathname is sort of gross
[b68f597]195    system("scp $file $user\@$host:$dest");
[1a8a08a]196    if ($?) {
197        warn "scp failed $?\n";
198        return 0;
199    }
200    else { return 1; }
201}
202
203# use ssh to execute the given command on the machine (and as the user) in
204# $where.  Parameters are the ssh destination directive ($where) and the
205# command to execute, and a prefix to be placed on a message generated if the
206# command fails.   On failure print a warning if a warning prefix was given and
[5479c80]207# return false.  If timeout is given fork a process and set an alarm of that
208# many seconds.  Timeouts also return 0;
[1a8a08a]209sub ssh_cmd {
[5479c80]210    my($user, $host, $cmd, $wname, $timeout) = @_;
211    my $pid;                # Child pid
212
[33548e1]213    $timeout = 0 unless $timeout;   # Force default timeout
214
215    if ( $pid = fork () ) {
216        # Parent process
217        # The eval acts as a signal catcher.  If the alarm goes off inside
218        # the eval, the die will put "alarm\n" into $@, otherwise the
219        # return value of the execution in the child process will be used.
220        my $rv = eval {
221            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
222            my $rv;
223
224            alarm $timeout;
225            $rv = waitpid($pid, 0);
226            alarm 0;
227            $rv;
228        };
229
230        # If the eval succeeded, $@ will be null and we can use $rv, which
231        # is the return code from the subprocess.  If the eval timed out,
232        # print a warning and assume the best.
233        if ($@ eq "alarm\n" ) {
234            warn "$wname timed out - pid $pid still live\n";
235            return 1;
[5479c80]236        }
237        else {
[33548e1]238            return $rv;
[5479c80]239        }
240    }
241    else {
[33548e1]242        # Child process
243        exec("ssh $user\@$host $cmd");
244        exit 0;
[1a8a08a]245    }
246}
247
[63f7c7e]248# Ship local copies of the federation scripts out to the given host.  If any of
249# the script transfers fails, return 0.  The scripts to transfer are from the
250# global @scripts and are found locally in $local_script_dir (another global).
251sub ship_scripts {
252    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
[5f1cd41]253    my $s;
[63f7c7e]254
[2ef2c5b]255    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
[63f7c7e]256    for $s (@scripts) {
[b68f597]257        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
[63f7c7e]258            return 0;
259    }
260    return 1;
261}
262
[9c00d41]263# Ship per-testbed configuration generated by this script to the remote /proj
264# directories on the remote testbeds
265sub ship_configs {
266    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
267    my($d, $f);
268
269    $d = IO::Dir->new($src_dir) || return 0;
270
[3529a90]271    # All directories under $tmpdir are 770 so we can delete them later.
272    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
273    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
[9c00d41]274    while ( $f = $d->read()) {
275        next if $f =~ /^\./;
[2ef2c5b]276        if ( -d "$src_dir/$f" ) {
[3529a90]277            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
278                return 0;
[2ef2c5b]279        }
280        else {
[b68f597]281            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
[2ef2c5b]282        }
[9c00d41]283    }
284    return 1;
285}
286
[1a8a08a]287# Start a sub section of the experiment on a given testbed.  The testbed and
288# the user to start the experiment as are pulled from the global per-testbed
[527321c]289# hash, passed in as $tbparams, as is the project name on the remote testbed.
290# Parameters are the testbed and the experiment id.  Configuration files are
291# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
292# Then the current state of the experiment determined using expinfo.  From that
293# state, the experiment is either created, modified or spapped in.  If
294# everything succeeds, true is returned.  If the global verbose is set progress
295# messages are printed.
[1a8a08a]296sub start_segment {
[5479c80]297    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
298                                            # per-testbed parameters and remote
299                                            # swap-in timeout
[527321c]300    my $host =                              # Host name of remote ops (FQDN)
301        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
302    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
303    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
[1a8a08a]304                                            # experiment under
[527321c]305    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
[1a8a08a]306                                            # sub-experiment
[527321c]307    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
308    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
[e2a71ebe]309    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
[527321c]310    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
311    my $state;                              # State of remote experiment
312    my $status = new IO::Pipe;              # The pipe to get status
[1a8a08a]313
314    # Determine the status of the remote experiment
[9c00d41]315    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
[c23025e]316        die "Can't ssh to $user\@$host:$!\n";
[1a8a08a]317    # XXX: this is simple now.  Parsing may become more complex
[9c00d41]318    while (<$status>) {
[1a8a08a]319        /State: (\w+)/ && ($state = $1);
320        /No\s+such\s+experiment/ && ($state = "none");
321    }
[9c00d41]322    $status->close();
[1a8a08a]323    print "$tb: $state\n";
324
[3529a90]325    # Copy the experiment definition data over
[a835df7]326    print "transferring subexperiment to $tb\n" if $verbose;
[b68f597]327    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
[3529a90]328    # Clear out any old experiment data; if not deleted, copies over it by
329    # different users will fail.
[0e23fdb]330    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
331    # force the /bin/sh interpretation of the trailing * (which we need to keep
332    # tmp around))  Again, this needs to be done more properly once we have a
333    # non-ssh interface here.)
[a835df7]334    print "clearing experiment subdirs on $tb\n" if $verbose;
[0e23fdb]335    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
336        return 0;
[b68f597]337    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
338    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
339        return 0;
340    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
341    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
342        return 0;
[e2a71ebe]343    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
344    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
345        return 0;
346    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
347    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
348        return 0;
[1a8a08a]349    # Remote experiment is active.  Modify it.
350    if ($state eq "active") {
[a835df7]351        print "Transferring federation support files to $tb\n" if $verbose;
[63f7c7e]352        # First copy new scripts and hostinfo into the remote /proj
[b68f597]353        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
[c23025e]354            return 0;
[7c3008e]355        # Copy the virtual topology out as well
356        &scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") ||
357            return 0;
358        &scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") ||
359            return 0;
[63f7c7e]360        &ship_scripts($host, $user, $proj_dir) || return 0;
[9c00d41]361        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
[e2a71ebe]362
[b68f597]363        if ( -d "$tmpdir/tarfiles") {
364            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
365                return 0;
366        }
[a835df7]367
[e2a71ebe]368        if ( -d "$tmpdir/rpms") {
369            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
370                return 0;
371        }
372
[a835df7]373        print "Modifying $eid in place on $tb\n" if $verbose;
[1a8a08a]374        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
[5479c80]375            "$eid $tclfile", "modexp", $timeout) || return 0;
[1a8a08a]376        return 1;
377    }
378
379    # Remote experiment is swapped out, modify it and swap it in.
380    if ($state eq "swapped") {
[a835df7]381        print "Transferring federation support files to $tb\n" if $verbose;
[2ef2c5b]382        # First copy new scripts and hostinfo into the remote /proj (because
383        # the experiment exists, the directory tree should be there.
[b68f597]384        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
[838fb6a]385            return 0;
[7c3008e]386        # Copy the virtual topology out as well
387        &scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") ||
388            return 0;
389        &scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") ||
390            return 0;
391        &ship_scripts($host, $user, $proj_dir) || return 0;
[63f7c7e]392        &ship_scripts($host, $user, $proj_dir) || return 0;
[9c00d41]393        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
[b68f597]394        if ( -d "$tmpdir/tarfiles") {
395            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
396                return 0;
397        }
[a835df7]398
[e2a71ebe]399        if ( -d "$tmpdir/rpms") {
400            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
401                return 0;
402        }
403
[a835df7]404        print "Modifying $eid on $tb\n" if $verbose;
[2ef2c5b]405        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
406            "modexp") || return 0;
[a835df7]407        print "Swapping $eid in on $tb\n" if $verbose;
[63f7c7e]408        # Now start up
[1a8a08a]409        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
[5479c80]410            "swapexp", $timeout) || return 0;
[1a8a08a]411        return 1;
412    }
413
[63f7c7e]414    # No remote experiment.  Create one.  We do this in 2 steps so we can put
415    # the configuration files and scripts into the new experiment directories.
[1a8a08a]416    if ($state eq "none") {
[b68f597]417
418        if ( -d "$tmpdir/tarfiles") {
419            # Tarfiles have to exist for the creation to work
420            print "copying tarfiles to $tb\n";
421            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
422                return 0;
423        }
[e2a71ebe]424
425        if ( -d "$tmpdir/rpms") {
426            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
427                return 0;
428        }
429
[fe459d0]430        print "Creating $eid on $tb\n" if $verbose;
431        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
[1a8a08a]432            "$pid -e $eid $tclfile", "startexp") || return 0;
[e2a71ebe]433        # After startexp succeeds, the per-experiment directories exist on the
434        # remote testbed.
[a835df7]435        print "Transferring federation support files to $tb\n" if $verbose;
[63f7c7e]436        # First copy new scripts and hostinfo into the remote /proj
[b68f597]437        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
[838fb6a]438            return 0;
[7c3008e]439        # Copy the virtual topology out as well
440        &scp_file("$tmpdir/vtopo.xml", $user, $host, "$proj_dir/vtopo.xml") ||
441            return 0;
442        &scp_file("$tmpdir/viz.xml", $user, $host, "$proj_dir/viz.xml") ||
443            return 0;
444        &ship_scripts($host, $user, $proj_dir) || return 0;
[63f7c7e]445        &ship_scripts($host, $user, $proj_dir) || return 0;
[9c00d41]446        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
[63f7c7e]447        # Now start up
[a835df7]448        print "Swapping $eid in on $tb\n" if $verbose;
[63f7c7e]449        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
[5479c80]450            "swapexp", $timeout) || return 0;
[1a8a08a]451        return 1;
452    }
453
454    # Every branch for a known state returns.  If execution gets here, the
455    # state is unknown.
456    warn "unknown state: $state\n";
457    return 0;
458}
459
460# Swap out a sub-experiment - probably because another has failed.  Arguments
461# are testbed and experiment.  Most of the control flow is similar to
462# start_segment, though much simpler.
463sub stop_segment {
[527321c]464    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
465                                            # per-testbed parameters
466    my $user = $tbparams->{$tb}->{'user'};  # testbed user
467    my $host =                              # Ops node
468        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
469    my $pid = $tbparams->{$tb}->{'project'};# testbed project
[1a8a08a]470
[a835df7]471    print "Stopping $eid on $tb\n" if $verbose;
[1a8a08a]472    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
473        "swapexp (out)") || return 0;
474    return 1;
475}
476
[ad5639c]477# Fill tbparams with results from the fedd call.  The command is passed in and
478# a string with any relevant error conditions is returned.  undef is success.
479sub fedd_access_request{
480    my($cmd) = @_;
481    my($rv)=undef;
482
483    system("$cmd 2> /tmp/splitter.err.$$ > /tmp/splitter.$$" );
484
485    if ( ! $? ) {
486        &parse_testbeds_filename("/tmp/splitter.$$", $tbparams) ||
487            ($rv =  "Error reading fedd output: $!\n");
488    }
489    else {
490        my $f = new IO::File("/tmp/splitter.err.$$");
491        $rv =  "Fedd_client error:\n";
492        while (<$f>) { $rv .= $_; }
493        $f->close();
494    }
495    unlink("/tmp/splitter.$$", "/tmp/splitter.err.$$");
496    return $rv;
497}
[1a8a08a]498
[7c3008e]499# Generate visualization info from the topo file.
500sub genviz {
501    my($file, $outfile)= @_;
502    my %nodes;
503    my $chars;
504    my $in_node;
505    my $in_lan;
506    my $lan;
507    my %links;
508    my %lans;
509    my $rv;
510    my $dotfile = "/tmp/split$$.dot";
511    my $neato = "/usr/local/bin/neato";
512    my $g = new IO::File(">$dotfile") || return;
513    my $p = new IO::Pipe() || return;
514    my $out = new IO::File(">$outfile") || die "open $!\n";
515
516    sub start_element { 
517        my($expat, $element) = @_;
518        $in_node++ if $element eq "node";
519        if ( $element eq "lan" ) {
520            $in_lan++;
521            $lan = {};
522        }
523    }
524
525    sub end_element {
526        my($expat, $element) = @_;
527
528        $in_node = 0 if $element eq "node";
529        $nodes{$chars} = "node" if $in_node && $element eq "vname";
530        if ($in_lan) {
531            if ( $element ne "lan") {
532                $lan->{$element} = $chars if $element =~/(vname|vnode)/;
533            }
534            else {
535                $in_lan = 0;
536                my $vname = $lan->{'vname'};
537                if ( $links{$vname} && @{$links{$vname}} ==2 ) {
538                    # this link needs to be a lan
539                    $nodes{$vname} = "lan";
540                    $lans{$lan->{'vname'}} = ();
541                    foreach my $l (@{$links{$vname}}) {
542                        push(@{$lans{$vname}}, $l);
543                    }
544                    push(@{$lans{$vname}}, $lan->{'vnode'});
545                    delete $links{$vname};
546                    $lan={};
547                    return;
548                }
549                if ( $lans{$vname} && @{$lans{$vname}}) {
550                    push(@{$lans{$vname}}, $lan->{'vnode'});
551                    $lan = {};
552                    return;
553                }
554                $links{$vname} = () unless $links{$vname};
555                push(@{$links{$vname}}, $lan->{'vnode'});
556                $lan = {};
557                return;
558            }
559        }
560    }
561    sub found_chars { 
562        my($expat, $data) = @_;
563        $chars = $data;
564    }
565
566    my $parser = new XML::Parser(Handlers => { 
567            Start => \&start_element, 
568            End => \&end_element,
569            Char => \&found_chars
570        });
571
572    print "$file\n";
573    $parser->parsefile($file);
574
575    print $g "graph G {\n";
576    foreach my $n (keys %nodes) {
577        print $g "\t\"$n\"\n";
578    }
579    foreach my $l (keys %links) {
580        print $g "\t", join(" -- ", @{$links{$l}}), "\n";
581    }
582    foreach my $l (keys %lans) {
583        foreach my $n (@{$lans{$l}}) {
584            print $g "\t \"$n\" -- \"$l\"\n";
585        }
586    }
587    print $g "}\n";
588    $g->close();
589    $p->reader("$neato  -Gstart=rand -Gepsilon=0.005 -Gmaxiter=20000 " .
590        "-Gpack=true $dotfile");
591    print $out "<vis>\n";
592    while (<$p>) {
593        /^\s*"?([\w\-]+)"?\s+\[.*pos=\"(\d+),(\d+)\"/ && do {
594            my ($n, $x, $y) = ($1, $2, $3);
595
596            print $out "<node><name>$n</name><x>$x</x><y>$y</y><type>" . 
597                "$nodes{$n}</type></node>\n" if $nodes{$n};
598        };
599    }
600    print $out "</vis>\n";
601    $p->close();
602    unlink("$dotfile");
603}
604
[1a8a08a]605$pid = $gid = "dummy";              # Default project and group to pass to
606                                    # $tcl_splitter above.  These are total
607                                    # dummy arguments;  the splitter doesn't
608                                    # use them at all, but we supply them to
609                                    # keep our changes to the parser minimal.
610# Argument processing.
[2396559e]611getopts('Ft:c:p:f:ndvNP:', \%opts);
[9c00d41]612$splitter_config = $opts{'c'} || "./splitter.conf";
613$debug = $opts{'d'};
[a835df7]614$verbose = $opts{'v'} || $opts{'d'};
615
[9c00d41]616&parse_config("$splitter_config", \%opts) || 
[b814529]617    die "Cannot read config file $splitter_config: $!\n";
[9c00d41]618
[33548e1]619warn "-N does nothing now.  Only one testbeds format supported.\n"
620    if $opts{'N'};
[e863014]621$fail_soft = $opts{'F'} || $opts{'failsoft'};
[9c00d41]622$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
[5479c80]623$timeout = $opts{'t'} || $opts{'timeout'};
[9c00d41]624$eid = $opts{'experiment'};             # Experiment ID
625$tcl = $opts{'f'} || shift;             # The experiment description
626$master = $opts{'master'};              # Master testbed
627$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
628$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
629$local_script_dir = $opts{'scriptdir'}; # Local scripts
[e5fee75]630$muxmax  = $opts{'muxlimit'} || 3;      # Number of connections muxed on one
631                                        # gateway
[3c7da22]632
[e863014]633$max_children = $opts{'p'} || $opts{'maxchildren'} 
634    if $opts{'p'} || $opts{'maxchildren'};
635
[3c7da22]636$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
637    die "Must give an SMB share\n";
638$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
639    die "Must give an SMB user\n";
[f3691ff]640$auth_proj = $opts{'P'};
[3c7da22]641
[9c00d41]642# tcl program to split experiments (changed during devel)
643$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
644# tclsh to call directly (changed during devel)
645$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
[2396559e]646# fedd_client to get testbed access parameters
647$fedd_client = $opts{'feddclient'} || "fedd_client";
[9c00d41]648
649# Prefix to avoid collisions
[c23025e]650$tmpdir .= "/split$$";
651
[a835df7]652print "Temp files are in $tmpdir\n" if $verbose;
[9c00d41]653# Create a workspace
[c23025e]654unless (-d "$tmpdir") {
655    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
656}
657
[22bb7f8]658# If the keys are given, use them.  Otherwise create a set under $tmpdir
659
660if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
661    $gw_pubkey = $opts{'gatewaypubkey'};
662    $gw_secretkey = $opts{'gatewaysecretkey'};
663}
664else {
665    $keytype = $opts{'gatewaykeytype'} || "rsa";
666    mkdir("$tmpdir/keys") || die "Can't create temoprary key dir: $!\n";
667    $gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
668    $gw_secretkey = "$tmpdir/keys/fed.$keytype";
669    print "Generating $keytype keys\n" if $verbose;
670    generate_ssh_keys($keytype, $gw_secretkey) || 
671        die "Cannot generate kets:$@\n";
672}
673# Generate the basenames
674($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
675($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
676
677
678
[9c00d41]679# Validate scripts directory
[b814529]680for my $s (@scripts) {
[63f7c7e]681    die "$local_script_dir/$s not in local script directory. Try -d\n"
682        unless -r "$local_script_dir/$s";
683}
[1a8a08a]684
685die "Must supply file, master and experiment" unless $master && $tcl && $eid;
686
[2396559e]687&parse_testbeds_filename($tb_config, $tbparams) ||
[33548e1]688    die "Cannot testbed congfigurations from $tb_config: $!\n";
[1a8a08a]689
690# Open a pipe to the splitter program and start it parsing the experiments
[5f1cd41]691my $pipe = new IO::Pipe;
[2ef2c5b]692# NB no more -p call on parse call.
[e5fee75]693$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master  $pid $gid $eid $tcl") || 
694    die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
[1a8a08a]695
[b814529]696# Parsing variables
[527321c]697my $ctb;                        # Current testbed
698my %allocated;                  # If allocated{$tb} > 0, $tb is in use
699my $destfile;                   # File that the sub-experiment tcl file is
[b814529]700                                # being written to, or "" if none.  Also used
701                                # for hostnames file.
[5f1cd41]702my $desthandle;                 # File handle for distfile
[527321c]703my $gateways;                   # when gateway lists are being processed this
[b814529]704                                # is the testbed whose gateways are being
705                                # gathered.
[527321c]706my $control_gateway;            # Control net gateway for the current testbed
707my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
[b814529]708                                # end of the a <-> b connector pair.
[7c3008e]709my $vtopo;                      # IO::File for virtual topology info
[b814529]710
[9c00d41]711# Parse the splitter output.  This loop creates the sub experiments, gateway
712# configurations and hostnames file
713while (<$pipe>) {
[7c3008e]714    # Vtopo is virtual topology about the entire experiment. Copy it to the
715    # $tmpdir for distribution far and wide.
[6115f88]716    (/^#\s+Begin\s+Vtopo/../^#\s+End\s+Vtopo/) && do {
[7c3008e]717        /^#\s+Begin/ && do {
718            $vtopo = new IO::File(">$tmpdir/vtopo.xml");
719            warn "Can't open $tmpdir/vtopo.xml:$!\n" unless $vtopo;
720            next;
721        };
722        /^#\s+End/ && do {
723            $vtopo->close() if $vtopo;
724            undef $vtopo;
725            genviz("$tmpdir/vtopo.xml", "$tmpdir/viz.xml");
726            next;
727        };
728        print $vtopo $_  if $vtopo;
[6115f88]729        next;
730    };
[2396559e]731    # Allbeds lists all the testbeds that this experiment accesses.  This code
732    # acquires access to them and pulls in their access parameters from fedd.
733    (/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do {
734        next if /^#/;
735        chomp;
736
[387408b]737        my $tb;         # Current testbed
738        my @nodes;      # Current testbed node requests
739
[ad5639c]740        # The Allbeds line has the testbed name first separated by the node
741        # requirements of the testbeds.  A node requirement is separated form
742        # teh testbed name and other node requirements by a vertical bar (|).
743        # This pulls the testbed off the front (which must be present) and
744        # splits the node descriptors out by the vertical bar.  The first
745        # vertical bar (the one after the testbed) is removed by the intial
746        # regular expression to avoid a null entry in @nodes.  The node
747        # requests are of the form image:type:count and can be passed directly
748        # to fedd_client as parameters.
[387408b]749        /([^|]+)\|?(.*)/ && do {
[ad5639c]750            my $n;      # Scratch
[387408b]751
752            ($tb , $n) = ($1, $2);
753            @nodes = split(/\|/, $n);
754        };
755
[2396559e]756        # If this testbed has not had its access parameters read from fedd, try
757        # to read them, if we have a way to talk to fedd
[f3691ff]758        unless ($tbparams->{$tb}->{'access'} || !$fedd_client) { 
[2396559e]759            my $access_pipe = new IO::Pipe || 
760                die "Can't open pipe to fedd:$!\n";
[f3691ff]761            my $proj = $auth_proj ? " -p $auth_proj " : "";
[ad5639c]762            my @cmds;
763            my $rv;
[2396559e]764
765            print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'}
766                . "\n") if $verbose;
767
[ad5639c]768            # First access command, implicitly uses localhost fedd
769            push(@cmds,"$fedd_client -t " . 
770                $tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
771                "-l $tb $proj" . (@nodes ? " -n " : " ") .
772                join(" -n ", @nodes));
773            # Second try access command, implicitly directly contact testbed
774            push(@cmds,"$fedd_client -t " . 
775                $tbparams->{$tb}->{'uri'} .  " -u " .
[2396559e]776                $tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
[387408b]777                "-l $tb $proj" . (@nodes ? " -n " : " ") .
[ad5639c]778                join(" -n ", @nodes));
[bd6e48a]779            # Third try access command, implicitly directly contact testbed
780            # using only federated id.
781            push(@cmds,"$fedd_client -f -a -t " . 
782                $tbparams->{$tb}->{'uri'} .  " -u " .
783                $tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
784                "-l $tb $proj" . (@nodes ? " -n " : " ") .
785                join(" -n ", @nodes));
[2396559e]786
[ad5639c]787            foreach my $c (@cmds) {
788                print "$c\n" if $verbose;
789                $rv = &fedd_access_request($c);
790                warn($rv) if $rv;
[2396559e]791
[ad5639c]792                last if $rv eq undef;
793            }
794            die "Cannot get access to $tb\n"  if $rv;
[2396559e]795        }
796        next;
797    };
798
[1a8a08a]799    # Start of a sub-experiment
800    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
801        $ctb = $1;
802
803        # If we know the testbed, start collecting its sub experiment tcl
[2396559e]804        # description.  If not, warn the user.
805        if ($tbparams->{$ctb}->{'access'}) {
[1a8a08a]806            $allocated{$ctb}++; # Keep track of the testbeds allocated
[c23025e]807
808            unless (-d "$tmpdir/$ctb") {
809                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
810            }
811            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
[1a8a08a]812
[5f1cd41]813            $desthandle = new IO::File(">$destfile") || 
814                die "Cannot open $destfile:$!\n";
[1a8a08a]815        }
[2396559e]816        else{
[1a8a08a]817            warn "No such testbed $ctb\n";
818            $destfile = "";
819        }
820        next;
821    };
[c23025e]822
[1a8a08a]823    # End of that experiment
824    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
825        # Simple syntax check and close out this experiment's tcl description
826        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
[2396559e]827        $desthandle->close() if $desthandle;
[1a8a08a]828        $destfile = $ctb = "";
829        next;
830    };
[c23025e]831
832    # Beginning of a gateway set
833    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
834        $gateways = $1;
835        # If we've heard of this tb, create the config lines for it one at a
836        # time.
837        if ($allocated{$gateways}) {
838            # Just in case.  This directory should already have been created
839            # above.
840            unless (-d "$tmpdir/$gateways") {
841                mkdir("$tmpdir/$gateways") || 
842                    die "Can't create $tmpdir/$gateways: $!";
843            }
844        }
845        else {
846            warn "Gateways given (and ignored) for testbed not in use: " .
847                "$gateways\n";
848            $gateways = 0;
849        }
850        next;
851    };
[3c7da22]852    # End of the gateways section.  Output the client config for this testbed
[c23025e]853    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
854        die "Mismatched gateway markers ($1, $gateways)\n" 
855            unless !$gateways || $gateways == $1;
[3c7da22]856
[e4436a6]857        if ($control_gateway ) {
[fe459d0]858            # Client config
[527321c]859            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
[bc01820]860            my $master_project = $tbparams->{$master}->{'project'};
[fe459d0]861            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
862            print $cc "ControlGateway: $control_gateway\n";
863            print $cc "SMBShare: $smb_share\n";
864            print $cc "ProjectUser: $project_user\n";
[bc01820]865            print $cc "ProjectName: $master_project\n";
[fe459d0]866            $cc->close();
867        }
[5479c80]868        else { warn "No control gateway for $gateways?\n"; }
[fe459d0]869           
[c23025e]870        $gateways = 0;
871        next;
872    };
[1a8a08a]873    # Beginning of the hostnames list.  Collection is always in the hostnames
874    # file.
875    /^#\s+Begin\s+hostnames/ && do {
[c23025e]876        $destfile = "$tmpdir/hostnames";
[5f1cd41]877        $desthandle = new IO::File(">$destfile") || 
878            die "Can't open $destfile:$!\n";
[1a8a08a]879        next;
880    };
881    # end of the hostnames list.
882    /^#\s+End\s+hostnames/ && do {
[5f1cd41]883        $desthandle->close();
[1a8a08a]884        $destfile = "";
885        next;
886    };
887
[c23025e]888    # Generate gateway configuration info, one file per line
889    $gateways && do {
890        chomp;
891        my($dtb, $myname, $desthost, $type) = split(" ", $_);
[527321c]892
893        # Many of these are to simplify print statements
894        my $sdomain =                           # domain for the source
895            $tbparams->{$gateways}->{'domain'};
896        my $ddomain =                           # domain for the destination
897            $tbparams->{$dtb}->{'domain'};
[8034579]898        my $sproject =                          # Project of the source
[527321c]899            $tbparams->{$gateways}->{'project'};
[8034579]900        my $dproject =                          # Project of the destination
901            $tbparams->{$dtb}->{'project'};
[527321c]902        my $fs =                                # Master fs node (FQDN)
903            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
904        my $boss =                              # Master boss node (FQDN)
905            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
[f64fa81]906        my $event_server =                      # Master event-server (FQDN)
907            $tbparams->{$master}->{'eventserver'} . 
908            $tbparams->{$master}->{'domain'};
[33e3537]909        my $remote_event_server =               # Slave event-server (FQDN)
910            $tbparams->{$dtb}->{'eventserver'} . 
911            $tbparams->{$dtb}->{'domain'};
[527321c]912        my $remote_script_dir =                 # Remote fed script location
[8034579]913            "/proj/" . $dproject . "/exp/$eid/tmp";
914        my $local_script_dir =                  # Local fed script location
915            "/proj/" . $sproject . "/exp/$eid/tmp";
[527321c]916        my $active;                             # Is this the active side of
[b814529]917                                                # the connector?
[2396559e]918        my $tunnel_cfg =                        # Use DETER's config stuff
919            $tbparams->{$gateways}->{'tun'} || "false";
920                                   
[c23025e]921
[527321c]922        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
923        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
[4addf9d]924
[527321c]925        my $conf_file = "$myname$sdomain.gw.conf";
[8034579]926        my $remote_conf_file = "$desthost$ddomain.gw.conf";
[a835df7]927        # translate to lower case so the `hostname` hack for specifying
928        # configuration files works.
929        $conf_file =~ tr/A-Z/a-z/;
[8034579]930        $remote_conf_file =~ tr/A-Z/a-z/;
[a835df7]931
[c23025e]932        # If either end of this link is in the master side of the testbed, that
933        # side is the active end. Otherwise the first testbed encountered in
934        # the file will be the active end.  The $active_end variable keeps
935        # track of those decisions
936        if ( $dtb eq $master ) { $active = "false"; }
937        elsif ($gateways eq $master ) { $active = "true"; }
938        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
939        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
940
[3c7da22]941        # This is used to create the client configuration.
942        $control_gateway = "$myname$sdomain"
943            if $type =~ /(control|both)/;
944
[c23025e]945        # Write out the file
[5f1cd41]946        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
[a835df7]947            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
[321c0cb]948
[f70f9c8]949        print $gwconfig "Active: $active\n";
[2396559e]950        print $gwconfig "TunnelCfg: $tunnel_cfg\n";
[527321c]951        print $gwconfig "BossName: $boss\n";
952        print $gwconfig "FsName: $fs\n";
[f64fa81]953        print $gwconfig "EventServerName: $event_server\n";
[33e3537]954        print $gwconfig "RemoteEventServerName: $remote_event_server\n";
[f70f9c8]955        print $gwconfig "Type: $type\n";
[527321c]956        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
[8034579]957        print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
958        print $gwconfig "RemoteExperiment: $dproject/$eid\n";
959        print $gwconfig "LocalExperiment: $sproject/$eid\n";
960        print $gwconfig "RemoteConfigFile: " . 
961            "$remote_script_dir/$remote_conf_file\n";
[f70f9c8]962        print $gwconfig "Peer: $desthost$ddomain\n";
963        print $gwconfig "Pubkeys: " . 
[9c00d41]964            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
[f70f9c8]965        print $gwconfig "Privkeys: " .
[9c00d41]966            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
[f70f9c8]967        $gwconfig->close();
[c23025e]968
[9c00d41]969        # This testbed has a gateway (most will) so make a copy of the keys it
970        # needs in this testbed's subdirectory.  start_segment will transfer
971        # them.
972        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
973            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
974                die "Can't copy pubkeys ($gw_pubkey to " . 
975                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
976        }
977        if ($active eq "true" ) {
978            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
979                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
980                    die "Can't copy secret keys ($gw_secretkey to " . 
981                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
982            }
983        }
984
[c23025e]985        #done processing gateway entry, ready for next line
986        next; 
987    };
[2ef2c5b]988    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
989        next if /^#/;
990        chomp;
991        push(@tarfiles, $_);
992        next;
993    };
[e2a71ebe]994    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
995        next if /^#/;
996        chomp;
997        push(@rpms, $_);
998        next;
999    };
[c23025e]1000
[1a8a08a]1001    next unless $destfile;  # Unidentified testbed, ignore config
[527321c]1002    # local copies that can be used in the substitutions below
[2396559e]1003    my $gwtype = $tbparams->{$ctb}->{'gwtype'} || $def_gwtype;
1004    my $gwimage = $tbparams->{$ctb}->{'gwimage'} || $def_gwimage;
1005    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} || $def_mgwstart;
1006    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} || $def_mexpstart;
1007    my $gwstart = $tbparams->{$ctb}->{'gwstart'} || $def_gwstart;
1008    my $expstart = $tbparams->{$ctb}->{'expstart'} || $def_expstart;
[527321c]1009    my $project = $tbparams->{$ctb}->{'project'};
[1a8a08a]1010
1011    # Substitute variables
[527321c]1012    s/GWTYPE/$gwtype/g;
1013    s/GWIMAGE/$gwimage/g;
[1a8a08a]1014    if ($ctb eq $master ) {
[527321c]1015        s/GWSTART/$mgwstart/g;
1016        s/EXPSTART/$mexpstart/g;
[1a8a08a]1017    }
1018    else {
[527321c]1019        s/GWSTART/$gwstart/g;
1020        s/EXPSTART/$expstart/g;
[1a8a08a]1021    }
[4addf9d]1022    # XXX: oh is this bad
1023    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
[527321c]1024    s#PROJDIR#/proj/$project/#g;
[b68f597]1025    s#EID#$eid#g;
[527321c]1026    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
[5f1cd41]1027    print $desthandle $_;
[1a8a08a]1028}
[9c00d41]1029$pipe->close();
[1a8a08a]1030die "No nodes in master testbed ($master)\n" unless $allocated{$master};
1031
[e2a71ebe]1032# Copy tarfiles and rpms needed at remote sites to the staging directories.
1033# Start_segment will distribute them
[b814529]1034for my $t  (@tarfiles) {
[2ef2c5b]1035    die "tarfile '$t' unreadable: $!\n" unless -r $t;
[b68f597]1036    unless (-d "$tmpdir/tarfiles") {
1037        mkdir("$tmpdir/tarfiles") || 
1038            die "Can't create $tmpdir/tarfiles:$!\n";
[2ef2c5b]1039    }
[b68f597]1040    copy($t, "$tmpdir/tarfiles") || 
1041        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
[2ef2c5b]1042}
1043
[e2a71ebe]1044for my $r  (@rpms) {
1045    die "rpm '$r' unreadable: $!\n" unless -r $r;
1046    unless (-d "$tmpdir/rpms") {
1047        mkdir("$tmpdir/rpms") || 
1048            die "Can't create $tmpdir/rpms:$!\n";
1049    }
1050    copy($r, "$tmpdir/rpms") || 
1051        die "Can't copy $r to  $tmpdir/rpms:$!\n";
1052}
1053
[1a8a08a]1054exit(0) unless $startem;
1055
[527321c]1056my %started;                # If $started{$tb} then $tb successfully started
[e863014]1057my %child;                  # If $child{$pid} then a process with that pid is
1058                            # working on a starting a segment
1059my $nworking = 0;           # Number of children working on swapin
1060my $pid;                    # Scratch variable for pids
[b814529]1061
[1a8a08a]1062# Start up the slave sub-experiments first
1063TESTBED:
[b814529]1064for my $tb  (keys %allocated) {
[e863014]1065    if ( $tb ne $master ) {
1066        while ( $nworking == $max_children ) {
1067            print "Waiting for a child process to complete\n" if $verbose;
1068            if (($pid = wait()) != -1 ) {
1069                # The $? >> 8 is the exit code of the subprocess, which is
1070                # non-zero if the &start_segment routine failed.
1071                my $exit_code = ($? >> 8);
1072
1073                print "Child $pid completed exit code ($exit_code)\n"
1074                    if $verbose;
1075                $nworking--;
1076                $started{$child{$pid}}++ unless $exit_code;
1077                if ($child{$pid} ) { delete $child{$pid}; }
1078                else { warn "Reaped a pid we did not start?? ($pid)\n"; }
1079                last TESTBED if $exit_code;
1080            }
1081            else { warn "wait returned without reaping: $!\n"; }
1082        }
1083        if ( $pid = fork() ) {
1084            # Parent process
1085            $nworking ++;
1086            $child{$pid} = $tb;
1087            print "Started process $pid to start testbed $tb\n"
1088                if $verbose;
1089        }
1090        else {
1091            # Child.  Note that we reverse the sense of the return code when it
1092            # becomes an exit value.  Zero exit values indicate success.
1093            exit(!&start_segment($tb, $eid, $tbparams, $timeout));
1094        }
1095    }
1096}
1097
1098# Now wait for any still running processes.
1099while ( $nworking ) {
1100    print "Waiting for a child process to complete ($nworking running)\n" 
1101        if $verbose;
1102    if (($pid = wait()) != -1 ) {
1103        # The $? >> 8 is the exit code of the subprocess, which is
1104        # non-zero if the &start_segment routine failed.
1105        my $exit_code = ($? >> 8);
1106
1107        print "Child $pid completed exit code ($exit_code)\n"
1108            if $verbose;
1109        $nworking--;
1110        $started{$child{$pid}}++ unless $exit_code;
1111        if ($child{$pid} ) { delete $child{$pid}; }
1112        else { warn "Reaped a pid we did not start?? ($pid)\n"; }
[1a8a08a]1113    }
[e863014]1114    else { warn "wait returned without reaping: $!\n"; }
[1a8a08a]1115}
1116
1117# Now the master
[5479c80]1118if (&start_segment($master, $eid, $tbparams, $timeout)) { 
[1a8a08a]1119    $started{$master}++;
1120}
1121
1122# If any testbed failed, swap the rest out.
[637adfa]1123if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
[527321c]1124    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
[1a8a08a]1125    print "Error starting experiment\n";
1126    exit(1);
1127}
1128print "Experiment started\n";
[a835df7]1129print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
[9c00d41]1130system("rm -rf $tmpdir") unless $debug;
[1a8a08a]1131exit(0);    # set the exit value
1132
1133=pod
1134
1135=head1 NAME
1136
1137B<splitter.pl>
1138
1139=head1 SYNOPSIS
1140
[637adfa]1141B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
[e863014]1142    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
[1a8a08a]1143
1144=head1 DESCRIPTION
1145
1146B<splitter.pl> invokes the DETER experiment parser to split an annotated
1147experiment into multiple sub-experments and instantiates the sub-experiments on
1148their intended testbeds.  Annotation is accomplished using the
1149tb-set-node-testbed command, added to the parser.
1150
[5f1cd41]1151Much of the script's behavior depends on the configuration file, specified with
1152the B<-c> flag and defaulting to F<./splitter.conf>.
1153
1154The testbed labels supplied in the B<tb-set-node-testbed> command are
1155meaningful based on their presence in the testbeds file.  that file can be
1156specified in the configuration file using the B<Testbeds> directive, and
1157defaults to F<./testbeds>.  The syntax is described below.
[9c00d41]1158
1159Most of the intermediate files are staged in a sub-directory of a temporary
1160files directory and deleted at the end of the script.  Specifying the B<-d>
1161flag on the command line avoids the deletion for debbugging.  By default the
1162temporary files directory is directory is F</tmp> and can be reset in the
1163configuration file using the B<Tmpdir> directive.  Intermediate files are
1164stored under a subdirectory formed by adding the process ID of the splitter
1165process.  For example, if the temporary files directory is F</tmp> and the
1166B<splitter.pl> process ID is 2323, the temporary files will be stored in
1167F</tmp/split2323/>.
[1a8a08a]1168
1169The expreriment is split out into one experiment description per testbed in the
[9c00d41]1170temporary directory named as F<experiment.testbed.tcl> where the experiment is
1171the experiment ID given in the configuration file, and the testbed is the
1172tb-set-node-testbed parameter for the nodes in the file.
[1a8a08a]1173
1174If the B<-n> option is absent the sub-experiments are then instantiated on
1175their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
[9c00d41]1176Per-testbed parameters are set in the testbeds file.  Sub-experiments on
[1a8a08a]1177slave testbeds are instantiated in a random order, but the master testbed is
1178currently instantiated last.
1179
[e863014]1180Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
1181and the maximum number of simultaneous processes to start.
1182
[9c00d41]1183Scripts to start federation (the federation kit) are copied into the local
1184experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
1185taken from the directory given by the B<ScriptDir> directive in the
1186configuration file.
[63f7c7e]1187
[637adfa]1188If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
1189F<splitter.conf>.
1190
[1a8a08a]1191If any sub-experiment fails to instantiate, the other sub-exeriments are
[637adfa]1192swapped out.  B<-F> avoids this swap out, which can also be specified as
1193B<SoftFail: true> in F<splitter.conf>
[1a8a08a]1194
[9c00d41]1195=head2 Configuration File
1196
[5f1cd41]1197The configuration file is a simple set of colon-separated parameters and
1198values.  A configuration file must be present, either specified in the B<-c>
1199flag or the default F<./splitter.conf>.  All the parameter names are case
1200insensitive, but should not include any whitespace.  Parameter values may
1201include whitespace, but no newlines.
[9c00d41]1202
1203Possible parameters are:
1204
1205=over 5
1206
1207=item Experiment
1208
1209The name of the experiment on the various testbeds
1210
1211=item Master
1212
1213The master testbed label from the testbeds file, described below.
1214
1215=item Testbeds
1216
1217The testbeds file described below, giving per-testbed parameters.  If this
1218directive is absent the testbeds file defaults to F<./testbeds>
1219
1220=item ScriptDir
1221
1222Location of the default federation scripts, i.e. the federation kit.
1223
1224=item GatewayPubkey
1225
1226=item GatewaySecretKey
1227
1228The names of the files containing secret and public keys to use in setting up
[22bb7f8]1229tunnels between testbeds.  If given they are used, otherwise keys are generated.
1230
1231=item GatewayKeyType
1232
1233This controls the kind of SSH keys generated to configure the geatways.  If
1234given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>.  The parameter
1235is csase insensitive.
[9c00d41]1236
1237=item TmpDir
1238
1239=item TempDir
1240
1241The directory where temporary files are created.  These are synonyms, but
1242should both be specified, B<TmpDir> has priority.  If neither is specified,
1243F</tmp> is used.
1244
[3c7da22]1245=item SMBShare
1246
1247The SMB share on the master testbed that will be exported to remote clients.
1248
1249=item SMBUser
1250
1251The experiment user to mount project directories as.  This user needs to be a
1252member of the exported experiment - that is one of the users in the project
1253containing this experiment on the master testbed.
1254
[637adfa]1255=item Timeout
1256
1257Value in seconds after which a swap-in operatioin will be considered a success.
1258Often long swap-ins will hang when there are partial failures.  This works
[8412883]1259around this issue.  (This behavior can be requested on the command line by
1260specifying B<-t> I<secs>.)
[637adfa]1261
1262=item FailSoft
1263
[8412883]1264If not set, failure of any sub experiment swaps the rest out.  Setting this to
1265any value avoids this swap out.  (This behavior can be requested on the command
1266line by specifying B<-F>.)
[637adfa]1267
[e5fee75]1268=item MuxLimit
1269
1270The maximum bumber of links/lans carried by one gateway pair
1271
1272=item Tclparse
1273
1274The pathname to the experiment parsing program.  Only developers should set
1275this.
1276
1277=item Tclsh
1278
1279The pathname to the local oTcl shell.  Only developers should set
1280this.
1281
[9c00d41]1282=back
1283
1284=head2 Testbeds file
[1a8a08a]1285
[33548e1]1286The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1287scoped attribute-value pairs where each attribute is specified on a separate
1288line of the configuration file.  Each testbed's parameters are preceeded by the
1289testbed label in brackets ([]) on a line by itself.  After that the parameters
1290are specified as parameter: value.  This is essentially the same format as the
1291configuration file.  Parameters are:
[5f1cd41]1292
1293=over 4
1294
1295=item User
1296
1297The user under which to make requests to this testbed.  The user running
1298B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1299testbed.
1300
1301=item OpsNode
1302
1303The host name of the testbed's ops node.  The user calling B<splitter.pl> must
1304be able to execute commands on this host via L<ssh(1)>.
1305
1306=item Domain
1307
1308The domain of nodes in this testbed (including the ops host).  This parameter
1309should always start with a period.
1310
1311=item Project
1312
1313The project under which to instantiate sub-experiments on this testbed.
1314
1315=item ConnectorType
1316
1317The node type for inter-testbed connector nodes on this testbed.
1318
1319=item SlaveNodeStartCmd
1320
1321The start command to run on experimental nodes when this testbed is used as a
[8412883]1322slave.  In all the start commands the following string substitutions are made:
1323
1324=over 10
1325
1326=item FEDDIR
1327
1328The local experiment's federation scripts directory.  Each local experiment
1329will have this replaced by the scripts directory on its local boss.
1330
1331=item GWCONF
1332
1333The full pathname of the gateway configuration file.  As with FEDDIR, this is
1334on the local boss.
1335
1336=item PROJDIR
1337
1338The project directory on the local boss.
1339
1340=item EID
1341
1342The local experiment name.
1343
1344=back
1345
1346All startcmds specified in F<testbeds> undergo these expansions.
[5f1cd41]1347
1348=item SlaveConnectorStartCmd
1349
1350The start command to run on gateway nodes when this testbed is used as a slave.
[8412883]1351The same string substitutions are made in this command as in SlaveNodeStartCmd.
[5f1cd41]1352
1353=item MasterNodeStartCmd
1354
1355The start command to run on experimental nodes when this testbed is used as a
1356master.  The same string substitutions are made in this command as in
[8412883]1357SlaveNodeStartCmd.
[5f1cd41]1358
1359=item MasterConnectorStartCmd
1360
1361The start command to run on gateway nodes when this testbed is used as a
1362master.  The same string substitutions are made in this command as in
[8412883]1363SlaveNodeStartCmd.
[5f1cd41]1364
1365=item ConnectorImage
1366
1367The disk image to be loaded on a gateway node on this testbed.
1368
1369=item FileServer
1370
1371The node in the master testbed from which filesystems are mounted.
1372
1373=item Boss
1374
1375The node in the master testbed that controls the testbed.
1376
1377=item TunnelCfg
1378
1379True if the connector needs to do DETER federation.  This parameter will
1380probably be removed.
1381
1382
1383=back
1384
[1a8a08a]1385=head1 ENVIRONMENT
1386
1387B<splitter.pl> does not directly make use of environment variables, but calls
1388out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1389environment.
1390
[8412883]1391=head1 BUGS
1392
1393A deprecated B<-N> flag was used to select testbeds file format.  Only one
1394format is supported now, and B<-N> generates a warning, but otherwise does not
1395affect B<splitter.pl>.
1396
[1a8a08a]1397=head1 SEE ALSO
1398
1399L<sh(1)>, L<ssh(1)>
1400
1401=cut
Note: See TracBrowser for help on using the repository browser.