source: fedkit/splitter.pl @ 8412883

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 8412883 was 8412883, checked in by Ted Faber <faber@…>, 17 years ago

Documentation of variable expansion in startcmds and some touch ups.

  • Property mode set to 100644
File size: 33.8 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my @tarfiles;                   # Tarfiles in use by this experiment
32my @rpms;                       # Rpms in use by this experiment
33my $timeout;                    # The timeout to use for experiment swap ins
34my %opts;                       # Parsed options
35
36my $tbparams = {};              # Map of the per-testbed parameters from the
37                                # testbeds file.  It is a reference to a hash
38                                # of hashes (because it's passed around a bunch
39                                # and it's nicer to have one access pattern
40                                # throughout the script, in the main loop and
41                                # the subroutines).  That access is exemplified
42                                # by  $tbparams->{'deter'}->{'domain'} which is
43                                # the domain parameter of the DETER testbed. 
44my $fail_soft;                  # Do not swap failed sub-experiments out
45
46# Parse the config file.  The format is a colon-separated parameter name
47# followed by the value of that parameter to the end of the line.  This parses
48# that format and puts the parameters into the referenced hash.  Parameter
49# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
50# failure (e.g. file open) and 1 on success.
51sub parse_config {
52    my($file, $href) = @_;
53    my $fh = new IO::File($file);
54       
55    unless ($fh) {
56        warn "Can't open $file: $!\n";
57        return 0;
58    }
59
60    while (<$fh>) {
61        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
62        chomp;
63        /^([^:]+):\s*(.*)/ && do {
64            my $key = $1; 
65
66            $key =~ tr/A-Z/a-z/;
67            $href->{$key} = $2;
68            next;
69        };
70        warn "Unparasble line in $file: $_\n";
71    }
72    $fh->close();   # It will close when it goes out of scope, but...
73    return 1;
74}
75
76# Parse an easier-to-read testbeds file (the original was comma-separated
77# unadorned strings).  The format is a testbed scope as [testbed] followed by
78# the colon-separated attribute-value pairs for the testbed.  Right now these
79# go into a set of global hashes indexed by testbed, but that should probably
80# change.
81
82sub parse_testbeds {
83    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
84    my $fh = new IO::File($file);  # Testbeds filehandle
85    my $tb;                         # Current testbed
86    # Convert attribute in the file to tbparams hash key
87    my %attr_to_hash = (
88        "opsnode" => "host",
89        "user" => "user",
90        "domain" => "domain",
91        "project" => "project",
92        "connectortype" => "gwtype",
93        "slavenodestartcmd" => "expstart",
94        "slaveconnectorstartcmd" => "gwstart",
95        "masternodestartcmd" => "mexpstart",
96        "masterconnectorstartcmd" => "mgwstart",
97        "connectorimage" => "gwimage",
98        "fileserver" => "fs",
99        "boss" => "boss",
100        "tunnelcfg" => "tun"
101    );
102
103
104    unless ($fh) {
105        warn "Can't open $file: $!\n";
106        return 0;
107    }
108
109    while (<$fh>) {
110        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
111        chomp;
112        /^\s*\[(.*)\]/ && do {
113            $tb = $1;
114            $tbparams->{$tb} = {} unless $tbparams->{$tb};
115            next;
116        };
117
118        /^([^:]+):\s*(.*)/ && do {
119            unless ($tb) {
120                warn "Ignored attribute definition before testbed " .
121                    "defined in $file: $_\n";
122                next;
123            }
124            my $key = $1; 
125            $key =~ tr/A-Z/a-z/;
126            my $var = $attr_to_hash{$key};
127
128            if ($var) { $tbparams->{$tb}->{$var} = $2; }
129            else { warn "Unknown keyword $key in $file\n"; }
130
131            next;
132        };
133        warn "Unparasble line in $file: $_\n";
134    }
135    $fh->close();   # It will close when it goes out of scope, but...
136    return 1;
137}
138
139
140# use scp to transfer a file, reporting true if successful and false otherwise.
141# Parameters are the local file name, the ssh host destination (either hostname
142# oe user@host), and an optional destination file name or directory.  If no
143# destination is given, the file is transferred to the given user's home
144# directory.  If only a machine is given in the ssh host destination, the
145# current user is used.
146sub scp_file {
147    my($file, $user, $host, $dest) = @_;
148
149    # XXX system with a relative pathname is sort of gross
150    system("scp $file $user\@$host:$dest");
151    if ($?) {
152        warn "scp failed $?\n";
153        return 0;
154    }
155    else { return 1; }
156}
157
158# use ssh to execute the given command on the machine (and as the user) in
159# $where.  Parameters are the ssh destination directive ($where) and the
160# command to execute, and a prefix to be placed on a message generated if the
161# command fails.   On failure print a warning if a warning prefix was given and
162# return false.  If timeout is given fork a process and set an alarm of that
163# many seconds.  Timeouts also return 0;
164sub ssh_cmd {
165    my($user, $host, $cmd, $wname, $timeout) = @_;
166    my $pid;                # Child pid
167
168    $timeout = 0 unless $timeout;   # Force default timeout
169
170    if ( $pid = fork () ) {
171        # Parent process
172        # The eval acts as a signal catcher.  If the alarm goes off inside
173        # the eval, the die will put "alarm\n" into $@, otherwise the
174        # return value of the execution in the child process will be used.
175        my $rv = eval {
176            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
177            my $rv;
178
179            alarm $timeout;
180            $rv = waitpid($pid, 0);
181            alarm 0;
182            $rv;
183        };
184
185        # If the eval succeeded, $@ will be null and we can use $rv, which
186        # is the return code from the subprocess.  If the eval timed out,
187        # print a warning and assume the best.
188        if ($@ eq "alarm\n" ) {
189            warn "$wname timed out - pid $pid still live\n";
190            return 1;
191        }
192        else {
193            return $rv;
194        }
195    }
196    else {
197        # Child process
198        exec("ssh $user\@$host $cmd");
199        exit 0;
200    }
201}
202
203# Ship local copies of the federation scripts out to the given host.  If any of
204# the script transfers fails, return 0.  The scripts to transfer are from the
205# global @scripts and are found locally in $local_script_dir (another global).
206sub ship_scripts {
207    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
208    my $s;
209
210    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
211    for $s (@scripts) {
212        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
213            return 0;
214    }
215    return 1;
216}
217
218# Ship per-testbed configuration generated by this script to the remote /proj
219# directories on the remote testbeds
220sub ship_configs {
221    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
222    my($d, $f);
223
224    $d = IO::Dir->new($src_dir) || return 0;
225
226    # All directories under $tmpdir are 770 so we can delete them later.
227    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
228    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
229    while ( $f = $d->read()) {
230        next if $f =~ /^\./;
231        if ( -d "$src_dir/$f" ) {
232            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
233                return 0;
234        }
235        else {
236            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
237        }
238    }
239    return 1;
240}
241
242# Start a sub section of the experiment on a given testbed.  The testbed and
243# the user to start the experiment as are pulled from the global per-testbed
244# hash, passed in as $tbparams, as is the project name on the remote testbed.
245# Parameters are the testbed and the experiment id.  Configuration files are
246# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
247# Then the current state of the experiment determined using expinfo.  From that
248# state, the experiment is either created, modified or spapped in.  If
249# everything succeeds, true is returned.  If the global verbose is set progress
250# messages are printed.
251sub start_segment {
252    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
253                                            # per-testbed parameters and remote
254                                            # swap-in timeout
255    my $host =                              # Host name of remote ops (FQDN)
256        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
257    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
258    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
259                                            # experiment under
260    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
261                                            # sub-experiment
262    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
263    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
264    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
265    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
266    my $state;                              # State of remote experiment
267    my $status = new IO::Pipe;              # The pipe to get status
268
269    # Determine the status of the remote experiment
270    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
271        die "Can't ssh to $user\@$host:$!\n";
272
273    # XXX: this is simple now.  Parsing may become more complex
274    while (<$status>) {
275        /State: (\w+)/ && ($state = $1);
276        /No\s+such\s+experiment/ && ($state = "none");
277    }
278    $status->close();
279    print "$tb: $state\n";
280
281    # Copy the experiment definition data over
282    print "transferring subexperiment to $tb\n" if $verbose;
283    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
284    # Clear out any old experiment data; if not deleted, copies over it by
285    # different users will fail.
286    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
287    # force the /bin/sh interpretation of the trailing * (which we need to keep
288    # tmp around))  Again, this needs to be done more properly once we have a
289    # non-ssh interface here.)
290    print "clearing experiment subdirs on $tb\n" if $verbose;
291    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
292        return 0;
293    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
294    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
295        return 0;
296    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
297    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
298        return 0;
299    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
300    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
301        return 0;
302    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
303    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
304        return 0;
305    # Remote experiment is active.  Modify it.
306    if ($state eq "active") {
307        print "Transferring federation support files to $tb\n" if $verbose;
308        # First copy new scripts and hostinfo into the remote /proj
309        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
310            return 0;
311        &ship_scripts($host, $user, $proj_dir) || return 0;
312        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
313
314        if ( -d "$tmpdir/tarfiles") {
315            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
316                return 0;
317        }
318
319        if ( -d "$tmpdir/rpms") {
320            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
321                return 0;
322        }
323
324        print "Modifying $eid in place on $tb\n" if $verbose;
325        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
326            "$eid $tclfile", "modexp", $timeout) || return 0;
327        return 1;
328    }
329
330    # Remote experiment is swapped out, modify it and swap it in.
331    if ($state eq "swapped") {
332        print "Transferring federation support files to $tb\n" if $verbose;
333        # First copy new scripts and hostinfo into the remote /proj (because
334        # the experiment exists, the directory tree should be there.
335        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
336            return 0;
337        &ship_scripts($host, $user, $proj_dir) || return 0;
338        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
339        if ( -d "$tmpdir/tarfiles") {
340            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
341                return 0;
342        }
343
344        if ( -d "$tmpdir/rpms") {
345            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
346                return 0;
347        }
348
349        print "Modifying $eid on $tb\n" if $verbose;
350        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
351            "modexp") || return 0;
352        print "Swapping $eid in on $tb\n" if $verbose;
353        # Now start up
354        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
355            "swapexp", $timeout) || return 0;
356        return 1;
357    }
358
359    # No remote experiment.  Create one.  We do this in 2 steps so we can put
360    # the configuration files and scripts into the new experiment directories.
361    if ($state eq "none") {
362
363        if ( -d "$tmpdir/tarfiles") {
364            # Tarfiles have to exist for the creation to work
365            print "copying tarfiles to $tb\n";
366            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
367                return 0;
368        }
369
370        if ( -d "$tmpdir/rpms") {
371            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
372                return 0;
373        }
374
375        print "Creating $eid on $tb\n" if $verbose;
376        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
377            "$pid -e $eid $tclfile", "startexp") || return 0;
378        # After startexp succeeds, the per-experiment directories exist on the
379        # remote testbed.
380        print "Transferring federation support files to $tb\n" if $verbose;
381        # First copy new scripts and hostinfo into the remote /proj
382        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
383            return 0;
384        &ship_scripts($host, $user, $proj_dir) || return 0;
385        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
386        # Now start up
387        print "Swapping $eid in on $tb\n" if $verbose;
388        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
389            "swapexp", $timeout) || return 0;
390        return 1;
391    }
392
393    # Every branch for a known state returns.  If execution gets here, the
394    # state is unknown.
395    warn "unknown state: $state\n";
396    return 0;
397}
398
399# Swap out a sub-experiment - probably because another has failed.  Arguments
400# are testbed and experiment.  Most of the control flow is similar to
401# start_segment, though much simpler.
402sub stop_segment {
403    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
404                                            # per-testbed parameters
405    my $user = $tbparams->{$tb}->{'user'};  # testbed user
406    my $host =                              # Ops node
407        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
408    my $pid = $tbparams->{$tb}->{'project'};# testbed project
409
410    print "Stopping $eid on $tb\n" if $verbose;
411    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
412        "swapexp (out)") || return 0;
413    return 1;
414}
415
416
417$pid = $gid = "dummy";              # Default project and group to pass to
418                                    # $tcl_splitter above.  These are total
419                                    # dummy arguments;  the splitter doesn't
420                                    # use them at all, but we supply them to
421                                    # keep our changes to the parser minimal.
422# Argument processing.
423getopts('Ft:c:f:ndvN', \%opts);
424$splitter_config = $opts{'c'} || "./splitter.conf";
425$debug = $opts{'d'};
426$verbose = $opts{'v'} || $opts{'d'};
427
428&parse_config("$splitter_config", \%opts) || 
429    die "Cannot read config file $splitter_config: $!\n";
430$fail_soft = $opts{'F'} || $opts{'failsoft'};
431
432warn "-N does nothing now.  Only one testbeds format supported.\n"
433    if $opts{'N'};
434$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
435$timeout = $opts{'t'} || $opts{'timeout'};
436$eid = $opts{'experiment'};             # Experiment ID
437$tcl = $opts{'f'} || shift;             # The experiment description
438$master = $opts{'master'};              # Master testbed
439$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
440$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
441$local_script_dir = $opts{'scriptdir'}; # Local scripts
442
443$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
444    die "Must give an SMB share\n";
445$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
446    die "Must give an SMB user\n";
447
448# For now specify these.  We may want to generate them later.
449$gw_pubkey = $opts{'gatewaypubkey'};
450($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
451$gw_secretkey = $opts{'gatewaysecretkey'};
452($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
453
454# tcl program to split experiments (changed during devel)
455$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
456# tclsh to call directly (changed during devel)
457$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
458
459# Prefix to avoid collisions
460$tmpdir .= "/split$$";
461
462print "Temp files are in $tmpdir\n" if $verbose;
463# Create a workspace
464unless (-d "$tmpdir") {
465    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
466}
467
468# Validate scripts directory
469for my $s (@scripts) {
470    die "$local_script_dir/$s not in local script directory. Try -d\n"
471        unless -r "$local_script_dir/$s";
472}
473
474die "Must supply file, master and experiment" unless $master && $tcl && $eid;
475
476&parse_testbeds($tb_config, $tbparams) ||
477    die "Cannot testbed congfigurations from $tb_config: $!\n";
478
479# Open a pipe to the splitter program and start it parsing the experiments
480my $pipe = new IO::Pipe;
481# NB no more -p call on parse call.
482$pipe->reader("$tclsh $tcl_splitter -s -m $master  $pid $gid $eid $tcl") || 
483    die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
484
485# Parsing variables
486my $ctb;                        # Current testbed
487my %allocated;                  # If allocated{$tb} > 0, $tb is in use
488my $destfile;                   # File that the sub-experiment tcl file is
489                                # being written to, or "" if none.  Also used
490                                # for hostnames file.
491my $desthandle;                 # File handle for distfile
492my $gateways;                   # when gateway lists are being processed this
493                                # is the testbed whose gateways are being
494                                # gathered.
495my $control_gateway;            # Control net gateway for the current testbed
496my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
497                                # end of the a <-> b connector pair.
498
499# Parse the splitter output.  This loop creates the sub experiments, gateway
500# configurations and hostnames file
501while (<$pipe>) {
502    # Start of a sub-experiment
503    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
504        $ctb = $1;
505
506        # If we know the testbed, start collecting its sub experiment tcl
507        # description.  If not, warn the caller and ignore the configuration of
508        # this testbed.
509        if ($tbparams->{$ctb}) {
510            $allocated{$ctb}++; # Keep track of the testbeds allocated
511
512            unless (-d "$tmpdir/$ctb") {
513                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
514            }
515            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
516
517            $desthandle = new IO::File(">$destfile") || 
518                die "Cannot open $destfile:$!\n";
519        }
520        else { 
521            warn "No such testbed $ctb\n";
522            $destfile = "";
523        }
524        next;
525    };
526
527    # End of that experiment
528    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
529        # Simple syntax check and close out this experiment's tcl description
530        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
531        $desthandle->close();
532        $destfile = $ctb = "";
533        next;
534    };
535
536    # Beginning of a gateway set
537    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
538        $gateways = $1;
539        # If we've heard of this tb, create the config lines for it one at a
540        # time.
541        if ($allocated{$gateways}) {
542            # Just in case.  This directory should already have been created
543            # above.
544            unless (-d "$tmpdir/$gateways") {
545                mkdir("$tmpdir/$gateways") || 
546                    die "Can't create $tmpdir/$gateways: $!";
547            }
548        }
549        else {
550            warn "Gateways given (and ignored) for testbed not in use: " .
551                "$gateways\n";
552            $gateways = 0;
553        }
554        next;
555    };
556    # End of the gateways section.  Output the client config for this testbed
557    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
558        die "Mismatched gateway markers ($1, $gateways)\n" 
559            unless !$gateways || $gateways == $1;
560
561        if ($control_gateway ) {
562            # Client config
563            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
564            my $master_project = $tbparams->{$master}->{'project'};
565            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
566            print $cc "ControlGateway: $control_gateway\n";
567            print $cc "SMBShare: $smb_share\n";
568            print $cc "ProjectUser: $project_user\n";
569            print $cc "ProjectName: $master_project\n";
570            $cc->close();
571        }
572        else { warn "No control gateway for $gateways?\n"; }
573           
574        $gateways = 0;
575        next;
576    };
577    # Beginning of the hostnames list.  Collection is always in the hostnames
578    # file.
579    /^#\s+Begin\s+hostnames/ && do {
580        $destfile = "$tmpdir/hostnames";
581        $desthandle = new IO::File(">$destfile") || 
582            die "Can't open $destfile:$!\n";
583        next;
584    };
585    # end of the hostnames list.
586    /^#\s+End\s+hostnames/ && do {
587        $desthandle->close();
588        $destfile = "";
589        next;
590    };
591
592    # Generate gateway configuration info, one file per line
593    $gateways && do {
594        chomp;
595        my($dtb, $myname, $desthost, $type) = split(" ", $_);
596
597        # Many of these are to simplify print statements
598        my $sdomain =                           # domain for the source
599            $tbparams->{$gateways}->{'domain'};
600        my $ddomain =                           # domain for the destination
601            $tbparams->{$dtb}->{'domain'};
602        my $sproject =                          # Project of the destination
603            $tbparams->{$gateways}->{'project'};
604        my $fs =                                # Master fs node (FQDN)
605            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
606        my $boss =                              # Master boss node (FQDN)
607            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
608        my $remote_script_dir =                 # Remote fed script location
609            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
610        my $active;                             # Is this the active side of
611                                                # the connector?
612
613        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
614        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
615
616        my $conf_file = "$myname$sdomain.gw.conf";
617        # translate to lower case so the `hostname` hack for specifying
618        # configuration files works.
619        $conf_file =~ tr/A-Z/a-z/;
620
621        # If either end of this link is in the master side of the testbed, that
622        # side is the active end. Otherwise the first testbed encountered in
623        # the file will be the active end.  The $active_end variable keeps
624        # track of those decisions
625        if ( $dtb eq $master ) { $active = "false"; }
626        elsif ($gateways eq $master ) { $active = "true"; }
627        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
628        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
629
630        # This is used to create the client configuration.
631        $control_gateway = "$myname$sdomain"
632            if $type =~ /(control|both)/;
633
634        # Write out the file
635        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
636            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
637
638        print $gwconfig "Active: $active\n";
639        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
640        print $gwconfig "BossName: $boss\n";
641        print $gwconfig "FsName: $fs\n";
642        print $gwconfig "Type: $type\n";
643        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
644        print $gwconfig "Peer: $desthost$ddomain\n";
645        print $gwconfig "Pubkeys: " . 
646            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
647        print $gwconfig "Privkeys: " .
648            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
649        $gwconfig->close();
650
651        # This testbed has a gateway (most will) so make a copy of the keys it
652        # needs in this testbed's subdirectory.  start_segment will transfer
653        # them.
654        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
655            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
656                die "Can't copy pubkeys ($gw_pubkey to " . 
657                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
658        }
659        if ($active eq "true" ) {
660            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
661                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
662                    die "Can't copy secret keys ($gw_secretkey to " . 
663                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
664            }
665        }
666
667        #done processing gateway entry, ready for next line
668        next; 
669    };
670    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
671        next if /^#/;
672        chomp;
673        push(@tarfiles, $_);
674        next;
675    };
676    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
677        next if /^#/;
678        chomp;
679        push(@rpms, $_);
680        next;
681    };
682
683    next unless $destfile;  # Unidentified testbed, ignore config
684    # local copies that can be used in the substitutions below
685    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
686    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
687    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
688    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
689    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
690    my $expstart = $tbparams->{$ctb}->{'expstart'};
691    my $project = $tbparams->{$ctb}->{'project'};
692
693    # Substitute variables
694    s/GWTYPE/$gwtype/g;
695    s/GWIMAGE/$gwimage/g;
696    if ($ctb eq $master ) {
697        s/GWSTART/$mgwstart/g;
698        s/EXPSTART/$mexpstart/g;
699    }
700    else {
701        s/GWSTART/$gwstart/g;
702        s/EXPSTART/$expstart/g;
703    }
704    # XXX: oh is this bad
705    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
706    s#PROJDIR#/proj/$project/#g;
707    s#EID#$eid#g;
708    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
709    print $desthandle $_;
710}
711$pipe->close();
712die "No nodes in master testbed ($master)\n" unless $allocated{$master};
713
714# Copy tarfiles and rpms needed at remote sites to the staging directories.
715# Start_segment will distribute them
716for my $t  (@tarfiles) {
717    die "tarfile '$t' unreadable: $!\n" unless -r $t;
718    unless (-d "$tmpdir/tarfiles") {
719        mkdir("$tmpdir/tarfiles") || 
720            die "Can't create $tmpdir/tarfiles:$!\n";
721    }
722    copy($t, "$tmpdir/tarfiles") || 
723        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
724}
725
726for my $r  (@rpms) {
727    die "rpm '$r' unreadable: $!\n" unless -r $r;
728    unless (-d "$tmpdir/rpms") {
729        mkdir("$tmpdir/rpms") || 
730            die "Can't create $tmpdir/rpms:$!\n";
731    }
732    copy($r, "$tmpdir/rpms") || 
733        die "Can't copy $r to  $tmpdir/rpms:$!\n";
734}
735
736exit(0) unless $startem;
737
738my %started;                # If $started{$tb} then $tb successfully started
739
740# Start up the slave sub-experiments first
741TESTBED:
742for my $tb  (keys %allocated) {
743    if ($tb ne $master) {
744        if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
745        else { last TESTBED; }
746    }
747}
748
749# Now the master
750if (&start_segment($master, $eid, $tbparams, $timeout)) { 
751    $started{$master}++;
752}
753
754# If any testbed failed, swap the rest out.
755if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
756    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
757    print "Error starting experiment\n";
758    exit(1);
759}
760print "Experiment started\n";
761print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
762system("rm -rf $tmpdir") unless $debug;
763exit(0);    # set the exit value
764
765=pod
766
767=head1 NAME
768
769B<splitter.pl>
770
771=head1 SYNOPSIS
772
773B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
774    [B<-f> F<experiment_tcl>] [F<experiment_tcl>]
775
776=head1 DESCRIPTION
777
778B<splitter.pl> invokes the DETER experiment parser to split an annotated
779experiment into multiple sub-experments and instantiates the sub-experiments on
780their intended testbeds.  Annotation is accomplished using the
781tb-set-node-testbed command, added to the parser.
782
783Much of the script's behavior depends on the configuration file, specified with
784the B<-c> flag and defaulting to F<./splitter.conf>.
785
786The testbed labels supplied in the B<tb-set-node-testbed> command are
787meaningful based on their presence in the testbeds file.  that file can be
788specified in the configuration file using the B<Testbeds> directive, and
789defaults to F<./testbeds>.  The syntax is described below.
790
791Most of the intermediate files are staged in a sub-directory of a temporary
792files directory and deleted at the end of the script.  Specifying the B<-d>
793flag on the command line avoids the deletion for debbugging.  By default the
794temporary files directory is directory is F</tmp> and can be reset in the
795configuration file using the B<Tmpdir> directive.  Intermediate files are
796stored under a subdirectory formed by adding the process ID of the splitter
797process.  For example, if the temporary files directory is F</tmp> and the
798B<splitter.pl> process ID is 2323, the temporary files will be stored in
799F</tmp/split2323/>.
800
801The expreriment is split out into one experiment description per testbed in the
802temporary directory named as F<experiment.testbed.tcl> where the experiment is
803the experiment ID given in the configuration file, and the testbed is the
804tb-set-node-testbed parameter for the nodes in the file.
805
806If the B<-n> option is absent the sub-experiments are then instantiated on
807their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
808Per-testbed parameters are set in the testbeds file.  Sub-experiments on
809slave testbeds are instantiated in a random order, but the master testbed is
810currently instantiated last.
811
812Scripts to start federation (the federation kit) are copied into the local
813experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
814taken from the directory given by the B<ScriptDir> directive in the
815configuration file.
816
817If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
818F<splitter.conf>.
819
820If any sub-experiment fails to instantiate, the other sub-exeriments are
821swapped out.  B<-F> avoids this swap out, which can also be specified as
822B<SoftFail: true> in F<splitter.conf>
823
824=head2 Configuration File
825
826The configuration file is a simple set of colon-separated parameters and
827values.  A configuration file must be present, either specified in the B<-c>
828flag or the default F<./splitter.conf>.  All the parameter names are case
829insensitive, but should not include any whitespace.  Parameter values may
830include whitespace, but no newlines.
831
832Possible parameters are:
833
834=over 5
835
836=item Experiment
837
838The name of the experiment on the various testbeds
839
840=item Master
841
842The master testbed label from the testbeds file, described below.
843
844=item Testbeds
845
846The testbeds file described below, giving per-testbed parameters.  If this
847directive is absent the testbeds file defaults to F<./testbeds>
848
849=item ScriptDir
850
851Location of the default federation scripts, i.e. the federation kit.
852
853=item GatewayPubkey
854
855=item GatewaySecretKey
856
857The names of the files containing secret and public keys to use in setting up
858tunnels between testbeds.  These will eventually be automatically generated.
859
860=item TmpDir
861
862=item TempDir
863
864The directory where temporary files are created.  These are synonyms, but
865should both be specified, B<TmpDir> has priority.  If neither is specified,
866F</tmp> is used.
867
868=item SMBShare
869
870The SMB share on the master testbed that will be exported to remote clients.
871
872=item SMBUser
873
874The experiment user to mount project directories as.  This user needs to be a
875member of the exported experiment - that is one of the users in the project
876containing this experiment on the master testbed.
877
878=item Tclparse
879
880The pathname to the experiment parsing program.  Only developers should set
881this.
882
883=item Tclsh
884
885The pathname to the local oTcl shell.  Only developers should set
886this.
887
888=item Timeout
889
890Value in seconds after which a swap-in operatioin will be considered a success.
891Often long swap-ins will hang when there are partial failures.  This works
892around this issue.  (This behavior can be requested on the command line by
893specifying B<-t> I<secs>.)
894
895=item FailSoft
896
897If not set, failure of any sub experiment swaps the rest out.  Setting this to
898any value avoids this swap out.  (This behavior can be requested on the command
899line by specifying B<-F>.)
900
901=back
902
903=head2 Testbeds file
904
905The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
906scoped attribute-value pairs where each attribute is specified on a separate
907line of the configuration file.  Each testbed's parameters are preceeded by the
908testbed label in brackets ([]) on a line by itself.  After that the parameters
909are specified as parameter: value.  This is essentially the same format as the
910configuration file.  Parameters are:
911
912=over 4
913
914=item User
915
916The user under which to make requests to this testbed.  The user running
917B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
918testbed.
919
920=item OpsNode
921
922The host name of the testbed's ops node.  The user calling B<splitter.pl> must
923be able to execute commands on this host via L<ssh(1)>.
924
925=item Domain
926
927The domain of nodes in this testbed (including the ops host).  This parameter
928should always start with a period.
929
930=item Project
931
932The project under which to instantiate sub-experiments on this testbed.
933
934=item ConnectorType
935
936The node type for inter-testbed connector nodes on this testbed.
937
938=item SlaveNodeStartCmd
939
940The start command to run on experimental nodes when this testbed is used as a
941slave.  In all the start commands the following string substitutions are made:
942
943=over 10
944
945=item FEDDIR
946
947The local experiment's federation scripts directory.  Each local experiment
948will have this replaced by the scripts directory on its local boss.
949
950=item GWCONF
951
952The full pathname of the gateway configuration file.  As with FEDDIR, this is
953on the local boss.
954
955=item PROJDIR
956
957The project directory on the local boss.
958
959=item EID
960
961The local experiment name.
962
963=back
964
965All startcmds specified in F<testbeds> undergo these expansions.
966
967=item SlaveConnectorStartCmd
968
969The start command to run on gateway nodes when this testbed is used as a slave.
970The same string substitutions are made in this command as in SlaveNodeStartCmd.
971
972=item MasterNodeStartCmd
973
974The start command to run on experimental nodes when this testbed is used as a
975master.  The same string substitutions are made in this command as in
976SlaveNodeStartCmd.
977
978=item MasterConnectorStartCmd
979
980The start command to run on gateway nodes when this testbed is used as a
981master.  The same string substitutions are made in this command as in
982SlaveNodeStartCmd.
983
984=item ConnectorImage
985
986The disk image to be loaded on a gateway node on this testbed.
987
988=item FileServer
989
990The node in the master testbed from which filesystems are mounted.
991
992=item Boss
993
994The node in the master testbed that controls the testbed.
995
996=item TunnelCfg
997
998True if the connector needs to do DETER federation.  This parameter will
999probably be removed.
1000
1001
1002=back
1003
1004=head1 ENVIRONMENT
1005
1006B<splitter.pl> does not directly make use of environment variables, but calls
1007out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1008environment.
1009
1010=head1 BUGS
1011
1012A deprecated B<-N> flag was used to select testbeds file format.  Only one
1013format is supported now, and B<-N> generates a warning, but otherwise does not
1014affect B<splitter.pl>.
1015
1016=head1 SEE ALSO
1017
1018L<sh(1)>, L<ssh(1)>
1019
1020=cut
Note: See TracBrowser for help on using the repository browser.