source: fedkit/splitter.pl @ 637adfa

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 637adfa was 637adfa, checked in by Ted Faber <faber@…>, 17 years ago

docs, fail soft feature

  • Property mode set to 100644
File size: 35.3 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my @tarfiles;                   # Tarfiles in use by this experiment
32my $timeout;                    # The timeout to use for experiment swap ins
33my %opts;                       # Parsed options
34
35my $tbparams = {};              # Map of the per-testbed parameters from the
36                                # testbeds file.  It is a reference to a hash
37                                # of hashes (because it's passed around a bunch
38                                # and it's nicer to have one access pattern
39                                # throughout the script, in the main loop and
40                                # the subroutines).  That access is exemplified
41                                # by  $tbparams->{'deter'}->{'domain'} which is
42                                # the domain parameter of the DETER testbed. 
43my $fail_soft;                  # Do not swap failed sub-experiments out
44
45# Parse the config file.  The format is a colon-separated parameter name
46# followed by the value of that parameter to the end of the line.  This parses
47# that format and puts the parameters into the referenced hash.  Parameter
48# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
49# failure (e.g. file open) and 1 on success.
50sub parse_config {
51    my($file, $href) = @_;
52    my $fh = new IO::File($file);
53       
54    unless ($fh) {
55        warn "Can't open $file: $!\n";
56        return 0;
57    }
58
59    while (<$fh>) {
60        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
61        chomp;
62        /^([^:]+):\s*(.*)/ && do {
63            my $key = $1; 
64
65            $key =~ tr/A-Z/a-z/;
66            $href->{$key} = $2;
67            next;
68        };
69        warn "Unparasble line in $file: $_\n";
70    }
71    $fh->close();   # It will close when it goes out of scope, but...
72    return 1;
73}
74
75# Parse an easier-to-read testbeds file (the original was comma-separated
76# unadorned strings).  The format is a testbed scope as [testbed] followed by
77# the colon-separated attribute-value pairs for the testbed.  Right now these
78# go into a set of global hashes indexed by testbed, but that should probably
79# change.
80
81sub parse_testbeds {
82    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
83    my $fh = new IO::File($file);  # Testbeds filehandle
84    my $tb;                         # Current testbed
85    # Convert attribute in the file to global variable name.  XXX: Again, this
86    # needs to be a 2-level hash
87    my %attr_to_hash = (
88        "opsnode" => "host",
89        "user" => "user",
90        "domain" => "domain",
91        "project" => "project",
92        "connectortype" => "gwtype",
93        "slavenodestartcmd" => "expstart",
94        "slaveconnectorstartcmd" => "gwstart",
95        "masternodestartcmd" => "mexpstart",
96        "masterconnectorstartcmd" => "mgwstart",
97        "connectorimage" => "gwimage",
98        "fileserver" => "fs",
99        "boss" => "boss",
100        "tunnelcfg" => "tun"
101    );
102
103
104    unless ($fh) {
105        warn "Can't open $file: $!\n";
106        return 0;
107    }
108
109    while (<$fh>) {
110        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
111        chomp;
112        /^\s*\[(.*)\]/ && do {
113            $tb = $1;
114            $tbparams->{$tb} = {} unless $tbparams->{$tb};
115            next;
116        };
117
118        /^([^:]+):\s*(.*)/ && do {
119            unless ($tb) {
120                warn "Ignored attribute definition before testbed " .
121                    "defined in $file: $_\n";
122                next;
123            }
124            my $key = $1; 
125            $key =~ tr/A-Z/a-z/;
126            my $var = $attr_to_hash{$key};
127
128            if ($var) { $tbparams->{$tb}->{$var} = $2; }
129            else { warn "Unknown keyword $key in $file\n"; }
130
131            next;
132        };
133        warn "Unparasble line in $file: $_\n";
134    }
135    $fh->close();   # It will close when it goes out of scope, but...
136    return 1;
137}
138
139
140# use scp to transfer a file, reporting true if successful and false otherwise.
141# Parameters are the local file name, the ssh host destination (either hostname
142# oe user@host), and an optional destination file name or directory.  If no
143# destination is given, the file is transferred to the given user's home
144# directory.  If only a machine is given in the ssh host destination, the
145# current user is used.
146sub scp_file {
147    my($file, $user, $host, $dest) = @_;
148
149    # XXX system with a relative pathname is sort of gross
150    system("scp $file $user\@$host:$dest");
151    if ($?) {
152        warn "scp failed $?\n";
153        return 0;
154    }
155    else { return 1; }
156}
157
158# use ssh to execute the given command on the machine (and as the user) in
159# $where.  Parameters are the ssh destination directive ($where) and the
160# command to execute, and a prefix to be placed on a message generated if the
161# command fails.   On failure print a warning if a warning prefix was given and
162# return false.  If timeout is given fork a process and set an alarm of that
163# many seconds.  Timeouts also return 0;
164sub ssh_cmd {
165    my($user, $host, $cmd, $wname, $timeout) = @_;
166    my $pid;                # Child pid
167
168    if ( $timeout ) {
169        if ( $pid = fork () ) {
170            # Parent process
171            # The eval acts as a signal catcher.  If the alarm goes off inside
172            # the eval, the die will put "alarm\n" into $@, otherwise the
173            # return value of the execution in the child process will be used.
174            my $rv = eval {
175                local $SIG{'ALRM'} = sub{ die "alarm\n"; };
176                my $rv;
177
178                alarm $timeout;
179                $rv = waitpid($pid, 0);
180                alarm 0;
181                $rv;
182            };
183
184            # If the eval succeeded, $@ will be null and we can use $rv, which
185            # is the return code from the subprocess.  If the eval timed out,
186            # print a warning and assume the best.
187            if ($@ eq "alarm\n" ) {
188                warn "$wname timed out - pid $pid still live\n";
189                return 1;
190            }
191            else {
192                return $rv;
193            }
194        }
195        else {
196            # Child process
197            # XXX system with a relative pathname is sort of gross
198            exec("ssh $user\@$host $cmd");
199            exit 0;
200        }
201    }
202    else {
203        # No timeout, do all the work here.
204        # XXX system with a relative pathname is sort of gross
205        system ("ssh $user\@$host $cmd");
206        if ($?) {
207            warn "$wname failed $?\n" if $wname;
208            return 0;
209        }
210        else { return 1; }
211    }
212}
213
214# Ship local copies of the federation scripts out to the given host.  If any of
215# the script transfers fails, return 0.  The scripts to transfer are from the
216# global @scripts and are found locally in $local_script_dir (another global).
217sub ship_scripts {
218    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
219    my $s;
220
221    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
222    for $s (@scripts) {
223        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
224            return 0;
225    }
226    return 1;
227}
228
229# Ship per-testbed configuration generated by this script to the remote /proj
230# directories on the remote testbeds
231sub ship_configs {
232    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
233    my($d, $f);
234
235    $d = IO::Dir->new($src_dir) || return 0;
236
237    # All directories under $tmpdir are 770 so we can delete them later.
238    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
239    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
240    while ( $f = $d->read()) {
241        next if $f =~ /^\./;
242        if ( -d "$src_dir/$f" ) {
243            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
244                return 0;
245        }
246        else {
247            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
248        }
249    }
250    return 1;
251}
252
253# Start a sub section of the experiment on a given testbed.  The testbed and
254# the user to start the experiment as are pulled from the global per-testbed
255# hash, passed in as $tbparams, as is the project name on the remote testbed.
256# Parameters are the testbed and the experiment id.  Configuration files are
257# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
258# Then the current state of the experiment determined using expinfo.  From that
259# state, the experiment is either created, modified or spapped in.  If
260# everything succeeds, true is returned.  If the global verbose is set progress
261# messages are printed.
262sub start_segment {
263    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
264                                            # per-testbed parameters and remote
265                                            # swap-in timeout
266    my $host =                              # Host name of remote ops (FQDN)
267        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
268    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
269    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
270                                            # experiment under
271    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
272                                            # sub-experiment
273    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
274    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
275    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
276    my $state;                              # State of remote experiment
277    my $status = new IO::Pipe;              # The pipe to get status
278
279    # Determine the status of the remote experiment
280    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
281        die "Can't ssh to $user\@$host:$!\n";
282
283    # XXX: this is simple now.  Parsing may become more complex
284    while (<$status>) {
285        /State: (\w+)/ && ($state = $1);
286        /No\s+such\s+experiment/ && ($state = "none");
287    }
288    $status->close();
289    print "$tb: $state\n";
290
291    # Copy the experiment definition data over
292    print "transferring subexperiment to $tb\n" if $verbose;
293    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
294    # Clear out any old experiment data; if not deleted, copies over it by
295    # different users will fail.
296    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
297    # force the /bin/sh interpretation of the trailing * (which we need to keep
298    # tmp around))  Again, this needs to be done more properly once we have a
299    # non-ssh interface here.)
300    print "clearing experiment subdirs on $tb\n" if $verbose;
301    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
302        return 0;
303    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
304    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
305        return 0;
306    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
307    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
308        return 0;
309    # Remote experiment is active.  Modify it.
310    if ($state eq "active") {
311        print "Transferring federation support files to $tb\n" if $verbose;
312        # First copy new scripts and hostinfo into the remote /proj
313        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
314            return 0;
315        &ship_scripts($host, $user, $proj_dir) || return 0;
316        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
317        if ( -d "$tmpdir/tarfiles") {
318            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
319                return 0;
320        }
321
322        print "Modifying $eid in place on $tb\n" if $verbose;
323        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
324            "$eid $tclfile", "modexp", $timeout) || return 0;
325        return 1;
326    }
327
328    # Remote experiment is swapped out, modify it and swap it in.
329    if ($state eq "swapped") {
330        print "Transferring federation support files to $tb\n" if $verbose;
331        # First copy new scripts and hostinfo into the remote /proj (because
332        # the experiment exists, the directory tree should be there.
333        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
334            return 0;
335        &ship_scripts($host, $user, $proj_dir) || return 0;
336        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
337        if ( -d "$tmpdir/tarfiles") {
338            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
339                return 0;
340        }
341
342        print "Modifying $eid on $tb\n" if $verbose;
343        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
344            "modexp") || return 0;
345        print "Swapping $eid in on $tb\n" if $verbose;
346        # Now start up
347        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
348            "swapexp", $timeout) || return 0;
349        return 1;
350    }
351
352    # No remote experiment.  Create one.  We do this in 2 steps so we can put
353    # the configuration files and scripts into the new experiment directories.
354    if ($state eq "none") {
355
356        if ( -d "$tmpdir/tarfiles") {
357            # Tarfiles have to exist for the creation to work
358            print "copying tarfiles to $tb\n";
359            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
360                return 0;
361        }
362        print "Creating $eid on $tb\n" if $verbose;
363        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
364            "$pid -e $eid $tclfile", "startexp") || return 0;
365        print "Transferring federation support files to $tb\n" if $verbose;
366        # First copy new scripts and hostinfo into the remote /proj
367        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
368            return 0;
369        &ship_scripts($host, $user, $proj_dir) || return 0;
370        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
371        # Now start up
372        print "Swapping $eid in on $tb\n" if $verbose;
373        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
374            "swapexp", $timeout) || return 0;
375        return 1;
376    }
377
378    # Every branch for a known state returns.  If execution gets here, the
379    # state is unknown.
380    warn "unknown state: $state\n";
381    return 0;
382}
383
384# Swap out a sub-experiment - probably because another has failed.  Arguments
385# are testbed and experiment.  Most of the control flow is similar to
386# start_segment, though much simpler.
387sub stop_segment {
388    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
389                                            # per-testbed parameters
390    my $user = $tbparams->{$tb}->{'user'};  # testbed user
391    my $host =                              # Ops node
392        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
393    my $pid = $tbparams->{$tb}->{'project'};# testbed project
394
395    print "Stopping $eid on $tb\n" if $verbose;
396    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
397        "swapexp (out)") || return 0;
398    return 1;
399}
400
401
402$pid = $gid = "dummy";              # Default project and group to pass to
403                                    # $tcl_splitter above.  These are total
404                                    # dummy arguments;  the splitter doesn't
405                                    # use them at all, but we supply them to
406                                    # keep our changes to the parser minimal.
407# Argument processing.
408getopts('Ft:c:f:ndvN', \%opts);
409$splitter_config = $opts{'c'} || "./splitter.conf";
410$debug = $opts{'d'};
411$verbose = $opts{'v'} || $opts{'d'};
412
413&parse_config("$splitter_config", \%opts) || 
414    die "Cannot read config file $splitter_config: $!\n";
415$fail_soft = $opts{'F'} || $opts{'failsoft'};
416
417$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
418$timeout = $opts{'t'} || $opts{'timeout'};
419$eid = $opts{'experiment'};             # Experiment ID
420$tcl = $opts{'f'} || shift;             # The experiment description
421$master = $opts{'master'};              # Master testbed
422$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
423$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
424$local_script_dir = $opts{'scriptdir'}; # Local scripts
425
426$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
427    die "Must give an SMB share\n";
428$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
429    die "Must give an SMB user\n";
430
431# For now specify these.  We may want to generate them later.
432$gw_pubkey = $opts{'gatewaypubkey'};
433($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
434$gw_secretkey = $opts{'gatewaysecretkey'};
435($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
436
437# tcl program to split experiments (changed during devel)
438$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
439# tclsh to call directly (changed during devel)
440$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
441
442# Prefix to avoid collisions
443$tmpdir .= "/split$$";
444
445print "Temp files are in $tmpdir\n" if $verbose;
446# Create a workspace
447unless (-d "$tmpdir") {
448    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
449}
450
451# Validate scripts directory
452for my $s (@scripts) {
453    die "$local_script_dir/$s not in local script directory. Try -d\n"
454        unless -r "$local_script_dir/$s";
455}
456
457die "Must supply file, master and experiment" unless $master && $tcl && $eid;
458
459if ($opts{'N'} ) {
460    &parse_testbeds($tb_config, $tbparams) ||
461        die "Cannot testbed congfigurations from $tb_config: $!\n";
462}
463else {
464    # Read a hash of per-testbed parameters from the local configurations.
465    my $conf = new IO::File($tb_config) || 
466        die "can't read testbed configutions from $tb_config: $!\n";
467    while (<$conf>) {
468        next if /^#/;
469        chomp;
470        my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
471            $tun) = split(":", $_);
472        $tbparams->{$tb}->{'host'} = $h;
473        $tbparams->{$tb}->{'user'} = $u;
474        $tbparams->{$tb}->{'domain'} = $d;
475        $tbparams->{$tb}->{'project'} = $p;
476        $tbparams->{$tb}->{'gwtype'} = $t;
477        $tbparams->{$tb}->{'expstart'} = $es;
478        $tbparams->{$tb}->{'gwstart'} = $gs;
479        $tbparams->{$tb}->{'mexpstart'} = $mes;
480        $tbparams->{$tb}->{'mgwstart'} = $mgs;
481        $tbparams->{$tb}->{'gwimage'} = $i;
482        $tbparams->{$tb}->{'fs'} = $fs;
483        $tbparams->{$tb}->{'boss'} = $boss;
484        $tbparams->{$tb}->{'tun'} = $tun;
485
486        # Make sure the domain starts with a period
487        $tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
488            unless $tbparams->{$tb}->{'domain'} =~ /^\./;
489    }
490    $conf->close();
491}
492
493# Open a pipe to the splitter program and start it parsing the experiments
494my $pipe = new IO::Pipe;
495# NB no more -p call on parse call.
496$pipe->reader("$tclsh $tcl_splitter -s -m $master  $pid $gid $eid $tcl") || 
497    die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
498
499# Parsing variables
500my $ctb;                        # Current testbed
501my %allocated;                  # If allocated{$tb} > 0, $tb is in use
502my $destfile;                   # File that the sub-experiment tcl file is
503                                # being written to, or "" if none.  Also used
504                                # for hostnames file.
505my $desthandle;                 # File handle for distfile
506my $gateways;                   # when gateway lists are being processed this
507                                # is the testbed whose gateways are being
508                                # gathered.
509my $control_gateway;            # Control net gateway for the current testbed
510my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
511                                # end of the a <-> b connector pair.
512
513# Parse the splitter output.  This loop creates the sub experiments, gateway
514# configurations and hostnames file
515while (<$pipe>) {
516    # Start of a sub-experiment
517    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
518        $ctb = $1;
519
520        # If we know the testbed, start collecting its sub experiment tcl
521        # description.  If not, warn the caller and ignore the configuration of
522        # this testbed.
523        if ($tbparams->{$ctb}) {
524            $allocated{$ctb}++; # Keep track of the testbeds allocated
525
526            unless (-d "$tmpdir/$ctb") {
527                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
528            }
529            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
530
531            $desthandle = new IO::File(">$destfile") || 
532                die "Cannot open $destfile:$!\n";
533        }
534        else { 
535            warn "No such testbed $ctb\n";
536            $destfile = "";
537        }
538        next;
539    };
540
541    # End of that experiment
542    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
543        # Simple syntax check and close out this experiment's tcl description
544        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
545        $desthandle->close();
546        $destfile = $ctb = "";
547        next;
548    };
549
550    # Beginning of a gateway set
551    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
552        $gateways = $1;
553        # If we've heard of this tb, create the config lines for it one at a
554        # time.
555        if ($allocated{$gateways}) {
556            # Just in case.  This directory should already have been created
557            # above.
558            unless (-d "$tmpdir/$gateways") {
559                mkdir("$tmpdir/$gateways") || 
560                    die "Can't create $tmpdir/$gateways: $!";
561            }
562        }
563        else {
564            warn "Gateways given (and ignored) for testbed not in use: " .
565                "$gateways\n";
566            $gateways = 0;
567        }
568        next;
569    };
570    # End of the gateways section.  Output the client config for this testbed
571    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
572        die "Mismatched gateway markers ($1, $gateways)\n" 
573            unless !$gateways || $gateways == $1;
574
575        if ($control_gateway ) {
576            # Client config
577            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
578            my $master_project = $tbparams->{$master}->{'project'};
579            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
580            print $cc "ControlGateway: $control_gateway\n";
581            print $cc "SMBShare: $smb_share\n";
582            print $cc "ProjectUser: $project_user\n";
583            print $cc "ProjectName: $master_project\n";
584            $cc->close();
585        }
586        else { warn "No control gateway for $gateways?\n"; }
587           
588        $gateways = 0;
589        next;
590    };
591    # Beginning of the hostnames list.  Collection is always in the hostnames
592    # file.
593    /^#\s+Begin\s+hostnames/ && do {
594        $destfile = "$tmpdir/hostnames";
595        $desthandle = new IO::File(">$destfile") || 
596            die "Can't open $destfile:$!\n";
597        next;
598    };
599    # end of the hostnames list.
600    /^#\s+End\s+hostnames/ && do {
601        $desthandle->close();
602        $destfile = "";
603        next;
604    };
605
606    # Generate gateway configuration info, one file per line
607    $gateways && do {
608        chomp;
609        my($dtb, $myname, $desthost, $type) = split(" ", $_);
610
611        # Many of these are to simplify print statements
612        my $sdomain =                           # domain for the source
613            $tbparams->{$gateways}->{'domain'};
614        my $ddomain =                           # domain for the destination
615            $tbparams->{$dtb}->{'domain'};
616        my $sproject =                          # Project of the destination
617            $tbparams->{$gateways}->{'project'};
618        my $fs =                                # Master fs node (FQDN)
619            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
620        my $boss =                              # Master boss node (FQDN)
621            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
622        my $remote_script_dir =                 # Remote fed script location
623            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
624        my $active;                             # Is this the active side of
625                                                # the connector?
626
627        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
628        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
629
630        my $conf_file = "$myname$sdomain.gw.conf";
631        # translate to lower case so the `hostname` hack for specifying
632        # configuration files works.
633        $conf_file =~ tr/A-Z/a-z/;
634
635        # If either end of this link is in the master side of the testbed, that
636        # side is the active end. Otherwise the first testbed encountered in
637        # the file will be the active end.  The $active_end variable keeps
638        # track of those decisions
639        if ( $dtb eq $master ) { $active = "false"; }
640        elsif ($gateways eq $master ) { $active = "true"; }
641        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
642        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
643
644        # This is used to create the client configuration.
645        $control_gateway = "$myname$sdomain"
646            if $type =~ /(control|both)/;
647
648        # Write out the file
649        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
650            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
651
652        print $gwconfig "Active: $active\n";
653        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
654        print $gwconfig "BossName: $boss\n";
655        print $gwconfig "FsName: $fs\n";
656        print $gwconfig "Type: $type\n";
657        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
658        print $gwconfig "Peer: $desthost$ddomain\n";
659        print $gwconfig "Pubkeys: " . 
660            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
661        print $gwconfig "Privkeys: " .
662            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
663        $gwconfig->close();
664
665        # This testbed has a gateway (most will) so make a copy of the keys it
666        # needs in this testbed's subdirectory.  start_segment will transfer
667        # them.
668        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
669            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
670                die "Can't copy pubkeys ($gw_pubkey to " . 
671                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
672        }
673        if ($active eq "true" ) {
674            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
675                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
676                    die "Can't copy secret keys ($gw_secretkey to " . 
677                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
678            }
679        }
680
681        #done processing gateway entry, ready for next line
682        next; 
683    };
684    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
685        next if /^#/;
686        chomp;
687        push(@tarfiles, $_);
688        next;
689    };
690
691    next unless $destfile;  # Unidentified testbed, ignore config
692    # local copies that can be used in the substitutions below
693    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
694    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
695    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
696    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
697    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
698    my $expstart = $tbparams->{$ctb}->{'expstart'};
699    my $project = $tbparams->{$ctb}->{'project'};
700
701    # Substitute variables
702    s/GWTYPE/$gwtype/g;
703    s/GWIMAGE/$gwimage/g;
704    if ($ctb eq $master ) {
705        s/GWSTART/$mgwstart/g;
706        s/EXPSTART/$mexpstart/g;
707    }
708    else {
709        s/GWSTART/$gwstart/g;
710        s/EXPSTART/$expstart/g;
711    }
712    # XXX: oh is this bad
713    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
714    s#PROJDIR#/proj/$project/#g;
715    s#EID#$eid#g;
716    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
717    print $desthandle $_;
718}
719$pipe->close();
720die "No nodes in master testbed ($master)\n" unless $allocated{$master};
721
722for my $t  (@tarfiles) {
723    die "tarfile '$t' unreadable: $!\n" unless -r $t;
724    unless (-d "$tmpdir/tarfiles") {
725        mkdir("$tmpdir/tarfiles") || 
726            die "Can't create $tmpdir/tarfiles:$!\n";
727    }
728    copy($t, "$tmpdir/tarfiles") || 
729        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
730}
731
732exit(0) unless $startem;
733
734my %started;                # If $started{$tb} then $tb successfully started
735
736# Start up the slave sub-experiments first
737TESTBED:
738for my $tb  (keys %allocated) {
739    if ($tb ne $master) {
740        if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
741        else { last TESTBED; }
742    }
743}
744
745# Now the master
746if (&start_segment($master, $eid, $tbparams, $timeout)) { 
747    $started{$master}++;
748}
749
750# If any testbed failed, swap the rest out.
751if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
752    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
753    print "Error starting experiment\n";
754    exit(1);
755}
756print "Experiment started\n";
757print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
758system("rm -rf $tmpdir") unless $debug;
759exit(0);    # set the exit value
760
761=pod
762
763=head1 NAME
764
765B<splitter.pl>
766
767=head1 SYNOPSIS
768
769B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
770    [B<-f> F<experiment_tcl>] [F<experiment_tcl>]
771
772=head1 DESCRIPTION
773
774B<splitter.pl> invokes the DETER experiment parser to split an annotated
775experiment into multiple sub-experments and instantiates the sub-experiments on
776their intended testbeds.  Annotation is accomplished using the
777tb-set-node-testbed command, added to the parser.
778
779Much of the script's behavior depends on the configuration file, specified with
780the B<-c> flag and defaulting to F<./splitter.conf>.
781
782The testbed labels supplied in the B<tb-set-node-testbed> command are
783meaningful based on their presence in the testbeds file.  that file can be
784specified in the configuration file using the B<Testbeds> directive, and
785defaults to F<./testbeds>.  The syntax is described below.
786
787Most of the intermediate files are staged in a sub-directory of a temporary
788files directory and deleted at the end of the script.  Specifying the B<-d>
789flag on the command line avoids the deletion for debbugging.  By default the
790temporary files directory is directory is F</tmp> and can be reset in the
791configuration file using the B<Tmpdir> directive.  Intermediate files are
792stored under a subdirectory formed by adding the process ID of the splitter
793process.  For example, if the temporary files directory is F</tmp> and the
794B<splitter.pl> process ID is 2323, the temporary files will be stored in
795F</tmp/split2323/>.
796
797The expreriment is split out into one experiment description per testbed in the
798temporary directory named as F<experiment.testbed.tcl> where the experiment is
799the experiment ID given in the configuration file, and the testbed is the
800tb-set-node-testbed parameter for the nodes in the file.
801
802If the B<-n> option is absent the sub-experiments are then instantiated on
803their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
804Per-testbed parameters are set in the testbeds file.  Sub-experiments on
805slave testbeds are instantiated in a random order, but the master testbed is
806currently instantiated last.
807
808Scripts to start federation (the federation kit) are copied into the local
809experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
810taken from the directory given by the B<ScriptDir> directive in the
811configuration file.
812
813If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
814F<splitter.conf>.
815
816If any sub-experiment fails to instantiate, the other sub-exeriments are
817swapped out.  B<-F> avoids this swap out, which can also be specified as
818B<SoftFail: true> in F<splitter.conf>
819
820=head2 Configuration File
821
822The configuration file is a simple set of colon-separated parameters and
823values.  A configuration file must be present, either specified in the B<-c>
824flag or the default F<./splitter.conf>.  All the parameter names are case
825insensitive, but should not include any whitespace.  Parameter values may
826include whitespace, but no newlines.
827
828Possible parameters are:
829
830=over 5
831
832=item Experiment
833
834The name of the experiment on the various testbeds
835
836=item Master
837
838The master testbed label from the testbeds file, described below.
839
840=item Testbeds
841
842The testbeds file described below, giving per-testbed parameters.  If this
843directive is absent the testbeds file defaults to F<./testbeds>
844
845=item ScriptDir
846
847Location of the default federation scripts, i.e. the federation kit.
848
849=item GatewayPubkey
850
851=item GatewaySecretKey
852
853The names of the files containing secret and public keys to use in setting up
854tunnels between testbeds.  These will eventually be automatically generated.
855
856=item TmpDir
857
858=item TempDir
859
860The directory where temporary files are created.  These are synonyms, but
861should both be specified, B<TmpDir> has priority.  If neither is specified,
862F</tmp> is used.
863
864=item SMBShare
865
866The SMB share on the master testbed that will be exported to remote clients.
867
868=item SMBUser
869
870The experiment user to mount project directories as.  This user needs to be a
871member of the exported experiment - that is one of the users in the project
872containing this experiment on the master testbed.
873
874=item Tclparse
875
876The pathname to the experiment parsing program.  Only developers should set
877this.
878
879=item Tclsh
880
881The pathname to the local oTcl shell.  Only developers should set
882this.
883
884=item Timeout
885
886Value in seconds after which a swap-in operatioin will be considered a success.
887Often long swap-ins will hang when there are partial failures.  This works
888around this issue.
889
890=item FailSoft
891
892If not set, failure of any sub experiment swaps the rest out.  Setting this to any value avoids this swap out.  (Also B<-F>.)
893
894=back
895
896=head2 Testbeds file
897
898The configuration file (F<./testbeds> unless overridden by B<-c>) is a
899colon-separated set of parameters keyed by testbed name.  The fields, in order,
900are:
901
902=over 5
903
904=item name
905
906The testbed to which this line of parameters applies.
907
908=item user
909
910The user under which to make requests to this testbed.  The user running
911B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
912testbed.
913
914=item host
915
916The host name of the testbed's ops node.  The user calling B<splitter.pl> must
917be able to execute commands on this host via L<ssh(1)>.
918
919=item domain
920
921The domain of nodes in this testbed (including the ops host).  This parameter
922should always start with a period.
923
924=item project
925
926The project under which to instantiate sub-experiments on this testbed.
927
928=item gateway type
929
930The node type for inter-testbed gateway nodes on this testbed.
931
932=item experiment start (slave)
933
934The start command to run on experimental nodes when this testbed is used as a
935slave.  In all the start commands the string FEDDIR will be replaced by the
936local experiment's federation scripts directory and the string GWCONF replaced
937by the gatway configuration file.
938
939=item gateway start (slave)
940
941The start command to run on gateway nodes when this testbed is used as a slave.
942The same string substitutions are made in this command as in experiment start.
943
944=item experiment start (master)
945
946The start command to run on experimental nodes when this testbed is used as a
947master.  The same string substitutions are made in this command as in
948experiment start.
949
950=item gateway start (master)
951
952The start command to run on gateway nodes when this testbed is used as a
953master.  The same string substitutions are made in this command as in
954experiment start.
955
956=item gateway image
957
958The disk image to be loaded on a gateway node on this testbed.
959
960=item filesystem node
961
962The node in the master testbed from which filesystems are mounted.
963
964=item boss node
965
966The node in the master testbed that controls the testbed.
967
968=item tunnel configuration
969
970True if the connector needs to do DETER federation.  This parameter will
971probably be removed.
972
973=back
974
975The parsing of the testbeds is extremely simple.  Colons separate each
976field and there is n provision for escaping them at this time.
977
978=head2 New Testbeds File Format
979
980The testbeds file has ben revamped to make it more human readable.  The
981parameters are now named and specified on separate lines of the configuration
982file.  Each testbed's parameters are preceeded by the testbed label in brackets
983([]) on a line by itself.  After that the parameters are specified as
984parameter: value.  This is essentially the same format as the configuration
985file.  Parameters are:
986
987=over 4
988
989=item User
990
991The user under which to make requests to this testbed.  The user running
992B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
993testbed.
994
995=item OpsNode
996
997The host name of the testbed's ops node.  The user calling B<splitter.pl> must
998be able to execute commands on this host via L<ssh(1)>.
999
1000=item Domain
1001
1002The domain of nodes in this testbed (including the ops host).  This parameter
1003should always start with a period.
1004
1005=item Project
1006
1007The project under which to instantiate sub-experiments on this testbed.
1008
1009=item ConnectorType
1010
1011The node type for inter-testbed connector nodes on this testbed.
1012
1013=item SlaveNodeStartCmd
1014
1015The start command to run on experimental nodes when this testbed is used as a
1016slave.  In all the start commands the string FEDDIR will be replaced by the
1017local experiment's federation scripts directory and the string GWCONF replaced
1018by the gatway configuration file.
1019
1020=item SlaveConnectorStartCmd
1021
1022The start command to run on gateway nodes when this testbed is used as a slave.
1023The same string substitutions are made in this command as in experiment start.
1024
1025=item MasterNodeStartCmd
1026
1027The start command to run on experimental nodes when this testbed is used as a
1028master.  The same string substitutions are made in this command as in
1029experiment start.
1030
1031=item MasterConnectorStartCmd
1032
1033The start command to run on gateway nodes when this testbed is used as a
1034master.  The same string substitutions are made in this command as in
1035experiment start.
1036
1037=item ConnectorImage
1038
1039The disk image to be loaded on a gateway node on this testbed.
1040
1041=item FileServer
1042
1043The node in the master testbed from which filesystems are mounted.
1044
1045=item Boss
1046
1047The node in the master testbed that controls the testbed.
1048
1049=item TunnelCfg
1050
1051True if the connector needs to do DETER federation.  This parameter will
1052probably be removed.
1053
1054
1055=back
1056
1057
1058=head1 ENVIRONMENT
1059
1060B<splitter.pl> does not directly make use of environment variables, but calls
1061out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1062environment.
1063
1064=head1 SEE ALSO
1065
1066L<sh(1)>, L<ssh(1)>
1067
1068=cut
Note: See TracBrowser for help on using the repository browser.