source: fedkit/splitter.pl @ 33548e1

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 33548e1 was 33548e1, checked in by Ted Faber <faber@…>, 17 years ago

Remove -N : all testbeds files use the "new" format now
Update docs for new textbeds format
Neaten up the timeout code in ssh_cmd

  • Property mode set to 100644
File size: 33.1 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my @tarfiles;                   # Tarfiles in use by this experiment
32my @rpms;                       # Rpms in use by this experiment
33my $timeout;                    # The timeout to use for experiment swap ins
34my %opts;                       # Parsed options
35
36my $tbparams = {};              # Map of the per-testbed parameters from the
37                                # testbeds file.  It is a reference to a hash
38                                # of hashes (because it's passed around a bunch
39                                # and it's nicer to have one access pattern
40                                # throughout the script, in the main loop and
41                                # the subroutines).  That access is exemplified
42                                # by  $tbparams->{'deter'}->{'domain'} which is
43                                # the domain parameter of the DETER testbed. 
44my $fail_soft;                  # Do not swap failed sub-experiments out
45
46# Parse the config file.  The format is a colon-separated parameter name
47# followed by the value of that parameter to the end of the line.  This parses
48# that format and puts the parameters into the referenced hash.  Parameter
49# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
50# failure (e.g. file open) and 1 on success.
51sub parse_config {
52    my($file, $href) = @_;
53    my $fh = new IO::File($file);
54       
55    unless ($fh) {
56        warn "Can't open $file: $!\n";
57        return 0;
58    }
59
60    while (<$fh>) {
61        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
62        chomp;
63        /^([^:]+):\s*(.*)/ && do {
64            my $key = $1; 
65
66            $key =~ tr/A-Z/a-z/;
67            $href->{$key} = $2;
68            next;
69        };
70        warn "Unparasble line in $file: $_\n";
71    }
72    $fh->close();   # It will close when it goes out of scope, but...
73    return 1;
74}
75
76# Parse an easier-to-read testbeds file (the original was comma-separated
77# unadorned strings).  The format is a testbed scope as [testbed] followed by
78# the colon-separated attribute-value pairs for the testbed.  Right now these
79# go into a set of global hashes indexed by testbed, but that should probably
80# change.
81
82sub parse_testbeds {
83    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
84    my $fh = new IO::File($file);  # Testbeds filehandle
85    my $tb;                         # Current testbed
86    # Convert attribute in the file to tbparams hash key
87    my %attr_to_hash = (
88        "opsnode" => "host",
89        "user" => "user",
90        "domain" => "domain",
91        "project" => "project",
92        "connectortype" => "gwtype",
93        "slavenodestartcmd" => "expstart",
94        "slaveconnectorstartcmd" => "gwstart",
95        "masternodestartcmd" => "mexpstart",
96        "masterconnectorstartcmd" => "mgwstart",
97        "connectorimage" => "gwimage",
98        "fileserver" => "fs",
99        "boss" => "boss",
100        "tunnelcfg" => "tun"
101    );
102
103
104    unless ($fh) {
105        warn "Can't open $file: $!\n";
106        return 0;
107    }
108
109    while (<$fh>) {
110        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
111        chomp;
112        /^\s*\[(.*)\]/ && do {
113            $tb = $1;
114            $tbparams->{$tb} = {} unless $tbparams->{$tb};
115            next;
116        };
117
118        /^([^:]+):\s*(.*)/ && do {
119            unless ($tb) {
120                warn "Ignored attribute definition before testbed " .
121                    "defined in $file: $_\n";
122                next;
123            }
124            my $key = $1; 
125            $key =~ tr/A-Z/a-z/;
126            my $var = $attr_to_hash{$key};
127
128            if ($var) { $tbparams->{$tb}->{$var} = $2; }
129            else { warn "Unknown keyword $key in $file\n"; }
130
131            next;
132        };
133        warn "Unparasble line in $file: $_\n";
134    }
135    $fh->close();   # It will close when it goes out of scope, but...
136    return 1;
137}
138
139
140# use scp to transfer a file, reporting true if successful and false otherwise.
141# Parameters are the local file name, the ssh host destination (either hostname
142# oe user@host), and an optional destination file name or directory.  If no
143# destination is given, the file is transferred to the given user's home
144# directory.  If only a machine is given in the ssh host destination, the
145# current user is used.
146sub scp_file {
147    my($file, $user, $host, $dest) = @_;
148
149    # XXX system with a relative pathname is sort of gross
150    system("scp $file $user\@$host:$dest");
151    if ($?) {
152        warn "scp failed $?\n";
153        return 0;
154    }
155    else { return 1; }
156}
157
158# use ssh to execute the given command on the machine (and as the user) in
159# $where.  Parameters are the ssh destination directive ($where) and the
160# command to execute, and a prefix to be placed on a message generated if the
161# command fails.   On failure print a warning if a warning prefix was given and
162# return false.  If timeout is given fork a process and set an alarm of that
163# many seconds.  Timeouts also return 0;
164sub ssh_cmd {
165    my($user, $host, $cmd, $wname, $timeout) = @_;
166    my $pid;                # Child pid
167
168    $timeout = 0 unless $timeout;   # Force default timeout
169
170    if ( $pid = fork () ) {
171        # Parent process
172        # The eval acts as a signal catcher.  If the alarm goes off inside
173        # the eval, the die will put "alarm\n" into $@, otherwise the
174        # return value of the execution in the child process will be used.
175        my $rv = eval {
176            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
177            my $rv;
178
179            alarm $timeout;
180            $rv = waitpid($pid, 0);
181            alarm 0;
182            $rv;
183        };
184
185        # If the eval succeeded, $@ will be null and we can use $rv, which
186        # is the return code from the subprocess.  If the eval timed out,
187        # print a warning and assume the best.
188        if ($@ eq "alarm\n" ) {
189            warn "$wname timed out - pid $pid still live\n";
190            return 1;
191        }
192        else {
193            return $rv;
194        }
195    }
196    else {
197        # Child process
198        exec("ssh $user\@$host $cmd");
199        exit 0;
200    }
201}
202
203# Ship local copies of the federation scripts out to the given host.  If any of
204# the script transfers fails, return 0.  The scripts to transfer are from the
205# global @scripts and are found locally in $local_script_dir (another global).
206sub ship_scripts {
207    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
208    my $s;
209
210    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
211    for $s (@scripts) {
212        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
213            return 0;
214    }
215    return 1;
216}
217
218# Ship per-testbed configuration generated by this script to the remote /proj
219# directories on the remote testbeds
220sub ship_configs {
221    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
222    my($d, $f);
223
224    $d = IO::Dir->new($src_dir) || return 0;
225
226    # All directories under $tmpdir are 770 so we can delete them later.
227    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
228    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
229    while ( $f = $d->read()) {
230        next if $f =~ /^\./;
231        if ( -d "$src_dir/$f" ) {
232            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
233                return 0;
234        }
235        else {
236            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
237        }
238    }
239    return 1;
240}
241
242# Start a sub section of the experiment on a given testbed.  The testbed and
243# the user to start the experiment as are pulled from the global per-testbed
244# hash, passed in as $tbparams, as is the project name on the remote testbed.
245# Parameters are the testbed and the experiment id.  Configuration files are
246# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
247# Then the current state of the experiment determined using expinfo.  From that
248# state, the experiment is either created, modified or spapped in.  If
249# everything succeeds, true is returned.  If the global verbose is set progress
250# messages are printed.
251sub start_segment {
252    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
253                                            # per-testbed parameters and remote
254                                            # swap-in timeout
255    my $host =                              # Host name of remote ops (FQDN)
256        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
257    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
258    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
259                                            # experiment under
260    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
261                                            # sub-experiment
262    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
263    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
264    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
265    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
266    my $state;                              # State of remote experiment
267    my $status = new IO::Pipe;              # The pipe to get status
268
269    # Determine the status of the remote experiment
270    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
271        die "Can't ssh to $user\@$host:$!\n";
272
273    # XXX: this is simple now.  Parsing may become more complex
274    while (<$status>) {
275        /State: (\w+)/ && ($state = $1);
276        /No\s+such\s+experiment/ && ($state = "none");
277    }
278    $status->close();
279    print "$tb: $state\n";
280
281    # Copy the experiment definition data over
282    print "transferring subexperiment to $tb\n" if $verbose;
283    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
284    # Clear out any old experiment data; if not deleted, copies over it by
285    # different users will fail.
286    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
287    # force the /bin/sh interpretation of the trailing * (which we need to keep
288    # tmp around))  Again, this needs to be done more properly once we have a
289    # non-ssh interface here.)
290    print "clearing experiment subdirs on $tb\n" if $verbose;
291    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
292        return 0;
293    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
294    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
295        return 0;
296    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
297    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
298        return 0;
299    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
300    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
301        return 0;
302    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
303    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
304        return 0;
305    # Remote experiment is active.  Modify it.
306    if ($state eq "active") {
307        print "Transferring federation support files to $tb\n" if $verbose;
308        # First copy new scripts and hostinfo into the remote /proj
309        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
310            return 0;
311        &ship_scripts($host, $user, $proj_dir) || return 0;
312        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
313
314        if ( -d "$tmpdir/tarfiles") {
315            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
316                return 0;
317        }
318
319        if ( -d "$tmpdir/rpms") {
320            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
321                return 0;
322        }
323
324        print "Modifying $eid in place on $tb\n" if $verbose;
325        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
326            "$eid $tclfile", "modexp", $timeout) || return 0;
327        return 1;
328    }
329
330    # Remote experiment is swapped out, modify it and swap it in.
331    if ($state eq "swapped") {
332        print "Transferring federation support files to $tb\n" if $verbose;
333        # First copy new scripts and hostinfo into the remote /proj (because
334        # the experiment exists, the directory tree should be there.
335        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
336            return 0;
337        &ship_scripts($host, $user, $proj_dir) || return 0;
338        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
339        if ( -d "$tmpdir/tarfiles") {
340            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
341                return 0;
342        }
343
344        if ( -d "$tmpdir/rpms") {
345            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
346                return 0;
347        }
348
349        print "Modifying $eid on $tb\n" if $verbose;
350        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
351            "modexp") || return 0;
352        print "Swapping $eid in on $tb\n" if $verbose;
353        # Now start up
354        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
355            "swapexp", $timeout) || return 0;
356        return 1;
357    }
358
359    # No remote experiment.  Create one.  We do this in 2 steps so we can put
360    # the configuration files and scripts into the new experiment directories.
361    if ($state eq "none") {
362
363        if ( -d "$tmpdir/tarfiles") {
364            # Tarfiles have to exist for the creation to work
365            print "copying tarfiles to $tb\n";
366            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
367                return 0;
368        }
369
370        if ( -d "$tmpdir/rpms") {
371            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
372                return 0;
373        }
374
375        print "Creating $eid on $tb\n" if $verbose;
376        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
377            "$pid -e $eid $tclfile", "startexp") || return 0;
378        # After startexp succeeds, the per-experiment directories exist on the
379        # remote testbed.
380        print "Transferring federation support files to $tb\n" if $verbose;
381        # First copy new scripts and hostinfo into the remote /proj
382        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
383            return 0;
384        &ship_scripts($host, $user, $proj_dir) || return 0;
385        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
386        # Now start up
387        print "Swapping $eid in on $tb\n" if $verbose;
388        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
389            "swapexp", $timeout) || return 0;
390        return 1;
391    }
392
393    # Every branch for a known state returns.  If execution gets here, the
394    # state is unknown.
395    warn "unknown state: $state\n";
396    return 0;
397}
398
399# Swap out a sub-experiment - probably because another has failed.  Arguments
400# are testbed and experiment.  Most of the control flow is similar to
401# start_segment, though much simpler.
402sub stop_segment {
403    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
404                                            # per-testbed parameters
405    my $user = $tbparams->{$tb}->{'user'};  # testbed user
406    my $host =                              # Ops node
407        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
408    my $pid = $tbparams->{$tb}->{'project'};# testbed project
409
410    print "Stopping $eid on $tb\n" if $verbose;
411    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
412        "swapexp (out)") || return 0;
413    return 1;
414}
415
416
417$pid = $gid = "dummy";              # Default project and group to pass to
418                                    # $tcl_splitter above.  These are total
419                                    # dummy arguments;  the splitter doesn't
420                                    # use them at all, but we supply them to
421                                    # keep our changes to the parser minimal.
422# Argument processing.
423getopts('Ft:c:f:ndvN', \%opts);
424$splitter_config = $opts{'c'} || "./splitter.conf";
425$debug = $opts{'d'};
426$verbose = $opts{'v'} || $opts{'d'};
427
428&parse_config("$splitter_config", \%opts) || 
429    die "Cannot read config file $splitter_config: $!\n";
430$fail_soft = $opts{'F'} || $opts{'failsoft'};
431
432warn "-N does nothing now.  Only one testbeds format supported.\n"
433    if $opts{'N'};
434$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
435$timeout = $opts{'t'} || $opts{'timeout'};
436$eid = $opts{'experiment'};             # Experiment ID
437$tcl = $opts{'f'} || shift;             # The experiment description
438$master = $opts{'master'};              # Master testbed
439$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
440$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
441$local_script_dir = $opts{'scriptdir'}; # Local scripts
442
443$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
444    die "Must give an SMB share\n";
445$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
446    die "Must give an SMB user\n";
447
448# For now specify these.  We may want to generate them later.
449$gw_pubkey = $opts{'gatewaypubkey'};
450($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
451$gw_secretkey = $opts{'gatewaysecretkey'};
452($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
453
454# tcl program to split experiments (changed during devel)
455$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
456# tclsh to call directly (changed during devel)
457$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
458
459# Prefix to avoid collisions
460$tmpdir .= "/split$$";
461
462print "Temp files are in $tmpdir\n" if $verbose;
463# Create a workspace
464unless (-d "$tmpdir") {
465    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
466}
467
468# Validate scripts directory
469for my $s (@scripts) {
470    die "$local_script_dir/$s not in local script directory. Try -d\n"
471        unless -r "$local_script_dir/$s";
472}
473
474die "Must supply file, master and experiment" unless $master && $tcl && $eid;
475
476&parse_testbeds($tb_config, $tbparams) ||
477    die "Cannot testbed congfigurations from $tb_config: $!\n";
478
479# Open a pipe to the splitter program and start it parsing the experiments
480my $pipe = new IO::Pipe;
481# NB no more -p call on parse call.
482$pipe->reader("$tclsh $tcl_splitter -s -m $master  $pid $gid $eid $tcl") || 
483    die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
484
485# Parsing variables
486my $ctb;                        # Current testbed
487my %allocated;                  # If allocated{$tb} > 0, $tb is in use
488my $destfile;                   # File that the sub-experiment tcl file is
489                                # being written to, or "" if none.  Also used
490                                # for hostnames file.
491my $desthandle;                 # File handle for distfile
492my $gateways;                   # when gateway lists are being processed this
493                                # is the testbed whose gateways are being
494                                # gathered.
495my $control_gateway;            # Control net gateway for the current testbed
496my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
497                                # end of the a <-> b connector pair.
498
499# Parse the splitter output.  This loop creates the sub experiments, gateway
500# configurations and hostnames file
501while (<$pipe>) {
502    # Start of a sub-experiment
503    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
504        $ctb = $1;
505
506        # If we know the testbed, start collecting its sub experiment tcl
507        # description.  If not, warn the caller and ignore the configuration of
508        # this testbed.
509        if ($tbparams->{$ctb}) {
510            $allocated{$ctb}++; # Keep track of the testbeds allocated
511
512            unless (-d "$tmpdir/$ctb") {
513                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
514            }
515            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
516
517            $desthandle = new IO::File(">$destfile") || 
518                die "Cannot open $destfile:$!\n";
519        }
520        else { 
521            warn "No such testbed $ctb\n";
522            $destfile = "";
523        }
524        next;
525    };
526
527    # End of that experiment
528    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
529        # Simple syntax check and close out this experiment's tcl description
530        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
531        $desthandle->close();
532        $destfile = $ctb = "";
533        next;
534    };
535
536    # Beginning of a gateway set
537    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
538        $gateways = $1;
539        # If we've heard of this tb, create the config lines for it one at a
540        # time.
541        if ($allocated{$gateways}) {
542            # Just in case.  This directory should already have been created
543            # above.
544            unless (-d "$tmpdir/$gateways") {
545                mkdir("$tmpdir/$gateways") || 
546                    die "Can't create $tmpdir/$gateways: $!";
547            }
548        }
549        else {
550            warn "Gateways given (and ignored) for testbed not in use: " .
551                "$gateways\n";
552            $gateways = 0;
553        }
554        next;
555    };
556    # End of the gateways section.  Output the client config for this testbed
557    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
558        die "Mismatched gateway markers ($1, $gateways)\n" 
559            unless !$gateways || $gateways == $1;
560
561        if ($control_gateway ) {
562            # Client config
563            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
564            my $master_project = $tbparams->{$master}->{'project'};
565            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
566            print $cc "ControlGateway: $control_gateway\n";
567            print $cc "SMBShare: $smb_share\n";
568            print $cc "ProjectUser: $project_user\n";
569            print $cc "ProjectName: $master_project\n";
570            $cc->close();
571        }
572        else { warn "No control gateway for $gateways?\n"; }
573           
574        $gateways = 0;
575        next;
576    };
577    # Beginning of the hostnames list.  Collection is always in the hostnames
578    # file.
579    /^#\s+Begin\s+hostnames/ && do {
580        $destfile = "$tmpdir/hostnames";
581        $desthandle = new IO::File(">$destfile") || 
582            die "Can't open $destfile:$!\n";
583        next;
584    };
585    # end of the hostnames list.
586    /^#\s+End\s+hostnames/ && do {
587        $desthandle->close();
588        $destfile = "";
589        next;
590    };
591
592    # Generate gateway configuration info, one file per line
593    $gateways && do {
594        chomp;
595        my($dtb, $myname, $desthost, $type) = split(" ", $_);
596
597        # Many of these are to simplify print statements
598        my $sdomain =                           # domain for the source
599            $tbparams->{$gateways}->{'domain'};
600        my $ddomain =                           # domain for the destination
601            $tbparams->{$dtb}->{'domain'};
602        my $sproject =                          # Project of the destination
603            $tbparams->{$gateways}->{'project'};
604        my $fs =                                # Master fs node (FQDN)
605            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
606        my $boss =                              # Master boss node (FQDN)
607            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
608        my $remote_script_dir =                 # Remote fed script location
609            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
610        my $active;                             # Is this the active side of
611                                                # the connector?
612
613        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
614        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
615
616        my $conf_file = "$myname$sdomain.gw.conf";
617        # translate to lower case so the `hostname` hack for specifying
618        # configuration files works.
619        $conf_file =~ tr/A-Z/a-z/;
620
621        # If either end of this link is in the master side of the testbed, that
622        # side is the active end. Otherwise the first testbed encountered in
623        # the file will be the active end.  The $active_end variable keeps
624        # track of those decisions
625        if ( $dtb eq $master ) { $active = "false"; }
626        elsif ($gateways eq $master ) { $active = "true"; }
627        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
628        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
629
630        # This is used to create the client configuration.
631        $control_gateway = "$myname$sdomain"
632            if $type =~ /(control|both)/;
633
634        # Write out the file
635        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
636            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
637
638        print $gwconfig "Active: $active\n";
639        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
640        print $gwconfig "BossName: $boss\n";
641        print $gwconfig "FsName: $fs\n";
642        print $gwconfig "Type: $type\n";
643        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
644        print $gwconfig "Peer: $desthost$ddomain\n";
645        print $gwconfig "Pubkeys: " . 
646            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
647        print $gwconfig "Privkeys: " .
648            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
649        $gwconfig->close();
650
651        # This testbed has a gateway (most will) so make a copy of the keys it
652        # needs in this testbed's subdirectory.  start_segment will transfer
653        # them.
654        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
655            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
656                die "Can't copy pubkeys ($gw_pubkey to " . 
657                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
658        }
659        if ($active eq "true" ) {
660            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
661                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
662                    die "Can't copy secret keys ($gw_secretkey to " . 
663                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
664            }
665        }
666
667        #done processing gateway entry, ready for next line
668        next; 
669    };
670    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
671        next if /^#/;
672        chomp;
673        push(@tarfiles, $_);
674        next;
675    };
676    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
677        next if /^#/;
678        chomp;
679        push(@rpms, $_);
680        next;
681    };
682
683    next unless $destfile;  # Unidentified testbed, ignore config
684    # local copies that can be used in the substitutions below
685    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
686    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
687    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
688    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
689    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
690    my $expstart = $tbparams->{$ctb}->{'expstart'};
691    my $project = $tbparams->{$ctb}->{'project'};
692
693    # Substitute variables
694    s/GWTYPE/$gwtype/g;
695    s/GWIMAGE/$gwimage/g;
696    if ($ctb eq $master ) {
697        s/GWSTART/$mgwstart/g;
698        s/EXPSTART/$mexpstart/g;
699    }
700    else {
701        s/GWSTART/$gwstart/g;
702        s/EXPSTART/$expstart/g;
703    }
704    # XXX: oh is this bad
705    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
706    s#PROJDIR#/proj/$project/#g;
707    s#EID#$eid#g;
708    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
709    print $desthandle $_;
710}
711$pipe->close();
712die "No nodes in master testbed ($master)\n" unless $allocated{$master};
713
714# Copy tarfiles and rpms needed at remote sites to the staging directories.
715# Start_segment will distribute them
716for my $t  (@tarfiles) {
717    die "tarfile '$t' unreadable: $!\n" unless -r $t;
718    unless (-d "$tmpdir/tarfiles") {
719        mkdir("$tmpdir/tarfiles") || 
720            die "Can't create $tmpdir/tarfiles:$!\n";
721    }
722    copy($t, "$tmpdir/tarfiles") || 
723        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
724}
725
726for my $r  (@rpms) {
727    die "rpm '$r' unreadable: $!\n" unless -r $r;
728    unless (-d "$tmpdir/rpms") {
729        mkdir("$tmpdir/rpms") || 
730            die "Can't create $tmpdir/rpms:$!\n";
731    }
732    copy($r, "$tmpdir/rpms") || 
733        die "Can't copy $r to  $tmpdir/rpms:$!\n";
734}
735
736exit(0) unless $startem;
737
738my %started;                # If $started{$tb} then $tb successfully started
739
740# Start up the slave sub-experiments first
741TESTBED:
742for my $tb  (keys %allocated) {
743    if ($tb ne $master) {
744        if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
745        else { last TESTBED; }
746    }
747}
748
749# Now the master
750if (&start_segment($master, $eid, $tbparams, $timeout)) { 
751    $started{$master}++;
752}
753
754# If any testbed failed, swap the rest out.
755if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
756    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
757    print "Error starting experiment\n";
758    exit(1);
759}
760print "Experiment started\n";
761print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
762system("rm -rf $tmpdir") unless $debug;
763exit(0);    # set the exit value
764
765=pod
766
767=head1 NAME
768
769B<splitter.pl>
770
771=head1 SYNOPSIS
772
773B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
774    [B<-f> F<experiment_tcl>] [F<experiment_tcl>]
775
776=head1 DESCRIPTION
777
778B<splitter.pl> invokes the DETER experiment parser to split an annotated
779experiment into multiple sub-experments and instantiates the sub-experiments on
780their intended testbeds.  Annotation is accomplished using the
781tb-set-node-testbed command, added to the parser.
782
783Much of the script's behavior depends on the configuration file, specified with
784the B<-c> flag and defaulting to F<./splitter.conf>.
785
786The testbed labels supplied in the B<tb-set-node-testbed> command are
787meaningful based on their presence in the testbeds file.  that file can be
788specified in the configuration file using the B<Testbeds> directive, and
789defaults to F<./testbeds>.  The syntax is described below.
790
791Most of the intermediate files are staged in a sub-directory of a temporary
792files directory and deleted at the end of the script.  Specifying the B<-d>
793flag on the command line avoids the deletion for debbugging.  By default the
794temporary files directory is directory is F</tmp> and can be reset in the
795configuration file using the B<Tmpdir> directive.  Intermediate files are
796stored under a subdirectory formed by adding the process ID of the splitter
797process.  For example, if the temporary files directory is F</tmp> and the
798B<splitter.pl> process ID is 2323, the temporary files will be stored in
799F</tmp/split2323/>.
800
801The expreriment is split out into one experiment description per testbed in the
802temporary directory named as F<experiment.testbed.tcl> where the experiment is
803the experiment ID given in the configuration file, and the testbed is the
804tb-set-node-testbed parameter for the nodes in the file.
805
806If the B<-n> option is absent the sub-experiments are then instantiated on
807their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
808Per-testbed parameters are set in the testbeds file.  Sub-experiments on
809slave testbeds are instantiated in a random order, but the master testbed is
810currently instantiated last.
811
812Scripts to start federation (the federation kit) are copied into the local
813experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
814taken from the directory given by the B<ScriptDir> directive in the
815configuration file.
816
817If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
818F<splitter.conf>.
819
820If any sub-experiment fails to instantiate, the other sub-exeriments are
821swapped out.  B<-F> avoids this swap out, which can also be specified as
822B<SoftFail: true> in F<splitter.conf>
823
824=head2 Configuration File
825
826The configuration file is a simple set of colon-separated parameters and
827values.  A configuration file must be present, either specified in the B<-c>
828flag or the default F<./splitter.conf>.  All the parameter names are case
829insensitive, but should not include any whitespace.  Parameter values may
830include whitespace, but no newlines.
831
832Possible parameters are:
833
834=over 5
835
836=item Experiment
837
838The name of the experiment on the various testbeds
839
840=item Master
841
842The master testbed label from the testbeds file, described below.
843
844=item Testbeds
845
846The testbeds file described below, giving per-testbed parameters.  If this
847directive is absent the testbeds file defaults to F<./testbeds>
848
849=item ScriptDir
850
851Location of the default federation scripts, i.e. the federation kit.
852
853=item GatewayPubkey
854
855=item GatewaySecretKey
856
857The names of the files containing secret and public keys to use in setting up
858tunnels between testbeds.  These will eventually be automatically generated.
859
860=item TmpDir
861
862=item TempDir
863
864The directory where temporary files are created.  These are synonyms, but
865should both be specified, B<TmpDir> has priority.  If neither is specified,
866F</tmp> is used.
867
868=item SMBShare
869
870The SMB share on the master testbed that will be exported to remote clients.
871
872=item SMBUser
873
874The experiment user to mount project directories as.  This user needs to be a
875member of the exported experiment - that is one of the users in the project
876containing this experiment on the master testbed.
877
878=item Tclparse
879
880The pathname to the experiment parsing program.  Only developers should set
881this.
882
883=item Tclsh
884
885The pathname to the local oTcl shell.  Only developers should set
886this.
887
888=item Timeout
889
890Value in seconds after which a swap-in operatioin will be considered a success.
891Often long swap-ins will hang when there are partial failures.  This works
892around this issue.
893
894=item FailSoft
895
896If not set, failure of any sub experiment swaps the rest out.  Setting this to any value avoids this swap out.  (Also B<-F>.)
897
898=back
899
900=head2 Testbeds file
901
902The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
903scoped attribute-value pairs where each attribute is specified on a separate
904line of the configuration file.  Each testbed's parameters are preceeded by the
905testbed label in brackets ([]) on a line by itself.  After that the parameters
906are specified as parameter: value.  This is essentially the same format as the
907configuration file.  Parameters are:
908
909=over 4
910
911=item User
912
913The user under which to make requests to this testbed.  The user running
914B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
915testbed.
916
917=item OpsNode
918
919The host name of the testbed's ops node.  The user calling B<splitter.pl> must
920be able to execute commands on this host via L<ssh(1)>.
921
922=item Domain
923
924The domain of nodes in this testbed (including the ops host).  This parameter
925should always start with a period.
926
927=item Project
928
929The project under which to instantiate sub-experiments on this testbed.
930
931=item ConnectorType
932
933The node type for inter-testbed connector nodes on this testbed.
934
935=item SlaveNodeStartCmd
936
937The start command to run on experimental nodes when this testbed is used as a
938slave.  In all the start commands the string FEDDIR will be replaced by the
939local experiment's federation scripts directory and the string GWCONF replaced
940by the gatway configuration file.
941
942=item SlaveConnectorStartCmd
943
944The start command to run on gateway nodes when this testbed is used as a slave.
945The same string substitutions are made in this command as in experiment start.
946
947=item MasterNodeStartCmd
948
949The start command to run on experimental nodes when this testbed is used as a
950master.  The same string substitutions are made in this command as in
951experiment start.
952
953=item MasterConnectorStartCmd
954
955The start command to run on gateway nodes when this testbed is used as a
956master.  The same string substitutions are made in this command as in
957experiment start.
958
959=item ConnectorImage
960
961The disk image to be loaded on a gateway node on this testbed.
962
963=item FileServer
964
965The node in the master testbed from which filesystems are mounted.
966
967=item Boss
968
969The node in the master testbed that controls the testbed.
970
971=item TunnelCfg
972
973True if the connector needs to do DETER federation.  This parameter will
974probably be removed.
975
976
977=back
978
979
980=head1 ENVIRONMENT
981
982B<splitter.pl> does not directly make use of environment variables, but calls
983out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
984environment.
985
986=head1 SEE ALSO
987
988L<sh(1)>, L<ssh(1)>
989
990=cut
Note: See TracBrowser for help on using the repository browser.