source: fedkit/splitter.pl @ 5479c80

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 5479c80 was 5479c80, checked in by Ted Faber <faber@…>, 17 years ago

Timeout critical sshes in setting up experiments. The commands don't seem to
terminate when partial swap-ins are accomplished, or perhaps after long
swap-ins. This provides a way to move on.

  • Property mode set to 100644
File size: 34.6 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my @tarfiles;                   # Tarfiles in use by this experiment
32my $timeout;                    # The timeout to use for experiment swap ins
33my %opts;                       # Parsed options
34
35my $tbparams = {};              # Map of the per-testbed parameters from the
36                                # testbeds file.  It is a reference to a hash
37                                # of hashes (because it's passed around a bunch
38                                # and it's nicer to have one access pattern
39                                # throughout the script, in the main loop and
40                                # the subroutines).  That access is exemplified
41                                # by  $tbparams->{'deter'}->{'domain'} which is
42                                # the domain parameter of the DETER testbed. 
43
44# Parse the config file.  The format is a colon-separated parameter name
45# followed by the value of that parameter to the end of the line.  This parses
46# that format and puts the parameters into the referenced hash.  Parameter
47# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
48# failure (e.g. file open) and 1 on success.
49sub parse_config {
50    my($file, $href) = @_;
51    my $fh = new IO::File($file);
52       
53    unless ($fh) {
54        warn "Can't open $file: $!\n";
55        return 0;
56    }
57
58    while (<$fh>) {
59        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
60        chomp;
61        /^([^:]+):\s*(.*)/ && do {
62            my $key = $1; 
63
64            $key =~ tr/A-Z/a-z/;
65            $href->{$key} = $2;
66            next;
67        };
68        warn "Unparasble line in $file: $_\n";
69    }
70    $fh->close();   # It will close when it goes out of scope, but...
71    return 1;
72}
73
74# Parse an easier-to-read testbeds file (the original was comma-separated
75# unadorned strings).  The format is a testbed scope as [testbed] followed by
76# the colon-separated attribute-value pairs for the testbed.  Right now these
77# go into a set of global hashes indexed by testbed, but that should probably
78# change.
79
80sub parse_testbeds {
81    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
82    my $fh = new IO::File($file);  # Testbeds filehandle
83    my $tb;                         # Current testbed
84    # Convert attribute in the file to global variable name.  XXX: Again, this
85    # needs to be a 2-level hash
86    my %attr_to_hash = (
87        "opsnode" => "host",
88        "user" => "user",
89        "domain" => "domain",
90        "project" => "project",
91        "connectortype" => "gwtype",
92        "slavenodestartcmd" => "expstart",
93        "slaveconnectorstartcmd" => "gwstart",
94        "masternodestartcmd" => "mexpstart",
95        "masterconnectorstartcmd" => "mgwstart",
96        "connectorimage" => "gwimage",
97        "fileserver" => "fs",
98        "boss" => "boss",
99        "tunnelcfg" => "tun"
100    );
101
102
103    unless ($fh) {
104        warn "Can't open $file: $!\n";
105        return 0;
106    }
107
108    while (<$fh>) {
109        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
110        chomp;
111        /^\s*\[(.*)\]/ && do {
112            $tb = $1;
113            $tbparams->{$tb} = {} unless $tbparams->{$tb};
114            next;
115        };
116
117        /^([^:]+):\s*(.*)/ && do {
118            unless ($tb) {
119                warn "Ignored attribute definition before testbed " .
120                    "defined in $file: $_\n";
121                next;
122            }
123            my $key = $1; 
124            $key =~ tr/A-Z/a-z/;
125            my $var = $attr_to_hash{$key};
126
127            if ($var) { $tbparams->{$tb}->{$var} = $2; }
128            else { warn "Unknown keyword $key in $file\n"; }
129
130            next;
131        };
132        warn "Unparasble line in $file: $_\n";
133    }
134    $fh->close();   # It will close when it goes out of scope, but...
135    return 1;
136}
137
138
139# use scp to transfer a file, reporting true if successful and false otherwise.
140# Parameters are the local file name, the ssh host destination (either hostname
141# oe user@host), and an optional destination file name or directory.  If no
142# destination is given, the file is transferred to the given user's home
143# directory.  If only a machine is given in the ssh host destination, the
144# current user is used.
145sub scp_file {
146    my($file, $user, $host, $dest) = @_;
147
148    # XXX system with a relative pathname is sort of gross
149    system("scp $file $user\@$host:$dest");
150    if ($?) {
151        warn "scp failed $?\n";
152        return 0;
153    }
154    else { return 1; }
155}
156
157# use ssh to execute the given command on the machine (and as the user) in
158# $where.  Parameters are the ssh destination directive ($where) and the
159# command to execute, and a prefix to be placed on a message generated if the
160# command fails.   On failure print a warning if a warning prefix was given and
161# return false.  If timeout is given fork a process and set an alarm of that
162# many seconds.  Timeouts also return 0;
163sub ssh_cmd {
164    my($user, $host, $cmd, $wname, $timeout) = @_;
165    my $pid;                # Child pid
166
167    if ( $timeout ) {
168        if ( $pid = fork () ) {
169            # Parent process
170            # The eval acts as a signal catcher.  If the alarm goes off inside
171            # the eval, the die will put "alarm\n" into $@, otherwise the
172            # return value of the execution in the child process will be used.
173            my $rv = eval {
174                local $SIG{'ALRM'} = sub{ die "alarm\n"; };
175                my $rv;
176
177                alarm $timeout;
178                $rv = waitpid($pid, 0);
179                alarm 0;
180                $rv;
181            };
182
183            # If the eval succeeded, $@ will be null and we can use $rv, which
184            # is the return code from the subprocess.  If the eval timed out,
185            # print a warning and assume the best.
186            if ($@ eq "alarm\n" ) {
187                warn "$wname timed out - pid $pid still live\n";
188                return 1;
189            }
190            else {
191                return $rv;
192            }
193        }
194        else {
195            # Child process
196            # XXX system with a relative pathname is sort of gross
197            exec("ssh $user\@$host $cmd");
198            exit 0;
199        }
200    }
201    else {
202        # No timeout, do all the work here.
203        # XXX system with a relative pathname is sort of gross
204        system ("ssh $user\@$host $cmd");
205        if ($?) {
206            warn "$wname failed $?\n" if $wname;
207            return 0;
208        }
209        else { return 1; }
210    }
211}
212
213# Ship local copies of the federation scripts out to the given host.  If any of
214# the script transfers fails, return 0.  The scripts to transfer are from the
215# global @scripts and are found locally in $local_script_dir (another global).
216sub ship_scripts {
217    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
218    my $s;
219
220    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
221    for $s (@scripts) {
222        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
223            return 0;
224    }
225    return 1;
226}
227
228# Ship per-testbed configuration generated by this script to the remote /proj
229# directories on the remote testbeds
230sub ship_configs {
231    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
232    my($d, $f);
233
234    $d = IO::Dir->new($src_dir) || return 0;
235
236    # All directories under $tmpdir are 770 so we can delete them later.
237    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
238    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
239    while ( $f = $d->read()) {
240        next if $f =~ /^\./;
241        if ( -d "$src_dir/$f" ) {
242            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
243                return 0;
244        }
245        else {
246            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
247        }
248    }
249    return 1;
250}
251
252# Start a sub section of the experiment on a given testbed.  The testbed and
253# the user to start the experiment as are pulled from the global per-testbed
254# hash, passed in as $tbparams, as is the project name on the remote testbed.
255# Parameters are the testbed and the experiment id.  Configuration files are
256# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
257# Then the current state of the experiment determined using expinfo.  From that
258# state, the experiment is either created, modified or spapped in.  If
259# everything succeeds, true is returned.  If the global verbose is set progress
260# messages are printed.
261sub start_segment {
262    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
263                                            # per-testbed parameters and remote
264                                            # swap-in timeout
265    my $host =                              # Host name of remote ops (FQDN)
266        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
267    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
268    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
269                                            # experiment under
270    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
271                                            # sub-experiment
272    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
273    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
274    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
275    my $state;                              # State of remote experiment
276    my $status = new IO::Pipe;              # The pipe to get status
277
278    # Determine the status of the remote experiment
279    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
280        die "Can't ssh to $user\@$host:$!\n";
281
282    # XXX: this is simple now.  Parsing may become more complex
283    while (<$status>) {
284        /State: (\w+)/ && ($state = $1);
285        /No\s+such\s+experiment/ && ($state = "none");
286    }
287    $status->close();
288    print "$tb: $state\n";
289
290    # Copy the experiment definition data over
291    print "transferring subexperiment to $tb\n" if $verbose;
292    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
293    # Clear out any old experiment data; if not deleted, copies over it by
294    # different users will fail.
295    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
296    # force the /bin/sh interpretation of the trailing * (which we need to keep
297    # tmp around))  Again, this needs to be done more properly once we have a
298    # non-ssh interface here.)
299    print "clearing experiment subdirs on $tb\n" if $verbose;
300    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
301        return 0;
302    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
303    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
304        return 0;
305    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
306    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
307        return 0;
308    # Remote experiment is active.  Modify it.
309    if ($state eq "active") {
310        print "Transferring federation support files to $tb\n" if $verbose;
311        # First copy new scripts and hostinfo into the remote /proj
312        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
313            return 0;
314        &ship_scripts($host, $user, $proj_dir) || return 0;
315        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
316        if ( -d "$tmpdir/tarfiles") {
317            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
318                return 0;
319        }
320
321        print "Modifying $eid in place on $tb\n" if $verbose;
322        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
323            "$eid $tclfile", "modexp", $timeout) || return 0;
324        return 1;
325    }
326
327    # Remote experiment is swapped out, modify it and swap it in.
328    if ($state eq "swapped") {
329        print "Transferring federation support files to $tb\n" if $verbose;
330        # First copy new scripts and hostinfo into the remote /proj (because
331        # the experiment exists, the directory tree should be there.
332        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
333            return 0;
334        &ship_scripts($host, $user, $proj_dir) || return 0;
335        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
336        if ( -d "$tmpdir/tarfiles") {
337            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
338                return 0;
339        }
340
341        print "Modifying $eid on $tb\n" if $verbose;
342        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
343            "modexp") || return 0;
344        print "Swapping $eid in on $tb\n" if $verbose;
345        # Now start up
346        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
347            "swapexp", $timeout) || return 0;
348        return 1;
349    }
350
351    # No remote experiment.  Create one.  We do this in 2 steps so we can put
352    # the configuration files and scripts into the new experiment directories.
353    if ($state eq "none") {
354
355        if ( -d "$tmpdir/tarfiles") {
356            # Tarfiles have to exist for the creation to work
357            print "copying tarfiles to $tb\n";
358            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
359                return 0;
360        }
361        print "Creating $eid on $tb\n" if $verbose;
362        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
363            "$pid -e $eid $tclfile", "startexp") || return 0;
364        print "Transferring federation support files to $tb\n" if $verbose;
365        # First copy new scripts and hostinfo into the remote /proj
366        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
367            return 0;
368        &ship_scripts($host, $user, $proj_dir) || return 0;
369        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
370        # Now start up
371        print "Swapping $eid in on $tb\n" if $verbose;
372        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
373            "swapexp", $timeout) || return 0;
374        return 1;
375    }
376
377    # Every branch for a known state returns.  If execution gets here, the
378    # state is unknown.
379    warn "unknown state: $state\n";
380    return 0;
381}
382
383# Swap out a sub-experiment - probably because another has failed.  Arguments
384# are testbed and experiment.  Most of the control flow is similar to
385# start_segment, though much simpler.
386sub stop_segment {
387    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
388                                            # per-testbed parameters
389    my $user = $tbparams->{$tb}->{'user'};  # testbed user
390    my $host =                              # Ops node
391        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
392    my $pid = $tbparams->{$tb}->{'project'};# testbed project
393
394    print "Stopping $eid on $tb\n" if $verbose;
395    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
396        "swapexp (out)") || return 0;
397    return 1;
398}
399
400
401$pid = $gid = "dummy";              # Default project and group to pass to
402                                    # $tcl_splitter above.  These are total
403                                    # dummy arguments;  the splitter doesn't
404                                    # use them at all, but we supply them to
405                                    # keep our changes to the parser minimal.
406# Argument processing.
407getopts('t:c:f:ndvN', \%opts);
408$splitter_config = $opts{'c'} || "./splitter.conf";
409$debug = $opts{'d'};
410$verbose = $opts{'v'} || $opts{'d'};
411
412&parse_config("$splitter_config", \%opts) || 
413    die "Cannot read config file $splitter_config: $!\n";
414
415
416$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
417$timeout = $opts{'t'} || $opts{'timeout'};
418$eid = $opts{'experiment'};             # Experiment ID
419$tcl = $opts{'f'} || shift;             # The experiment description
420$master = $opts{'master'};              # Master testbed
421$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
422$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
423$local_script_dir = $opts{'scriptdir'}; # Local scripts
424
425$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
426    die "Must give an SMB share\n";
427$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
428    die "Must give an SMB user\n";
429
430# For now specify these.  We may want to generate them later.
431$gw_pubkey = $opts{'gatewaypubkey'};
432($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
433$gw_secretkey = $opts{'gatewaysecretkey'};
434($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
435
436# tcl program to split experiments (changed during devel)
437$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
438# tclsh to call directly (changed during devel)
439$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
440
441# Prefix to avoid collisions
442$tmpdir .= "/split$$";
443
444print "Temp files are in $tmpdir\n" if $verbose;
445# Create a workspace
446unless (-d "$tmpdir") {
447    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
448}
449
450# Validate scripts directory
451for my $s (@scripts) {
452    die "$local_script_dir/$s not in local script directory. Try -d\n"
453        unless -r "$local_script_dir/$s";
454}
455
456die "Must supply file, master and experiment" unless $master && $tcl && $eid;
457
458if ($opts{'N'} ) {
459    &parse_testbeds($tb_config, $tbparams) ||
460        die "Cannot testbed congfigurations from $tb_config: $!\n";
461}
462else {
463    # Read a hash of per-testbed parameters from the local configurations.
464    my $conf = new IO::File($tb_config) || 
465        die "can't read testbed configutions from $tb_config: $!\n";
466    while (<$conf>) {
467        next if /^#/;
468        chomp;
469        my($tb, $h, $d, $u, $p, $es, $gs, $mes, $mgs, $t, $i, $fs, $boss,
470            $tun) = split(":", $_);
471        $tbparams->{$tb}->{'host'} = $h;
472        $tbparams->{$tb}->{'user'} = $u;
473        $tbparams->{$tb}->{'domain'} = $d;
474        $tbparams->{$tb}->{'project'} = $p;
475        $tbparams->{$tb}->{'gwtype'} = $t;
476        $tbparams->{$tb}->{'expstart'} = $es;
477        $tbparams->{$tb}->{'gwstart'} = $gs;
478        $tbparams->{$tb}->{'mexpstart'} = $mes;
479        $tbparams->{$tb}->{'mgwstart'} = $mgs;
480        $tbparams->{$tb}->{'gwimage'} = $i;
481        $tbparams->{$tb}->{'fs'} = $fs;
482        $tbparams->{$tb}->{'boss'} = $boss;
483        $tbparams->{$tb}->{'tun'} = $tun;
484
485        # Make sure the domain starts with a period
486        $tbparams->{$tb}->{'domain'} = "." . $tbparams->{$tb}->{'domain'}
487            unless $tbparams->{$tb}->{'domain'} =~ /^\./;
488    }
489    $conf->close();
490}
491
492# Open a pipe to the splitter program and start it parsing the experiments
493my $pipe = new IO::Pipe;
494# NB no more -p call on parse call.
495$pipe->reader("$tclsh $tcl_splitter -s -m $master  $pid $gid $eid $tcl") || 
496    die "Cannot execute $tclsh $tcl_splitter -s -m $master $pid $gid $eid $tcl:$!\n";
497
498# Parsing variables
499my $ctb;                        # Current testbed
500my %allocated;                  # If allocated{$tb} > 0, $tb is in use
501my $destfile;                   # File that the sub-experiment tcl file is
502                                # being written to, or "" if none.  Also used
503                                # for hostnames file.
504my $desthandle;                 # File handle for distfile
505my $gateways;                   # when gateway lists are being processed this
506                                # is the testbed whose gateways are being
507                                # gathered.
508my $control_gateway;            # Control net gateway for the current testbed
509my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
510                                # end of the a <-> b connector pair.
511
512# Parse the splitter output.  This loop creates the sub experiments, gateway
513# configurations and hostnames file
514while (<$pipe>) {
515    # Start of a sub-experiment
516    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
517        $ctb = $1;
518
519        # If we know the testbed, start collecting its sub experiment tcl
520        # description.  If not, warn the caller and ignore the configuration of
521        # this testbed.
522        if ($tbparams->{$ctb}) {
523            $allocated{$ctb}++; # Keep track of the testbeds allocated
524
525            unless (-d "$tmpdir/$ctb") {
526                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
527            }
528            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
529
530            $desthandle = new IO::File(">$destfile") || 
531                die "Cannot open $destfile:$!\n";
532        }
533        else { 
534            warn "No such testbed $ctb\n";
535            $destfile = "";
536        }
537        next;
538    };
539
540    # End of that experiment
541    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
542        # Simple syntax check and close out this experiment's tcl description
543        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
544        $desthandle->close();
545        $destfile = $ctb = "";
546        next;
547    };
548
549    # Beginning of a gateway set
550    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
551        $gateways = $1;
552        # If we've heard of this tb, create the config lines for it one at a
553        # time.
554        if ($allocated{$gateways}) {
555            # Just in case.  This directory should already have been created
556            # above.
557            unless (-d "$tmpdir/$gateways") {
558                mkdir("$tmpdir/$gateways") || 
559                    die "Can't create $tmpdir/$gateways: $!";
560            }
561        }
562        else {
563            warn "Gateways given (and ignored) for testbed not in use: " .
564                "$gateways\n";
565            $gateways = 0;
566        }
567        next;
568    };
569    # End of the gateways section.  Output the client config for this testbed
570    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
571        die "Mismatched gateway markers ($1, $gateways)\n" 
572            unless !$gateways || $gateways == $1;
573
574        if ($control_gateway ) {
575            # Client config
576            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
577            my $master_project = $tbparams->{$master}->{'project'};
578            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
579            print $cc "ControlGateway: $control_gateway\n";
580            print $cc "SMBShare: $smb_share\n";
581            print $cc "ProjectUser: $project_user\n";
582            print $cc "ProjectName: $master_project\n";
583            $cc->close();
584        }
585        else { warn "No control gateway for $gateways?\n"; }
586           
587        $gateways = 0;
588        next;
589    };
590    # Beginning of the hostnames list.  Collection is always in the hostnames
591    # file.
592    /^#\s+Begin\s+hostnames/ && do {
593        $destfile = "$tmpdir/hostnames";
594        $desthandle = new IO::File(">$destfile") || 
595            die "Can't open $destfile:$!\n";
596        next;
597    };
598    # end of the hostnames list.
599    /^#\s+End\s+hostnames/ && do {
600        $desthandle->close();
601        $destfile = "";
602        next;
603    };
604
605    # Generate gateway configuration info, one file per line
606    $gateways && do {
607        chomp;
608        my($dtb, $myname, $desthost, $type) = split(" ", $_);
609
610        # Many of these are to simplify print statements
611        my $sdomain =                           # domain for the source
612            $tbparams->{$gateways}->{'domain'};
613        my $ddomain =                           # domain for the destination
614            $tbparams->{$dtb}->{'domain'};
615        my $sproject =                          # Project of the destination
616            $tbparams->{$gateways}->{'project'};
617        my $fs =                                # Master fs node (FQDN)
618            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
619        my $boss =                              # Master boss node (FQDN)
620            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
621        my $remote_script_dir =                 # Remote fed script location
622            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
623        my $active;                             # Is this the active side of
624                                                # the connector?
625
626        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
627        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
628
629        my $conf_file = "$myname$sdomain.gw.conf";
630        # translate to lower case so the `hostname` hack for specifying
631        # configuration files works.
632        $conf_file =~ tr/A-Z/a-z/;
633
634        # If either end of this link is in the master side of the testbed, that
635        # side is the active end. Otherwise the first testbed encountered in
636        # the file will be the active end.  The $active_end variable keeps
637        # track of those decisions
638        if ( $dtb eq $master ) { $active = "false"; }
639        elsif ($gateways eq $master ) { $active = "true"; }
640        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
641        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
642
643        # This is used to create the client configuration.
644        $control_gateway = "$myname$sdomain"
645            if $type =~ /(control|both)/;
646
647        # Write out the file
648        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
649            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
650
651        print $gwconfig "Active: $active\n";
652        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
653        print $gwconfig "BossName: $boss\n";
654        print $gwconfig "FsName: $fs\n";
655        print $gwconfig "Type: $type\n";
656        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
657        print $gwconfig "Peer: $desthost$ddomain\n";
658        print $gwconfig "Pubkeys: " . 
659            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
660        print $gwconfig "Privkeys: " .
661            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
662        $gwconfig->close();
663
664        # This testbed has a gateway (most will) so make a copy of the keys it
665        # needs in this testbed's subdirectory.  start_segment will transfer
666        # them.
667        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
668            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
669                die "Can't copy pubkeys ($gw_pubkey to " . 
670                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
671        }
672        if ($active eq "true" ) {
673            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
674                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
675                    die "Can't copy secret keys ($gw_secretkey to " . 
676                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
677            }
678        }
679
680        #done processing gateway entry, ready for next line
681        next; 
682    };
683    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
684        next if /^#/;
685        chomp;
686        push(@tarfiles, $_);
687        next;
688    };
689
690    next unless $destfile;  # Unidentified testbed, ignore config
691    # local copies that can be used in the substitutions below
692    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
693    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
694    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
695    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
696    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
697    my $expstart = $tbparams->{$ctb}->{'expstart'};
698    my $project = $tbparams->{$ctb}->{'project'};
699
700    # Substitute variables
701    s/GWTYPE/$gwtype/g;
702    s/GWIMAGE/$gwimage/g;
703    if ($ctb eq $master ) {
704        s/GWSTART/$mgwstart/g;
705        s/EXPSTART/$mexpstart/g;
706    }
707    else {
708        s/GWSTART/$gwstart/g;
709        s/EXPSTART/$expstart/g;
710    }
711    # XXX: oh is this bad
712    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
713    s#PROJDIR#/proj/$project/#g;
714    s#EID#$eid#g;
715    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
716    print $desthandle $_;
717}
718$pipe->close();
719die "No nodes in master testbed ($master)\n" unless $allocated{$master};
720
721for my $t  (@tarfiles) {
722    die "tarfile '$t' unreadable: $!\n" unless -r $t;
723    unless (-d "$tmpdir/tarfiles") {
724        mkdir("$tmpdir/tarfiles") || 
725            die "Can't create $tmpdir/tarfiles:$!\n";
726    }
727    copy($t, "$tmpdir/tarfiles") || 
728        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
729}
730
731exit(0) unless $startem;
732
733my %started;                # If $started{$tb} then $tb successfully started
734
735# Start up the slave sub-experiments first
736TESTBED:
737for my $tb  (keys %allocated) {
738    if ($tb ne $master) {
739        if (&start_segment($tb, $eid, $tbparams, $timeout)) { $started{$tb}++; }
740        else { last TESTBED; }
741    }
742}
743
744# Now the master
745if (&start_segment($master, $eid, $tbparams, $timeout)) { 
746    $started{$master}++;
747}
748
749# If any testbed failed, swap the rest out.
750if ( scalar(keys %started) != scalar(keys %allocated)) {
751    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
752    print "Error starting experiment\n";
753    exit(1);
754}
755print "Experiment started\n";
756print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
757system("rm -rf $tmpdir") unless $debug;
758exit(0);    # set the exit value
759
760=pod
761
762=head1 NAME
763
764B<splitter.pl>
765
766=head1 SYNOPSIS
767
768B<splitter.pl> [B<-nd>] [B<-c> F<config_file>] [B<-f> F<experiment_tcl>]
769    [F<experiment_tcl>]
770
771=head1 DESCRIPTION
772
773B<splitter.pl> invokes the DETER experiment parser to split an annotated
774experiment into multiple sub-experments and instantiates the sub-experiments on
775their intended testbeds.  Annotation is accomplished using the
776tb-set-node-testbed command, added to the parser.
777
778Much of the script's behavior depends on the configuration file, specified with
779the B<-c> flag and defaulting to F<./splitter.conf>.
780
781The testbed labels supplied in the B<tb-set-node-testbed> command are
782meaningful based on their presence in the testbeds file.  that file can be
783specified in the configuration file using the B<Testbeds> directive, and
784defaults to F<./testbeds>.  The syntax is described below.
785
786Most of the intermediate files are staged in a sub-directory of a temporary
787files directory and deleted at the end of the script.  Specifying the B<-d>
788flag on the command line avoids the deletion for debbugging.  By default the
789temporary files directory is directory is F</tmp> and can be reset in the
790configuration file using the B<Tmpdir> directive.  Intermediate files are
791stored under a subdirectory formed by adding the process ID of the splitter
792process.  For example, if the temporary files directory is F</tmp> and the
793B<splitter.pl> process ID is 2323, the temporary files will be stored in
794F</tmp/split2323/>.
795
796The expreriment is split out into one experiment description per testbed in the
797temporary directory named as F<experiment.testbed.tcl> where the experiment is
798the experiment ID given in the configuration file, and the testbed is the
799tb-set-node-testbed parameter for the nodes in the file.
800
801If the B<-n> option is absent the sub-experiments are then instantiated on
802their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
803Per-testbed parameters are set in the testbeds file.  Sub-experiments on
804slave testbeds are instantiated in a random order, but the master testbed is
805currently instantiated last.
806
807Scripts to start federation (the federation kit) are copied into the local
808experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
809taken from the directory given by the B<ScriptDir> directive in the
810configuration file.
811
812If any sub-experiment fails to instantiate, the other sub-exeriments are
813swapped out.
814
815=head2 Configuration File
816
817The configuration file is a simple set of colon-separated parameters and
818values.  A configuration file must be present, either specified in the B<-c>
819flag or the default F<./splitter.conf>.  All the parameter names are case
820insensitive, but should not include any whitespace.  Parameter values may
821include whitespace, but no newlines.
822
823Possible parameters are:
824
825=over 5
826
827=item Experiment
828
829The name of the experiment on the various testbeds
830
831=item Master
832
833The master testbed label from the testbeds file, described below.
834
835=item Testbeds
836
837The testbeds file described below, giving per-testbed parameters.  If this
838directive is absent the testbeds file defaults to F<./testbeds>
839
840=item ScriptDir
841
842Location of the default federation scripts, i.e. the federation kit.
843
844=item GatewayPubkey
845
846=item GatewaySecretKey
847
848The names of the files containing secret and public keys to use in setting up
849tunnels between testbeds.  These will eventually be automatically generated.
850
851=item TmpDir
852
853=item TempDir
854
855The directory where temporary files are created.  These are synonyms, but
856should both be specified, B<TmpDir> has priority.  If neither is specified,
857F</tmp> is used.
858
859=item SMBShare
860
861The SMB share on the master testbed that will be exported to remote clients.
862
863=item SMBUser
864
865The experiment user to mount project directories as.  This user needs to be a
866member of the exported experiment - that is one of the users in the project
867containing this experiment on the master testbed.
868
869=item Tclparse
870
871The pathname to the experiment parsing program.  Only developers should set
872this.
873
874=item Tclsh
875
876The pathname to the local oTcl shell.  Only developers should set
877this.
878
879=back
880
881=head2 Testbeds file
882
883The configuration file (F<./testbeds> unless overridden by B<-c>) is a
884colon-separated set of parameters keyed by testbed name.  The fields, in order,
885are:
886
887=over 5
888
889=item name
890
891The testbed to which this line of parameters applies.
892
893=item user
894
895The user under which to make requests to this testbed.  The user running
896B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
897testbed.
898
899=item host
900
901The host name of the testbed's ops node.  The user calling B<splitter.pl> must
902be able to execute commands on this host via L<ssh(1)>.
903
904=item domain
905
906The domain of nodes in this testbed (including the ops host).  This parameter
907should always start with a period.
908
909=item project
910
911The project under which to instantiate sub-experiments on this testbed.
912
913=item gateway type
914
915The node type for inter-testbed gateway nodes on this testbed.
916
917=item experiment start (slave)
918
919The start command to run on experimental nodes when this testbed is used as a
920slave.  In all the start commands the string FEDDIR will be replaced by the
921local experiment's federation scripts directory and the string GWCONF replaced
922by the gatway configuration file.
923
924=item gateway start (slave)
925
926The start command to run on gateway nodes when this testbed is used as a slave.
927The same string substitutions are made in this command as in experiment start.
928
929=item experiment start (master)
930
931The start command to run on experimental nodes when this testbed is used as a
932master.  The same string substitutions are made in this command as in
933experiment start.
934
935=item gateway start (master)
936
937The start command to run on gateway nodes when this testbed is used as a
938master.  The same string substitutions are made in this command as in
939experiment start.
940
941=item gateway image
942
943The disk image to be loaded on a gateway node on this testbed.
944
945=item filesystem node
946
947The node in the master testbed from which filesystems are mounted.
948
949=item boss node
950
951The node in the master testbed that controls the testbed.
952
953=item tunnel configuration
954
955True if the connector needs to do DETER federation.  This parameter will
956probably be removed.
957
958=back
959
960The parsing of the testbeds is extremely simple.  Colons separate each
961field and there is n provision for escaping them at this time.
962
963=head2 New Testbeds File Format
964
965The testbeds file has ben revamped to make it more human readable.  The
966parameters are now named and specified on separate lines of the configuration
967file.  Each testbed's parameters are preceeded by the testbed label in brackets
968([]) on a line by itself.  After that the parameters are specified as
969parameter: value.  This is essentially the same format as the configuration
970file.  Parameters are:
971
972=over 4
973
974=item User
975
976The user under which to make requests to this testbed.  The user running
977B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
978testbed.
979
980=item OpsNode
981
982The host name of the testbed's ops node.  The user calling B<splitter.pl> must
983be able to execute commands on this host via L<ssh(1)>.
984
985=item Domain
986
987The domain of nodes in this testbed (including the ops host).  This parameter
988should always start with a period.
989
990=item Project
991
992The project under which to instantiate sub-experiments on this testbed.
993
994=item ConnectorType
995
996The node type for inter-testbed connector nodes on this testbed.
997
998=item SlaveNodeStartCmd
999
1000The start command to run on experimental nodes when this testbed is used as a
1001slave.  In all the start commands the string FEDDIR will be replaced by the
1002local experiment's federation scripts directory and the string GWCONF replaced
1003by the gatway configuration file.
1004
1005=item SlaveConnectorStartCmd
1006
1007The start command to run on gateway nodes when this testbed is used as a slave.
1008The same string substitutions are made in this command as in experiment start.
1009
1010=item MasterNodeStartCmd
1011
1012The start command to run on experimental nodes when this testbed is used as a
1013master.  The same string substitutions are made in this command as in
1014experiment start.
1015
1016=item MasterConnectorStartCmd
1017
1018The start command to run on gateway nodes when this testbed is used as a
1019master.  The same string substitutions are made in this command as in
1020experiment start.
1021
1022=item ConnectorImage
1023
1024The disk image to be loaded on a gateway node on this testbed.
1025
1026=item FileServer
1027
1028The node in the master testbed from which filesystems are mounted.
1029
1030=item Boss
1031
1032The node in the master testbed that controls the testbed.
1033
1034=item TunnelCfg
1035
1036True if the connector needs to do DETER federation.  This parameter will
1037probably be removed.
1038
1039
1040=back
1041
1042
1043=head1 ENVIRONMENT
1044
1045B<splitter.pl> does not directly make use of environment variables, but calls
1046out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1047environment.
1048
1049=head1 SEE ALSO
1050
1051L<sh(1)>, L<ssh(1)>
1052
1053=cut
Note: See TracBrowser for help on using the repository browser.