source: fedkit/splitter.pl @ e5fee75

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since e5fee75 was e5fee75, checked in by Ted Faber <faber@…>, 17 years ago

multiplexed gateway support

  • Property mode set to 100644
File size: 36.0 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("federate.sh", "smbmount.pl", "make_hosts", "fed-tun.pl");
12my $local_script_dir = ".";
13my($pid, $gid);                 # Process and group IDs for calling parse.tcl
14my $splitter_config;            # Configuration file
15my $debug;                      # True if thecalled in debug mode
16my $verbose;                    # True for extra progress reports
17my $startem;                    # If true, start the sub-experiments
18my $eid;                        # Experiment ID
19my $tcl;                        # The experiment description (topology file)
20my $master;                     # Master testbed
21my $tmpdir;                     # tmp files
22my $tb_config;                  # testbed configurations
23my $smb_share;                  # Share to mount from the master
24my $project_user;               # User to mount project dirs as
25my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
26my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
27                                # basename)
28my $tcl_splitter;               # tcl program to split experiments
29                                # (changed during devel)
30my $tclsh;                      # tclsh to call directly (changed during devel)
31my $muxmax;                     # Maximum number of links/lans over 1 gw pair
32my @tarfiles;                   # Tarfiles in use by this experiment
33my @rpms;                       # Rpms in use by this experiment
34my $timeout;                    # The timeout to use for experiment swap ins
35my %opts;                       # Parsed options
36
37my $tbparams = {};              # Map of the per-testbed parameters from the
38                                # testbeds file.  It is a reference to a hash
39                                # of hashes (because it's passed around a bunch
40                                # and it's nicer to have one access pattern
41                                # throughout the script, in the main loop and
42                                # the subroutines).  That access is exemplified
43                                # by  $tbparams->{'deter'}->{'domain'} which is
44                                # the domain parameter of the DETER testbed. 
45my $fail_soft;                  # Do not swap failed sub-experiments out
46my $max_children=1;             # Maximum number of simultaneous swap-ins
47
48# Parse the config file.  The format is a colon-separated parameter name
49# followed by the value of that parameter to the end of the line.  This parses
50# that format and puts the parameters into the referenced hash.  Parameter
51# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
52# failure (e.g. file open) and 1 on success.
53sub parse_config {
54    my($file, $href) = @_;
55    my $fh = new IO::File($file);
56       
57    unless ($fh) {
58        warn "Can't open $file: $!\n";
59        return 0;
60    }
61
62    while (<$fh>) {
63        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
64        chomp;
65        /^([^:]+):\s*(.*)/ && do {
66            my $key = $1; 
67
68            $key =~ tr/A-Z/a-z/;
69            $href->{$key} = $2;
70            next;
71        };
72        warn "Unparasble line in $file: $_\n";
73    }
74    $fh->close();   # It will close when it goes out of scope, but...
75    return 1;
76}
77
78# Parse an easier-to-read testbeds file (the original was comma-separated
79# unadorned strings).  The format is a testbed scope as [testbed] followed by
80# the colon-separated attribute-value pairs for the testbed.  Right now these
81# go into a set of global hashes indexed by testbed, but that should probably
82# change.
83
84sub parse_testbeds {
85    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
86    my $fh = new IO::File($file);  # Testbeds filehandle
87    my $tb;                         # Current testbed
88    # Convert attribute in the file to tbparams hash key
89    my %attr_to_hash = (
90        "opsnode" => "host",
91        "user" => "user",
92        "domain" => "domain",
93        "project" => "project",
94        "connectortype" => "gwtype",
95        "slavenodestartcmd" => "expstart",
96        "slaveconnectorstartcmd" => "gwstart",
97        "masternodestartcmd" => "mexpstart",
98        "masterconnectorstartcmd" => "mgwstart",
99        "connectorimage" => "gwimage",
100        "fileserver" => "fs",
101        "boss" => "boss",
102        "tunnelcfg" => "tun"
103    );
104
105
106    unless ($fh) {
107        warn "Can't open $file: $!\n";
108        return 0;
109    }
110
111    while (<$fh>) {
112        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
113        chomp;
114        /^\s*\[(.*)\]/ && do {
115            $tb = $1;
116            $tbparams->{$tb} = {} unless $tbparams->{$tb};
117            next;
118        };
119
120        /^([^:]+):\s*(.*)/ && do {
121            unless ($tb) {
122                warn "Ignored attribute definition before testbed " .
123                    "defined in $file: $_\n";
124                next;
125            }
126            my $key = $1; 
127            $key =~ tr/A-Z/a-z/;
128            my $var = $attr_to_hash{$key};
129
130            if ($var) { $tbparams->{$tb}->{$var} = $2; }
131            else { warn "Unknown keyword $key in $file\n"; }
132
133            next;
134        };
135        warn "Unparasble line in $file: $_\n";
136    }
137    $fh->close();   # It will close when it goes out of scope, but...
138    return 1;
139}
140
141
142# use scp to transfer a file, reporting true if successful and false otherwise.
143# Parameters are the local file name, the ssh host destination (either hostname
144# oe user@host), and an optional destination file name or directory.  If no
145# destination is given, the file is transferred to the given user's home
146# directory.  If only a machine is given in the ssh host destination, the
147# current user is used.
148sub scp_file {
149    my($file, $user, $host, $dest) = @_;
150
151    # XXX system with a relative pathname is sort of gross
152    system("scp $file $user\@$host:$dest");
153    if ($?) {
154        warn "scp failed $?\n";
155        return 0;
156    }
157    else { return 1; }
158}
159
160# use ssh to execute the given command on the machine (and as the user) in
161# $where.  Parameters are the ssh destination directive ($where) and the
162# command to execute, and a prefix to be placed on a message generated if the
163# command fails.   On failure print a warning if a warning prefix was given and
164# return false.  If timeout is given fork a process and set an alarm of that
165# many seconds.  Timeouts also return 0;
166sub ssh_cmd {
167    my($user, $host, $cmd, $wname, $timeout) = @_;
168    my $pid;                # Child pid
169
170    $timeout = 0 unless $timeout;   # Force default timeout
171
172    if ( $pid = fork () ) {
173        # Parent process
174        # The eval acts as a signal catcher.  If the alarm goes off inside
175        # the eval, the die will put "alarm\n" into $@, otherwise the
176        # return value of the execution in the child process will be used.
177        my $rv = eval {
178            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
179            my $rv;
180
181            alarm $timeout;
182            $rv = waitpid($pid, 0);
183            alarm 0;
184            $rv;
185        };
186
187        # If the eval succeeded, $@ will be null and we can use $rv, which
188        # is the return code from the subprocess.  If the eval timed out,
189        # print a warning and assume the best.
190        if ($@ eq "alarm\n" ) {
191            warn "$wname timed out - pid $pid still live\n";
192            return 1;
193        }
194        else {
195            return $rv;
196        }
197    }
198    else {
199        # Child process
200        exec("ssh $user\@$host $cmd");
201        exit 0;
202    }
203}
204
205# Ship local copies of the federation scripts out to the given host.  If any of
206# the script transfers fails, return 0.  The scripts to transfer are from the
207# global @scripts and are found locally in $local_script_dir (another global).
208sub ship_scripts {
209    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
210    my $s;
211
212    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
213    for $s (@scripts) {
214        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
215            return 0;
216    }
217    return 1;
218}
219
220# Ship per-testbed configuration generated by this script to the remote /proj
221# directories on the remote testbeds
222sub ship_configs {
223    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
224    my($d, $f);
225
226    $d = IO::Dir->new($src_dir) || return 0;
227
228    # All directories under $tmpdir are 770 so we can delete them later.
229    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
230    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
231    while ( $f = $d->read()) {
232        next if $f =~ /^\./;
233        if ( -d "$src_dir/$f" ) {
234            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
235                return 0;
236        }
237        else {
238            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
239        }
240    }
241    return 1;
242}
243
244# Start a sub section of the experiment on a given testbed.  The testbed and
245# the user to start the experiment as are pulled from the global per-testbed
246# hash, passed in as $tbparams, as is the project name on the remote testbed.
247# Parameters are the testbed and the experiment id.  Configuration files are
248# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
249# Then the current state of the experiment determined using expinfo.  From that
250# state, the experiment is either created, modified or spapped in.  If
251# everything succeeds, true is returned.  If the global verbose is set progress
252# messages are printed.
253sub start_segment {
254    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
255                                            # per-testbed parameters and remote
256                                            # swap-in timeout
257    my $host =                              # Host name of remote ops (FQDN)
258        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
259    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
260    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
261                                            # experiment under
262    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
263                                            # sub-experiment
264    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
265    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
266    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
267    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
268    my $state;                              # State of remote experiment
269    my $status = new IO::Pipe;              # The pipe to get status
270
271    # Determine the status of the remote experiment
272    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
273        die "Can't ssh to $user\@$host:$!\n";
274
275    # XXX: this is simple now.  Parsing may become more complex
276    while (<$status>) {
277        /State: (\w+)/ && ($state = $1);
278        /No\s+such\s+experiment/ && ($state = "none");
279    }
280    $status->close();
281    print "$tb: $state\n";
282
283    # Copy the experiment definition data over
284    print "transferring subexperiment to $tb\n" if $verbose;
285    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
286    # Clear out any old experiment data; if not deleted, copies over it by
287    # different users will fail.
288    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
289    # force the /bin/sh interpretation of the trailing * (which we need to keep
290    # tmp around))  Again, this needs to be done more properly once we have a
291    # non-ssh interface here.)
292    print "clearing experiment subdirs on $tb\n" if $verbose;
293    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
294        return 0;
295    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
296    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
297        return 0;
298    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
299    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
300        return 0;
301    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
302    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
303        return 0;
304    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
305    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
306        return 0;
307    # Remote experiment is active.  Modify it.
308    if ($state eq "active") {
309        print "Transferring federation support files to $tb\n" if $verbose;
310        # First copy new scripts and hostinfo into the remote /proj
311        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
312            return 0;
313        &ship_scripts($host, $user, $proj_dir) || return 0;
314        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
315
316        if ( -d "$tmpdir/tarfiles") {
317            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
318                return 0;
319        }
320
321        if ( -d "$tmpdir/rpms") {
322            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
323                return 0;
324        }
325
326        print "Modifying $eid in place on $tb\n" if $verbose;
327        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
328            "$eid $tclfile", "modexp", $timeout) || return 0;
329        return 1;
330    }
331
332    # Remote experiment is swapped out, modify it and swap it in.
333    if ($state eq "swapped") {
334        print "Transferring federation support files to $tb\n" if $verbose;
335        # First copy new scripts and hostinfo into the remote /proj (because
336        # the experiment exists, the directory tree should be there.
337        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
338            return 0;
339        &ship_scripts($host, $user, $proj_dir) || return 0;
340        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
341        if ( -d "$tmpdir/tarfiles") {
342            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
343                return 0;
344        }
345
346        if ( -d "$tmpdir/rpms") {
347            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
348                return 0;
349        }
350
351        print "Modifying $eid on $tb\n" if $verbose;
352        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
353            "modexp") || return 0;
354        print "Swapping $eid in on $tb\n" if $verbose;
355        # Now start up
356        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
357            "swapexp", $timeout) || return 0;
358        return 1;
359    }
360
361    # No remote experiment.  Create one.  We do this in 2 steps so we can put
362    # the configuration files and scripts into the new experiment directories.
363    if ($state eq "none") {
364
365        if ( -d "$tmpdir/tarfiles") {
366            # Tarfiles have to exist for the creation to work
367            print "copying tarfiles to $tb\n";
368            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
369                return 0;
370        }
371
372        if ( -d "$tmpdir/rpms") {
373            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
374                return 0;
375        }
376
377        print "Creating $eid on $tb\n" if $verbose;
378        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
379            "$pid -e $eid $tclfile", "startexp") || return 0;
380        # After startexp succeeds, the per-experiment directories exist on the
381        # remote testbed.
382        print "Transferring federation support files to $tb\n" if $verbose;
383        # First copy new scripts and hostinfo into the remote /proj
384        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
385            return 0;
386        &ship_scripts($host, $user, $proj_dir) || return 0;
387        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
388        # Now start up
389        print "Swapping $eid in on $tb\n" if $verbose;
390        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
391            "swapexp", $timeout) || return 0;
392        return 1;
393    }
394
395    # Every branch for a known state returns.  If execution gets here, the
396    # state is unknown.
397    warn "unknown state: $state\n";
398    return 0;
399}
400
401# Swap out a sub-experiment - probably because another has failed.  Arguments
402# are testbed and experiment.  Most of the control flow is similar to
403# start_segment, though much simpler.
404sub stop_segment {
405    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
406                                            # per-testbed parameters
407    my $user = $tbparams->{$tb}->{'user'};  # testbed user
408    my $host =                              # Ops node
409        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
410    my $pid = $tbparams->{$tb}->{'project'};# testbed project
411
412    print "Stopping $eid on $tb\n" if $verbose;
413    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
414        "swapexp (out)") || return 0;
415    return 1;
416}
417
418
419$pid = $gid = "dummy";              # Default project and group to pass to
420                                    # $tcl_splitter above.  These are total
421                                    # dummy arguments;  the splitter doesn't
422                                    # use them at all, but we supply them to
423                                    # keep our changes to the parser minimal.
424# Argument processing.
425getopts('Ft:c:p:f:ndvN', \%opts);
426$splitter_config = $opts{'c'} || "./splitter.conf";
427$debug = $opts{'d'};
428$verbose = $opts{'v'} || $opts{'d'};
429
430&parse_config("$splitter_config", \%opts) || 
431    die "Cannot read config file $splitter_config: $!\n";
432
433warn "-N does nothing now.  Only one testbeds format supported.\n"
434    if $opts{'N'};
435$fail_soft = $opts{'F'} || $opts{'failsoft'};
436$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
437$timeout = $opts{'t'} || $opts{'timeout'};
438$eid = $opts{'experiment'};             # Experiment ID
439$tcl = $opts{'f'} || shift;             # The experiment description
440$master = $opts{'master'};              # Master testbed
441$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
442$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
443$local_script_dir = $opts{'scriptdir'}; # Local scripts
444$muxmax  = $opts{'muxlimit'} || 3;      # Number of connections muxed on one
445                                        # gateway
446
447$max_children = $opts{'p'} || $opts{'maxchildren'} 
448    if $opts{'p'} || $opts{'maxchildren'};
449
450$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
451    die "Must give an SMB share\n";
452$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
453    die "Must give an SMB user\n";
454
455# For now specify these.  We may want to generate them later.
456$gw_pubkey = $opts{'gatewaypubkey'};
457($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
458$gw_secretkey = $opts{'gatewaysecretkey'};
459($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
460
461# tcl program to split experiments (changed during devel)
462$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
463# tclsh to call directly (changed during devel)
464$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
465
466# Prefix to avoid collisions
467$tmpdir .= "/split$$";
468
469print "Temp files are in $tmpdir\n" if $verbose;
470# Create a workspace
471unless (-d "$tmpdir") {
472    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
473}
474
475# Validate scripts directory
476for my $s (@scripts) {
477    die "$local_script_dir/$s not in local script directory. Try -d\n"
478        unless -r "$local_script_dir/$s";
479}
480
481die "Must supply file, master and experiment" unless $master && $tcl && $eid;
482
483&parse_testbeds($tb_config, $tbparams) ||
484    die "Cannot testbed congfigurations from $tb_config: $!\n";
485
486# Open a pipe to the splitter program and start it parsing the experiments
487my $pipe = new IO::Pipe;
488# NB no more -p call on parse call.
489$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master  $pid $gid $eid $tcl") || 
490    die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
491
492# Parsing variables
493my $ctb;                        # Current testbed
494my %allocated;                  # If allocated{$tb} > 0, $tb is in use
495my $destfile;                   # File that the sub-experiment tcl file is
496                                # being written to, or "" if none.  Also used
497                                # for hostnames file.
498my $desthandle;                 # File handle for distfile
499my $gateways;                   # when gateway lists are being processed this
500                                # is the testbed whose gateways are being
501                                # gathered.
502my $control_gateway;            # Control net gateway for the current testbed
503my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
504                                # end of the a <-> b connector pair.
505
506# Parse the splitter output.  This loop creates the sub experiments, gateway
507# configurations and hostnames file
508while (<$pipe>) {
509    # Start of a sub-experiment
510    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
511        $ctb = $1;
512
513        # If we know the testbed, start collecting its sub experiment tcl
514        # description.  If not, warn the caller and ignore the configuration of
515        # this testbed.
516        if ($tbparams->{$ctb}) {
517            $allocated{$ctb}++; # Keep track of the testbeds allocated
518
519            unless (-d "$tmpdir/$ctb") {
520                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
521            }
522            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
523
524            $desthandle = new IO::File(">$destfile") || 
525                die "Cannot open $destfile:$!\n";
526        }
527        else { 
528            warn "No such testbed $ctb\n";
529            $destfile = "";
530        }
531        next;
532    };
533
534    # End of that experiment
535    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
536        # Simple syntax check and close out this experiment's tcl description
537        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
538        $desthandle->close();
539        $destfile = $ctb = "";
540        next;
541    };
542
543    # Beginning of a gateway set
544    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
545        $gateways = $1;
546        # If we've heard of this tb, create the config lines for it one at a
547        # time.
548        if ($allocated{$gateways}) {
549            # Just in case.  This directory should already have been created
550            # above.
551            unless (-d "$tmpdir/$gateways") {
552                mkdir("$tmpdir/$gateways") || 
553                    die "Can't create $tmpdir/$gateways: $!";
554            }
555        }
556        else {
557            warn "Gateways given (and ignored) for testbed not in use: " .
558                "$gateways\n";
559            $gateways = 0;
560        }
561        next;
562    };
563    # End of the gateways section.  Output the client config for this testbed
564    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
565        die "Mismatched gateway markers ($1, $gateways)\n" 
566            unless !$gateways || $gateways == $1;
567
568        if ($control_gateway ) {
569            # Client config
570            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
571            my $master_project = $tbparams->{$master}->{'project'};
572            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
573            print $cc "ControlGateway: $control_gateway\n";
574            print $cc "SMBShare: $smb_share\n";
575            print $cc "ProjectUser: $project_user\n";
576            print $cc "ProjectName: $master_project\n";
577            $cc->close();
578        }
579        else { warn "No control gateway for $gateways?\n"; }
580           
581        $gateways = 0;
582        next;
583    };
584    # Beginning of the hostnames list.  Collection is always in the hostnames
585    # file.
586    /^#\s+Begin\s+hostnames/ && do {
587        $destfile = "$tmpdir/hostnames";
588        $desthandle = new IO::File(">$destfile") || 
589            die "Can't open $destfile:$!\n";
590        next;
591    };
592    # end of the hostnames list.
593    /^#\s+End\s+hostnames/ && do {
594        $desthandle->close();
595        $destfile = "";
596        next;
597    };
598
599    # Generate gateway configuration info, one file per line
600    $gateways && do {
601        chomp;
602        my($dtb, $myname, $desthost, $type) = split(" ", $_);
603
604        # Many of these are to simplify print statements
605        my $sdomain =                           # domain for the source
606            $tbparams->{$gateways}->{'domain'};
607        my $ddomain =                           # domain for the destination
608            $tbparams->{$dtb}->{'domain'};
609        my $sproject =                          # Project of the destination
610            $tbparams->{$gateways}->{'project'};
611        my $fs =                                # Master fs node (FQDN)
612            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
613        my $boss =                              # Master boss node (FQDN)
614            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
615        my $remote_script_dir =                 # Remote fed script location
616            "/proj/" . $tbparams->{$dtb}->{'project'} . "/exp/$eid/tmp";
617        my $active;                             # Is this the active side of
618                                                # the connector?
619
620        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
621        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
622
623        my $conf_file = "$myname$sdomain.gw.conf";
624        # translate to lower case so the `hostname` hack for specifying
625        # configuration files works.
626        $conf_file =~ tr/A-Z/a-z/;
627
628        # If either end of this link is in the master side of the testbed, that
629        # side is the active end. Otherwise the first testbed encountered in
630        # the file will be the active end.  The $active_end variable keeps
631        # track of those decisions
632        if ( $dtb eq $master ) { $active = "false"; }
633        elsif ($gateways eq $master ) { $active = "true"; }
634        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
635        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
636
637        # This is used to create the client configuration.
638        $control_gateway = "$myname$sdomain"
639            if $type =~ /(control|both)/;
640
641        # Write out the file
642        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
643            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
644
645        print $gwconfig "Active: $active\n";
646        print $gwconfig "TunnelCfg: " . $tbparams->{$gateways}->{'tun'} . "\n";
647        print $gwconfig "BossName: $boss\n";
648        print $gwconfig "FsName: $fs\n";
649        print $gwconfig "Type: $type\n";
650        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
651        print $gwconfig "Peer: $desthost$ddomain\n";
652        print $gwconfig "Pubkeys: " . 
653            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
654        print $gwconfig "Privkeys: " .
655            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
656        $gwconfig->close();
657
658        # This testbed has a gateway (most will) so make a copy of the keys it
659        # needs in this testbed's subdirectory.  start_segment will transfer
660        # them.
661        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
662            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
663                die "Can't copy pubkeys ($gw_pubkey to " . 
664                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
665        }
666        if ($active eq "true" ) {
667            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
668                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
669                    die "Can't copy secret keys ($gw_secretkey to " . 
670                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
671            }
672        }
673
674        #done processing gateway entry, ready for next line
675        next; 
676    };
677    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
678        next if /^#/;
679        chomp;
680        push(@tarfiles, $_);
681        next;
682    };
683    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
684        next if /^#/;
685        chomp;
686        push(@rpms, $_);
687        next;
688    };
689
690    next unless $destfile;  # Unidentified testbed, ignore config
691    # local copies that can be used in the substitutions below
692    my $gwtype = $tbparams->{$ctb}->{'gwtype'};
693    my $gwimage = $tbparams->{$ctb}->{'gwimage'};
694    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'};
695    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'};
696    my $gwstart = $tbparams->{$ctb}->{'gwstart'};
697    my $expstart = $tbparams->{$ctb}->{'expstart'};
698    my $project = $tbparams->{$ctb}->{'project'};
699
700    # Substitute variables
701    s/GWTYPE/$gwtype/g;
702    s/GWIMAGE/$gwimage/g;
703    if ($ctb eq $master ) {
704        s/GWSTART/$mgwstart/g;
705        s/EXPSTART/$mexpstart/g;
706    }
707    else {
708        s/GWSTART/$gwstart/g;
709        s/EXPSTART/$expstart/g;
710    }
711    # XXX: oh is this bad
712    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
713    s#PROJDIR#/proj/$project/#g;
714    s#EID#$eid#g;
715    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
716    print $desthandle $_;
717}
718$pipe->close();
719die "No nodes in master testbed ($master)\n" unless $allocated{$master};
720
721# Copy tarfiles and rpms needed at remote sites to the staging directories.
722# Start_segment will distribute them
723for my $t  (@tarfiles) {
724    die "tarfile '$t' unreadable: $!\n" unless -r $t;
725    unless (-d "$tmpdir/tarfiles") {
726        mkdir("$tmpdir/tarfiles") || 
727            die "Can't create $tmpdir/tarfiles:$!\n";
728    }
729    copy($t, "$tmpdir/tarfiles") || 
730        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
731}
732
733for my $r  (@rpms) {
734    die "rpm '$r' unreadable: $!\n" unless -r $r;
735    unless (-d "$tmpdir/rpms") {
736        mkdir("$tmpdir/rpms") || 
737            die "Can't create $tmpdir/rpms:$!\n";
738    }
739    copy($r, "$tmpdir/rpms") || 
740        die "Can't copy $r to  $tmpdir/rpms:$!\n";
741}
742
743exit(0) unless $startem;
744
745my %started;                # If $started{$tb} then $tb successfully started
746my %child;                  # If $child{$pid} then a process with that pid is
747                            # working on a starting a segment
748my $nworking = 0;           # Number of children working on swapin
749my $pid;                    # Scratch variable for pids
750
751# Start up the slave sub-experiments first
752TESTBED:
753for my $tb  (keys %allocated) {
754    if ( $tb ne $master ) {
755        while ( $nworking == $max_children ) {
756            print "Waiting for a child process to complete\n" if $verbose;
757            if (($pid = wait()) != -1 ) {
758                # The $? >> 8 is the exit code of the subprocess, which is
759                # non-zero if the &start_segment routine failed.
760                my $exit_code = ($? >> 8);
761
762                print "Child $pid completed exit code ($exit_code)\n"
763                    if $verbose;
764                $nworking--;
765                $started{$child{$pid}}++ unless $exit_code;
766                if ($child{$pid} ) { delete $child{$pid}; }
767                else { warn "Reaped a pid we did not start?? ($pid)\n"; }
768                last TESTBED if $exit_code;
769            }
770            else { warn "wait returned without reaping: $!\n"; }
771        }
772        if ( $pid = fork() ) {
773            # Parent process
774            $nworking ++;
775            $child{$pid} = $tb;
776            print "Started process $pid to start testbed $tb\n"
777                if $verbose;
778        }
779        else {
780            # Child.  Note that we reverse the sense of the return code when it
781            # becomes an exit value.  Zero exit values indicate success.
782            exit(!&start_segment($tb, $eid, $tbparams, $timeout));
783        }
784    }
785}
786
787# Now wait for any still running processes.
788while ( $nworking ) {
789    print "Waiting for a child process to complete ($nworking running)\n" 
790        if $verbose;
791    if (($pid = wait()) != -1 ) {
792        # The $? >> 8 is the exit code of the subprocess, which is
793        # non-zero if the &start_segment routine failed.
794        my $exit_code = ($? >> 8);
795
796        print "Child $pid completed exit code ($exit_code)\n"
797            if $verbose;
798        $nworking--;
799        $started{$child{$pid}}++ unless $exit_code;
800        if ($child{$pid} ) { delete $child{$pid}; }
801        else { warn "Reaped a pid we did not start?? ($pid)\n"; }
802    }
803    else { warn "wait returned without reaping: $!\n"; }
804}
805
806# Now the master
807if (&start_segment($master, $eid, $tbparams, $timeout)) { 
808    $started{$master}++;
809}
810
811# If any testbed failed, swap the rest out.
812if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
813    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
814    print "Error starting experiment\n";
815    exit(1);
816}
817print "Experiment started\n";
818print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
819system("rm -rf $tmpdir") unless $debug;
820exit(0);    # set the exit value
821
822=pod
823
824=head1 NAME
825
826B<splitter.pl>
827
828=head1 SYNOPSIS
829
830B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
831    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
832
833=head1 DESCRIPTION
834
835B<splitter.pl> invokes the DETER experiment parser to split an annotated
836experiment into multiple sub-experments and instantiates the sub-experiments on
837their intended testbeds.  Annotation is accomplished using the
838tb-set-node-testbed command, added to the parser.
839
840Much of the script's behavior depends on the configuration file, specified with
841the B<-c> flag and defaulting to F<./splitter.conf>.
842
843The testbed labels supplied in the B<tb-set-node-testbed> command are
844meaningful based on their presence in the testbeds file.  that file can be
845specified in the configuration file using the B<Testbeds> directive, and
846defaults to F<./testbeds>.  The syntax is described below.
847
848Most of the intermediate files are staged in a sub-directory of a temporary
849files directory and deleted at the end of the script.  Specifying the B<-d>
850flag on the command line avoids the deletion for debbugging.  By default the
851temporary files directory is directory is F</tmp> and can be reset in the
852configuration file using the B<Tmpdir> directive.  Intermediate files are
853stored under a subdirectory formed by adding the process ID of the splitter
854process.  For example, if the temporary files directory is F</tmp> and the
855B<splitter.pl> process ID is 2323, the temporary files will be stored in
856F</tmp/split2323/>.
857
858The expreriment is split out into one experiment description per testbed in the
859temporary directory named as F<experiment.testbed.tcl> where the experiment is
860the experiment ID given in the configuration file, and the testbed is the
861tb-set-node-testbed parameter for the nodes in the file.
862
863If the B<-n> option is absent the sub-experiments are then instantiated on
864their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
865Per-testbed parameters are set in the testbeds file.  Sub-experiments on
866slave testbeds are instantiated in a random order, but the master testbed is
867currently instantiated last.
868
869Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
870and the maximum number of simultaneous processes to start.
871
872Scripts to start federation (the federation kit) are copied into the local
873experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
874taken from the directory given by the B<ScriptDir> directive in the
875configuration file.
876
877If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
878F<splitter.conf>.
879
880If any sub-experiment fails to instantiate, the other sub-exeriments are
881swapped out.  B<-F> avoids this swap out, which can also be specified as
882B<SoftFail: true> in F<splitter.conf>
883
884=head2 Configuration File
885
886The configuration file is a simple set of colon-separated parameters and
887values.  A configuration file must be present, either specified in the B<-c>
888flag or the default F<./splitter.conf>.  All the parameter names are case
889insensitive, but should not include any whitespace.  Parameter values may
890include whitespace, but no newlines.
891
892Possible parameters are:
893
894=over 5
895
896=item Experiment
897
898The name of the experiment on the various testbeds
899
900=item Master
901
902The master testbed label from the testbeds file, described below.
903
904=item Testbeds
905
906The testbeds file described below, giving per-testbed parameters.  If this
907directive is absent the testbeds file defaults to F<./testbeds>
908
909=item ScriptDir
910
911Location of the default federation scripts, i.e. the federation kit.
912
913=item GatewayPubkey
914
915=item GatewaySecretKey
916
917The names of the files containing secret and public keys to use in setting up
918tunnels between testbeds.  These will eventually be automatically generated.
919
920=item TmpDir
921
922=item TempDir
923
924The directory where temporary files are created.  These are synonyms, but
925should both be specified, B<TmpDir> has priority.  If neither is specified,
926F</tmp> is used.
927
928=item SMBShare
929
930The SMB share on the master testbed that will be exported to remote clients.
931
932=item SMBUser
933
934The experiment user to mount project directories as.  This user needs to be a
935member of the exported experiment - that is one of the users in the project
936containing this experiment on the master testbed.
937
938=item Timeout
939
940Value in seconds after which a swap-in operatioin will be considered a success.
941Often long swap-ins will hang when there are partial failures.  This works
942around this issue.  (This behavior can be requested on the command line by
943specifying B<-t> I<secs>.)
944
945=item FailSoft
946
947If not set, failure of any sub experiment swaps the rest out.  Setting this to
948any value avoids this swap out.  (This behavior can be requested on the command
949line by specifying B<-F>.)
950
951=item MuxLimit
952
953The maximum bumber of links/lans carried by one gateway pair
954
955=item Tclparse
956
957The pathname to the experiment parsing program.  Only developers should set
958this.
959
960=item Tclsh
961
962The pathname to the local oTcl shell.  Only developers should set
963this.
964
965=back
966
967=head2 Testbeds file
968
969The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
970scoped attribute-value pairs where each attribute is specified on a separate
971line of the configuration file.  Each testbed's parameters are preceeded by the
972testbed label in brackets ([]) on a line by itself.  After that the parameters
973are specified as parameter: value.  This is essentially the same format as the
974configuration file.  Parameters are:
975
976=over 4
977
978=item User
979
980The user under which to make requests to this testbed.  The user running
981B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
982testbed.
983
984=item OpsNode
985
986The host name of the testbed's ops node.  The user calling B<splitter.pl> must
987be able to execute commands on this host via L<ssh(1)>.
988
989=item Domain
990
991The domain of nodes in this testbed (including the ops host).  This parameter
992should always start with a period.
993
994=item Project
995
996The project under which to instantiate sub-experiments on this testbed.
997
998=item ConnectorType
999
1000The node type for inter-testbed connector nodes on this testbed.
1001
1002=item SlaveNodeStartCmd
1003
1004The start command to run on experimental nodes when this testbed is used as a
1005slave.  In all the start commands the following string substitutions are made:
1006
1007=over 10
1008
1009=item FEDDIR
1010
1011The local experiment's federation scripts directory.  Each local experiment
1012will have this replaced by the scripts directory on its local boss.
1013
1014=item GWCONF
1015
1016The full pathname of the gateway configuration file.  As with FEDDIR, this is
1017on the local boss.
1018
1019=item PROJDIR
1020
1021The project directory on the local boss.
1022
1023=item EID
1024
1025The local experiment name.
1026
1027=back
1028
1029All startcmds specified in F<testbeds> undergo these expansions.
1030
1031=item SlaveConnectorStartCmd
1032
1033The start command to run on gateway nodes when this testbed is used as a slave.
1034The same string substitutions are made in this command as in SlaveNodeStartCmd.
1035
1036=item MasterNodeStartCmd
1037
1038The start command to run on experimental nodes when this testbed is used as a
1039master.  The same string substitutions are made in this command as in
1040SlaveNodeStartCmd.
1041
1042=item MasterConnectorStartCmd
1043
1044The start command to run on gateway nodes when this testbed is used as a
1045master.  The same string substitutions are made in this command as in
1046SlaveNodeStartCmd.
1047
1048=item ConnectorImage
1049
1050The disk image to be loaded on a gateway node on this testbed.
1051
1052=item FileServer
1053
1054The node in the master testbed from which filesystems are mounted.
1055
1056=item Boss
1057
1058The node in the master testbed that controls the testbed.
1059
1060=item TunnelCfg
1061
1062True if the connector needs to do DETER federation.  This parameter will
1063probably be removed.
1064
1065
1066=back
1067
1068=head1 ENVIRONMENT
1069
1070B<splitter.pl> does not directly make use of environment variables, but calls
1071out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1072environment.
1073
1074=head1 BUGS
1075
1076A deprecated B<-N> flag was used to select testbeds file format.  Only one
1077format is supported now, and B<-N> generates a warning, but otherwise does not
1078affect B<splitter.pl>.
1079
1080=head1 SEE ALSO
1081
1082L<sh(1)>, L<ssh(1)>
1083
1084=cut
Note: See TracBrowser for help on using the repository browser.