source: fedkit/splitter.pl @ f3691ff

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since f3691ff was f3691ff, checked in by Ted Faber <faber@…>, 16 years ago

checkpoint includes some ucb and some FC6 code

  • Property mode set to 100644
File size: 40.6 KB
Line 
1#!/usr/bin/perl
2
3use strict;
4
5use Getopt::Std;
6use IO::File;
7use IO::Dir;
8use IO::Pipe;
9use File::Copy;
10
11my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.FreeBSD.pl", 
12    "smbmount.Linux.pl", "make_hosts", "fed-tun.pl", "fed_evrepeater",
13    "rc.accounts.patch");
14my $local_script_dir = ".";
15my($pid, $gid);                 # Process and group IDs for calling parse.tcl
16my $splitter_config;            # Configuration file
17my $debug;                      # True if thecalled in debug mode
18my $verbose;                    # True for extra progress reports
19my $startem;                    # If true, start the sub-experiments
20my $eid;                        # Experiment ID
21my $tcl;                        # The experiment description (topology file)
22my $master;                     # Master testbed
23my $tmpdir;                     # tmp files
24my $tb_config;                  # testbed configurations
25my $smb_share;                  # Share to mount from the master
26my $project_user;               # User to mount project dirs as
27my $auth_proj;                  # Local project for resource access
28my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename)
29my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path &
30                                # basename)
31my($keytype);                   # Type (DSA or RSA) of generated gateway keys
32my $tcl_splitter;               # tcl program to split experiments
33                                # (changed during devel)
34my $tclsh;                      # tclsh to call directly (changed during devel)
35my $fedd_client;                # Program to call for testbed access params
36my $muxmax;                     # Maximum number of links/lans over 1 gw pair
37my @tarfiles;                   # Tarfiles in use by this experiment
38my @rpms;                       # Rpms in use by this experiment
39my $timeout;                    # The timeout to use for experiment swap ins
40my %opts;                       # Parsed options
41
42my $tbparams = {};              # Map of the per-testbed parameters from the
43                                # testbeds file.  It is a reference to a hash
44                                # of hashes (because it's passed around a bunch
45                                # and it's nicer to have one access pattern
46                                # throughout the script, in the main loop and
47                                # the subroutines).  That access is exemplified
48                                # by  $tbparams->{'deter'}->{'domain'} which is
49                                # the domain parameter of the DETER testbed. 
50my $fail_soft;                  # Do not swap failed sub-experiments out
51my $max_children=1;             # Maximum number of simultaneous swap-ins
52
53# Default commands for starting experiment and gateway nodes.  Testbeds can
54# override these.  (The 'm' prefixed commands are for operating as the master
55# testbed.)
56my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate";
57my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts";
58my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log";
59my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log";
60my $def_gwimage = "FBSD61-TUNNEL2";
61my $def_gwtype = "pc";
62
63# Parse the config file.  The format is a colon-separated parameter name
64# followed by the value of that parameter to the end of the line.  This parses
65# that format and puts the parameters into the referenced hash.  Parameter
66# names are mapped to lower case, parameter values are unchanged.  Returns 0 on
67# failure (e.g. file open) and 1 on success.
68sub parse_config {
69    my($file, $href) = @_;
70    my $fh = new IO::File($file);
71       
72    unless ($fh) {
73        warn "Can't open $file: $!\n";
74        return 0;
75    }
76
77    while (<$fh>) {
78        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
79        chomp;
80        /^([^:]+):\s*(.*)/ && do {
81            my $key = $1; 
82
83            $key =~ tr/A-Z/a-z/;
84            $href->{$key} = $2;
85            next;
86        };
87        warn "Unparasble line in $file: $_\n";
88    }
89    $fh->close();   # It will close when it goes out of scope, but...
90    return 1;
91}
92
93# Parse an easier-to-read testbeds file (the original was comma-separated
94# unadorned strings).  The format is a testbed scope as [testbed] followed by
95# the colon-separated attribute-value pairs for the testbed.  Right now these
96# go into a set of global hashes indexed by testbed, but that should probably
97# change.  The file parameter is an open IO::Handle.  &parse_testbeds_filename
98# opens the file and calls this.  Parse_testbeds can be used on pipes as well,
99# e.g. fedd_client output.
100sub parse_testbeds {
101    my($fh, $tbparams) = @_;        # Testbeds file and parameter hash
102    my $tb;                         # Current testbed
103    # Convert attribute in the file to tbparams hash key
104    my %attr_to_hash = (
105        "opsnode" => "host",
106        "user" => "user",
107        "domain" => "domain",
108        "project" => "project",
109        "connectortype" => "gwtype",
110        "slavenodestartcmd" => "expstart",
111        "slaveconnectorstartcmd" => "gwstart",
112        "masternodestartcmd" => "mexpstart",
113        "masterconnectorstartcmd" => "mgwstart",
114        "connectorimage" => "gwimage",
115        "fileserver" => "fs",
116        "boss" => "boss",
117        "eventserver" => "eventserver",
118        "tunnelcfg" => "tun",
119        "uri" => "uri",
120        "access" => "access"
121    );
122
123    while (<$fh>) {
124        next if /^\s*#/ || /^\s*$/;     # Skip comments & blanks
125        print STDERR "testbeds: $_";
126        chomp;
127        /^\s*\[(.*)\]/ && do {
128            $tb = $1;
129            $tbparams->{$tb} = {} unless $tbparams->{$tb};
130            next;
131        };
132
133        /^([^:]+):\s*(.*)/ && do {
134            unless ($tb) {
135                warn "Ignored attribute definition before testbed: $_\n ";
136                next;
137            }
138            my $key = $1; 
139            $key =~ tr/A-Z/a-z/;
140            my $var = $attr_to_hash{$key};
141
142            if ($var) { $tbparams->{$tb}->{$var} = $2; }
143            else { warn "Unknown keyword $key\n"; }
144
145            next;
146        };
147        warn "Unparasble line: $_\n";
148    }
149    return 1;
150}
151
152
153# Open the given file name and parse the testbeds file it contains by calling
154# &parse_testbeds.
155sub parse_testbeds_filename {
156    my($file, $tbparams) = @_;      # Testbeds file and parameter hash
157    my $fh = new IO::File($file);   # Testbeds filehandle
158
159    if ($fh) {
160        my $rv = &parse_testbeds($fh, $tbparams);
161        $fh->close();   # It will close when it goes out of scope, but...
162        $rv;
163    }
164    else {
165        warn "Can't open $file: $!\n";
166        return 0;
167    }
168}
169
170# Generate SSH keys for use by the gateways.  The parameters are the type and
171# the filename for the private key.  The pubkey will be stored in a filename
172# with the same name as the private key but with .pub appended.  Type can be
173# dsa or rsa.
174
175sub generate_ssh_keys {
176    my($type, $dest) = @_;
177
178    $type =~ tr/A-Z/a-z/;
179    return 0 if $type !~ /(rsa|dsa)/;
180    system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest");
181    return $@ ? 0 : 1;
182}
183
184# use scp to transfer a file, reporting true if successful and false otherwise.
185# Parameters are the local file name, the ssh host destination (either hostname
186# oe user@host), and an optional destination file name or directory.  If no
187# destination is given, the file is transferred to the given user's home
188# directory.  If only a machine is given in the ssh host destination, the
189# current user is used.
190sub scp_file {
191    my($file, $user, $host, $dest) = @_;
192
193    # XXX system with a relative pathname is sort of gross
194    system("scp $file $user\@$host:$dest");
195    if ($?) {
196        warn "scp failed $?\n";
197        return 0;
198    }
199    else { return 1; }
200}
201
202# use ssh to execute the given command on the machine (and as the user) in
203# $where.  Parameters are the ssh destination directive ($where) and the
204# command to execute, and a prefix to be placed on a message generated if the
205# command fails.   On failure print a warning if a warning prefix was given and
206# return false.  If timeout is given fork a process and set an alarm of that
207# many seconds.  Timeouts also return 0;
208sub ssh_cmd {
209    my($user, $host, $cmd, $wname, $timeout) = @_;
210    my $pid;                # Child pid
211
212    $timeout = 0 unless $timeout;   # Force default timeout
213
214    if ( $pid = fork () ) {
215        # Parent process
216        # The eval acts as a signal catcher.  If the alarm goes off inside
217        # the eval, the die will put "alarm\n" into $@, otherwise the
218        # return value of the execution in the child process will be used.
219        my $rv = eval {
220            local $SIG{'ALRM'} = sub{ die "alarm\n"; };
221            my $rv;
222
223            alarm $timeout;
224            $rv = waitpid($pid, 0);
225            alarm 0;
226            $rv;
227        };
228
229        # If the eval succeeded, $@ will be null and we can use $rv, which
230        # is the return code from the subprocess.  If the eval timed out,
231        # print a warning and assume the best.
232        if ($@ eq "alarm\n" ) {
233            warn "$wname timed out - pid $pid still live\n";
234            return 1;
235        }
236        else {
237            return $rv;
238        }
239    }
240    else {
241        # Child process
242        exec("ssh $user\@$host $cmd");
243        exit 0;
244    }
245}
246
247# Ship local copies of the federation scripts out to the given host.  If any of
248# the script transfers fails, return 0.  The scripts to transfer are from the
249# global @scripts and are found locally in $local_script_dir (another global).
250sub ship_scripts {
251    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
252    my $s;
253
254    &ssh_cmd($user, $host, "mkdir -p $dest_dir");
255    for $s (@scripts) {
256        &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || 
257            return 0;
258    }
259    return 1;
260}
261
262# Ship per-testbed configuration generated by this script to the remote /proj
263# directories on the remote testbeds
264sub ship_configs {
265    my($host, $user, $src_dir, $dest_dir) = @_;     # Where, who, where remotely
266    my($d, $f);
267
268    $d = IO::Dir->new($src_dir) || return 0;
269
270    # All directories under $tmpdir are 770 so we can delete them later.
271    &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0;
272    &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0;
273    while ( $f = $d->read()) {
274        next if $f =~ /^\./;
275        if ( -d "$src_dir/$f" ) {
276            &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || 
277                return 0;
278        }
279        else {
280            &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0;
281        }
282    }
283    return 1;
284}
285
286# Start a sub section of the experiment on a given testbed.  The testbed and
287# the user to start the experiment as are pulled from the global per-testbed
288# hash, passed in as $tbparams, as is the project name on the remote testbed.
289# Parameters are the testbed and the experiment id.  Configuration files are
290# scp-ed over to the target testbed from the global $tmpdir/$tb directory.
291# Then the current state of the experiment determined using expinfo.  From that
292# state, the experiment is either created, modified or spapped in.  If
293# everything succeeds, true is returned.  If the global verbose is set progress
294# messages are printed.
295sub start_segment {
296    my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID,
297                                            # per-testbed parameters and remote
298                                            # swap-in timeout
299    my $host =                              # Host name of remote ops (FQDN)
300        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
301    my $user = $tbparams->{$tb}->{'user'};  # user to pass to ssh
302    my $pid = $tbparams->{$tb}->{'project'};# remote project to start the
303                                            # experiment under
304    my $tclfile = "./$eid.$tb.tcl";         # Local tcl file with the
305                                            # sub-experiment
306    my $proj_dir = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
307    my $tarfiles_dir = "/proj/$pid/tarfiles/$eid";  # Where to stash tarfiles
308    my $rpms_dir = "/proj/$pid/rpms/$eid";  # Where to stash rpms
309    my $to_hostname = "$proj_dir/hosts";    # remote hostnames file
310    my $state;                              # State of remote experiment
311    my $status = new IO::Pipe;              # The pipe to get status
312
313    # Determine the status of the remote experiment
314    $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || 
315        die "Can't ssh to $user\@$host:$!\n";
316    # XXX: this is simple now.  Parsing may become more complex
317    while (<$status>) {
318        /State: (\w+)/ && ($state = $1);
319        /No\s+such\s+experiment/ && ($state = "none");
320    }
321    $status->close();
322    print "$tb: $state\n";
323
324    # Copy the experiment definition data over
325    print "transferring subexperiment to $tb\n" if $verbose;
326    &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0;
327    # Clear out any old experiment data; if not deleted, copies over it by
328    # different users will fail.
329    # (O /bin/csh, how evil thou art.  The -c and the escaped single quotes
330    # force the /bin/sh interpretation of the trailing * (which we need to keep
331    # tmp around))  Again, this needs to be done more properly once we have a
332    # non-ssh interface here.)
333    print "clearing experiment subdirs on $tb\n" if $verbose;
334    &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || 
335        return 0;
336    print "clearing experiment tarfiles subdirs on $tb\n" if $verbose;
337    &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || 
338        return 0;
339    print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose;
340    &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || 
341        return 0;
342    print "clearing experiment rpms subdirs on $tb\n" if $verbose;
343    &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || 
344        return 0;
345    print "creating rpms subdir $rpms_dir on $tb\n" if $verbose;
346    &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || 
347        return 0;
348    # Remote experiment is active.  Modify it.
349    if ($state eq "active") {
350        print "Transferring federation support files to $tb\n" if $verbose;
351        # First copy new scripts and hostinfo into the remote /proj
352        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
353            return 0;
354        &ship_scripts($host, $user, $proj_dir) || return 0;
355        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
356
357        if ( -d "$tmpdir/tarfiles") {
358            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
359                return 0;
360        }
361
362        if ( -d "$tmpdir/rpms") {
363            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
364                return 0;
365        }
366
367        print "Modifying $eid in place on $tb\n" if $verbose;
368        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
369            "$eid $tclfile", "modexp", $timeout) || return 0;
370        return 1;
371    }
372
373    # Remote experiment is swapped out, modify it and swap it in.
374    if ($state eq "swapped") {
375        print "Transferring federation support files to $tb\n" if $verbose;
376        # First copy new scripts and hostinfo into the remote /proj (because
377        # the experiment exists, the directory tree should be there.
378        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
379            return 0;
380        &ship_scripts($host, $user, $proj_dir) || return 0;
381        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
382        if ( -d "$tmpdir/tarfiles") {
383            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
384                return 0;
385        }
386
387        if ( -d "$tmpdir/rpms") {
388            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
389                return 0;
390        }
391
392        print "Modifying $eid on $tb\n" if $verbose;
393        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
394            "modexp") || return 0;
395        print "Swapping $eid in on $tb\n" if $verbose;
396        # Now start up
397        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
398            "swapexp", $timeout) || return 0;
399        return 1;
400    }
401
402    # No remote experiment.  Create one.  We do this in 2 steps so we can put
403    # the configuration files and scripts into the new experiment directories.
404    if ($state eq "none") {
405
406        if ( -d "$tmpdir/tarfiles") {
407            # Tarfiles have to exist for the creation to work
408            print "copying tarfiles to $tb\n";
409            &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || 
410                return 0;
411        }
412
413        if ( -d "$tmpdir/rpms") {
414            &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || 
415                return 0;
416        }
417
418        print "Creating $eid on $tb\n" if $verbose;
419        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . 
420            "$pid -e $eid $tclfile", "startexp") || return 0;
421        # After startexp succeeds, the per-experiment directories exist on the
422        # remote testbed.
423        print "Transferring federation support files to $tb\n" if $verbose;
424        # First copy new scripts and hostinfo into the remote /proj
425        &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) ||
426            return 0;
427        &ship_scripts($host, $user, $proj_dir) || return 0;
428        &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0;
429        # Now start up
430        print "Swapping $eid in on $tb\n" if $verbose;
431        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
432            "swapexp", $timeout) || return 0;
433        return 1;
434    }
435
436    # Every branch for a known state returns.  If execution gets here, the
437    # state is unknown.
438    warn "unknown state: $state\n";
439    return 0;
440}
441
442# Swap out a sub-experiment - probably because another has failed.  Arguments
443# are testbed and experiment.  Most of the control flow is similar to
444# start_segment, though much simpler.
445sub stop_segment {
446    my($tb, $eid, $tbparams) = @_;          # testbed, experiment ID and
447                                            # per-testbed parameters
448    my $user = $tbparams->{$tb}->{'user'};  # testbed user
449    my $host =                              # Ops node
450        $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'};
451    my $pid = $tbparams->{$tb}->{'project'};# testbed project
452
453    print "Stopping $eid on $tb\n" if $verbose;
454    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
455        "swapexp (out)") || return 0;
456    return 1;
457}
458
459
460$pid = $gid = "dummy";              # Default project and group to pass to
461                                    # $tcl_splitter above.  These are total
462                                    # dummy arguments;  the splitter doesn't
463                                    # use them at all, but we supply them to
464                                    # keep our changes to the parser minimal.
465# Argument processing.
466getopts('Ft:c:p:f:ndvNP:', \%opts);
467$splitter_config = $opts{'c'} || "./splitter.conf";
468$debug = $opts{'d'};
469$verbose = $opts{'v'} || $opts{'d'};
470
471&parse_config("$splitter_config", \%opts) || 
472    die "Cannot read config file $splitter_config: $!\n";
473
474warn "-N does nothing now.  Only one testbeds format supported.\n"
475    if $opts{'N'};
476$fail_soft = $opts{'F'} || $opts{'failsoft'};
477$startem = $opts{'n'} ? 0 : 1;          # If true, start the sub-experiments
478$timeout = $opts{'t'} || $opts{'timeout'};
479$eid = $opts{'experiment'};             # Experiment ID
480$tcl = $opts{'f'} || shift;             # The experiment description
481$master = $opts{'master'};              # Master testbed
482$tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files
483$tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations
484$local_script_dir = $opts{'scriptdir'}; # Local scripts
485$muxmax  = $opts{'muxlimit'} || 3;      # Number of connections muxed on one
486                                        # gateway
487
488$max_children = $opts{'p'} || $opts{'maxchildren'} 
489    if $opts{'p'} || $opts{'maxchildren'};
490
491$smb_share = $opts{'smbshare'} ||       # Share to mount from the master
492    die "Must give an SMB share\n";
493$project_user = $opts{'smbuser'} ||     # User to mount project dirs as
494    die "Must give an SMB user\n";
495$auth_proj = $opts{'P'};
496
497# tcl program to split experiments (changed during devel)
498$tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl";
499# tclsh to call directly (changed during devel)
500$tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh";
501# fedd_client to get testbed access parameters
502$fedd_client = $opts{'feddclient'} || "fedd_client";
503
504# Prefix to avoid collisions
505$tmpdir .= "/split$$";
506
507print "Temp files are in $tmpdir\n" if $verbose;
508# Create a workspace
509unless (-d "$tmpdir") {
510    mkdir("$tmpdir") || die "Can't create $tmpdir: $!";
511}
512
513# If the keys are given, use them.  Otherwise create a set under $tmpdir
514
515if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) {
516    $gw_pubkey = $opts{'gatewaypubkey'};
517    $gw_secretkey = $opts{'gatewaysecretkey'};
518}
519else {
520    $keytype = $opts{'gatewaykeytype'} || "rsa";
521    mkdir("$tmpdir/keys") || die "Can't create temoprary key dir: $!\n";
522    $gw_pubkey = "$tmpdir/keys/fed.$keytype.pub";
523    $gw_secretkey = "$tmpdir/keys/fed.$keytype";
524    print "Generating $keytype keys\n" if $verbose;
525    generate_ssh_keys($keytype, $gw_secretkey) || 
526        die "Cannot generate kets:$@\n";
527}
528# Generate the basenames
529($gw_pubkey_base = $gw_pubkey) =~ s#.*/##;
530($gw_secretkey_base = $gw_secretkey) =~ s#.*/##;
531
532
533
534# Validate scripts directory
535for my $s (@scripts) {
536    die "$local_script_dir/$s not in local script directory. Try -d\n"
537        unless -r "$local_script_dir/$s";
538}
539
540die "Must supply file, master and experiment" unless $master && $tcl && $eid;
541
542&parse_testbeds_filename($tb_config, $tbparams) ||
543    die "Cannot testbed congfigurations from $tb_config: $!\n";
544
545# Open a pipe to the splitter program and start it parsing the experiments
546my $pipe = new IO::Pipe;
547# NB no more -p call on parse call.
548$pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master  $pid $gid $eid $tcl") || 
549    die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n";
550
551# Parsing variables
552my $ctb;                        # Current testbed
553my %allocated;                  # If allocated{$tb} > 0, $tb is in use
554my $destfile;                   # File that the sub-experiment tcl file is
555                                # being written to, or "" if none.  Also used
556                                # for hostnames file.
557my $desthandle;                 # File handle for distfile
558my $gateways;                   # when gateway lists are being processed this
559                                # is the testbed whose gateways are being
560                                # gathered.
561my $control_gateway;            # Control net gateway for the current testbed
562my %active_end;                 # If active_end{"a-b"} > 0 then a is the active
563                                # end of the a <-> b connector pair.
564
565# Parse the splitter output.  This loop creates the sub experiments, gateway
566# configurations and hostnames file
567while (<$pipe>) {
568    # Allbeds lists all the testbeds that this experiment accesses.  This code
569    # acquires access to them and pulls in their access parameters from fedd.
570    (/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do {
571        next if /^#/;
572        chomp;
573
574        my $tb = $_;            # Current testbed
575        # If this testbed has not had its access parameters read from fedd, try
576        # to read them, if we have a way to talk to fedd
577        unless ($tbparams->{$tb}->{'access'} || !$fedd_client) { 
578            my $access_pipe = new IO::Pipe || 
579                die "Can't open pipe to fedd:$!\n";
580            my $proj = $auth_proj ? " -p $auth_proj " : "";
581
582            print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'}
583                . "\n") if $verbose;
584
585            my $cmd = "$fedd_client -t " . 
586                $tbparams->{$tb}->{'uri'} .  " -T $ENV{HOME}/cacert.pem ".
587                "-l $tb $proj | /usr/bin/tee fedd.$tb ";
588            print "$cmd\n" if $verbose;
589
590            $access_pipe->reader($cmd) || 
591                    die "Can't exec fedd_client: $!\n";
592
593            &parse_testbeds($access_pipe, $tbparams) || 
594                warn("Error reading fedd output: $!\n");
595            $access_pipe->close();
596        }
597        next;
598    };
599
600    # Start of a sub-experiment
601    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
602        $ctb = $1;
603
604        # If we know the testbed, start collecting its sub experiment tcl
605        # description.  If not, warn the user.
606        if ($tbparams->{$ctb}->{'access'}) {
607            $allocated{$ctb}++; # Keep track of the testbeds allocated
608
609            unless (-d "$tmpdir/$ctb") {
610                mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!";
611            }
612            $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl";
613
614            $desthandle = new IO::File(">$destfile") || 
615                die "Cannot open $destfile:$!\n";
616        }
617        else{
618            warn "No such testbed $ctb\n";
619            $destfile = "";
620        }
621        next;
622    };
623
624    # End of that experiment
625    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
626        # Simple syntax check and close out this experiment's tcl description
627        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
628        $desthandle->close() if $desthandle;
629        $destfile = $ctb = "";
630        next;
631    };
632
633    # Beginning of a gateway set
634    /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do {
635        $gateways = $1;
636        # If we've heard of this tb, create the config lines for it one at a
637        # time.
638        if ($allocated{$gateways}) {
639            # Just in case.  This directory should already have been created
640            # above.
641            unless (-d "$tmpdir/$gateways") {
642                mkdir("$tmpdir/$gateways") || 
643                    die "Can't create $tmpdir/$gateways: $!";
644            }
645        }
646        else {
647            warn "Gateways given (and ignored) for testbed not in use: " .
648                "$gateways\n";
649            $gateways = 0;
650        }
651        next;
652    };
653    # End of the gateways section.  Output the client config for this testbed
654    /^#\s+End\s+gateways\s+\((\w+)\)/ && do {
655        die "Mismatched gateway markers ($1, $gateways)\n" 
656            unless !$gateways || $gateways == $1;
657
658        if ($control_gateway ) {
659            # Client config
660            my $cc = new IO::File(">$tmpdir/$gateways/client.conf");
661            my $master_project = $tbparams->{$master}->{'project'};
662            die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc;
663            print $cc "ControlGateway: $control_gateway\n";
664            print $cc "SMBShare: $smb_share\n";
665            print $cc "ProjectUser: $project_user\n";
666            print $cc "ProjectName: $master_project\n";
667            $cc->close();
668        }
669        else { warn "No control gateway for $gateways?\n"; }
670           
671        $gateways = 0;
672        next;
673    };
674    # Beginning of the hostnames list.  Collection is always in the hostnames
675    # file.
676    /^#\s+Begin\s+hostnames/ && do {
677        $destfile = "$tmpdir/hostnames";
678        $desthandle = new IO::File(">$destfile") || 
679            die "Can't open $destfile:$!\n";
680        next;
681    };
682    # end of the hostnames list.
683    /^#\s+End\s+hostnames/ && do {
684        $desthandle->close();
685        $destfile = "";
686        next;
687    };
688
689    # Generate gateway configuration info, one file per line
690    $gateways && do {
691        chomp;
692        my($dtb, $myname, $desthost, $type) = split(" ", $_);
693
694        # Many of these are to simplify print statements
695        my $sdomain =                           # domain for the source
696            $tbparams->{$gateways}->{'domain'};
697        my $ddomain =                           # domain for the destination
698            $tbparams->{$dtb}->{'domain'};
699        my $sproject =                          # Project of the source
700            $tbparams->{$gateways}->{'project'};
701        my $dproject =                          # Project of the destination
702            $tbparams->{$dtb}->{'project'};
703        my $fs =                                # Master fs node (FQDN)
704            $tbparams->{$master}->{'fs'} .  $tbparams->{$master}->{'domain'};
705        my $boss =                              # Master boss node (FQDN)
706            $tbparams->{$master}->{'boss'} .  $tbparams->{$master}->{'domain'};
707        my $event_server =                      # Master event-server (FQDN)
708            $tbparams->{$master}->{'eventserver'} . 
709            $tbparams->{$master}->{'domain'};
710        my $remote_event_server =               # Slave event-server (FQDN)
711            $tbparams->{$dtb}->{'eventserver'} . 
712            $tbparams->{$dtb}->{'domain'};
713        my $remote_script_dir =                 # Remote fed script location
714            "/proj/" . $dproject . "/exp/$eid/tmp";
715        my $local_script_dir =                  # Local fed script location
716            "/proj/" . $sproject . "/exp/$eid/tmp";
717        my $active;                             # Is this the active side of
718                                                # the connector?
719        my $tunnel_cfg =                        # Use DETER's config stuff
720            $tbparams->{$gateways}->{'tun'} || "false";
721                                   
722
723        $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain";
724        $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain";
725
726        my $conf_file = "$myname$sdomain.gw.conf";
727        my $remote_conf_file = "$desthost$ddomain.gw.conf";
728        # translate to lower case so the `hostname` hack for specifying
729        # configuration files works.
730        $conf_file =~ tr/A-Z/a-z/;
731        $remote_conf_file =~ tr/A-Z/a-z/;
732
733        # If either end of this link is in the master side of the testbed, that
734        # side is the active end. Otherwise the first testbed encountered in
735        # the file will be the active end.  The $active_end variable keeps
736        # track of those decisions
737        if ( $dtb eq $master ) { $active = "false"; }
738        elsif ($gateways eq $master ) { $active = "true"; }
739        elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; }
740        else { $active_end{"$gateways-$dtb"}++; $active = "true"; }
741
742        # This is used to create the client configuration.
743        $control_gateway = "$myname$sdomain"
744            if $type =~ /(control|both)/;
745
746        # Write out the file
747        my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| 
748            die "can't open $tmpdir/$gateways/$conf_file: $!\n";
749
750        print $gwconfig "Active: $active\n";
751        print $gwconfig "TunnelCfg: $tunnel_cfg\n";
752        print $gwconfig "BossName: $boss\n";
753        print $gwconfig "FsName: $fs\n";
754        print $gwconfig "EventServerName: $event_server\n";
755        print $gwconfig "RemoteEventServerName: $remote_event_server\n";
756        print $gwconfig "Type: $type\n";
757        print $gwconfig "RemoteScriptDir: $remote_script_dir\n";
758        print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n";
759        print $gwconfig "RemoteExperiment: $dproject/$eid\n";
760        print $gwconfig "LocalExperiment: $sproject/$eid\n";
761        print $gwconfig "RemoteConfigFile: " . 
762            "$remote_script_dir/$remote_conf_file\n";
763        print $gwconfig "Peer: $desthost$ddomain\n";
764        print $gwconfig "Pubkeys: " . 
765            "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n";
766        print $gwconfig "Privkeys: " .
767            "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n";
768        $gwconfig->close();
769
770        # This testbed has a gateway (most will) so make a copy of the keys it
771        # needs in this testbed's subdirectory.  start_segment will transfer
772        # them.
773        unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) {
774            copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") ||
775                die "Can't copy pubkeys ($gw_pubkey to " . 
776                    "$tmpdir/$gateways/$gw_pubkey_base): $!\n";
777        }
778        if ($active eq "true" ) {
779            unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) {
780                copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") ||
781                    die "Can't copy secret keys ($gw_secretkey to " . 
782                        "$tmpdir/$gateways/$gw_secretkey_base): $!\n";
783            }
784        }
785
786        #done processing gateway entry, ready for next line
787        next; 
788    };
789    (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do {
790        next if /^#/;
791        chomp;
792        push(@tarfiles, $_);
793        next;
794    };
795    (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do {
796        next if /^#/;
797        chomp;
798        push(@rpms, $_);
799        next;
800    };
801
802    next unless $destfile;  # Unidentified testbed, ignore config
803    # local copies that can be used in the substitutions below
804    my $gwtype = $tbparams->{$ctb}->{'gwtype'} || $def_gwtype;
805    my $gwimage = $tbparams->{$ctb}->{'gwimage'} || $def_gwimage;
806    my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} || $def_mgwstart;
807    my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} || $def_mexpstart;
808    my $gwstart = $tbparams->{$ctb}->{'gwstart'} || $def_gwstart;
809    my $expstart = $tbparams->{$ctb}->{'expstart'} || $def_expstart;
810    my $project = $tbparams->{$ctb}->{'project'};
811
812    # Substitute variables
813    s/GWTYPE/$gwtype/g;
814    s/GWIMAGE/$gwimage/g;
815    if ($ctb eq $master ) {
816        s/GWSTART/$mgwstart/g;
817        s/EXPSTART/$mexpstart/g;
818    }
819    else {
820        s/GWSTART/$gwstart/g;
821        s/EXPSTART/$expstart/g;
822    }
823    # XXX: oh is this bad
824    s#GWCONF#FEDDIR\`hostname\`.gw.conf#g;
825    s#PROJDIR#/proj/$project/#g;
826    s#EID#$eid#g;
827    s#FEDDIR#/proj/$project/exp/$eid/tmp/#g;
828    print $desthandle $_;
829}
830$pipe->close();
831die "No nodes in master testbed ($master)\n" unless $allocated{$master};
832
833# Copy tarfiles and rpms needed at remote sites to the staging directories.
834# Start_segment will distribute them
835for my $t  (@tarfiles) {
836    die "tarfile '$t' unreadable: $!\n" unless -r $t;
837    unless (-d "$tmpdir/tarfiles") {
838        mkdir("$tmpdir/tarfiles") || 
839            die "Can't create $tmpdir/tarfiles:$!\n";
840    }
841    copy($t, "$tmpdir/tarfiles") || 
842        die "Can't copy $t to  $tmpdir/tarfiles:$!\n";
843}
844
845for my $r  (@rpms) {
846    die "rpm '$r' unreadable: $!\n" unless -r $r;
847    unless (-d "$tmpdir/rpms") {
848        mkdir("$tmpdir/rpms") || 
849            die "Can't create $tmpdir/rpms:$!\n";
850    }
851    copy($r, "$tmpdir/rpms") || 
852        die "Can't copy $r to  $tmpdir/rpms:$!\n";
853}
854
855exit(0) unless $startem;
856
857my %started;                # If $started{$tb} then $tb successfully started
858my %child;                  # If $child{$pid} then a process with that pid is
859                            # working on a starting a segment
860my $nworking = 0;           # Number of children working on swapin
861my $pid;                    # Scratch variable for pids
862
863# Start up the slave sub-experiments first
864TESTBED:
865for my $tb  (keys %allocated) {
866    if ( $tb ne $master ) {
867        while ( $nworking == $max_children ) {
868            print "Waiting for a child process to complete\n" if $verbose;
869            if (($pid = wait()) != -1 ) {
870                # The $? >> 8 is the exit code of the subprocess, which is
871                # non-zero if the &start_segment routine failed.
872                my $exit_code = ($? >> 8);
873
874                print "Child $pid completed exit code ($exit_code)\n"
875                    if $verbose;
876                $nworking--;
877                $started{$child{$pid}}++ unless $exit_code;
878                if ($child{$pid} ) { delete $child{$pid}; }
879                else { warn "Reaped a pid we did not start?? ($pid)\n"; }
880                last TESTBED if $exit_code;
881            }
882            else { warn "wait returned without reaping: $!\n"; }
883        }
884        if ( $pid = fork() ) {
885            # Parent process
886            $nworking ++;
887            $child{$pid} = $tb;
888            print "Started process $pid to start testbed $tb\n"
889                if $verbose;
890        }
891        else {
892            # Child.  Note that we reverse the sense of the return code when it
893            # becomes an exit value.  Zero exit values indicate success.
894            exit(!&start_segment($tb, $eid, $tbparams, $timeout));
895        }
896    }
897}
898
899# Now wait for any still running processes.
900while ( $nworking ) {
901    print "Waiting for a child process to complete ($nworking running)\n" 
902        if $verbose;
903    if (($pid = wait()) != -1 ) {
904        # The $? >> 8 is the exit code of the subprocess, which is
905        # non-zero if the &start_segment routine failed.
906        my $exit_code = ($? >> 8);
907
908        print "Child $pid completed exit code ($exit_code)\n"
909            if $verbose;
910        $nworking--;
911        $started{$child{$pid}}++ unless $exit_code;
912        if ($child{$pid} ) { delete $child{$pid}; }
913        else { warn "Reaped a pid we did not start?? ($pid)\n"; }
914    }
915    else { warn "wait returned without reaping: $!\n"; }
916}
917
918# Now the master
919if (&start_segment($master, $eid, $tbparams, $timeout)) { 
920    $started{$master}++;
921}
922
923# If any testbed failed, swap the rest out.
924if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) {
925    for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); }
926    print "Error starting experiment\n";
927    exit(1);
928}
929print "Experiment started\n";
930print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug;
931system("rm -rf $tmpdir") unless $debug;
932exit(0);    # set the exit value
933
934=pod
935
936=head1 NAME
937
938B<splitter.pl>
939
940=head1 SYNOPSIS
941
942B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>]
943    [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>]
944
945=head1 DESCRIPTION
946
947B<splitter.pl> invokes the DETER experiment parser to split an annotated
948experiment into multiple sub-experments and instantiates the sub-experiments on
949their intended testbeds.  Annotation is accomplished using the
950tb-set-node-testbed command, added to the parser.
951
952Much of the script's behavior depends on the configuration file, specified with
953the B<-c> flag and defaulting to F<./splitter.conf>.
954
955The testbed labels supplied in the B<tb-set-node-testbed> command are
956meaningful based on their presence in the testbeds file.  that file can be
957specified in the configuration file using the B<Testbeds> directive, and
958defaults to F<./testbeds>.  The syntax is described below.
959
960Most of the intermediate files are staged in a sub-directory of a temporary
961files directory and deleted at the end of the script.  Specifying the B<-d>
962flag on the command line avoids the deletion for debbugging.  By default the
963temporary files directory is directory is F</tmp> and can be reset in the
964configuration file using the B<Tmpdir> directive.  Intermediate files are
965stored under a subdirectory formed by adding the process ID of the splitter
966process.  For example, if the temporary files directory is F</tmp> and the
967B<splitter.pl> process ID is 2323, the temporary files will be stored in
968F</tmp/split2323/>.
969
970The expreriment is split out into one experiment description per testbed in the
971temporary directory named as F<experiment.testbed.tcl> where the experiment is
972the experiment ID given in the configuration file, and the testbed is the
973tb-set-node-testbed parameter for the nodes in the file.
974
975If the B<-n> option is absent the sub-experiments are then instantiated on
976their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
977Per-testbed parameters are set in the testbeds file.  Sub-experiments on
978slave testbeds are instantiated in a random order, but the master testbed is
979currently instantiated last.
980
981Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter
982and the maximum number of simultaneous processes to start.
983
984Scripts to start federation (the federation kit) are copied into the local
985experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>.  These are
986taken from the directory given by the B<ScriptDir> directive in the
987configuration file.
988
989If B<-t> is given the parameter is treated as a parameter to B<Timeout> in
990F<splitter.conf>.
991
992If any sub-experiment fails to instantiate, the other sub-exeriments are
993swapped out.  B<-F> avoids this swap out, which can also be specified as
994B<SoftFail: true> in F<splitter.conf>
995
996=head2 Configuration File
997
998The configuration file is a simple set of colon-separated parameters and
999values.  A configuration file must be present, either specified in the B<-c>
1000flag or the default F<./splitter.conf>.  All the parameter names are case
1001insensitive, but should not include any whitespace.  Parameter values may
1002include whitespace, but no newlines.
1003
1004Possible parameters are:
1005
1006=over 5
1007
1008=item Experiment
1009
1010The name of the experiment on the various testbeds
1011
1012=item Master
1013
1014The master testbed label from the testbeds file, described below.
1015
1016=item Testbeds
1017
1018The testbeds file described below, giving per-testbed parameters.  If this
1019directive is absent the testbeds file defaults to F<./testbeds>
1020
1021=item ScriptDir
1022
1023Location of the default federation scripts, i.e. the federation kit.
1024
1025=item GatewayPubkey
1026
1027=item GatewaySecretKey
1028
1029The names of the files containing secret and public keys to use in setting up
1030tunnels between testbeds.  If given they are used, otherwise keys are generated.
1031
1032=item GatewayKeyType
1033
1034This controls the kind of SSH keys generated to configure the geatways.  If
1035given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>.  The parameter
1036is csase insensitive.
1037
1038=item TmpDir
1039
1040=item TempDir
1041
1042The directory where temporary files are created.  These are synonyms, but
1043should both be specified, B<TmpDir> has priority.  If neither is specified,
1044F</tmp> is used.
1045
1046=item SMBShare
1047
1048The SMB share on the master testbed that will be exported to remote clients.
1049
1050=item SMBUser
1051
1052The experiment user to mount project directories as.  This user needs to be a
1053member of the exported experiment - that is one of the users in the project
1054containing this experiment on the master testbed.
1055
1056=item Timeout
1057
1058Value in seconds after which a swap-in operatioin will be considered a success.
1059Often long swap-ins will hang when there are partial failures.  This works
1060around this issue.  (This behavior can be requested on the command line by
1061specifying B<-t> I<secs>.)
1062
1063=item FailSoft
1064
1065If not set, failure of any sub experiment swaps the rest out.  Setting this to
1066any value avoids this swap out.  (This behavior can be requested on the command
1067line by specifying B<-F>.)
1068
1069=item MuxLimit
1070
1071The maximum bumber of links/lans carried by one gateway pair
1072
1073=item Tclparse
1074
1075The pathname to the experiment parsing program.  Only developers should set
1076this.
1077
1078=item Tclsh
1079
1080The pathname to the local oTcl shell.  Only developers should set
1081this.
1082
1083=back
1084
1085=head2 Testbeds file
1086
1087The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of
1088scoped attribute-value pairs where each attribute is specified on a separate
1089line of the configuration file.  Each testbed's parameters are preceeded by the
1090testbed label in brackets ([]) on a line by itself.  After that the parameters
1091are specified as parameter: value.  This is essentially the same format as the
1092configuration file.  Parameters are:
1093
1094=over 4
1095
1096=item User
1097
1098The user under which to make requests to this testbed.  The user running
1099B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
1100testbed.
1101
1102=item OpsNode
1103
1104The host name of the testbed's ops node.  The user calling B<splitter.pl> must
1105be able to execute commands on this host via L<ssh(1)>.
1106
1107=item Domain
1108
1109The domain of nodes in this testbed (including the ops host).  This parameter
1110should always start with a period.
1111
1112=item Project
1113
1114The project under which to instantiate sub-experiments on this testbed.
1115
1116=item ConnectorType
1117
1118The node type for inter-testbed connector nodes on this testbed.
1119
1120=item SlaveNodeStartCmd
1121
1122The start command to run on experimental nodes when this testbed is used as a
1123slave.  In all the start commands the following string substitutions are made:
1124
1125=over 10
1126
1127=item FEDDIR
1128
1129The local experiment's federation scripts directory.  Each local experiment
1130will have this replaced by the scripts directory on its local boss.
1131
1132=item GWCONF
1133
1134The full pathname of the gateway configuration file.  As with FEDDIR, this is
1135on the local boss.
1136
1137=item PROJDIR
1138
1139The project directory on the local boss.
1140
1141=item EID
1142
1143The local experiment name.
1144
1145=back
1146
1147All startcmds specified in F<testbeds> undergo these expansions.
1148
1149=item SlaveConnectorStartCmd
1150
1151The start command to run on gateway nodes when this testbed is used as a slave.
1152The same string substitutions are made in this command as in SlaveNodeStartCmd.
1153
1154=item MasterNodeStartCmd
1155
1156The start command to run on experimental nodes when this testbed is used as a
1157master.  The same string substitutions are made in this command as in
1158SlaveNodeStartCmd.
1159
1160=item MasterConnectorStartCmd
1161
1162The start command to run on gateway nodes when this testbed is used as a
1163master.  The same string substitutions are made in this command as in
1164SlaveNodeStartCmd.
1165
1166=item ConnectorImage
1167
1168The disk image to be loaded on a gateway node on this testbed.
1169
1170=item FileServer
1171
1172The node in the master testbed from which filesystems are mounted.
1173
1174=item Boss
1175
1176The node in the master testbed that controls the testbed.
1177
1178=item TunnelCfg
1179
1180True if the connector needs to do DETER federation.  This parameter will
1181probably be removed.
1182
1183
1184=back
1185
1186=head1 ENVIRONMENT
1187
1188B<splitter.pl> does not directly make use of environment variables, but calls
1189out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
1190environment.
1191
1192=head1 BUGS
1193
1194A deprecated B<-N> flag was used to select testbeds file format.  Only one
1195format is supported now, and B<-N> generates a warning, but otherwise does not
1196affect B<splitter.pl>.
1197
1198=head1 SEE ALSO
1199
1200L<sh(1)>, L<ssh(1)>
1201
1202=cut
Note: See TracBrowser for help on using the repository browser.