source: fedkit/splitter.pl @ 63f7c7e

axis_examplecompt_changesinfo-opsversion-1.30version-2.00version-3.01version-3.02
Last change on this file since 63f7c7e was 63f7c7e, checked in by Ted Faber <faber@…>, 17 years ago

add hosts fixup

  • Property mode set to 100644
File size: 12.4 KB
Line 
1#!/usr/bin/perl
2
3use Getopt::Std;
4
5@scripts = ("federate.sh", "smbmount.pl");
6$local_script_dir = ".";
7
8# use scp to transfer a file, reporting true if successful and false otherwise.
9# Parameters are the local file name, the ssh host destination (either hostname
10# oe user@host), and an optional destination file name or directory.  If no
11# destination is given, the file is transferred to the given user's home
12# directory.  If only a machine is given in the ssh host destination, the
13# current user is used.
14sub scp_file {
15    my($file, $where, $dest) = @_;
16
17    # XXX system with a relative pathname is sort of gross
18    system("scp $file $where:$dest");
19    if ($?) {
20        warn "scp failed $?\n";
21        return 0;
22    }
23    else { return 1; }
24}
25
26# use ssh to execute the given command on the machine (and as the user) in
27# $where.  Parameters are the ssh destination directive ($where) and the
28# command to execute, and a prefix to be placed on a message generated if the
29# command fails.   On failure print a warning if a warning prefix was given and
30# return false.
31sub ssh_cmd {
32    my($user, $host, $cmd, $wname) = @_;
33
34    # XXX system with a relative pathname is sort of gross
35    if ( $host ne "localhost") {
36        system ("ssh $user\@$host $cmd");
37    }
38    else {
39        system ("$cmd");
40    }
41    if ($?) {
42        warn "$wname failed $?\n" if $wname;
43        return 0;
44    }
45    else { return 1; }
46}
47
48# Ship local copies of the federation scripts out to the given host.  If any of
49# the script transfers fails, return 0.  The scripts to transfer are from the
50# global @scripts and are found locally in $local_script_dir (another global).
51sub ship_scripts {
52    my($host, $user, $dest_dir) = @_;       # Where, who, where remotely
53    my($s);
54
55    for $s (@scripts) {
56        &scp_file("$local_script_dir/$s", "$user\@$host", $dest_dir) || 
57            return 0;
58    }
59    return 1;
60}
61
62
63
64# Start a sub section of the experiment on a given testbed.  The testbed and
65# the user to start the experiment as are pulled from the global per-testbed
66# hash, as is the project name on the remote testbed.  Parameters are the
67# testbed and the experiment id.  Configuration files are scp-ed over to the
68# target testbed.  Then the current state of the experiment determined using
69# expinfo.  From that state, the experiment is either created, modified or
70# spapped in.  If everything succeeds, true is returned.
71sub start_segment {
72    my($tb, $eid) = @_;                     # testbed and experiment ID
73    my($host) = $host{$tb};                 # Host name of remote users
74    my($user) = $user{$tb};                 # user to pass to ssh
75    my($pid) = $project{$tb};               # remote project to start the
76                                            # experiment under
77    my($tclfile) = "./$eid.$tb.tcl";        # Local tcl file with the
78                                            # sub-experiment
79    my($proj_dir) = "/proj/$pid/exp/$eid/tmp";  # Where to stash federation stuff
80    my($to_hostname) = "$proj_dir/hosts";   # remote hostnames file
81
82    # Determine the status of the remote experiment
83    if ( $host ne "localhost") {
84        open(STATUS, "ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid|") || 
85            die "Can't ssh to $user\@$host:$!\n";
86    }
87    else {
88        open(STATUS, "/usr/testbed/bin/expinfo $pid $eid|") || 
89            die "Can't call expinfo locally";
90    }
91    # XXX: this is simple now.  Parsing may become more complex
92    while (<STATUS>) {
93        /State: (\w+)/ && ($state = $1);
94        /No\s+such\s+experiment/ && ($state = "none");
95    }
96    close(STATUS);
97    print "$tb: $state\n";
98
99    # Copy the experiment definition data over (unless the host is local)
100    if ( $host ne "localhost") {
101        &scp_file($tclfile, "$user\@$host") || return 0;
102    }
103
104    # Remote experiment is active.  Modify it.
105    if ($state eq "active") {
106        # First copy new scripts and hostinfo into the remote /proj
107        &scp_file("./hostnames", "$user\@$host", $to_hostname) || return 0;
108        &ship_scripts($host, $user, $proj_dir) || return 0;
109        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . 
110            "$eid $tclfile", "modexp") || return 0;
111        return 1;
112    }
113
114    # Remote experiment is swapped out, modify it and swap it in.
115    if ($state eq "swapped") {
116        &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", 
117            "modexp") || return 0;
118        # First copy new scripts and hostinfo into the remote /proj
119        &scp_file("./hostnames", "$user\@$host", $to_hostname) || return 0;
120        &ship_scripts($host, $user, $proj_dir) || return 0;
121        # Now start up
122        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
123            "swapexp") || return 0;
124        return 1;
125    }
126
127    # No remote experiment.  Create one.  We do this in 2 steps so we can put
128    # the configuration files and scripts into the new experiment directories.
129    if ($state eq "none") {
130        &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -f -w -p " . 
131            "$pid -e $eid $tclfile", "startexp") || return 0;
132        # First copy new scripts and hostinfo into the remote /proj
133        &scp_file("./hostnames", "$user\@$host", $to_hostname) || return 0;
134        &ship_scripts($host, $user, $proj_dir) || return 0;
135        # Now start up
136        &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", 
137            "swapexp") || return 0;
138        return 1;
139    }
140
141    # Every branch for a known state returns.  If execution gets here, the
142    # state is unknown.
143    warn "unknown state: $state\n";
144    return 0;
145}
146
147# Swap out a sub-experiment - probably because another has failed.  Arguments
148# are testbed and experiment.  Most of the control flow is similar to
149# start_segment, though much simpler.
150sub stop_segment {
151    my($tb, $eid) = @_;
152    my($user) = "$user{$tb}";
153    my($host) = "$host{$tb}";
154    my($pid) = $project{$tb};
155
156    &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", 
157        "swapexp (out)") || return 0;
158    return 1;
159}
160
161$tcl_splitter = "/usr/testbed/lib/ns2ir/parse.tcl";         # tcl program to split experiments
162$tclsh = "/usr/local/bin/otclsh";   # tclsh to call directly
163
164$pid = $gid = "dummy";              # Default project and group to pass to
165                                    # $tcl_splitter above.  These are total
166                                    # dummy arguments;  the splitter doesn't
167                                    # use them at all, but we supply them to
168                                    # keep our changes to the parser minimal.
169
170# Argument processing.
171getopts('d:c:m:e:f:n', \%opts);
172
173$eid = $opts{'e'};                  # Experiment ID
174$tcl = $opts{'f'} || shift;         # The experiment description
175$master = $opts{'m'};               # Master testbed
176$startem = $opts{'n'} ? 0 : 1;      # If true, start the sub-experiments
177$config = $opts{'c'} || "./testbeds";
178$local_script_dir = $opts{'d'};     # Local scripts
179
180for $s (@scripts) {
181    die "$local_script_dir/$s not in local script directory. Try -d\n"
182        unless -r "$local_script_dir/$s";
183}
184
185die "Must supply file, master and experiment" unless $master && $tcl && $eid;
186
187# Read a hash of per-testbed parameters from the local configurations.
188open(CONF, $config) || die "can't read testbed configutions from $config: $!\n";
189while (<CONF>) {
190    next if /^#/;
191    chomp;
192    ($tb, $h, $u, $p, $es, $gs, $mes, $mgs, $t, $i) = split(":", $_);
193    $host{$tb} = $h;
194    $user{$tb} = $u;
195    $project{$tb} = $p;
196    $gwtype{$tb} = $t;
197    $expstart{$tb} = $es;
198    $gwstart{$tb} = $gs;
199    $mexpstart{$tb} = $mes;
200    $mgwstart{$tb} = $mgs;
201    $gwimage{$tb} = $i;
202}
203close(CONF);
204
205# Open a pipe to the splitter program and start it parsing the experiments
206open(PIPE, "$tclsh $tcl_splitter -s -p $pid $gid $eid $tcl|") || 
207    die "Cannot execute $tclsh $tcl_splitter -s -p $pid $gid $eid $tcl:$!\n";
208
209# Parse the splitter output.
210while (<PIPE>) {
211    # Start of a sub-experiment
212    /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do {
213        $ctb = $1;
214
215        # If we know the testbed, start collecting its sub experiment tcl
216        # description.  If not, warn the caller and ignore the configuration of
217        # this testbed.
218        if ($host{$ctb}) {
219            $allocated{$ctb}++; # Keep track of the testbeds allocated
220            $destfile = "./$eid.$ctb.tcl";
221
222            open(FILE, ">$destfile") || die "Cannot open $destfile:$!\n";
223        }
224        else { 
225            warn "No such testbed $ctb\n";
226            $destfile = "";
227        }
228        next;
229    };
230    # End of that experiment
231    /^#\s+End\s+Testbed\s+\((\w+)\)/ && do {
232        # Simple syntax check and close out this experiment's tcl description
233        die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb);
234        close(FILE);
235        $destfile = $ctb = "";
236        next;
237    };
238    # Beginning of the hostnames list.  Collection is always in the hostnames
239    # file.
240    /^#\s+Begin\s+hostnames/ && do {
241        $destfile = "./hostnames";
242        open(FILE, ">$destfile") || die "Can't open $destfile:$!\n";
243        next;
244    };
245    # end of the hostnames list.
246    /^#\s+End\s+hostnames/ && do {
247        close(FILE);
248        $destfile = "";
249        next;
250    };
251
252    next unless $destfile;  # Unidentified testbed, ignore config
253
254    # Substitute variables
255    s/GWTYPE/$gwtype{$ctb}/g;
256    s/GWIMAGE/$gwimage{$ctb}/g;
257    if ($ctb eq $master ) {
258        s/GWSTART/$mgwstart{$ctb}/g;
259        s/EXPSTART/$mexpstart{$ctb}/g;
260    }
261    else {
262        s/GWSTART/$gwstart{$ctb}/g;
263        s/EXPSTART/$expstart{$ctb}/g;
264    }
265    print FILE;
266}
267close(PIPE);
268die "No nodes in master testbed ($master)\n" unless $allocated{$master};
269
270exit(0) unless $startem;
271
272# Start up the slave sub-experiments first
273TESTBED:
274for $tb (keys %allocated) {
275    if ($tb ne $master) {
276        if (&start_segment($tb, $eid)) { $started{$tb}++; }
277        else { last TESTBED; }
278    }
279}
280
281# Now the master
282if (&start_segment($master, $eid)) { 
283    $started{$master}++;
284}
285
286# If any testbed failed, swap the rest out.
287if ( scalar(keys %started) != scalar(keys %allocated)) {
288    for $tb (keys %started) { &stop_segment($tb, $eid); }
289    print "Error starting experiment\n";
290    exit(1);
291}
292print "Experiment started\n";
293exit(0);    # set the exit value
294
295=pod
296
297=head1 NAME
298
299B<splitter.pl>
300
301=head1 SYNOPSIS
302
303B<splitter.pl> B<-e> I<experiment> B<-m> I<master_testbed> [B<-n>]
304    [B<-d> F<script_dir>] [B<-c> F<config_file>] [B<-f> F<experiment_tcl>]
305    [F<experiment_tcl>]
306
307=head1 DESCRIPTION
308
309B<splitter.pl> invokes the DETER experiment parser to split an annotated
310experiment into multiple sub-experments and instantiates the sub-experiments on
311their intended testbeds.  Annotation is accomplished using the
312tb-set-node-testbed command, added to the parser.
313
314The testbed labels are meaningful based on their presence in the testbeds file.
315that file can be specified with the B<-c> option, and defaults to
316F<./testbeds>.  The syntax is described below.
317
318The expreriment is split out into one experiment description per testbed in the
319current directory named as F<experiment.testbed.tcl> where the experiment is
320the argument to B<-e> and the testbed is the tb-set-node-testbed parameter for
321the nodes in the file.
322
323If the B<-n> option is absent the sub-experiments are then instantiated on
324their testbeds.  (Here B<-n> is analogous to its use in L<make(1)>).
325Per-testbed parameters are set in the configuration file.  Sub-experiments on
326slave testbeds are instantiated in a random order, but the master testbed is
327currently instantiated last.
328
329Scripts to start federation are copied into the local experiment's tmp file -
330e.g., F</proj/DETER/exp/simple-split/tmp>.  These are taken from the directory
331given by the B<-d> option.
332
333If any sub-experiment fails to instantiate, the other sub-exeriments are
334swapped out.
335
336=head2 Configuration file
337
338The configuration file (F<./testbeds> unless overridden by B<-c>) is a
339colon-separated set of parameters keyed by testbed name.  The fields, in order,
340are:
341
342=over 5
343
344=item name
345
346The testbed to which this line of parameters applies.
347
348=item user
349
350The user under which to make requests to this testbed.  The user running
351B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this
352testbed.
353
354=item project
355
356The project under which to instantiate sub-experiments on this testbed.
357
358=item gateway type
359
360The node type for inter-testbed gateway nodes on this testbed.
361
362=item experiment start (slave)
363
364The start command to run on experimental nodes when this testbed is used as a
365slave.
366
367=item gateway start (slave)
368
369The start command to run on gateway nodes when this testbed is used as a
370slave.
371
372=item experiment start (master)
373
374The start command to run on experimental nodes when this testbed is used as a
375master.
376
377=item gateway start (master)
378
379The start command to run on gateway nodes when this testbed is used as a
380master.
381
382=item gateway image
383
384The disk image to be loaded on a gateway node on this testbed.
385
386=back
387
388The parsing of the configuration is extremely simple.  Colons separate each
389field and there is n provision for escaping them at this time.
390
391=head1 ENVIRONMENT
392
393B<splitter.pl> does not directly make use of environment variables, but calls
394out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the
395environment.
396
397=head1 SEE ALSO
398
399L<sh(1)>, L<ssh(1)>
400
401=cut
Note: See TracBrowser for help on using the repository browser.