1 | #!/usr/bin/perl |
---|
2 | |
---|
3 | use strict; |
---|
4 | |
---|
5 | use Getopt::Std; |
---|
6 | use IO::File; |
---|
7 | use IO::Dir; |
---|
8 | use IO::Pipe; |
---|
9 | use File::Copy; |
---|
10 | |
---|
11 | my @scripts = ("fed_bootstrap", "federate.sh", "smbmount.FreeBSD.pl", |
---|
12 | "smbmount.Linux.pl", "make_hosts", "fed-tun.pl", "fed_evrepeater", |
---|
13 | "rc.accounts.patch"); |
---|
14 | my $local_script_dir = "."; |
---|
15 | my($pid, $gid); # Process and group IDs for calling parse.tcl |
---|
16 | my $splitter_config; # Configuration file |
---|
17 | my $debug; # True if thecalled in debug mode |
---|
18 | my $verbose; # True for extra progress reports |
---|
19 | my $startem; # If true, start the sub-experiments |
---|
20 | my $eid; # Experiment ID |
---|
21 | my $tcl; # The experiment description (topology file) |
---|
22 | my $master; # Master testbed |
---|
23 | my $tmpdir; # tmp files |
---|
24 | my $tb_config; # testbed configurations |
---|
25 | my $smb_share; # Share to mount from the master |
---|
26 | my $project_user; # User to mount project dirs as |
---|
27 | my $auth_proj; # Local project for resource access |
---|
28 | my($gw_pubkey, $gw_pubkey_base);# Connector pubkey (full path & basename) |
---|
29 | my($gw_secretkey, $gw_secretkey_base);# Connector secret key (full path & |
---|
30 | # basename) |
---|
31 | my($keytype); # Type (DSA or RSA) of generated gateway keys |
---|
32 | my $tcl_splitter; # tcl program to split experiments |
---|
33 | # (changed during devel) |
---|
34 | my $tclsh; # tclsh to call directly (changed during devel) |
---|
35 | my $fedd_client; # Program to call for testbed access params |
---|
36 | my $muxmax; # Maximum number of links/lans over 1 gw pair |
---|
37 | my @tarfiles; # Tarfiles in use by this experiment |
---|
38 | my @rpms; # Rpms in use by this experiment |
---|
39 | my $timeout; # The timeout to use for experiment swap ins |
---|
40 | my %opts; # Parsed options |
---|
41 | |
---|
42 | my $tbparams = {}; # Map of the per-testbed parameters from the |
---|
43 | # testbeds file. It is a reference to a hash |
---|
44 | # of hashes (because it's passed around a bunch |
---|
45 | # and it's nicer to have one access pattern |
---|
46 | # throughout the script, in the main loop and |
---|
47 | # the subroutines). That access is exemplified |
---|
48 | # by $tbparams->{'deter'}->{'domain'} which is |
---|
49 | # the domain parameter of the DETER testbed. |
---|
50 | my $fail_soft; # Do not swap failed sub-experiments out |
---|
51 | my $max_children=1; # Maximum number of simultaneous swap-ins |
---|
52 | |
---|
53 | # Default commands for starting experiment and gateway nodes. Testbeds can |
---|
54 | # override these. (The 'm' prefixed commands are for operating as the master |
---|
55 | # testbed.) |
---|
56 | my $def_expstart = "sudo -H /bin/sh FEDDIR/fed_bootstrap >& /tmp/federate"; |
---|
57 | my $def_mexpstart = "sudo -H FEDDIR/make_hosts FEDDIR/hosts"; |
---|
58 | my $def_gwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF>& /tmp/bridge.log"; |
---|
59 | my $def_mgwstart = "sudo -H FEDDIR/fed-tun.pl -f GWCONF >& /tmp/bridge.log"; |
---|
60 | my $def_gwimage = "FBSD61-TUNNEL2"; |
---|
61 | my $def_gwtype = "pc"; |
---|
62 | |
---|
63 | # Parse the config file. The format is a colon-separated parameter name |
---|
64 | # followed by the value of that parameter to the end of the line. This parses |
---|
65 | # that format and puts the parameters into the referenced hash. Parameter |
---|
66 | # names are mapped to lower case, parameter values are unchanged. Returns 0 on |
---|
67 | # failure (e.g. file open) and 1 on success. |
---|
68 | sub parse_config { |
---|
69 | my($file, $href) = @_; |
---|
70 | my $fh = new IO::File($file); |
---|
71 | |
---|
72 | unless ($fh) { |
---|
73 | warn "Can't open $file: $!\n"; |
---|
74 | return 0; |
---|
75 | } |
---|
76 | |
---|
77 | while (<$fh>) { |
---|
78 | next if /^\s*#/ || /^\s*$/; # Skip comments & blanks |
---|
79 | chomp; |
---|
80 | /^([^:]+):\s*(.*)/ && do { |
---|
81 | my $key = $1; |
---|
82 | |
---|
83 | $key =~ tr/A-Z/a-z/; |
---|
84 | $href->{$key} = $2; |
---|
85 | next; |
---|
86 | }; |
---|
87 | warn "Unparasble line in $file: $_\n"; |
---|
88 | } |
---|
89 | $fh->close(); # It will close when it goes out of scope, but... |
---|
90 | return 1; |
---|
91 | } |
---|
92 | |
---|
93 | # Parse an easier-to-read testbeds file (the original was comma-separated |
---|
94 | # unadorned strings). The format is a testbed scope as [testbed] followed by |
---|
95 | # the colon-separated attribute-value pairs for the testbed. Right now these |
---|
96 | # go into a set of global hashes indexed by testbed, but that should probably |
---|
97 | # change. The file parameter is an open IO::Handle. &parse_testbeds_filename |
---|
98 | # opens the file and calls this. Parse_testbeds can be used on pipes as well, |
---|
99 | # e.g. fedd_client output. |
---|
100 | sub parse_testbeds { |
---|
101 | my($fh, $tbparams) = @_; # Testbeds file and parameter hash |
---|
102 | my $tb; # Current testbed |
---|
103 | # Convert attribute in the file to tbparams hash key |
---|
104 | my %attr_to_hash = ( |
---|
105 | "opsnode" => "host", |
---|
106 | "user" => "user", |
---|
107 | "domain" => "domain", |
---|
108 | "project" => "project", |
---|
109 | "connectortype" => "gwtype", |
---|
110 | "slavenodestartcmd" => "expstart", |
---|
111 | "slaveconnectorstartcmd" => "gwstart", |
---|
112 | "masternodestartcmd" => "mexpstart", |
---|
113 | "masterconnectorstartcmd" => "mgwstart", |
---|
114 | "connectorimage" => "gwimage", |
---|
115 | "fileserver" => "fs", |
---|
116 | "boss" => "boss", |
---|
117 | "eventserver" => "eventserver", |
---|
118 | "tunnelcfg" => "tun", |
---|
119 | "uri" => "uri", |
---|
120 | "access" => "access" |
---|
121 | ); |
---|
122 | |
---|
123 | while (<$fh>) { |
---|
124 | next if /^\s*#/ || /^\s*$/; # Skip comments & blanks |
---|
125 | print STDERR "testbeds: $_"; |
---|
126 | chomp; |
---|
127 | /^\s*\[(.*)\]/ && do { |
---|
128 | $tb = $1; |
---|
129 | $tbparams->{$tb} = {} unless $tbparams->{$tb}; |
---|
130 | next; |
---|
131 | }; |
---|
132 | |
---|
133 | /^([^:]+):\s*(.*)/ && do { |
---|
134 | unless ($tb) { |
---|
135 | warn "Ignored attribute definition before testbed: $_\n "; |
---|
136 | next; |
---|
137 | } |
---|
138 | my $key = $1; |
---|
139 | $key =~ tr/A-Z/a-z/; |
---|
140 | my $var = $attr_to_hash{$key}; |
---|
141 | |
---|
142 | if ($var) { $tbparams->{$tb}->{$var} = $2; } |
---|
143 | else { warn "Unknown keyword $key\n"; } |
---|
144 | |
---|
145 | next; |
---|
146 | }; |
---|
147 | warn "Unparasble line: $_\n"; |
---|
148 | } |
---|
149 | return 1; |
---|
150 | } |
---|
151 | |
---|
152 | |
---|
153 | # Open the given file name and parse the testbeds file it contains by calling |
---|
154 | # &parse_testbeds. |
---|
155 | sub parse_testbeds_filename { |
---|
156 | my($file, $tbparams) = @_; # Testbeds file and parameter hash |
---|
157 | my $fh = new IO::File($file); # Testbeds filehandle |
---|
158 | |
---|
159 | if ($fh) { |
---|
160 | my $rv = &parse_testbeds($fh, $tbparams); |
---|
161 | $fh->close(); # It will close when it goes out of scope, but... |
---|
162 | $rv; |
---|
163 | } |
---|
164 | else { |
---|
165 | warn "Can't open $file: $!\n"; |
---|
166 | return 0; |
---|
167 | } |
---|
168 | } |
---|
169 | |
---|
170 | # Generate SSH keys for use by the gateways. The parameters are the type and |
---|
171 | # the filename for the private key. The pubkey will be stored in a filename |
---|
172 | # with the same name as the private key but with .pub appended. Type can be |
---|
173 | # dsa or rsa. |
---|
174 | |
---|
175 | sub generate_ssh_keys { |
---|
176 | my($type, $dest) = @_; |
---|
177 | |
---|
178 | $type =~ tr/A-Z/a-z/; |
---|
179 | return 0 if $type !~ /(rsa|dsa)/; |
---|
180 | system("/usr/bin/ssh-keygen -t $type -N \"\" -f $dest"); |
---|
181 | return $@ ? 0 : 1; |
---|
182 | } |
---|
183 | |
---|
184 | # use scp to transfer a file, reporting true if successful and false otherwise. |
---|
185 | # Parameters are the local file name, the ssh host destination (either hostname |
---|
186 | # oe user@host), and an optional destination file name or directory. If no |
---|
187 | # destination is given, the file is transferred to the given user's home |
---|
188 | # directory. If only a machine is given in the ssh host destination, the |
---|
189 | # current user is used. |
---|
190 | sub scp_file { |
---|
191 | my($file, $user, $host, $dest) = @_; |
---|
192 | |
---|
193 | # XXX system with a relative pathname is sort of gross |
---|
194 | system("scp $file $user\@$host:$dest"); |
---|
195 | if ($?) { |
---|
196 | warn "scp failed $?\n"; |
---|
197 | return 0; |
---|
198 | } |
---|
199 | else { return 1; } |
---|
200 | } |
---|
201 | |
---|
202 | # use ssh to execute the given command on the machine (and as the user) in |
---|
203 | # $where. Parameters are the ssh destination directive ($where) and the |
---|
204 | # command to execute, and a prefix to be placed on a message generated if the |
---|
205 | # command fails. On failure print a warning if a warning prefix was given and |
---|
206 | # return false. If timeout is given fork a process and set an alarm of that |
---|
207 | # many seconds. Timeouts also return 0; |
---|
208 | sub ssh_cmd { |
---|
209 | my($user, $host, $cmd, $wname, $timeout) = @_; |
---|
210 | my $pid; # Child pid |
---|
211 | |
---|
212 | $timeout = 0 unless $timeout; # Force default timeout |
---|
213 | |
---|
214 | if ( $pid = fork () ) { |
---|
215 | # Parent process |
---|
216 | # The eval acts as a signal catcher. If the alarm goes off inside |
---|
217 | # the eval, the die will put "alarm\n" into $@, otherwise the |
---|
218 | # return value of the execution in the child process will be used. |
---|
219 | my $rv = eval { |
---|
220 | local $SIG{'ALRM'} = sub{ die "alarm\n"; }; |
---|
221 | my $rv; |
---|
222 | |
---|
223 | alarm $timeout; |
---|
224 | $rv = waitpid($pid, 0); |
---|
225 | alarm 0; |
---|
226 | $rv; |
---|
227 | }; |
---|
228 | |
---|
229 | # If the eval succeeded, $@ will be null and we can use $rv, which |
---|
230 | # is the return code from the subprocess. If the eval timed out, |
---|
231 | # print a warning and assume the best. |
---|
232 | if ($@ eq "alarm\n" ) { |
---|
233 | warn "$wname timed out - pid $pid still live\n"; |
---|
234 | return 1; |
---|
235 | } |
---|
236 | else { |
---|
237 | return $rv; |
---|
238 | } |
---|
239 | } |
---|
240 | else { |
---|
241 | # Child process |
---|
242 | exec("ssh $user\@$host $cmd"); |
---|
243 | exit 0; |
---|
244 | } |
---|
245 | } |
---|
246 | |
---|
247 | # Ship local copies of the federation scripts out to the given host. If any of |
---|
248 | # the script transfers fails, return 0. The scripts to transfer are from the |
---|
249 | # global @scripts and are found locally in $local_script_dir (another global). |
---|
250 | sub ship_scripts { |
---|
251 | my($host, $user, $dest_dir) = @_; # Where, who, where remotely |
---|
252 | my $s; |
---|
253 | |
---|
254 | &ssh_cmd($user, $host, "mkdir -p $dest_dir"); |
---|
255 | for $s (@scripts) { |
---|
256 | &scp_file("$local_script_dir/$s", $user, $host, $dest_dir) || |
---|
257 | return 0; |
---|
258 | } |
---|
259 | return 1; |
---|
260 | } |
---|
261 | |
---|
262 | # Ship per-testbed configuration generated by this script to the remote /proj |
---|
263 | # directories on the remote testbeds |
---|
264 | sub ship_configs { |
---|
265 | my($host, $user, $src_dir, $dest_dir) = @_; # Where, who, where remotely |
---|
266 | my($d, $f); |
---|
267 | |
---|
268 | $d = IO::Dir->new($src_dir) || return 0; |
---|
269 | |
---|
270 | # All directories under $tmpdir are 770 so we can delete them later. |
---|
271 | &ssh_cmd($user, $host, "mkdir -p $dest_dir") || return 0; |
---|
272 | &ssh_cmd($user, $host, "chmod 770 $dest_dir") || return 0; |
---|
273 | while ( $f = $d->read()) { |
---|
274 | next if $f =~ /^\./; |
---|
275 | if ( -d "$src_dir/$f" ) { |
---|
276 | &ship_configs($host, $user, "$src_dir/$f", "$dest_dir/$f") || |
---|
277 | return 0; |
---|
278 | } |
---|
279 | else { |
---|
280 | &scp_file("$src_dir/$f", $user, $host, $dest_dir) || return 0; |
---|
281 | } |
---|
282 | } |
---|
283 | return 1; |
---|
284 | } |
---|
285 | |
---|
286 | # Start a sub section of the experiment on a given testbed. The testbed and |
---|
287 | # the user to start the experiment as are pulled from the global per-testbed |
---|
288 | # hash, passed in as $tbparams, as is the project name on the remote testbed. |
---|
289 | # Parameters are the testbed and the experiment id. Configuration files are |
---|
290 | # scp-ed over to the target testbed from the global $tmpdir/$tb directory. |
---|
291 | # Then the current state of the experiment determined using expinfo. From that |
---|
292 | # state, the experiment is either created, modified or spapped in. If |
---|
293 | # everything succeeds, true is returned. If the global verbose is set progress |
---|
294 | # messages are printed. |
---|
295 | sub start_segment { |
---|
296 | my($tb, $eid, $tbparams, $timeout) = @_;# testbed, experiment ID, |
---|
297 | # per-testbed parameters and remote |
---|
298 | # swap-in timeout |
---|
299 | my $host = # Host name of remote ops (FQDN) |
---|
300 | $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'}; |
---|
301 | my $user = $tbparams->{$tb}->{'user'}; # user to pass to ssh |
---|
302 | my $pid = $tbparams->{$tb}->{'project'};# remote project to start the |
---|
303 | # experiment under |
---|
304 | my $tclfile = "./$eid.$tb.tcl"; # Local tcl file with the |
---|
305 | # sub-experiment |
---|
306 | my $proj_dir = "/proj/$pid/exp/$eid/tmp"; # Where to stash federation stuff |
---|
307 | my $tarfiles_dir = "/proj/$pid/tarfiles/$eid"; # Where to stash tarfiles |
---|
308 | my $rpms_dir = "/proj/$pid/rpms/$eid"; # Where to stash rpms |
---|
309 | my $to_hostname = "$proj_dir/hosts"; # remote hostnames file |
---|
310 | my $state; # State of remote experiment |
---|
311 | my $status = new IO::Pipe; # The pipe to get status |
---|
312 | |
---|
313 | # Determine the status of the remote experiment |
---|
314 | $status->reader("ssh $user\@$host /usr/testbed/bin/expinfo $pid $eid") || |
---|
315 | die "Can't ssh to $user\@$host:$!\n"; |
---|
316 | # XXX: this is simple now. Parsing may become more complex |
---|
317 | while (<$status>) { |
---|
318 | /State: (\w+)/ && ($state = $1); |
---|
319 | /No\s+such\s+experiment/ && ($state = "none"); |
---|
320 | } |
---|
321 | $status->close(); |
---|
322 | print "$tb: $state\n"; |
---|
323 | |
---|
324 | # Copy the experiment definition data over |
---|
325 | print "transferring subexperiment to $tb\n" if $verbose; |
---|
326 | &scp_file("$tmpdir/$tb/$tclfile", $user, $host) || return 0; |
---|
327 | # Clear out any old experiment data; if not deleted, copies over it by |
---|
328 | # different users will fail. |
---|
329 | # (O /bin/csh, how evil thou art. The -c and the escaped single quotes |
---|
330 | # force the /bin/sh interpretation of the trailing * (which we need to keep |
---|
331 | # tmp around)) Again, this needs to be done more properly once we have a |
---|
332 | # non-ssh interface here.) |
---|
333 | print "clearing experiment subdirs on $tb\n" if $verbose; |
---|
334 | &ssh_cmd($user, $host, "/bin/sh -c \\'/bin/rm -rf $proj_dir/*\\'") || |
---|
335 | return 0; |
---|
336 | print "clearing experiment tarfiles subdirs on $tb\n" if $verbose; |
---|
337 | &ssh_cmd($user, $host, "/bin/rm -rf $tarfiles_dir/") || |
---|
338 | return 0; |
---|
339 | print "creating tarfiles subdir $tarfiles_dir on $tb\n" if $verbose; |
---|
340 | &ssh_cmd($user, $host, "mkdir -p $tarfiles_dir", "create tarfiles") || |
---|
341 | return 0; |
---|
342 | print "clearing experiment rpms subdirs on $tb\n" if $verbose; |
---|
343 | &ssh_cmd($user, $host, "/bin/rm -rf $rpms_dir/") || |
---|
344 | return 0; |
---|
345 | print "creating rpms subdir $rpms_dir on $tb\n" if $verbose; |
---|
346 | &ssh_cmd($user, $host, "mkdir -p $rpms_dir", "create rpms") || |
---|
347 | return 0; |
---|
348 | # Remote experiment is active. Modify it. |
---|
349 | if ($state eq "active") { |
---|
350 | print "Transferring federation support files to $tb\n" if $verbose; |
---|
351 | # First copy new scripts and hostinfo into the remote /proj |
---|
352 | &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) || |
---|
353 | return 0; |
---|
354 | &ship_scripts($host, $user, $proj_dir) || return 0; |
---|
355 | &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0; |
---|
356 | |
---|
357 | if ( -d "$tmpdir/tarfiles") { |
---|
358 | &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || |
---|
359 | return 0; |
---|
360 | } |
---|
361 | |
---|
362 | if ( -d "$tmpdir/rpms") { |
---|
363 | &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || |
---|
364 | return 0; |
---|
365 | } |
---|
366 | |
---|
367 | print "Modifying $eid in place on $tb\n" if $verbose; |
---|
368 | &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -r -s -w $pid " . |
---|
369 | "$eid $tclfile", "modexp", $timeout) || return 0; |
---|
370 | return 1; |
---|
371 | } |
---|
372 | |
---|
373 | # Remote experiment is swapped out, modify it and swap it in. |
---|
374 | if ($state eq "swapped") { |
---|
375 | print "Transferring federation support files to $tb\n" if $verbose; |
---|
376 | # First copy new scripts and hostinfo into the remote /proj (because |
---|
377 | # the experiment exists, the directory tree should be there. |
---|
378 | &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) || |
---|
379 | return 0; |
---|
380 | &ship_scripts($host, $user, $proj_dir) || return 0; |
---|
381 | &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0; |
---|
382 | if ( -d "$tmpdir/tarfiles") { |
---|
383 | &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || |
---|
384 | return 0; |
---|
385 | } |
---|
386 | |
---|
387 | if ( -d "$tmpdir/rpms") { |
---|
388 | &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || |
---|
389 | return 0; |
---|
390 | } |
---|
391 | |
---|
392 | print "Modifying $eid on $tb\n" if $verbose; |
---|
393 | &ssh_cmd($user, $host, "/usr/testbed/bin/modexp -w $pid $eid $tclfile", |
---|
394 | "modexp") || return 0; |
---|
395 | print "Swapping $eid in on $tb\n" if $verbose; |
---|
396 | # Now start up |
---|
397 | &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", |
---|
398 | "swapexp", $timeout) || return 0; |
---|
399 | return 1; |
---|
400 | } |
---|
401 | |
---|
402 | # No remote experiment. Create one. We do this in 2 steps so we can put |
---|
403 | # the configuration files and scripts into the new experiment directories. |
---|
404 | if ($state eq "none") { |
---|
405 | |
---|
406 | if ( -d "$tmpdir/tarfiles") { |
---|
407 | # Tarfiles have to exist for the creation to work |
---|
408 | print "copying tarfiles to $tb\n"; |
---|
409 | &ship_configs($host, $user, "$tmpdir/tarfiles", $tarfiles_dir) || |
---|
410 | return 0; |
---|
411 | } |
---|
412 | |
---|
413 | if ( -d "$tmpdir/rpms") { |
---|
414 | &ship_configs($host, $user, "$tmpdir/rpms", $rpms_dir) || |
---|
415 | return 0; |
---|
416 | } |
---|
417 | |
---|
418 | print "Creating $eid on $tb\n" if $verbose; |
---|
419 | &ssh_cmd($user, $host, "/usr/testbed/bin/startexp -i -f -w -p " . |
---|
420 | "$pid -e $eid $tclfile", "startexp") || return 0; |
---|
421 | # After startexp succeeds, the per-experiment directories exist on the |
---|
422 | # remote testbed. |
---|
423 | print "Transferring federation support files to $tb\n" if $verbose; |
---|
424 | # First copy new scripts and hostinfo into the remote /proj |
---|
425 | &scp_file("$tmpdir/hostnames", $user, $host, $to_hostname) || |
---|
426 | return 0; |
---|
427 | &ship_scripts($host, $user, $proj_dir) || return 0; |
---|
428 | &ship_configs($host, $user, "$tmpdir/$tb", $proj_dir) || return 0; |
---|
429 | # Now start up |
---|
430 | print "Swapping $eid in on $tb\n" if $verbose; |
---|
431 | &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid in", |
---|
432 | "swapexp", $timeout) || return 0; |
---|
433 | return 1; |
---|
434 | } |
---|
435 | |
---|
436 | # Every branch for a known state returns. If execution gets here, the |
---|
437 | # state is unknown. |
---|
438 | warn "unknown state: $state\n"; |
---|
439 | return 0; |
---|
440 | } |
---|
441 | |
---|
442 | # Swap out a sub-experiment - probably because another has failed. Arguments |
---|
443 | # are testbed and experiment. Most of the control flow is similar to |
---|
444 | # start_segment, though much simpler. |
---|
445 | sub stop_segment { |
---|
446 | my($tb, $eid, $tbparams) = @_; # testbed, experiment ID and |
---|
447 | # per-testbed parameters |
---|
448 | my $user = $tbparams->{$tb}->{'user'}; # testbed user |
---|
449 | my $host = # Ops node |
---|
450 | $tbparams->{$tb}->{'host'} . $tbparams->{$tb}->{'domain'}; |
---|
451 | my $pid = $tbparams->{$tb}->{'project'};# testbed project |
---|
452 | |
---|
453 | print "Stopping $eid on $tb\n" if $verbose; |
---|
454 | &ssh_cmd($user, $host, "/usr/testbed/bin/swapexp -w $pid $eid out", |
---|
455 | "swapexp (out)") || return 0; |
---|
456 | return 1; |
---|
457 | } |
---|
458 | |
---|
459 | # Fill tbparams with results from the fedd call. The command is passed in and |
---|
460 | # a string with any relevant error conditions is returned. undef is success. |
---|
461 | sub fedd_access_request{ |
---|
462 | my($cmd) = @_; |
---|
463 | my($rv)=undef; |
---|
464 | |
---|
465 | system("$cmd 2> /tmp/splitter.err.$$ > /tmp/splitter.$$" ); |
---|
466 | |
---|
467 | if ( ! $? ) { |
---|
468 | &parse_testbeds_filename("/tmp/splitter.$$", $tbparams) || |
---|
469 | ($rv = "Error reading fedd output: $!\n"); |
---|
470 | } |
---|
471 | else { |
---|
472 | my $f = new IO::File("/tmp/splitter.err.$$"); |
---|
473 | $rv = "Fedd_client error:\n"; |
---|
474 | while (<$f>) { $rv .= $_; } |
---|
475 | $f->close(); |
---|
476 | } |
---|
477 | unlink("/tmp/splitter.$$", "/tmp/splitter.err.$$"); |
---|
478 | return $rv; |
---|
479 | } |
---|
480 | |
---|
481 | $pid = $gid = "dummy"; # Default project and group to pass to |
---|
482 | # $tcl_splitter above. These are total |
---|
483 | # dummy arguments; the splitter doesn't |
---|
484 | # use them at all, but we supply them to |
---|
485 | # keep our changes to the parser minimal. |
---|
486 | # Argument processing. |
---|
487 | getopts('Ft:c:p:f:ndvNP:', \%opts); |
---|
488 | $splitter_config = $opts{'c'} || "./splitter.conf"; |
---|
489 | $debug = $opts{'d'}; |
---|
490 | $verbose = $opts{'v'} || $opts{'d'}; |
---|
491 | |
---|
492 | &parse_config("$splitter_config", \%opts) || |
---|
493 | die "Cannot read config file $splitter_config: $!\n"; |
---|
494 | |
---|
495 | warn "-N does nothing now. Only one testbeds format supported.\n" |
---|
496 | if $opts{'N'}; |
---|
497 | $fail_soft = $opts{'F'} || $opts{'failsoft'}; |
---|
498 | $startem = $opts{'n'} ? 0 : 1; # If true, start the sub-experiments |
---|
499 | $timeout = $opts{'t'} || $opts{'timeout'}; |
---|
500 | $eid = $opts{'experiment'}; # Experiment ID |
---|
501 | $tcl = $opts{'f'} || shift; # The experiment description |
---|
502 | $master = $opts{'master'}; # Master testbed |
---|
503 | $tmpdir = $opts{'tmpdir'} || $opts{'tempdir'}|| "/tmp"; # tmp files |
---|
504 | $tb_config = $opts{'testbeds'} || "./testbeds"; # testbed configurations |
---|
505 | $local_script_dir = $opts{'scriptdir'}; # Local scripts |
---|
506 | $muxmax = $opts{'muxlimit'} || 3; # Number of connections muxed on one |
---|
507 | # gateway |
---|
508 | |
---|
509 | $max_children = $opts{'p'} || $opts{'maxchildren'} |
---|
510 | if $opts{'p'} || $opts{'maxchildren'}; |
---|
511 | |
---|
512 | $smb_share = $opts{'smbshare'} || # Share to mount from the master |
---|
513 | die "Must give an SMB share\n"; |
---|
514 | $project_user = $opts{'smbuser'} || # User to mount project dirs as |
---|
515 | die "Must give an SMB user\n"; |
---|
516 | $auth_proj = $opts{'P'}; |
---|
517 | |
---|
518 | # tcl program to split experiments (changed during devel) |
---|
519 | $tcl_splitter = $opts{'tclparse'} || "/usr/testbed/lib/ns2ir/parse.tcl"; |
---|
520 | # tclsh to call directly (changed during devel) |
---|
521 | $tclsh = $opts{'tclsh'} || "/usr/local/bin/otclsh"; |
---|
522 | # fedd_client to get testbed access parameters |
---|
523 | $fedd_client = $opts{'feddclient'} || "fedd_client"; |
---|
524 | |
---|
525 | # Prefix to avoid collisions |
---|
526 | $tmpdir .= "/split$$"; |
---|
527 | |
---|
528 | print "Temp files are in $tmpdir\n" if $verbose; |
---|
529 | # Create a workspace |
---|
530 | unless (-d "$tmpdir") { |
---|
531 | mkdir("$tmpdir") || die "Can't create $tmpdir: $!"; |
---|
532 | } |
---|
533 | |
---|
534 | # If the keys are given, use them. Otherwise create a set under $tmpdir |
---|
535 | |
---|
536 | if ( $opts{'gatewatpubkey'} && $opts{'gatewaysecretkey'}) { |
---|
537 | $gw_pubkey = $opts{'gatewaypubkey'}; |
---|
538 | $gw_secretkey = $opts{'gatewaysecretkey'}; |
---|
539 | } |
---|
540 | else { |
---|
541 | $keytype = $opts{'gatewaykeytype'} || "rsa"; |
---|
542 | mkdir("$tmpdir/keys") || die "Can't create temoprary key dir: $!\n"; |
---|
543 | $gw_pubkey = "$tmpdir/keys/fed.$keytype.pub"; |
---|
544 | $gw_secretkey = "$tmpdir/keys/fed.$keytype"; |
---|
545 | print "Generating $keytype keys\n" if $verbose; |
---|
546 | generate_ssh_keys($keytype, $gw_secretkey) || |
---|
547 | die "Cannot generate kets:$@\n"; |
---|
548 | } |
---|
549 | # Generate the basenames |
---|
550 | ($gw_pubkey_base = $gw_pubkey) =~ s#.*/##; |
---|
551 | ($gw_secretkey_base = $gw_secretkey) =~ s#.*/##; |
---|
552 | |
---|
553 | |
---|
554 | |
---|
555 | # Validate scripts directory |
---|
556 | for my $s (@scripts) { |
---|
557 | die "$local_script_dir/$s not in local script directory. Try -d\n" |
---|
558 | unless -r "$local_script_dir/$s"; |
---|
559 | } |
---|
560 | |
---|
561 | die "Must supply file, master and experiment" unless $master && $tcl && $eid; |
---|
562 | |
---|
563 | &parse_testbeds_filename($tb_config, $tbparams) || |
---|
564 | die "Cannot testbed congfigurations from $tb_config: $!\n"; |
---|
565 | |
---|
566 | # Open a pipe to the splitter program and start it parsing the experiments |
---|
567 | my $pipe = new IO::Pipe; |
---|
568 | # NB no more -p call on parse call. |
---|
569 | $pipe->reader("$tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl") || |
---|
570 | die "Cannot execute $tclsh $tcl_splitter -s -x $muxmax -m $master $pid $gid $eid $tcl:$!\n"; |
---|
571 | |
---|
572 | # Parsing variables |
---|
573 | my $ctb; # Current testbed |
---|
574 | my %allocated; # If allocated{$tb} > 0, $tb is in use |
---|
575 | my $destfile; # File that the sub-experiment tcl file is |
---|
576 | # being written to, or "" if none. Also used |
---|
577 | # for hostnames file. |
---|
578 | my $desthandle; # File handle for distfile |
---|
579 | my $gateways; # when gateway lists are being processed this |
---|
580 | # is the testbed whose gateways are being |
---|
581 | # gathered. |
---|
582 | my $control_gateway; # Control net gateway for the current testbed |
---|
583 | my %active_end; # If active_end{"a-b"} > 0 then a is the active |
---|
584 | # end of the a <-> b connector pair. |
---|
585 | |
---|
586 | # Parse the splitter output. This loop creates the sub experiments, gateway |
---|
587 | # configurations and hostnames file |
---|
588 | while (<$pipe>) { |
---|
589 | # Vtopo is virtual topology about the entire experiment. Right now ignore |
---|
590 | # it. We'll pass it to SEER soon. |
---|
591 | (/^#\s+Begin\s+Vtopo/../^#\s+End\s+Vtopo/) && do { |
---|
592 | next; |
---|
593 | }; |
---|
594 | # Allbeds lists all the testbeds that this experiment accesses. This code |
---|
595 | # acquires access to them and pulls in their access parameters from fedd. |
---|
596 | (/^#\s+Begin\s+Allbeds/../^#\s+End\s+Allbeds/) && do { |
---|
597 | next if /^#/; |
---|
598 | chomp; |
---|
599 | |
---|
600 | my $tb; # Current testbed |
---|
601 | my @nodes; # Current testbed node requests |
---|
602 | |
---|
603 | # The Allbeds line has the testbed name first separated by the node |
---|
604 | # requirements of the testbeds. A node requirement is separated form |
---|
605 | # teh testbed name and other node requirements by a vertical bar (|). |
---|
606 | # This pulls the testbed off the front (which must be present) and |
---|
607 | # splits the node descriptors out by the vertical bar. The first |
---|
608 | # vertical bar (the one after the testbed) is removed by the intial |
---|
609 | # regular expression to avoid a null entry in @nodes. The node |
---|
610 | # requests are of the form image:type:count and can be passed directly |
---|
611 | # to fedd_client as parameters. |
---|
612 | /([^|]+)\|?(.*)/ && do { |
---|
613 | my $n; # Scratch |
---|
614 | |
---|
615 | ($tb , $n) = ($1, $2); |
---|
616 | @nodes = split(/\|/, $n); |
---|
617 | }; |
---|
618 | |
---|
619 | # If this testbed has not had its access parameters read from fedd, try |
---|
620 | # to read them, if we have a way to talk to fedd |
---|
621 | unless ($tbparams->{$tb}->{'access'} || !$fedd_client) { |
---|
622 | my $access_pipe = new IO::Pipe || |
---|
623 | die "Can't open pipe to fedd:$!\n"; |
---|
624 | my $proj = $auth_proj ? " -p $auth_proj " : ""; |
---|
625 | my @cmds; |
---|
626 | my $rv; |
---|
627 | |
---|
628 | print("Checking access to $tb using " . $tbparams->{$tb}->{'uri'} |
---|
629 | . "\n") if $verbose; |
---|
630 | |
---|
631 | # First access command, implicitly uses localhost fedd |
---|
632 | push(@cmds,"$fedd_client -t " . |
---|
633 | $tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ". |
---|
634 | "-l $tb $proj" . (@nodes ? " -n " : " ") . |
---|
635 | join(" -n ", @nodes)); |
---|
636 | # Second try access command, implicitly directly contact testbed |
---|
637 | push(@cmds,"$fedd_client -t " . |
---|
638 | $tbparams->{$tb}->{'uri'} . " -u " . |
---|
639 | $tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ". |
---|
640 | "-l $tb $proj" . (@nodes ? " -n " : " ") . |
---|
641 | join(" -n ", @nodes)); |
---|
642 | # Third try access command, implicitly directly contact testbed |
---|
643 | # using only federated id. |
---|
644 | push(@cmds,"$fedd_client -f -a -t " . |
---|
645 | $tbparams->{$tb}->{'uri'} . " -u " . |
---|
646 | $tbparams->{$tb}->{'uri'} . " -T $ENV{HOME}/cacert.pem ". |
---|
647 | "-l $tb $proj" . (@nodes ? " -n " : " ") . |
---|
648 | join(" -n ", @nodes)); |
---|
649 | |
---|
650 | foreach my $c (@cmds) { |
---|
651 | print "$c\n" if $verbose; |
---|
652 | $rv = &fedd_access_request($c); |
---|
653 | warn($rv) if $rv; |
---|
654 | |
---|
655 | last if $rv eq undef; |
---|
656 | } |
---|
657 | die "Cannot get access to $tb\n" if $rv; |
---|
658 | } |
---|
659 | next; |
---|
660 | }; |
---|
661 | |
---|
662 | # Start of a sub-experiment |
---|
663 | /^#\s+Begin\s+Testbed\s+\((\w+)\)/ && do { |
---|
664 | $ctb = $1; |
---|
665 | |
---|
666 | # If we know the testbed, start collecting its sub experiment tcl |
---|
667 | # description. If not, warn the user. |
---|
668 | if ($tbparams->{$ctb}->{'access'}) { |
---|
669 | $allocated{$ctb}++; # Keep track of the testbeds allocated |
---|
670 | |
---|
671 | unless (-d "$tmpdir/$ctb") { |
---|
672 | mkdir("$tmpdir/$ctb") || die "Can't create $tmpdir/$ctb: $!"; |
---|
673 | } |
---|
674 | $destfile = "$tmpdir/$ctb/$eid.$ctb.tcl"; |
---|
675 | |
---|
676 | $desthandle = new IO::File(">$destfile") || |
---|
677 | die "Cannot open $destfile:$!\n"; |
---|
678 | } |
---|
679 | else{ |
---|
680 | warn "No such testbed $ctb\n"; |
---|
681 | $destfile = ""; |
---|
682 | } |
---|
683 | next; |
---|
684 | }; |
---|
685 | |
---|
686 | # End of that experiment |
---|
687 | /^#\s+End\s+Testbed\s+\((\w+)\)/ && do { |
---|
688 | # Simple syntax check and close out this experiment's tcl description |
---|
689 | die "Mismatched testbed markers ($1, $ctb)\n" unless ($1 eq $ctb); |
---|
690 | $desthandle->close() if $desthandle; |
---|
691 | $destfile = $ctb = ""; |
---|
692 | next; |
---|
693 | }; |
---|
694 | |
---|
695 | # Beginning of a gateway set |
---|
696 | /^#\s+Begin\s+gateways\s+\((\w+)\)/ && do { |
---|
697 | $gateways = $1; |
---|
698 | # If we've heard of this tb, create the config lines for it one at a |
---|
699 | # time. |
---|
700 | if ($allocated{$gateways}) { |
---|
701 | # Just in case. This directory should already have been created |
---|
702 | # above. |
---|
703 | unless (-d "$tmpdir/$gateways") { |
---|
704 | mkdir("$tmpdir/$gateways") || |
---|
705 | die "Can't create $tmpdir/$gateways: $!"; |
---|
706 | } |
---|
707 | } |
---|
708 | else { |
---|
709 | warn "Gateways given (and ignored) for testbed not in use: " . |
---|
710 | "$gateways\n"; |
---|
711 | $gateways = 0; |
---|
712 | } |
---|
713 | next; |
---|
714 | }; |
---|
715 | # End of the gateways section. Output the client config for this testbed |
---|
716 | /^#\s+End\s+gateways\s+\((\w+)\)/ && do { |
---|
717 | die "Mismatched gateway markers ($1, $gateways)\n" |
---|
718 | unless !$gateways || $gateways == $1; |
---|
719 | |
---|
720 | if ($control_gateway ) { |
---|
721 | # Client config |
---|
722 | my $cc = new IO::File(">$tmpdir/$gateways/client.conf"); |
---|
723 | my $master_project = $tbparams->{$master}->{'project'}; |
---|
724 | die "Can't open $tmpdir/$gateways/client.conf: $!\n" unless $cc; |
---|
725 | print $cc "ControlGateway: $control_gateway\n"; |
---|
726 | print $cc "SMBShare: $smb_share\n"; |
---|
727 | print $cc "ProjectUser: $project_user\n"; |
---|
728 | print $cc "ProjectName: $master_project\n"; |
---|
729 | $cc->close(); |
---|
730 | } |
---|
731 | else { warn "No control gateway for $gateways?\n"; } |
---|
732 | |
---|
733 | $gateways = 0; |
---|
734 | next; |
---|
735 | }; |
---|
736 | # Beginning of the hostnames list. Collection is always in the hostnames |
---|
737 | # file. |
---|
738 | /^#\s+Begin\s+hostnames/ && do { |
---|
739 | $destfile = "$tmpdir/hostnames"; |
---|
740 | $desthandle = new IO::File(">$destfile") || |
---|
741 | die "Can't open $destfile:$!\n"; |
---|
742 | next; |
---|
743 | }; |
---|
744 | # end of the hostnames list. |
---|
745 | /^#\s+End\s+hostnames/ && do { |
---|
746 | $desthandle->close(); |
---|
747 | $destfile = ""; |
---|
748 | next; |
---|
749 | }; |
---|
750 | |
---|
751 | # Generate gateway configuration info, one file per line |
---|
752 | $gateways && do { |
---|
753 | chomp; |
---|
754 | my($dtb, $myname, $desthost, $type) = split(" ", $_); |
---|
755 | |
---|
756 | # Many of these are to simplify print statements |
---|
757 | my $sdomain = # domain for the source |
---|
758 | $tbparams->{$gateways}->{'domain'}; |
---|
759 | my $ddomain = # domain for the destination |
---|
760 | $tbparams->{$dtb}->{'domain'}; |
---|
761 | my $sproject = # Project of the source |
---|
762 | $tbparams->{$gateways}->{'project'}; |
---|
763 | my $dproject = # Project of the destination |
---|
764 | $tbparams->{$dtb}->{'project'}; |
---|
765 | my $fs = # Master fs node (FQDN) |
---|
766 | $tbparams->{$master}->{'fs'} . $tbparams->{$master}->{'domain'}; |
---|
767 | my $boss = # Master boss node (FQDN) |
---|
768 | $tbparams->{$master}->{'boss'} . $tbparams->{$master}->{'domain'}; |
---|
769 | my $event_server = # Master event-server (FQDN) |
---|
770 | $tbparams->{$master}->{'eventserver'} . |
---|
771 | $tbparams->{$master}->{'domain'}; |
---|
772 | my $remote_event_server = # Slave event-server (FQDN) |
---|
773 | $tbparams->{$dtb}->{'eventserver'} . |
---|
774 | $tbparams->{$dtb}->{'domain'}; |
---|
775 | my $remote_script_dir = # Remote fed script location |
---|
776 | "/proj/" . $dproject . "/exp/$eid/tmp"; |
---|
777 | my $local_script_dir = # Local fed script location |
---|
778 | "/proj/" . $sproject . "/exp/$eid/tmp"; |
---|
779 | my $active; # Is this the active side of |
---|
780 | # the connector? |
---|
781 | my $tunnel_cfg = # Use DETER's config stuff |
---|
782 | $tbparams->{$gateways}->{'tun'} || "false"; |
---|
783 | |
---|
784 | |
---|
785 | $sdomain = ".$eid." . $tbparams->{$gateways}->{'project'} . "$sdomain"; |
---|
786 | $ddomain = ".$eid." . $tbparams->{$dtb}->{'project'} . "$ddomain"; |
---|
787 | |
---|
788 | my $conf_file = "$myname$sdomain.gw.conf"; |
---|
789 | my $remote_conf_file = "$desthost$ddomain.gw.conf"; |
---|
790 | # translate to lower case so the `hostname` hack for specifying |
---|
791 | # configuration files works. |
---|
792 | $conf_file =~ tr/A-Z/a-z/; |
---|
793 | $remote_conf_file =~ tr/A-Z/a-z/; |
---|
794 | |
---|
795 | # If either end of this link is in the master side of the testbed, that |
---|
796 | # side is the active end. Otherwise the first testbed encountered in |
---|
797 | # the file will be the active end. The $active_end variable keeps |
---|
798 | # track of those decisions |
---|
799 | if ( $dtb eq $master ) { $active = "false"; } |
---|
800 | elsif ($gateways eq $master ) { $active = "true"; } |
---|
801 | elsif ( $active_end{"$dtb-$gateways"} ) { $active="false"; } |
---|
802 | else { $active_end{"$gateways-$dtb"}++; $active = "true"; } |
---|
803 | |
---|
804 | # This is used to create the client configuration. |
---|
805 | $control_gateway = "$myname$sdomain" |
---|
806 | if $type =~ /(control|both)/; |
---|
807 | |
---|
808 | # Write out the file |
---|
809 | my $gwconfig = new IO::File(">$tmpdir/$gateways/$conf_file")|| |
---|
810 | die "can't open $tmpdir/$gateways/$conf_file: $!\n"; |
---|
811 | |
---|
812 | print $gwconfig "Active: $active\n"; |
---|
813 | print $gwconfig "TunnelCfg: $tunnel_cfg\n"; |
---|
814 | print $gwconfig "BossName: $boss\n"; |
---|
815 | print $gwconfig "FsName: $fs\n"; |
---|
816 | print $gwconfig "EventServerName: $event_server\n"; |
---|
817 | print $gwconfig "RemoteEventServerName: $remote_event_server\n"; |
---|
818 | print $gwconfig "Type: $type\n"; |
---|
819 | print $gwconfig "RemoteScriptDir: $remote_script_dir\n"; |
---|
820 | print $gwconfig "EventRepeater: $local_script_dir/fed_evrepeater\n"; |
---|
821 | print $gwconfig "RemoteExperiment: $dproject/$eid\n"; |
---|
822 | print $gwconfig "LocalExperiment: $sproject/$eid\n"; |
---|
823 | print $gwconfig "RemoteConfigFile: " . |
---|
824 | "$remote_script_dir/$remote_conf_file\n"; |
---|
825 | print $gwconfig "Peer: $desthost$ddomain\n"; |
---|
826 | print $gwconfig "Pubkeys: " . |
---|
827 | "/proj/$sproject/exp/$eid/tmp/$gw_pubkey_base\n"; |
---|
828 | print $gwconfig "Privkeys: " . |
---|
829 | "/proj/$sproject/exp/$eid/tmp/$gw_secretkey_base\n"; |
---|
830 | $gwconfig->close(); |
---|
831 | |
---|
832 | # This testbed has a gateway (most will) so make a copy of the keys it |
---|
833 | # needs in this testbed's subdirectory. start_segment will transfer |
---|
834 | # them. |
---|
835 | unless (-r "$tmpdir/$gateways/$gw_pubkey_base" ) { |
---|
836 | copy($gw_pubkey, "$tmpdir/$gateways/$gw_pubkey_base") || |
---|
837 | die "Can't copy pubkeys ($gw_pubkey to " . |
---|
838 | "$tmpdir/$gateways/$gw_pubkey_base): $!\n"; |
---|
839 | } |
---|
840 | if ($active eq "true" ) { |
---|
841 | unless (-r "$tmpdir/$gateways/$gw_secretkey_base" ) { |
---|
842 | copy($gw_secretkey, "$tmpdir/$gateways/$gw_secretkey_base") || |
---|
843 | die "Can't copy secret keys ($gw_secretkey to " . |
---|
844 | "$tmpdir/$gateways/$gw_secretkey_base): $!\n"; |
---|
845 | } |
---|
846 | } |
---|
847 | |
---|
848 | #done processing gateway entry, ready for next line |
---|
849 | next; |
---|
850 | }; |
---|
851 | (/^#\s+Begin\s+tarfiles/../^#\s+End\s+tarfiles/) && do { |
---|
852 | next if /^#/; |
---|
853 | chomp; |
---|
854 | push(@tarfiles, $_); |
---|
855 | next; |
---|
856 | }; |
---|
857 | (/^#\s+Begin\s+rpms/../^#\s+End\s+rpms/) && do { |
---|
858 | next if /^#/; |
---|
859 | chomp; |
---|
860 | push(@rpms, $_); |
---|
861 | next; |
---|
862 | }; |
---|
863 | |
---|
864 | next unless $destfile; # Unidentified testbed, ignore config |
---|
865 | # local copies that can be used in the substitutions below |
---|
866 | my $gwtype = $tbparams->{$ctb}->{'gwtype'} || $def_gwtype; |
---|
867 | my $gwimage = $tbparams->{$ctb}->{'gwimage'} || $def_gwimage; |
---|
868 | my $mgwstart = $tbparams->{$ctb}->{'mgwstart'} || $def_mgwstart; |
---|
869 | my $mexpstart = $tbparams->{$ctb}->{'mexpstart'} || $def_mexpstart; |
---|
870 | my $gwstart = $tbparams->{$ctb}->{'gwstart'} || $def_gwstart; |
---|
871 | my $expstart = $tbparams->{$ctb}->{'expstart'} || $def_expstart; |
---|
872 | my $project = $tbparams->{$ctb}->{'project'}; |
---|
873 | |
---|
874 | # Substitute variables |
---|
875 | s/GWTYPE/$gwtype/g; |
---|
876 | s/GWIMAGE/$gwimage/g; |
---|
877 | if ($ctb eq $master ) { |
---|
878 | s/GWSTART/$mgwstart/g; |
---|
879 | s/EXPSTART/$mexpstart/g; |
---|
880 | } |
---|
881 | else { |
---|
882 | s/GWSTART/$gwstart/g; |
---|
883 | s/EXPSTART/$expstart/g; |
---|
884 | } |
---|
885 | # XXX: oh is this bad |
---|
886 | s#GWCONF#FEDDIR\`hostname\`.gw.conf#g; |
---|
887 | s#PROJDIR#/proj/$project/#g; |
---|
888 | s#EID#$eid#g; |
---|
889 | s#FEDDIR#/proj/$project/exp/$eid/tmp/#g; |
---|
890 | print $desthandle $_; |
---|
891 | } |
---|
892 | $pipe->close(); |
---|
893 | die "No nodes in master testbed ($master)\n" unless $allocated{$master}; |
---|
894 | |
---|
895 | # Copy tarfiles and rpms needed at remote sites to the staging directories. |
---|
896 | # Start_segment will distribute them |
---|
897 | for my $t (@tarfiles) { |
---|
898 | die "tarfile '$t' unreadable: $!\n" unless -r $t; |
---|
899 | unless (-d "$tmpdir/tarfiles") { |
---|
900 | mkdir("$tmpdir/tarfiles") || |
---|
901 | die "Can't create $tmpdir/tarfiles:$!\n"; |
---|
902 | } |
---|
903 | copy($t, "$tmpdir/tarfiles") || |
---|
904 | die "Can't copy $t to $tmpdir/tarfiles:$!\n"; |
---|
905 | } |
---|
906 | |
---|
907 | for my $r (@rpms) { |
---|
908 | die "rpm '$r' unreadable: $!\n" unless -r $r; |
---|
909 | unless (-d "$tmpdir/rpms") { |
---|
910 | mkdir("$tmpdir/rpms") || |
---|
911 | die "Can't create $tmpdir/rpms:$!\n"; |
---|
912 | } |
---|
913 | copy($r, "$tmpdir/rpms") || |
---|
914 | die "Can't copy $r to $tmpdir/rpms:$!\n"; |
---|
915 | } |
---|
916 | |
---|
917 | exit(0) unless $startem; |
---|
918 | |
---|
919 | my %started; # If $started{$tb} then $tb successfully started |
---|
920 | my %child; # If $child{$pid} then a process with that pid is |
---|
921 | # working on a starting a segment |
---|
922 | my $nworking = 0; # Number of children working on swapin |
---|
923 | my $pid; # Scratch variable for pids |
---|
924 | |
---|
925 | # Start up the slave sub-experiments first |
---|
926 | TESTBED: |
---|
927 | for my $tb (keys %allocated) { |
---|
928 | if ( $tb ne $master ) { |
---|
929 | while ( $nworking == $max_children ) { |
---|
930 | print "Waiting for a child process to complete\n" if $verbose; |
---|
931 | if (($pid = wait()) != -1 ) { |
---|
932 | # The $? >> 8 is the exit code of the subprocess, which is |
---|
933 | # non-zero if the &start_segment routine failed. |
---|
934 | my $exit_code = ($? >> 8); |
---|
935 | |
---|
936 | print "Child $pid completed exit code ($exit_code)\n" |
---|
937 | if $verbose; |
---|
938 | $nworking--; |
---|
939 | $started{$child{$pid}}++ unless $exit_code; |
---|
940 | if ($child{$pid} ) { delete $child{$pid}; } |
---|
941 | else { warn "Reaped a pid we did not start?? ($pid)\n"; } |
---|
942 | last TESTBED if $exit_code; |
---|
943 | } |
---|
944 | else { warn "wait returned without reaping: $!\n"; } |
---|
945 | } |
---|
946 | if ( $pid = fork() ) { |
---|
947 | # Parent process |
---|
948 | $nworking ++; |
---|
949 | $child{$pid} = $tb; |
---|
950 | print "Started process $pid to start testbed $tb\n" |
---|
951 | if $verbose; |
---|
952 | } |
---|
953 | else { |
---|
954 | # Child. Note that we reverse the sense of the return code when it |
---|
955 | # becomes an exit value. Zero exit values indicate success. |
---|
956 | exit(!&start_segment($tb, $eid, $tbparams, $timeout)); |
---|
957 | } |
---|
958 | } |
---|
959 | } |
---|
960 | |
---|
961 | # Now wait for any still running processes. |
---|
962 | while ( $nworking ) { |
---|
963 | print "Waiting for a child process to complete ($nworking running)\n" |
---|
964 | if $verbose; |
---|
965 | if (($pid = wait()) != -1 ) { |
---|
966 | # The $? >> 8 is the exit code of the subprocess, which is |
---|
967 | # non-zero if the &start_segment routine failed. |
---|
968 | my $exit_code = ($? >> 8); |
---|
969 | |
---|
970 | print "Child $pid completed exit code ($exit_code)\n" |
---|
971 | if $verbose; |
---|
972 | $nworking--; |
---|
973 | $started{$child{$pid}}++ unless $exit_code; |
---|
974 | if ($child{$pid} ) { delete $child{$pid}; } |
---|
975 | else { warn "Reaped a pid we did not start?? ($pid)\n"; } |
---|
976 | } |
---|
977 | else { warn "wait returned without reaping: $!\n"; } |
---|
978 | } |
---|
979 | |
---|
980 | # Now the master |
---|
981 | if (&start_segment($master, $eid, $tbparams, $timeout)) { |
---|
982 | $started{$master}++; |
---|
983 | } |
---|
984 | |
---|
985 | # If any testbed failed, swap the rest out. |
---|
986 | if ( !$fail_soft && scalar(keys %started) != scalar(keys %allocated)) { |
---|
987 | for my $tb (keys %started) { &stop_segment($tb, $eid, $tbparams); } |
---|
988 | print "Error starting experiment\n"; |
---|
989 | exit(1); |
---|
990 | } |
---|
991 | print "Experiment started\n"; |
---|
992 | print "Deleting $tmpdir (-d to leave them in place)\n" if $verbose && !$debug; |
---|
993 | system("rm -rf $tmpdir") unless $debug; |
---|
994 | exit(0); # set the exit value |
---|
995 | |
---|
996 | =pod |
---|
997 | |
---|
998 | =head1 NAME |
---|
999 | |
---|
1000 | B<splitter.pl> |
---|
1001 | |
---|
1002 | =head1 SYNOPSIS |
---|
1003 | |
---|
1004 | B<splitter.pl> [B<-ndF>] [B<-t> I<secs>] [B<-c> F<config_file>] |
---|
1005 | [B<-f> F<experiment_tcl>] [B<-p> I<max_procs>] [F<experiment_tcl>] |
---|
1006 | |
---|
1007 | =head1 DESCRIPTION |
---|
1008 | |
---|
1009 | B<splitter.pl> invokes the DETER experiment parser to split an annotated |
---|
1010 | experiment into multiple sub-experments and instantiates the sub-experiments on |
---|
1011 | their intended testbeds. Annotation is accomplished using the |
---|
1012 | tb-set-node-testbed command, added to the parser. |
---|
1013 | |
---|
1014 | Much of the script's behavior depends on the configuration file, specified with |
---|
1015 | the B<-c> flag and defaulting to F<./splitter.conf>. |
---|
1016 | |
---|
1017 | The testbed labels supplied in the B<tb-set-node-testbed> command are |
---|
1018 | meaningful based on their presence in the testbeds file. that file can be |
---|
1019 | specified in the configuration file using the B<Testbeds> directive, and |
---|
1020 | defaults to F<./testbeds>. The syntax is described below. |
---|
1021 | |
---|
1022 | Most of the intermediate files are staged in a sub-directory of a temporary |
---|
1023 | files directory and deleted at the end of the script. Specifying the B<-d> |
---|
1024 | flag on the command line avoids the deletion for debbugging. By default the |
---|
1025 | temporary files directory is directory is F</tmp> and can be reset in the |
---|
1026 | configuration file using the B<Tmpdir> directive. Intermediate files are |
---|
1027 | stored under a subdirectory formed by adding the process ID of the splitter |
---|
1028 | process. For example, if the temporary files directory is F</tmp> and the |
---|
1029 | B<splitter.pl> process ID is 2323, the temporary files will be stored in |
---|
1030 | F</tmp/split2323/>. |
---|
1031 | |
---|
1032 | The expreriment is split out into one experiment description per testbed in the |
---|
1033 | temporary directory named as F<experiment.testbed.tcl> where the experiment is |
---|
1034 | the experiment ID given in the configuration file, and the testbed is the |
---|
1035 | tb-set-node-testbed parameter for the nodes in the file. |
---|
1036 | |
---|
1037 | If the B<-n> option is absent the sub-experiments are then instantiated on |
---|
1038 | their testbeds. (Here B<-n> is analogous to its use in L<make(1)>). |
---|
1039 | Per-testbed parameters are set in the testbeds file. Sub-experiments on |
---|
1040 | slave testbeds are instantiated in a random order, but the master testbed is |
---|
1041 | currently instantiated last. |
---|
1042 | |
---|
1043 | Slave testbeds can be swapped in in parallel by specifying the B<-p> parameter |
---|
1044 | and the maximum number of simultaneous processes to start. |
---|
1045 | |
---|
1046 | Scripts to start federation (the federation kit) are copied into the local |
---|
1047 | experiment's tmp file - e.g., F</proj/DETER/exp/simple-split/tmp>. These are |
---|
1048 | taken from the directory given by the B<ScriptDir> directive in the |
---|
1049 | configuration file. |
---|
1050 | |
---|
1051 | If B<-t> is given the parameter is treated as a parameter to B<Timeout> in |
---|
1052 | F<splitter.conf>. |
---|
1053 | |
---|
1054 | If any sub-experiment fails to instantiate, the other sub-exeriments are |
---|
1055 | swapped out. B<-F> avoids this swap out, which can also be specified as |
---|
1056 | B<SoftFail: true> in F<splitter.conf> |
---|
1057 | |
---|
1058 | =head2 Configuration File |
---|
1059 | |
---|
1060 | The configuration file is a simple set of colon-separated parameters and |
---|
1061 | values. A configuration file must be present, either specified in the B<-c> |
---|
1062 | flag or the default F<./splitter.conf>. All the parameter names are case |
---|
1063 | insensitive, but should not include any whitespace. Parameter values may |
---|
1064 | include whitespace, but no newlines. |
---|
1065 | |
---|
1066 | Possible parameters are: |
---|
1067 | |
---|
1068 | =over 5 |
---|
1069 | |
---|
1070 | =item Experiment |
---|
1071 | |
---|
1072 | The name of the experiment on the various testbeds |
---|
1073 | |
---|
1074 | =item Master |
---|
1075 | |
---|
1076 | The master testbed label from the testbeds file, described below. |
---|
1077 | |
---|
1078 | =item Testbeds |
---|
1079 | |
---|
1080 | The testbeds file described below, giving per-testbed parameters. If this |
---|
1081 | directive is absent the testbeds file defaults to F<./testbeds> |
---|
1082 | |
---|
1083 | =item ScriptDir |
---|
1084 | |
---|
1085 | Location of the default federation scripts, i.e. the federation kit. |
---|
1086 | |
---|
1087 | =item GatewayPubkey |
---|
1088 | |
---|
1089 | =item GatewaySecretKey |
---|
1090 | |
---|
1091 | The names of the files containing secret and public keys to use in setting up |
---|
1092 | tunnels between testbeds. If given they are used, otherwise keys are generated. |
---|
1093 | |
---|
1094 | =item GatewayKeyType |
---|
1095 | |
---|
1096 | This controls the kind of SSH keys generated to configure the geatways. If |
---|
1097 | given this must be B<dsa> or B<rsa>, and it defaults to B<rsa>. The parameter |
---|
1098 | is csase insensitive. |
---|
1099 | |
---|
1100 | =item TmpDir |
---|
1101 | |
---|
1102 | =item TempDir |
---|
1103 | |
---|
1104 | The directory where temporary files are created. These are synonyms, but |
---|
1105 | should both be specified, B<TmpDir> has priority. If neither is specified, |
---|
1106 | F</tmp> is used. |
---|
1107 | |
---|
1108 | =item SMBShare |
---|
1109 | |
---|
1110 | The SMB share on the master testbed that will be exported to remote clients. |
---|
1111 | |
---|
1112 | =item SMBUser |
---|
1113 | |
---|
1114 | The experiment user to mount project directories as. This user needs to be a |
---|
1115 | member of the exported experiment - that is one of the users in the project |
---|
1116 | containing this experiment on the master testbed. |
---|
1117 | |
---|
1118 | =item Timeout |
---|
1119 | |
---|
1120 | Value in seconds after which a swap-in operatioin will be considered a success. |
---|
1121 | Often long swap-ins will hang when there are partial failures. This works |
---|
1122 | around this issue. (This behavior can be requested on the command line by |
---|
1123 | specifying B<-t> I<secs>.) |
---|
1124 | |
---|
1125 | =item FailSoft |
---|
1126 | |
---|
1127 | If not set, failure of any sub experiment swaps the rest out. Setting this to |
---|
1128 | any value avoids this swap out. (This behavior can be requested on the command |
---|
1129 | line by specifying B<-F>.) |
---|
1130 | |
---|
1131 | =item MuxLimit |
---|
1132 | |
---|
1133 | The maximum bumber of links/lans carried by one gateway pair |
---|
1134 | |
---|
1135 | =item Tclparse |
---|
1136 | |
---|
1137 | The pathname to the experiment parsing program. Only developers should set |
---|
1138 | this. |
---|
1139 | |
---|
1140 | =item Tclsh |
---|
1141 | |
---|
1142 | The pathname to the local oTcl shell. Only developers should set |
---|
1143 | this. |
---|
1144 | |
---|
1145 | =back |
---|
1146 | |
---|
1147 | =head2 Testbeds file |
---|
1148 | |
---|
1149 | The configuration file (F<./testbeds> unless overridden by B<-c>) is a file of |
---|
1150 | scoped attribute-value pairs where each attribute is specified on a separate |
---|
1151 | line of the configuration file. Each testbed's parameters are preceeded by the |
---|
1152 | testbed label in brackets ([]) on a line by itself. After that the parameters |
---|
1153 | are specified as parameter: value. This is essentially the same format as the |
---|
1154 | configuration file. Parameters are: |
---|
1155 | |
---|
1156 | =over 4 |
---|
1157 | |
---|
1158 | =item User |
---|
1159 | |
---|
1160 | The user under which to make requests to this testbed. The user running |
---|
1161 | B<splitter.pl> must be able to authenicate as this user under L<ssh(1)> to this |
---|
1162 | testbed. |
---|
1163 | |
---|
1164 | =item OpsNode |
---|
1165 | |
---|
1166 | The host name of the testbed's ops node. The user calling B<splitter.pl> must |
---|
1167 | be able to execute commands on this host via L<ssh(1)>. |
---|
1168 | |
---|
1169 | =item Domain |
---|
1170 | |
---|
1171 | The domain of nodes in this testbed (including the ops host). This parameter |
---|
1172 | should always start with a period. |
---|
1173 | |
---|
1174 | =item Project |
---|
1175 | |
---|
1176 | The project under which to instantiate sub-experiments on this testbed. |
---|
1177 | |
---|
1178 | =item ConnectorType |
---|
1179 | |
---|
1180 | The node type for inter-testbed connector nodes on this testbed. |
---|
1181 | |
---|
1182 | =item SlaveNodeStartCmd |
---|
1183 | |
---|
1184 | The start command to run on experimental nodes when this testbed is used as a |
---|
1185 | slave. In all the start commands the following string substitutions are made: |
---|
1186 | |
---|
1187 | =over 10 |
---|
1188 | |
---|
1189 | =item FEDDIR |
---|
1190 | |
---|
1191 | The local experiment's federation scripts directory. Each local experiment |
---|
1192 | will have this replaced by the scripts directory on its local boss. |
---|
1193 | |
---|
1194 | =item GWCONF |
---|
1195 | |
---|
1196 | The full pathname of the gateway configuration file. As with FEDDIR, this is |
---|
1197 | on the local boss. |
---|
1198 | |
---|
1199 | =item PROJDIR |
---|
1200 | |
---|
1201 | The project directory on the local boss. |
---|
1202 | |
---|
1203 | =item EID |
---|
1204 | |
---|
1205 | The local experiment name. |
---|
1206 | |
---|
1207 | =back |
---|
1208 | |
---|
1209 | All startcmds specified in F<testbeds> undergo these expansions. |
---|
1210 | |
---|
1211 | =item SlaveConnectorStartCmd |
---|
1212 | |
---|
1213 | The start command to run on gateway nodes when this testbed is used as a slave. |
---|
1214 | The same string substitutions are made in this command as in SlaveNodeStartCmd. |
---|
1215 | |
---|
1216 | =item MasterNodeStartCmd |
---|
1217 | |
---|
1218 | The start command to run on experimental nodes when this testbed is used as a |
---|
1219 | master. The same string substitutions are made in this command as in |
---|
1220 | SlaveNodeStartCmd. |
---|
1221 | |
---|
1222 | =item MasterConnectorStartCmd |
---|
1223 | |
---|
1224 | The start command to run on gateway nodes when this testbed is used as a |
---|
1225 | master. The same string substitutions are made in this command as in |
---|
1226 | SlaveNodeStartCmd. |
---|
1227 | |
---|
1228 | =item ConnectorImage |
---|
1229 | |
---|
1230 | The disk image to be loaded on a gateway node on this testbed. |
---|
1231 | |
---|
1232 | =item FileServer |
---|
1233 | |
---|
1234 | The node in the master testbed from which filesystems are mounted. |
---|
1235 | |
---|
1236 | =item Boss |
---|
1237 | |
---|
1238 | The node in the master testbed that controls the testbed. |
---|
1239 | |
---|
1240 | =item TunnelCfg |
---|
1241 | |
---|
1242 | True if the connector needs to do DETER federation. This parameter will |
---|
1243 | probably be removed. |
---|
1244 | |
---|
1245 | |
---|
1246 | =back |
---|
1247 | |
---|
1248 | =head1 ENVIRONMENT |
---|
1249 | |
---|
1250 | B<splitter.pl> does not directly make use of environment variables, but calls |
---|
1251 | out to L<ssh(1)> and (indirectly) to L<sh(1)>, which may be influenced by the |
---|
1252 | environment. |
---|
1253 | |
---|
1254 | =head1 BUGS |
---|
1255 | |
---|
1256 | A deprecated B<-N> flag was used to select testbeds file format. Only one |
---|
1257 | format is supported now, and B<-N> generates a warning, but otherwise does not |
---|
1258 | affect B<splitter.pl>. |
---|
1259 | |
---|
1260 | =head1 SEE ALSO |
---|
1261 | |
---|
1262 | L<sh(1)>, L<ssh(1)> |
---|
1263 | |
---|
1264 | =cut |
---|