- Timestamp:
- May 30, 2010 10:46:00 AM (14 years ago)
- Branches:
- axis_example, compt_changes, info-ops, master, version-3.01, version-3.02
- Children:
- 60961f5
- Parents:
- 42cd8a7
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
fedd/federation/protogeni_access.py
r42cd8a7 r37ed9a5 366 366 raise service_error(service_error.req, "No such allocation") 367 367 368 # Turn the manifest into a dict were each virtual nodename (i.e. the topdl369 # name) has an entry with the allocated machine in hostname and the370 # interfaces in 'interfaces'. I love having XML parser code lying around.371 368 def manifest_to_dict(self, manifest, ignore_debug=False): 369 """ 370 Turn the manifest into a dict were each virtual nodename (i.e. the 371 topdl name) has an entry with the allocated machine in hostname and the 372 interfaces in 'interfaces'. I love having XML parser code lying 373 around. 374 """ 372 375 if self.create_debug and not ignore_debug: 373 376 self.log.debug("Returning null manifest dict") … … 416 419 417 420 def fake_manifest(self, topo): 421 """ 422 Fake the output of manifest_to_dict with a bunch of generic node an 423 interface names, for debugging. 424 """ 418 425 node = { } 419 426 for i, e in enumerate([ e for e in topo.elements \ … … 648 655 continue 649 656 650 def configure_nodes(self, segment_commands, topo, nodes, user, pubkey, secretkey, 651 stagingdir, tmpdir): 652 653 # These little functions/functors just make things more readable 657 def write_node_config_script(self, elem, node, user, pubkey, 658 secretkey, stagingdir, tmpdir): 659 """ 660 Write out the configuration script that is to run on the node 661 represented by elem in the topology. This is called 662 once per node to configure. 663 """ 664 # These little functions/functors just make things more readable. Each 665 # one encapsulates a small task of copying software files or installing 666 # them. 654 667 class stage_file_type: 668 """ 669 Write code copying file sfrom the staging host to the host on which 670 this will run. 671 """ 655 672 def __init__(self, user, host, stagingdir): 656 673 self.user = user … … 668 685 669 686 def install_tar(script, loc, base): 687 """ 688 Print code to script to install a tarfile in loc. 689 """ 670 690 tar = "/bin/tar" 671 691 mkdir="/bin/mkdir" … … 675 695 676 696 def install_rpm(script, base): 697 """ 698 Print code to script to install an rpm 699 """ 677 700 rpm = "/bin/rpm" 678 701 print >>script, "%s --install %s" % (rpm, base) 679 702 703 ifconfig = "/sbin/ifconfig" 704 stage_file = stage_file_type(user, self.staging_host, stagingdir) 705 pname = node.get('hostname', None) 680 706 fed_dir = "/usr/local/federation" 681 707 fed_etc_dir = "%s/etc" % fed_dir … … 683 709 fed_lib_dir = "%s/lib" % fed_dir 684 710 685 ifconfig = "/sbin/ifconfig" 686 687 stage_file = stage_file_type(user, self.staging_host, stagingdir) 688 689 for e in [ e for e in topo.elements if isinstance(e, topdl.Computer)]: 690 vname = e.name 691 node = nodes.get(vname, { }) 692 pname = node.get('hostname', None) 693 if pname: 694 script = open("%s/%s.startup" %(tmpdir, pname), "w") 695 # Reset the interfaces to the ones in the topo file 696 for i in [ i for i in e.interface \ 697 if not i.get_attribute('portal')]: 698 pinf = node['interfaces'].get(i.name, None) 699 addr = i.get_attribute('ip4_address') 700 netmask = i.get_attribute('ip4_netmask') or '255.255.255.0' 701 if pinf and addr: 702 print >>script, \ 703 "%s %s %s netmask %s" % \ 704 (ifconfig, pinf, addr, netmask) 705 else: 706 self.log.error("Missing interface or address for %s" \ 707 % i.name) 708 709 for l, f in self.federation_software: 711 if pname: 712 sfile = "%s/%s.startup" % (tmpdir, pname) 713 script = open(sfile, "w") 714 # Reset the interfaces to the ones in the topo file 715 for i in [ i for i in elem.interface \ 716 if not i.get_attribute('portal')]: 717 pinf = node['interfaces'].get(i.name, None) 718 addr = i.get_attribute('ip4_address') 719 netmask = i.get_attribute('ip4_netmask') or '255.255.255.0' 720 if pinf and addr: 721 print >>script, \ 722 "%s %s %s netmask %s" % \ 723 (ifconfig, pinf, addr, netmask) 724 else: 725 self.log.error("Missing interface or address for %s" \ 726 % i.name) 727 728 for l, f in self.federation_software: 729 base = os.path.basename(f) 730 stage_file(script, base) 731 if l: install_tar(script, l, base) 732 else: install_rpm(script, base) 733 734 for s in elem.software: 735 s_base = s.location.rpartition('/')[2] 736 stage_file(script, s_base) 737 if s.install: install_tar(script, s.install, s_base) 738 else: install_rpm(script, s_base) 739 740 for f in ('hosts', pubkey, secretkey, 'client.conf', 741 'userconf'): 742 stage_file(script, f, fed_etc_dir) 743 if self.sshd: 744 stage_file(script, self.sshd, fed_bin_dir) 745 if self.sshd_config: 746 stage_file(script, self.sshd_config, fed_etc_dir) 747 748 # Look in tmpdir to get the names. They've all been copied 749 # into the (remote) staging dir 750 if os.access("%s/%s.gw.conf" % (tmpdir, elem.name), os.R_OK): 751 stage_file(script, "%s.gw.conf" % elem.name, fed_etc_dir) 752 753 # Hackery dackery dock: the ProtoGENI python is really ancient. 754 # A modern version (though packaged for Mandrake (remember 755 # Mandrake? good times, good times)) should be in the 756 # federation_software list, but we need to move rename is for 757 # SEER. 758 print >>script, "rm /usr/bin/python" 759 print >>script, "ln /usr/bin/python2.4 /usr/bin/python" 760 # Back to less hacky stuff 761 762 # Start commands 763 if elem.get_attribute('portal') and self.portal_startcommand: 764 # Install portal software 765 for l, f in self.portal_software: 710 766 base = os.path.basename(f) 711 767 stage_file(script, base) … … 713 769 else: install_rpm(script, base) 714 770 715 for s in e.software: 716 s_base = s.location.rpartition('/')[2] 717 stage_file(script, s_base) 718 if s.install: install_tar(script, s.install, s_base) 719 else: install_rpm(script, s_base) 720 721 for f in ('hosts', pubkey, secretkey, 'client.conf', 722 'userconf'): 723 stage_file(script, f, fed_etc_dir) 724 if self.sshd: 725 stage_file(script, self.sshd, fed_bin_dir) 726 if self.sshd_config: 727 stage_file(script, self.sshd_config, fed_etc_dir) 728 729 # Look in tmpdir to get the names. They've all been copied 730 # into the (remote) staging dir 731 if os.access("%s/%s.gw.conf" % (tmpdir, vname), os.R_OK): 732 stage_file(script, "%s.gw.conf" % vname, fed_etc_dir) 733 734 # Hackery dackery dock: the ProtoGENI python is really ancient. 735 # A modern version (though packaged for Mandrake (remember 736 # Mandrake? good times, good times)) should be in the 737 # federation_software list, but we need to move rename is for 738 # SEER. 739 print >>script, "rm /usr/bin/python" 740 print >>script, "ln /usr/bin/python2.4 /usr/bin/python" 741 # Back to less hacky stuff 742 743 # Start commands 744 if e.get_attribute('portal') and self.portal_startcommand: 745 # Install portal software 746 for l, f in self.portal_software: 747 base = os.path.basename(f) 748 stage_file(script, base) 749 if l: install_tar(script, l, base) 750 else: install_rpm(script, base) 751 752 # Portals never have a user-specified start command 753 print >>script, self.portal_startcommand 754 elif self.node_startcommand: 755 # XXX: debug 756 print >>script, "sudo perl -I%s %simport_key.pl /users/%s/.ssh/authorized_keys /root/.ssh/authorized_keys" % (fed_lib_dir, fed_bin_dir, user) 757 # XXX: debug 758 if e.get_attribute('startup'): 759 print >>script, "%s \\$USER '%s'" % \ 760 (self.node_startcommand, e.get_attribute('startup')) 761 else: 762 print >>script, self.node_startcommand 763 script.close() 764 if not segment_commands.scp_file("%s/%s.startup" % (tmpdir, pname), 765 user, pname): 771 # Portals never have a user-specified start command 772 print >>script, self.portal_startcommand 773 elif self.node_startcommand: 774 # XXX: debug 775 print >>script, "sudo perl -I%s %simport_key.pl /users/%s/.ssh/authorized_keys /root/.ssh/authorized_keys" % (fed_lib_dir, fed_bin_dir, user) 776 # XXX: debug 777 if elem.get_attribute('startup'): 778 print >>script, "%s \\$USER '%s'" % \ 779 (self.node_startcommand, 780 elem.get_attribute('startup')) 781 else: 782 print >>script, self.node_startcommand 783 script.close() 784 return sfile, pname 785 else: 786 return None, None 787 788 789 def configure_nodes(self, segment_commands, topo, nodes, user, 790 pubkey, secretkey, stagingdir, tmpdir): 791 """ 792 For each node in the topology, generate a script file that copies 793 software onto it and installs it in the proper places and then runs the 794 startup command (including the federation commands. 795 """ 796 797 798 799 for e in [ e for e in topo.elements if isinstance(e, topdl.Computer)]: 800 vname = e.name 801 sfile, pname = self.write_node_config_script(e, 802 nodes.get(vname, { }), 803 user, pubkey, secretkey, stagingdir, tmpdir) 804 if sfile: 805 if not segment_commands.scp_file(sfile, user, pname): 766 806 self.log.error("Could not copy script to %s" % pname) 767 807 else: … … 769 809 770 810 def start_node(self, user, host, node, segment_commands): 811 """ 812 Copy an identity to a node for the configuration script to be able to 813 import data and then run the startup script remotely. 814 """ 771 815 # Place an identity on the node so that the copying can succeed 772 816 segment_commands.ssh_cmd(user, host, "scp .ssh/id_rsa %s:.ssh" % node) … … 775 819 776 820 def start_nodes(self, user, host, nodes, segment_commands): 821 """ 822 Start a thread to initialize each node and wait for them to complete. 823 Each thread runs start_node. 824 """ 777 825 threads = [ ] 778 826 for n in nodes: … … 788 836 done = [not t.isAlive() for t in threads] 789 837 790 791 792 793 def start_segment(self, segment_commands, aid, user, rspec, pubkey, 794 secretkey, ename, stagingdir, tmpdir, certfile, certpw, 795 export_certfile, topo, connInfo, services, timeout=0): 796 """ 797 Start a sub-experiment on a federant. 798 799 Get the current state, modify or create as appropriate, ship data 800 and configs and start the experiment. There are small ordering 801 differences based on the initial state of the sub-experiment. 802 """ 803 804 def random_slicename(user): 805 slicename = user 806 for i in range(0,5): 807 slicename += random.choice(string.ascii_letters) 808 return slicename 809 810 host = self.staging_host 811 if not os.access(certfile, os.R_OK): 812 self.log.error("[start_segment]: Cannot read certfile: %s" % \ 813 certfile) 814 return False 815 ctxt = fedd_ssl_context(my_cert=certfile, password=certpw) 816 # Local software dir 817 lsoftdir = "%s/software" % tmpdir 818 819 # Open up a temporary file to contain a script for setting up the 820 # filespace for the new experiment. 838 def set_up_staging_filespace(self, segment_commands, user, host, 839 stagingdir): 840 """ 841 Set up teh staging area on the staging machine. To reduce the number 842 of ssh commands, we compose a script and execute it remotely. 843 """ 844 821 845 self.log.info("[start_segment]: creating script file") 822 846 try: … … 845 869 return False 846 870 871 def initialize_protogeni_context(self, segment_commands, certfile, certpw): 872 """ 873 Protogeni interactions take a context and a protogeni certificate. 874 This establishes both for later calls and returns them. 875 """ 876 if os.access(certfile, os.R_OK): 877 ctxt = fedd_ssl_context(my_cert=certfile, password=certpw) 878 else: 879 self.log.error("[start_segment]: Cannot read certfile: %s" % \ 880 certfile) 881 return None, None 882 847 883 try: 848 884 gcred = segment_commands.pg_call(self.sa_url, … … 851 887 raise service_error(service_error.federant, 852 888 "ProtoGENI: %s" % e) 853 # Find a slicename not in use 854 slicename = "fabereGpgL" 889 890 return ctxt, gcred 891 892 def get_free_slicename(self, segment_commands, user, gcred, ctxt): 893 """ 894 Find a usable slice name by trying random ones until there's no 895 collision. 896 """ 897 898 def random_slicename(user): 899 """ 900 Return a random slicename by appending 5 letters to the username. 901 """ 902 slicename = user 903 for i in range(0,5): 904 slicename += random.choice(string.ascii_letters) 905 return slicename 906 855 907 while True: 856 908 slicename = random_slicename(user) … … 866 918 break 867 919 868 self.log.info("Creating %s" % slicename) 869 f = open("./rspec", "w") 870 print >>f, "%s" % rspec 871 f.close() 872 # Create the slice and allocate resources. If any of this stuff fails, 873 # the allocations will time out on PG in short order, so we just raise 874 # the service_error. 920 return slicename 921 922 def allocate_slice(self, segment_commands, slicename, rspec, gcred, ctxt): 923 """ 924 Create the slice and allocate resources. If any of this stuff fails, 925 the allocations will time out on PG in short order, so we just raise 926 the service_error. Return the slice and sliver credentials as well as 927 the manifest. 928 """ 875 929 try: 876 930 param = { … … 879 933 'type': 'Slice' 880 934 } 881 slice_cred = segment_commands.pg_call(self.sa_url, 'Register', param, ctxt) 935 slice_cred = segment_commands.pg_call(self.sa_url, 'Register', 936 param, ctxt) 882 937 f = open("./slice_cred", "w") 883 938 print >>f, slice_cred … … 887 942 'credential': gcred, 888 943 } 889 keys = segment_commands.pg_call(self.sa_url, 'GetKeys', param, ctxt) 944 keys = segment_commands.pg_call(self.sa_url, 'GetKeys', param, 945 ctxt) 890 946 # Grab and redeem a ticket 891 947 param = { … … 893 949 'rspec': rspec, 894 950 } 895 ticket = segment_commands.pg_call(self.cm_url, 'GetTicket', param, ctxt) 951 ticket = segment_commands.pg_call(self.cm_url, 'GetTicket', param, 952 ctxt) 896 953 f = open("./ticket", "w") 897 954 print >>f, ticket … … 919 976 "ProtoGENI: %s %s" % (e.code, e)) 920 977 978 return (slice_cred, sliver_cred, manifest) 979 980 def wait_for_slice(self, segment_commands, slice_cred, ctxt): 981 """ 982 Wait for the given slice to finish its startup. Return the final 983 status. 984 """ 985 status = 'notready' 986 try: 987 while status == 'notready': 988 param = { 989 'credential': slice_cred 990 } 991 r = segment_commands.pg_call(self.cm_url, 992 'SliceStatus', param, ctxt) 993 status = r.get('status', 'notready') 994 if status == 'notready': 995 time.sleep(30) 996 except segment_commands.ProtoGENIError, e: 997 raise service_error(service_error.federant, 998 "ProtoGENI: %s %s" % (e.code, e)) 999 1000 return status 1001 1002 def delete_slice(self, segment_commands, slice_cred, ctxt): 1003 """ 1004 Delete the slice resources. An error from the service is ignores, 1005 because the soft state will go away anyway. 1006 """ 1007 try: 1008 param = { 'credential': slice_cred } 1009 segment_commands.pg_call(self.cm_url, 'DeleteSliver', 1010 param, ctxt) 1011 except segment_commands.ProtoGENIError, e: 1012 self.log.warn("ProtoGENI: %s" % e) 1013 1014 1015 1016 def start_segment(self, segment_commands, aid, user, rspec, pubkey, 1017 secretkey, ename, stagingdir, tmpdir, certfile, certpw, 1018 export_certfile, topo, connInfo, services, timeout=0): 1019 """ 1020 Start a sub-experiment on a federant. 1021 1022 Get the current state, modify or create as appropriate, ship data 1023 and configs and start the experiment. There are small ordering 1024 differences based on the initial state of the sub-experiment. 1025 """ 1026 1027 # Local software dir 1028 lsoftdir = "%s/software" % tmpdir 1029 host = self.staging_host 1030 1031 ctxt, gcred = self.initialize_protogeni_context(segment_commands, 1032 certfile, certpw) 1033 1034 if not ctxt: return False 1035 1036 self.set_up_staging_filespace(segment_commands, user, host, stagingdir) 1037 slicename = self.get_free_slicename(segment_commands, user, gcred, ctxt) 1038 self.log.info("Creating %s" % slicename) 1039 slice_cred, sliver_cred, manifest = self.allocate_slice( 1040 segment_commands, slicename, rspec, gcred, ctxt) 1041 921 1042 # With manifest in hand, we can export the portal node names. 922 1043 if self.create_debug: nodes = self.fake_manifest(topo) … … 941 1062 942 1063 # Now we wait for the nodes to start on PG 943 status = 'notready' 944 try: 945 while status == 'notready': 946 param = { 947 'credential': slice_cred 948 } 949 r = segment_commands.pg_call(self.cm_url, 'SliceStatus', param, ctxt) 950 print r 951 status = r.get('status', 'notready') 952 if status == 'notready': 953 time.sleep(30) 954 except segment_commands.ProtoGENIError, e: 955 raise service_error(service_error.federant, 956 "ProtoGENI: %s %s" % (e.code, e)) 957 1064 status = self.wait_for_slice(segment_commands, slice_cred, ctxt) 958 1065 if status == 'failed': 959 1066 self.log.error('Sliver failed to start on ProtoGENI') 960 try: 961 param = { 962 'credential': slice_cred 963 } 964 segment_commands.pg_call(self.cm_url, 'DeleteSliver', param, ctxt) 965 except segment_commands.ProtoGENIError, e: 966 raise service_error(service_error.federant, 967 "ProtoGENI: %s" % e) 1067 self.delete_slice(segment_commands, slice_cred, ctxt) 968 1068 return False 969 1069 else: 1070 # All good: save ProtoGENI info in shared state 970 1071 self.state_lock.acquire() 971 1072 self.allocation[aid]['slice_name'] = slicename … … 979 1080 980 1081 # Now we have configuration to do for ProtoGENI 981 self.configure_nodes(segment_commands, topo, nodes, user, pubkey, secretkey,982 s tagingdir, tmpdir)1082 self.configure_nodes(segment_commands, topo, nodes, user, pubkey, 1083 secretkey, stagingdir, tmpdir) 983 1084 984 1085 self.start_nodes(user, self.staging_host, … … 1229 1330 "%s/%s" % (self.staging_dir, ename), tmpdir, cf, cpw, 1230 1331 certfile, topo, connInfo, services) 1231 except EnvironmentError :1332 except EnvironmentError, e: 1232 1333 err = service_error(service_error.internal, "%s" % e) 1233 1334 except service_error, e: … … 1264 1365 self.log.error('Removing Sliver on ProtoGENI') 1265 1366 ctxt = fedd_ssl_context(my_cert=certfile, password=certpw) 1266 try: 1267 param = { 1268 'credential': slice_cred 1269 } 1270 segment_commands.pg_call(self.cm_url, 'DeleteSlice', 1271 param, ctxt) 1272 except segment_commands.ProtoGENIError, e: 1273 raise service_error(service_error.federant, 1274 "ProtoGENI: %s" % e) 1367 self.delete_slice(segment_commands, slice_cred, ctxt) 1275 1368 return True 1276 1369 except self.ssh_cmd_timeout: … … 1315 1408 def renew_segment(self, segment_commands, name, scred, interval, 1316 1409 certfile, certpw): 1410 """ 1411 Linear code through the segment renewal calls. 1412 """ 1317 1413 ctxt = fedd_ssl_context(my_cert=certfile, password=certpw) 1318 1414 try: 1319 1415 expiration = time.strftime("%Y%m%dT%H:%M:%S", 1320 1416 time.gmtime(time.time() + interval)) 1321 cred = segment_commands.pg_call(self.sa_url, 'GetCredential', {}, ctxt) 1417 cred = segment_commands.pg_call(self.sa_url, 'GetCredential', 1418 {}, ctxt) 1322 1419 1323 1420 param = { … … 1331 1428 'type': 'Slice', 1332 1429 } 1333 slice = segment_commands.pg_call(self.sa_url, 'Resolve', param, ctxt) 1430 slice = segment_commands.pg_call(self.sa_url, 'Resolve', 1431 param, ctxt) 1334 1432 uuid = slice.get('uuid', None) 1335 1433 if uuid == None: … … 1342 1440 'type': 'Slice', 1343 1441 } 1344 new_scred = segment_commands.pg_call(self.sa_url, 'GetCredential', param, ctxt) 1442 new_scred = segment_commands.pg_call(self.sa_url, 'GetCredential', 1443 param, ctxt) 1345 1444 f = open('./new_slice_cred', 'w') 1346 1445 print >>f, new_scred
Note: See TracChangeset
for help on using the changeset viewer.