z-hypervisor/zhypervisor/daemon.py

228 lines
7.9 KiB
Python

import os
import json
import signal
import logging
import argparse
from time import sleep
from concurrent.futures import ThreadPoolExecutor
from threading import Thread
from zhypervisor.logging import setup_logging
from zhypervisor.machine import MachineSpec
from zhypervisor.api.api import ZApi
from pprint import pprint
class ZHypervisorDaemon(object):
def __init__(self, config):
"""
Z Hypervisor main thread. Roles:
- Load and start machines and API on init
- Cleanup on shutdown
- Committing changes to machines to disk
- Primary interface to modify machines
"""
self.config = config # JSON config listing, mainly, datastore paths
self.datastores = {} # Mapping of datastore name -> objects
self.machines = {} # Mapping of machine name -> objects
self.running = True
# Set up datastores and use the default datastore for "State" storage
self.init_datastores()
self.state = ZConfig(self.datastores["default"])
# start API
self.api = ZApi(self)
# Set up shutdown signal handlers
signal.signal(signal.SIGINT, self.signal_handler) # ctrl-c
signal.signal(signal.SIGTERM, self.signal_handler) # sigterm
def init_datastores(self):
"""
Per datastore in the config, create a ZDataStore object
"""
for name, info in self.config["datastores"].items():
self.datastores[name] = ZDataStore(name, info["path"], info.get("init", False))
def init_machines(self):
"""
Per machine in the on-disk state, create a machine object
"""
for machine_info in self.state.get_machines():
machine_id = machine_info["machine_id"]
self.add_machine(machine_id, machine_info["machine_type"], machine_info["spec"])
def add_machine(self, machine_id, machine_type, machine_spec, write=False):
"""
Create or update a machine.
:param machine_id: alphanumeric id of machine to modify/create
:param machine_type: runnable type e.g. "q"
:param machine_spec: dictionary of machine options - see example/ubuntu.json
:param write: commit machinge changes to on-disk state
"""
# Find / create the machine
if machine_id in self.machines:
machine = self.machines[machine_id]
machine.options = machine_spec["options"]
machine.properties = machine_spec["properties"]
else:
machine = MachineSpec(self, machine_id, machine_type, machine_spec)
self.machines[machine_id] = machine
# Update if necessary
if write:
self.state.write_machine(machine_id, machine_type, machine_spec)
# Launch if machine is an autostarted machine
if machine.options.get("autostart", False) and machine.machine.get_status() == "stopped":
machine.start()
def signal_handler(self, signum, frame):
"""
Handle signals sent to the daemon. On any, exit.
"""
logging.critical("Got signal {}".format(signum))
self.stop()
def run(self):
"""
Main loop of the daemon. Sets up & starts machines, runs api, and waits.
"""
self.init_machines()
self.api.run()
def stop(self):
"""
SHut down the hypervisor. Stop the API then shut down machines
"""
self.running = False
self.api.stop()
with ThreadPoolExecutor(10) as pool:
for machine_id in self.machines.keys():
pool.submit(self.forceful_stop, machine_id)
# Sequential shutdown code below is easier to debug
# for machine_id in self.machines.keys():
# self.forceful_stop(machine_id)
def forceful_stop(self, machine_id, timeout=10): # make this timeout longer?
"""
Gracefully stop a machine by asking it nicely, waiting some time, then forcefully killing it.
"""
machine_spec = self.machines[machine_id]
nice_stop = Thread(target=machine_spec.stop)
nice_stop.start()
nice_stop.join(timeout)
if nice_stop.is_alive():
logging.error("%s did not respond in %s seconds, killing", machine_id, timeout)
machine_spec.machine.kill_machine()
def remove_machine(self, machine_id):
"""
Remove a stopped machine from the system
"""
assert self.machines[machine_id].machine.get_status() == "stopped"
self.state.remove_machine(machine_id)
del self.machines[machine_id]
class ZDataStore(object):
"""
Helper module representing a data storage location somewhere on disk
"""
def __init__(self, name, root_path, init_ok=False):
self.name = name
self.root_path = root_path
os.makedirs(self.root_path, exist_ok=True)
try:
metainfo_path = self.get_filepath(".datastore.json")
assert os.path.exists(metainfo_path), "Datastore missing or not initialized! " \
"File not found: {}".format(metainfo_path)
except:
if init_ok:
with open(metainfo_path, "w") as f:
json.dump({}, f)
else:
raise
logging.info("Initialized datastore %s at %s", name, self.root_path)
def get_filepath(self, *paths):
return os.path.join(self.root_path, *paths)
class ZConfig(object):
"""
The Z Hypervisor daemon's interface to the on-disk config
"""
def __init__(self, datastore):
self.datastore = datastore
self.machine_data_dir = self.datastore.get_filepath("machines")
for d in [self.machine_data_dir]:
os.makedirs(d, exist_ok=True)
def get_machines(self):
"""
Return config of all machines on disk
"""
machines = []
logging.info("Looking for machines in {}".format(self.machine_data_dir))
for mach_name in os.listdir(self.machine_data_dir):
with open(os.path.join(self.machine_data_dir, mach_name), "r") as f:
machines.append(json.load(f))
return machines
def write_machine(self, machine_id, machine_type, machine_spec):
"""
Write a machine's config to the disk. Params similar to elsewhere.
"""
with open(os.path.join(self.machine_data_dir, "{}.json".format(machine_id)), "w") as f:
json.dump({"machine_id": machine_id,
"machine_type": machine_type,
"spec": machine_spec}, f, indent=4)
def write_machine_o(self, machine_obj):
"""
Similar to write_machine, but accepts a MachineSpec object
"""
self.write_machine(machine_obj.machine_id, machine_obj.machine_type, machine_obj.serialize())
def remove_machine(self, machine_id):
"""
Remove a machine from the on disk state
"""
json_path = os.path.join(self.machine_data_dir, "{}.json".format(machine_id))
os.unlink(json_path)
def main():
setup_logging()
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", default="/etc/zd.json", help="Config file path")
args = parser.parse_args()
if not os.path.exists(args.config):
logging.warning("Config does not exist, attempting to write default config")
with open(args.config, "w") as f:
json.dump({"nodename": "examplenode",
"access": [("root", "toor", 0)],
"state": "/opt/datastore/state/",
"datastores": {
"default": {
"path": "/opt/z/datastore/machines/"
}
}}, f, indent=4)
return
with open(args.config) as f:
config = json.load(f)
z = ZHypervisorDaemon(config)
z.run()
print("Z has been shut down")