From 3a5f4ace3b6e7accc7f8ad24c2e7824fb2984610 Mon Sep 17 00:00:00 2001 From: dave Date: Mon, 26 Dec 2016 17:14:28 -0800 Subject: [PATCH] Add basic api --- example/banutoo.json | 4 +- example/banutoo2.json | 4 +- setup.py | 2 +- zhypervisor/api/api.py | 239 ++++++++++++++++++++++++++++++++ zhypervisor/clients/qmachine.py | 29 +++- zhypervisor/daemon.py | 139 ++++++++++++++++--- zhypervisor/machine.py | 26 +++- zhypervisor/tools/ifup.py | 4 + zhypervisor/util.py | 6 + 9 files changed, 420 insertions(+), 33 deletions(-) create mode 100644 zhypervisor/api/api.py diff --git a/example/banutoo.json b/example/banutoo.json index bff9d44..a6f6c8a 100644 --- a/example/banutoo.json +++ b/example/banutoo.json @@ -1,6 +1,6 @@ { - "id": "banutoo", - "type": "q", + "machine_id": "banutoo", + "machine_type": "q", "spec": { "options": { "autostart": true, diff --git a/example/banutoo2.json b/example/banutoo2.json index 3026945..a4b53da 100644 --- a/example/banutoo2.json +++ b/example/banutoo2.json @@ -1,6 +1,6 @@ { - "id": "banutoo2", - "type": "q", + "machine_id": "banutoo2", + "machine_type": "q", "spec": { "options": { "autostart": true, diff --git a/setup.py b/setup.py index 4a4dfea..50a7e62 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup(name='zhypervisor', url='http://gitlab.xmopx.net/dave/zhypervisor', author='dpedu', author_email='dave@davepedu.com', - packages=['zhypervisor', 'zhypervisor.clients', 'zhypervisor.tools'], + packages=['zhypervisor', 'zhypervisor.clients', 'zhypervisor.api', 'zhypervisor.tools'], entry_points={'console_scripts': ['zd = zhypervisor.daemon:main', 'zd_ifup = zhypervisor.tools.ifup:main']}, zip_safe=False) diff --git a/zhypervisor/api/api.py b/zhypervisor/api/api.py new file mode 100644 index 0000000..3d6b8e3 --- /dev/null +++ b/zhypervisor/api/api.py @@ -0,0 +1,239 @@ +import cherrypy +import logging +import json +import subprocess +from threading import Thread + + +class Mountable(object): + """ + Macro for encapsulating a component's config and methods into one object. + :param conf: cherrypy config dict for use when mounting this component + """ + def __init__(self, conf=None): + self.conf = conf if conf else {'/': {}} + + def mount(self, path): + """ + Mount this component into the cherrypy tree + :param path: where to mount it e.g. /v1 + :return: self + """ + cherrypy.tree.mount(self, path, self.conf) + return self + + +class ZApi(object): + def __init__(self, master): + """ + Main component of the API service. Inits and assembles the various classes. Provides .run() and .stop() to + control it. + :param master: parent BastionController reference. + """ + self.master = master + self.app_v1 = ZApiV1(self).mount('/api/v1') + # self.app_root = BSApiRoot(self).mount('/api') + # self.ui = Mountable(conf={'/': { + # 'tools.staticdir.on': True, + # 'tools.staticdir.dir': os.getcwd() + '/ui/build', # TODO don't hardcode + # 'tools.staticdir.index': 'index.html'}}).mount('/ui') + + cherrypy.config.update({ + 'sessionFilter.on': True, + 'tools.sessions.on': True, + 'tools.sessions.locking': 'explicit', + 'tools.sessions.timeout': 525600, + 'request.show_tracebacks': True, + 'server.socket_port': 3000, # TODO configurable port + 'server.thread_pool': 25, + 'server.socket_host': '0.0.0.0', + 'server.show_tracebacks': True, + 'server.socket_timeout': 5, + 'log.screen': False, + 'engine.autoreload.on': False + }) + + def run(self): + cherrypy.engine.start() + cherrypy.engine.block() + logging.info("API has shut down") + + def stop(self): + cherrypy.engine.exit() + logging.info("API shutting down...") + + +class ZApiV1(Mountable): + """ + Provides the /v1/ api. + """ + def __init__(self, root): + super().__init__(conf={ + "/machine": {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}, + # "/task": {'request.dispatch': cherrypy.dispatch.MethodDispatcher()}, # @TODO this conf belongs in the child + # "/logs": { + # 'tools.staticdir.on': True, + # 'tools.staticdir.dir': root.master.log_path, + # 'tools.staticdir.content_types': {'log': 'text/plain'} + # } + }) + self.root = root + self.machine = ZApiMachines(self.root) + # self.task = BSApiTask(self.root) + # self.control = BSApiControl(self.root) + # self.socket = ApiWebsockets(self.root) + + @cherrypy.expose + def index(self): + yield "It works!" + + @cherrypy.expose + def create_disk(self, datastore, name, size, fmt): + """ + WORKAROUND for creating qemu disks + TODO replace me + """ + assert fmt in ["qcow2", "raw"], "Disk format is invalid" + assert name.endswith(".bin"), "Disk must be named .bin" + + disk_path = self.root.master.datastores[datastore].get_filepath(name) + img_args = ["qemu-img", "create", "-f", fmt, disk_path, "{}M".format(int(size))] + logging.info("Creating disk with: %s", str(img_args)) + subprocess.check_call(img_args) + return name + + +@cherrypy.popargs("machine_id") +class ZApiMachineStop(object): + """ + Endpoint to stop running machines + """ + exposed = True + + def __init__(self, root): + self.root = root + + @cherrypy.tools.json_out() + def GET(self, machine_id): + """ + If the machine exists, stop it gracefully. This happens asynchronously. + """ + assert machine_id in self.root.master.machines + Thread(target=lambda: self.root.master.forceful_stop(machine_id)).start() + return machine_id + + +@cherrypy.popargs("machine_id") +class ZApiMachineStart(object): + """ + Endpoint to start stopped machines + """ + exposed = True + + def __init__(self, root): + self.root = root + + @cherrypy.tools.json_out() + def GET(self, machine_id=None): + """ + Start the machine + """ + self.root.master.machines[machine_id].start() + return machine_id + + +@cherrypy.popargs("machine_id") +class ZApiMachineRestart(object): + """ + Endpoint to restart machines + """ + exposed = True + + def __init__(self, root): + self.root = root + + @cherrypy.tools.json_out() + def GET(self, machine_id=None): + """ + Start the machine + TODO can we not repeat this from Stop/Start? + """ + assert machine_id in self.root.master.machines + self.root.master.forceful_stop(machine_id) + self.root.master.machines[machine_id].start() + return machine_id + + +@cherrypy.popargs("machine_id") +class ZApiMachines(): + """ + Endpoint for managing machines + """ + + exposed = True + + def __init__(self, root): + """ + Endpoint to modify machines. PUT and DELETE require the machine not be running, which can be managed with the + stop and start methods below + """ + self.root = root + self.stop = ZApiMachineStop(self.root) + self.start = ZApiMachineStart(self.root) + self.restart = ZApiMachineRestart(self.root) + + @cherrypy.tools.json_out() + def GET(self, machine_id=None, action=None, summary=False): + """ + Get a list of all machines or specific one if passed + :param task_id: task to retrieve + """ + summary = summary in [True, 'True', 'true', 'yes', '1', 1] + + machines = {} + for _machine_id, machine_spec in self.root.master.machines.items(): + machine = {"machine_id": _machine_id, + "_status": machine_spec.machine.get_status()} + if not summary: + machine.update({"machine_type": machine_spec.machine_type, + "spec": machine_spec.serialize()}) + + machines[_machine_id] = machine + if machine_id is not None: + try: + return [machines[machine_id]] + except KeyError: + raise cherrypy.HTTPError(status=404) + else: + return list(machines.values()) + + @cherrypy.tools.json_out() + def PUT(self, machine_id, machine_type, machine_spec): + """ + Create a new machine or update an existing machine + :param machine_id: id of machine to create or modify + :param machine_type: set machine type (currently, only "q") + 'param machine_spec: json dictionary describing the machine. see the 'spec' key of example/banutoo.json + """ + + assert machine_id not in self.root.master.machines or \ + self.root.master.machines[machine_id].machine.get_status() == "stopped", \ + "Machine must be stopped to modify" + + machine_spec = json.loads(machine_spec) + self.root.master.add_machine(machine_id, machine_type, machine_spec, write=True) + return machine_id + + def DELETE(self, machine_id): + """ + Delete a machine. Raises 404 if no machine exists. Raises error if machine is not stopped + :param machine_id: ID of machine to remove + """ + try: + assert self.root.master.machines[machine_id].machine.get_status() == "stopped", \ + "Machine must be stopped to delete" + except KeyError: + raise cherrypy.HTTPError(status=404) + + self.root.master.remove_machine(machine_id) + return machine_id diff --git a/zhypervisor/clients/qmachine.py b/zhypervisor/clients/qmachine.py index a301cce..4896f0e 100644 --- a/zhypervisor/clients/qmachine.py +++ b/zhypervisor/clients/qmachine.py @@ -17,7 +17,17 @@ class QMachine(Machine): self.block_respawns = False # TODO validate specs + def get_status(self): + """ + Return string "stopped" or "running" depending on machine status + @TODO machine status consts + """ + return "stopped" if self.proc is None else "running" + def start_machine(self): + """ + If needed, launch the machine. + """ if self.proc: raise Exception("Machine already running!") else: @@ -29,6 +39,9 @@ class QMachine(Machine): Thread(target=self.wait_on_exit, args=[self.proc]).start() def wait_on_exit(self, proc): + """ + Listener used by above start_machine to restart the machine if the machine exits + """ proc.wait() logging.info("qemu process has exited") self.proc = None @@ -36,20 +49,30 @@ class QMachine(Machine): self.start_machine() def stop_machine(self): + """ + Send the powerdown signal to the running machine + """ if self.proc: - logging.info("stopping machine...") + logging.info("stopping machine %s", self.spec.machine_id) self.proc.stdin.write(b"system_powerdown\n") self.proc.stdin.flush() self.proc.wait() self.proc = None def kill_machine(self): + """ + Forcefully kill the running machine + """ + print("Terminating {}".format(self.proc)) if self.proc: self.proc.terminate() self.proc.wait() self.proc = None def get_args(self, tap): + """ + Assemble the full argv array that will be executed for this machine + """ argv = ['qemu-system-x86_64'] argv += self.get_args_system() argv += self.get_args_drives() @@ -78,7 +101,7 @@ class QMachine(Machine): def get_args_network(self, tap_name): """ - Hard-coded for now + Return network related qemu args """ args = [] for iface in self.spec.properties.get("netifaces"): @@ -87,7 +110,7 @@ class QMachine(Machine): if iface_type == "tap": if "ifname" not in iface: iface["ifname"] = tap_name - iface["script"] = "/root/zhypervisor/testenv/bin/zd_ifup" # TODO fixme + iface["script"] = "/root/zhypervisor/testenv/bin/zd_ifup" # TODO don't hard code iface["downscript"] = "no" args.append("-net") diff --git a/zhypervisor/daemon.py b/zhypervisor/daemon.py index 65a6cec..a0879f8 100644 --- a/zhypervisor/daemon.py +++ b/zhypervisor/daemon.py @@ -6,65 +6,133 @@ import logging import argparse from time import sleep from concurrent.futures import ThreadPoolExecutor +from threading import Thread from zhypervisor.logging import setup_logging from zhypervisor.machine import MachineSpec +from zhypervisor.api.api import ZApi from pprint import pprint class ZHypervisorDaemon(object): def __init__(self, config): - self.config = config - self.datastores = {} - self.machines = {} + """ + Z Hypervisor main thread. Roles: + - Load and start machines and API on init + - Cleanup on shutdown + - Committing changes to machines to disk + - Primary interface to modify machines + """ + self.config = config # JSON config listing, mainly, datastore paths + self.datastores = {} # Mapping of datastore name -> objects + self.machines = {} # Mapping of machine name -> objects self.running = True + # Set up datastores and use the default datastore for "State" storage self.init_datastores() self.state = ZConfig(self.datastores["default"]) + # start API + self.api = ZApi(self) + + # Set up shutdown signal handlers signal.signal(signal.SIGINT, self.signal_handler) # ctrl-c signal.signal(signal.SIGTERM, self.signal_handler) # sigterm def init_datastores(self): + """ + Per datastore in the config, create a ZDataStore object + """ for name, info in self.config["datastores"].items(): self.datastores[name] = ZDataStore(name, info["path"], info.get("init", False)) def init_machines(self): + """ + Per machine in the on-disk state, create a machine object + """ for machine_info in self.state.get_machines(): - machine_id = machine_info["id"] - self.add_machine(machine_id, machine_info["type"], machine_info["spec"]) + machine_id = machine_info["machine_id"] + self.add_machine(machine_id, machine_info["machine_type"], machine_info["spec"]) - def add_machine(self, machine_id, machine_type, machine_spec): - machine = MachineSpec(self, machine_id, machine_type, machine_spec) - self.machines[machine_id] = machine - if machine.options.get("autostart", False): + def add_machine(self, machine_id, machine_type, machine_spec, write=False): + """ + Create or update a machine. + :param machine_id: alphanumeric id of machine to modify/create + :param machine_type: runnable type e.g. "q" + :param machine_spec: dictionary of machine options - see example/ubuntu.json + :param write: commit machinge changes to on-disk state + """ + # Find / create the machine + if machine_id in self.machines: + machine = self.machines[machine_id] + machine.options = machine_spec["options"] + machine.properties = machine_spec["properties"] + else: + machine = MachineSpec(self, machine_id, machine_type, machine_spec) + self.machines[machine_id] = machine + + # Update if necessary + if write: + self.state.write_machine(machine_id, machine_type, machine_spec) + + # Launch if machine is an autostarted machine + if machine.options.get("autostart", False) and machine.machine.get_status() == "stopped": machine.start() def signal_handler(self, signum, frame): + """ + Handle signals sent to the daemon. On any, exit. + """ logging.critical("Got signal {}".format(signum)) self.stop() def run(self): - # launch machines + """ + Main loop of the daemon. Sets up & starts machines, runs api, and waits. + """ self.init_machines() - - # start API - # TODO - - # Wait? - while self.running: - sleep(1) + self.api.run() def stop(self): + """ + SHut down the hypervisor. Stop the API then shut down machines + """ self.running = False - + self.api.stop() with ThreadPoolExecutor(10) as pool: - for machine_id, machine in self.machines.items(): - pool.submit(machine.stop) + for machine_id in self.machines.keys(): + pool.submit(self.forceful_stop, machine_id) + # Sequential shutdown code below is easier to debug + # for machine_id in self.machines.keys(): + # self.forceful_stop(machine_id) + + def forceful_stop(self, machine_id, timeout=10): # make this timeout longer? + """ + Gracefully stop a machine by asking it nicely, waiting some time, then forcefully killing it. + """ + machine_spec = self.machines[machine_id] + nice_stop = Thread(target=machine_spec.stop) + nice_stop.start() + nice_stop.join(timeout) + + if nice_stop.is_alive(): + logging.error("%s did not respond in %s seconds, killing", machine_id, timeout) + machine_spec.machine.kill_machine() + + def remove_machine(self, machine_id): + """ + Remove a stopped machine from the system + """ + assert self.machines[machine_id].machine.get_status() == "stopped" + self.state.remove_machine(machine_id) + del self.machines[machine_id] class ZDataStore(object): + """ + Helper module representing a data storage location somewhere on disk + """ def __init__(self, name, root_path, init_ok=False): self.name = name self.root_path = root_path @@ -86,6 +154,9 @@ class ZDataStore(object): class ZConfig(object): + """ + The Z Hypervisor daemon's interface to the on-disk config + """ def __init__(self, datastore): self.datastore = datastore @@ -95,6 +166,9 @@ class ZConfig(object): os.makedirs(d, exist_ok=True) def get_machines(self): + """ + Return config of all machines on disk + """ machines = [] logging.info("Looking for machines in {}".format(self.machine_data_dir)) for mach_name in os.listdir(self.machine_data_dir): @@ -102,6 +176,28 @@ class ZConfig(object): machines.append(json.load(f)) return machines + def write_machine(self, machine_id, machine_type, machine_spec): + """ + Write a machine's config to the disk. Params similar to elsewhere. + """ + with open(os.path.join(self.machine_data_dir, "{}.json".format(machine_id)), "w") as f: + json.dump({"machine_id": machine_id, + "machine_type": machine_type, + "spec": machine_spec}, f, indent=4) + + def write_machine_o(self, machine_obj): + """ + Similar to write_machine, but accepts a MachineSpec object + """ + self.write_machine(machine_obj.machine_id, machine_obj.machine_type, machine_obj.serialize()) + + def remove_machine(self, machine_id): + """ + Remove a machine from the on disk state + """ + json_path = os.path.join(self.machine_data_dir, "{}.json".format(machine_id)) + os.unlink(json_path) + def main(): setup_logging() @@ -118,7 +214,7 @@ def main(): "state": "/opt/datastore/state/", "datastores": { "default": { - "path": "/opt/datastore/machines/" + "path": "/opt/z/datastore/machines/" } }}, f, indent=4) return @@ -128,3 +224,4 @@ def main(): z = ZHypervisorDaemon(config) z.run() + print("Z has been shut down") diff --git a/zhypervisor/machine.py b/zhypervisor/machine.py index 1c98c06..9851310 100644 --- a/zhypervisor/machine.py +++ b/zhypervisor/machine.py @@ -4,26 +4,44 @@ from zhypervisor.clients.qmachine import QMachine class MachineSpec(object): + """ + Represents a machine we may control + """ def __init__(self, master, machine_id, machine_type, spec): + """ + Initialize options and properties of the machine. More importantly, initialize the self.machine object which + should be a subclass of zhypervisor.util.Machine. + """ logging.info("Initting machine %s", machine_id) self.master = master self.machine_id = machine_id self.machine_type = machine_type - self.options = {} # hypervisor-level stuff like Autostart - self.properties = {} # machine level stuff like processor count + self.options = spec["options"] + self.properties = spec["properties"] # TODO replace if/else with better system if machine_type == "q": self.machine = QMachine(self) - self.options = spec["options"] - self.properties = spec["properties"] else: raise Exception("Unknown machine type: {}".format(machine_type)) def start(self): + """ + Start this machine (pass-through) + """ self.machine.start_machine() def stop(self): + """ + Stop this machine + """ self.machine.block_respawns = True self.machine.stop_machine() + + def serialize(self): + """ + Return a serializable form of this machine's specs + """ + return {"options": self.options, + "properties": self.properties} diff --git a/zhypervisor/tools/ifup.py b/zhypervisor/tools/ifup.py index 38652fc..0034458 100644 --- a/zhypervisor/tools/ifup.py +++ b/zhypervisor/tools/ifup.py @@ -8,6 +8,10 @@ from zhypervisor.logging import setup_logging def main(): + """ + Helper script for dealing with QEMU network interfaces. When QEMU starts, it calls this script passing an interface + name when the virtual machine has been started with it. This needs to enable the interface. + """ setup_logging() _, tap_name = sys.argv logging.info("Enabling interface %s...", tap_name) diff --git a/zhypervisor/util.py b/zhypervisor/util.py index e1e183d..367c542 100644 --- a/zhypervisor/util.py +++ b/zhypervisor/util.py @@ -52,6 +52,12 @@ class Machine(object): """ raise NotImplemented() + def get_status(self): + """ + Get the machine's status (return one of "running" or "stopped") + """ + raise NotImplemented() + def get_datastore_path(self, datastore_name, *paths): """ Resolve the filesystem path for a path in the given datastore