CRS/CRIU: added CRIU as new CRS component
authorAdrian Reber <adrian.reber@hs-esslingen.de>
Fri, 7 Feb 2014 20:56:19 +0000 (21:56 +0100)
committerAdrian Reber <adrian.reber@hs-esslingen.de>
Fri, 7 Feb 2014 21:24:27 +0000 (22:24 +0100)
To be able to checkpoint/restart using criu (criu.org) a new
CRS component is added which is based on criu. This first commit
provides the minimal set of functions and configure script options
to enable --with-criu and link against libcriu.so.
No actual checkpoint/restart functionality is yet implemented.
This is only the framework which needs to be filled with the
actual functionality.

opal/mca/crs/criu/Makefile.am [new file with mode: 0644]
opal/mca/crs/criu/configure.m4 [new file with mode: 0644]
opal/mca/crs/criu/crs_criu.h [new file with mode: 0644]
opal/mca/crs/criu/crs_criu_component.c [new file with mode: 0644]
opal/mca/crs/criu/crs_criu_module.c [new file with mode: 0644]

diff --git a/opal/mca/crs/criu/Makefile.am b/opal/mca/crs/criu/Makefile.am
new file mode 100644 (file)
index 0000000..06a9181
--- /dev/null
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2004-2007 The Trustees of Indiana University.
+#                         All rights reserved.
+# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
+#                         All rights reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
+#
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+CFLAGS = $(crs_criu_CFLAGS)
+AM_CPPFLAGS = $(crs_criu_CPPFLAGS)
+
+sources = \
+        crs_criu.h \
+        crs_criu_component.c \
+        crs_criu_module.c
+
+# Make the output library in this directory, and name it either
+# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
+# (for static builds).
+
+if MCA_BUILD_opal_crs_criu_DSO
+component_noinst  =
+component_install = mca_crs_criu.la
+else
+component_noinst  = libmca_crs_criu.la
+component_install =
+endif
+
+mcacomponentdir = $(ompilibdir)
+mcacomponent_LTLIBRARIES = $(component_install)
+mca_crs_criu_la_SOURCES  = $(sources)
+mca_crs_criu_la_LDFLAGS  = -module -avoid-version $(crs_criu_LDFLAGS)
+mca_crs_criu_la_LIBADD = $(crs_criu_LIBS)
+
+noinst_LTLIBRARIES = $(component_noinst)
+libmca_crs_criu_la_SOURCES = $(sources)
+libmca_crs_criu_la_LDFLAGS = -module -avoid-version $(crs_criu_LDFLAGS)
+libmca_crs_criu_la_LIBADD  = $(crs_criu_LIBS)
diff --git a/opal/mca/crs/criu/configure.m4 b/opal/mca/crs/criu/configure.m4
new file mode 100644 (file)
index 0000000..f20a07c
--- /dev/null
@@ -0,0 +1,105 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2004-2010 The Trustees of Indiana University.
+#                         All rights reserved.
+# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
+#                         All rights reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2006 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
+# Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
+#
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+# MCA_crs_criu_CONFIG([action-if-found], [action-if-not-found])
+# -----------------------------------------------------------
+AC_DEFUN([MCA_opal_crs_criu_CONFIG],[
+    AC_CONFIG_FILES([opal/mca/crs/criu/Makefile])
+
+    AC_ARG_WITH([criu],
+                [AC_HELP_STRING([--with-criu(=DIR)],
+                                [Path to CRIU Installation])])
+    OMPI_CHECK_WITHDIR([criu], [$with_criu], [include/criu/criu.h])
+    AC_ARG_WITH([criu-libdir],
+                [AC_HELP_STRING([--with-criu-libdir=DIR],
+                                [Search for CRIU libraries in DIR])])
+    OMPI_CHECK_WITHDIR([criu-libdir], [$with_criu_libdir], [libcriu.*])
+
+    check_crs_criu_good="no"
+
+    # If we do not want FT, don't compile this component
+    #
+    # If we wanted CRIU, but did not specify the FT option,
+    # error out with a warning for the user
+    AS_IF([test "$opal_want_ft_cr" = "0"],
+          [$2
+           check_crs_criu_good="no"
+           AS_IF([test ! -z "$with_criu" -a "$with_criu" != "no"],
+                 [AC_MSG_WARN([CRIU support requested, but FT support not requested. You need to specify the --with-ft=cr configure option.])
+                  AC_MSG_ERROR([Aborting.])])
+          ],
+          [check_crs_criu_good="yes"])
+
+    # If we do not want CRIU, then do not compile it
+    AS_IF([test "$with_criu" = "no" -o "$check_crs_criu_good" = "no"],
+          [$2
+           check_crs_criu_good="no"],
+          [check_crs_criu_good="yes"])
+
+    # Defaults
+    check_crs_criu_dir_msg="compiler default"
+    check_crs_criu_libdir_msg="linker default"
+    check_crs_criu_dir=""
+    check_crs_criu_libdir=""
+
+    # Determine the search paths for the headers and libraries
+    AS_IF([test "$check_crs_criu_good" != "yes"], [$2],
+          [AS_IF([test ! -z "$with_criu" -a "$with_criu" != "yes"],
+                 [check_crs_criu_dir="$with_criu"
+                  check_crs_criu_dir_msg="$with_criu (from --with-criu)"])
+           AS_IF([test ! -z "$with_criu_libdir" -a "$with_criu_libdir" != "yes"],
+                 [check_crs_criu_libdir="$with_criu_libdir"
+                  check_crs_criu_libdir_msg="$with_criu_libdir (from --with-criu-libdir)"])
+          ])
+
+    AS_IF([test "$check_crs_criu_good" != "yes"], [$2],
+          [AC_MSG_CHECKING([for CRIU dir])
+           AC_MSG_RESULT([$check_crs_criu_dir_msg])
+           AC_MSG_CHECKING([for CRIU library dir])
+           AC_MSG_RESULT([$check_crs_criu_libdir_msg])
+           OMPI_CHECK_PACKAGE([crs_criu_check],
+                              [criu/criu.h],
+                              [criu],
+                              [criu_init_opts],
+                              [],
+                              [$check_crs_criu_dir],
+                              [$check_crs_criu_libdir],
+                              [check_crs_criu_good="yes"],
+                              [check_crs_criu_good="no"])
+          ])
+
+    crs_criu_CFLAGS="$CFLAGS $crs_criu_check_CFLAGS"
+    crs_criu_CPPFLAGS="$CPPFLAGS $crs_criu_check_CPPFLAGS"
+    crs_criu_LDFLAGS="$LDFLAGS $crs_criu_check_LDFLAGS"
+    crs_criu_LIBS="$LIBS $crs_criu_check_LIBS"
+
+    AS_IF([test "$check_crs_criu_good" = "yes"],
+          [ AC_SUBST([crs_criu_CFLAGS])
+            AC_SUBST([crs_criu_CPPFLAGS])
+            AC_SUBST([crs_criu_LDFLAGS])
+            AC_SUBST([crs_criu_LIBS])
+            $1],
+          [AS_IF([test ! -z "$with_criu" -a "$with_criu" != "no"],
+                 [AC_MSG_WARN([CRIU support requested but not found.  Perhaps you need to specify the location of the CRIU libraries.])
+                  AC_MSG_ERROR([Aborting.])])
+           $3])
+
+])dnl
diff --git a/opal/mca/crs/criu/crs_criu.h b/opal/mca/crs/criu/crs_criu.h
new file mode 100644 (file)
index 0000000..d5591ee
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2004-2009 The Trustees of Indiana University.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+/**
+ * @file
+ *
+ * CRIU CRS component - support checkpoint/restart using CRIU
+ */
+
+#ifndef MCA_CRS_CRIU_EXPORT_H
+#define MCA_CRS_CRIU_EXPORT_H
+
+#include "opal_config.h"
+
+
+#include "opal/mca/mca.h"
+#include "opal/mca/crs/crs.h"
+#include "opal/mca/base/base.h"
+
+#include <criu/criu.h>
+
+BEGIN_C_DECLS
+
+/* Local Component structures */
+struct opal_crs_criu_component_t {
+    /** Base CRS component */
+    opal_crs_base_component_t super;
+};
+typedef struct opal_crs_criu_component_t opal_crs_criu_component_t;
+
+OPAL_MODULE_DECLSPEC extern opal_crs_criu_component_t mca_crs_criu_component;
+
+int opal_crs_criu_component_query(mca_base_module_t **module, int *priority);
+
+/*
+ * Module functions
+ */
+int opal_crs_criu_module_init(void);
+int opal_crs_criu_module_finalize(void);
+int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *snapshot,
+                             opal_crs_base_ckpt_options_t *options,
+                             opal_crs_state_type_t *state);
+
+int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot,
+                          bool spawn_child, pid_t *child_pid);
+
+int opal_crs_criu_disable_checkpoint(void);
+int opal_crs_criu_enable_checkpoint(void);
+
+int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir, char **app,
+                            char **cwd, char ***argv, char ***env);
+
+int opal_crs_criu_reg_thread(void);
+
+
+END_C_DECLS
+
+#endif /* MCA_CRS_CRIU_EXPORT_H */
diff --git a/opal/mca/crs/criu/crs_criu_component.c b/opal/mca/crs/criu/crs_criu_component.c
new file mode 100644 (file)
index 0000000..20080fc
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2004-2009 The Trustees of Indiana University.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "opal_config.h"
+
+#include "opal/util/output.h"
+
+#include "opal/constants.h"
+#include "opal/mca/crs/crs.h"
+#include "opal/mca/crs/base/base.h"
+#include "crs_criu.h"
+
+/* Local functionality */
+static int crs_criu_register(void);
+static int crs_criu_open(void);
+static int crs_criu_close(void);
+
+/*
+ * Instantiate the public struct with all of our public information
+ * and pointer to our public functions in it
+ */
+opal_crs_criu_component_t mca_crs_criu_component = {
+    /* First do the base component stuff */
+    {
+        /* Handle the general mca_component_t struct containing
+         *  meta information about the component itself
+         */
+        {
+            OPAL_CRS_BASE_VERSION_2_0_0,
+
+            /* Component name and version */
+            "criu",
+            OPAL_MAJOR_VERSION,
+            OPAL_MINOR_VERSION,
+            OPAL_RELEASE_VERSION,
+
+            /* Component open and close functions */
+            crs_criu_open,
+            crs_criu_close,
+            opal_crs_criu_component_query,
+            crs_criu_register
+        },
+        {
+            /* The component is checkpoint ready */
+            MCA_BASE_METADATA_PARAM_CHECKPOINT
+        },
+
+        /* Verbosity level */
+        0,
+        /* opal_output handler */
+        -1
+    }
+};
+
+static int crs_criu_register(void)
+{
+    int ret;
+
+    mca_base_component_t *component = &mca_crs_criu_component.super.base_version;
+
+    mca_crs_criu_component.super.priority = 10;
+    ret = mca_base_component_var_register(component, "priority",
+                                          "Priority of the CRS criu component (default: 10)",
+                                          MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                          OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
+                                          &mca_crs_criu_component.super.priority);
+    if (0 > ret) {
+        return ret;
+    }
+
+    mca_crs_criu_component.super.verbose = 0;
+    ret = mca_base_component_var_register(component, "verbose",
+                                          "Verbose level for the CRS criu component",
+                                          MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                          OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
+                                          &mca_crs_criu_component.super.verbose);
+
+    return (0 > ret) ? ret : OPAL_SUCCESS;
+}
+
+static int crs_criu_open(void)
+{
+    /* If there is a custom verbose level for this component than use it
+     * otherwise take our parents level and output channel
+     */
+    if (0 != mca_crs_criu_component.super.verbose) {
+        mca_crs_criu_component.super.output_handle = opal_output_open(NULL);
+        opal_output_set_verbosity(mca_crs_criu_component.super.output_handle,
+                                  mca_crs_criu_component.super.verbose);
+    } else {
+        mca_crs_criu_component.super.output_handle = opal_crs_base_framework.framework_output;
+    }
+
+    /*
+     * Debug output
+     */
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: open()");
+    opal_output_verbose(20, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: open: priority = %d",
+                        mca_crs_criu_component.super.priority);
+    opal_output_verbose(20, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: open: verbosity = %d",
+                        mca_crs_criu_component.super.verbose);
+
+    return OPAL_SUCCESS;
+}
+
+static int crs_criu_close(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: close()");
+
+    return OPAL_SUCCESS;
+}
diff --git a/opal/mca/crs/criu/crs_criu_module.c b/opal/mca/crs/criu/crs_criu_module.c
new file mode 100644 (file)
index 0000000..50f3a11
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2004-2010 The Trustees of Indiana University.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
+ *                         All rights reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2007      Evergrid, Inc. All rights reserved.
+ * Copyright (c) 2011      Oak Ridge National Labs.  All rights reserved.
+ * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "opal_config.h"
+
+#include "opal/util/show_help.h"
+#include "opal/util/output.h"
+#include "opal/util/argv.h"
+#include "opal/constants.h"
+
+#include "opal/mca/base/mca_base_var.h"
+
+#include "opal/mca/crs/crs.h"
+#include "opal/mca/crs/base/base.h"
+
+#include "crs_criu.h"
+
+/* CRIU module */
+static opal_crs_base_module_t criu_module = {
+    /* Initialization Function */
+    opal_crs_criu_module_init,
+    /* Finalization Function */
+    opal_crs_criu_module_finalize,
+
+    /* Checkpoint interface */
+    opal_crs_criu_checkpoint,
+
+    /* Restart Command Access */
+    opal_crs_criu_restart,
+
+    /* Disable checkpoints */
+    opal_crs_criu_disable_checkpoint,
+    /* Enable checkpoints */
+    opal_crs_criu_enable_checkpoint,
+
+    /* Prelaunch */
+    opal_crs_criu_prelaunch,
+
+    /* Register Thread */
+    opal_crs_criu_reg_thread
+};
+
+/* Snapshot Class Functions */
+OBJ_CLASS_DECLARATION(opal_crs_criu_snapshot_t);
+
+struct opal_crs_criu_snapshot_t {
+    /* Base CRS snapshot type */
+    opal_crs_base_snapshot_t super;
+    char *context_filename;
+};
+typedef struct opal_crs_criu_snapshot_t opal_crs_criu_snapshot_t;
+
+void opal_crs_criu_construct(opal_crs_criu_snapshot_t *obj);
+void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *obj);
+
+OBJ_CLASS_INSTANCE(opal_crs_criu_snapshot_t,
+                   opal_crs_base_snapshot_t,
+                   opal_crs_criu_construct,
+                   opal_crs_criu_destruct);
+
+void opal_crs_criu_construct(opal_crs_criu_snapshot_t *snapshot)
+{
+    snapshot->context_filename = NULL;
+    snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name);
+}
+
+void opal_crs_criu_destruct(opal_crs_criu_snapshot_t *snapshot)
+{
+    if (NULL != snapshot->context_filename) {
+        free(snapshot->context_filename);
+        snapshot->context_filename = NULL;
+    }
+}
+
+int opal_crs_criu_component_query(mca_base_module_t **module, int *priority)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: component_query()");
+
+    *priority = mca_crs_criu_component.super.priority;
+    *module = (mca_base_module_t *)&criu_module;
+
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_module_init(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: module_init()");
+
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_module_finalize(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: module_finalize()");
+
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot,
+                             opal_crs_base_ckpt_options_t *options,
+                             opal_crs_state_type_t *state)
+{
+    int ret;
+    opal_crs_criu_snapshot_t *snapshot = NULL;
+
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: checkpoint(%d, ---)", pid);
+
+    snapshot = (opal_crs_criu_snapshot_t *)base_snapshot;
+    snapshot->super.component_name = strdup(mca_crs_criu_component.super.base_version.mca_component_name);
+
+    ret = criu_init_opts();
+
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: criu_init_opts() returned %d", ret);
+
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_restart(opal_crs_base_snapshot_t *snapshot,
+                          bool spawn_child, pid_t *child_pid)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: %s", __func__);
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_disable_checkpoint(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: %s", __func__);
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_enable_checkpoint(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: %s", __func__);
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_prelaunch(int32_t rank, char *base_snapshot_dir,
+                            char **app, char **cwd, char ***argv,
+                            char ***env)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: %s", __func__);
+    return OPAL_SUCCESS;
+}
+
+int opal_crs_criu_reg_thread(void)
+{
+    opal_output_verbose(10, mca_crs_criu_component.super.output_handle,
+                        "crs:criu: %s", __func__);
+    return OPAL_SUCCESS;
+}