Source code for kingpin.actors.group

"""
:mod:`kingpin.actors.group`
^^^^^^^^^^^^^^^^^^^^^^^^^^^

Group a series of other `BaseActor` into either synchronous
or asynchronous stages.
"""

import asyncio
import logging

from kingpin import utils as kp_utils
from kingpin.actors import base, exceptions, utils
from kingpin.constants import REQUIRED

log = logging.getLogger(__name__)

__author__ = "Matt Wise <matt@nextdoor.com>"



[docs]
class BaseGroupActor(base.BaseActor):
    """Group together a series of other `kingpin.actors.base.BaseActor` objects

    :acts:
      [ <list of `kingpin.actors.base.BaseActor` objects  to execute> ]

    """

    # By default, group actors have no timeout. We rely on the individual
    # actors to expire on their own. This is, of course, overrideable in the
    # JSON.
    default_timeout = None

    all_options = {
        "contexts": ((str, list), [], "List of contextual hashes."),
        "acts": (list, REQUIRED, "Array of actor definitions."),
    }

    # Override the BaseActor strict_init_context setting. Since there may be
    # nested-groups that have their own context tokens, we do not require
    # that all of the {KEY}'s inside of the self._options dict are filled in
    # the moment that this actor is instantiated.
    strict_init_context = False

    # Do not remove remove escape sequence from escaped tokens. This will be
    # done later by another actor. Otherwise we risk remove the escapes and
    # failing because the token isn't found by a sub actor.
    remove_escape_sequence = False

    def __init__(self, *args, **kwargs):
        """Initializes all of the sub actors.

        By actually initializing all of the Actors supplied to us during the
        __init__, we effectively do a full instantiation of every Actor defined
        in the supplied JSON all at once and upfront long before we try to
        execute any code. This greatly increases our chances of catching JSON
        errors because every single object is pre-initialized before we ever
        begin executing any of our steps.

        *Note about init_tokens:*
          The group.BaseActor and misc.Macro actors support the concept of
          externally supplied data (usually os.environ) being used as available
          tokens for %TOKEN% parsing when reading JSON/YAML scripts. By passing
          this data between these three actors, we are able to allow nested
          token passing.

          See `Token-replacement <basicuse.html#token-replacement>` for more
          info.
        """
        super().__init__(*args, **kwargs)

        # Pre-initialize all of our actions!
        self._actions = self._build_actions()


[docs]
    def get_orgchart(self, parent=""):
        """Generate an orgchart for all the `acts` specified."""

        ret = super().get_orgchart(parent=parent)
        group_id = str(id(self))
        for act in self._actions:
            ret = ret + act.get_orgchart(parent=group_id)

        return ret


    def _build_actions(self):
        """Builds either a single set of actions, or multiple sets.

        If no 'contexts' were passed in, then we simply build the actors that
        are defined in the 'acts' option for the group.

        If any 'contexts' were passed in, then this method will create as many
        groups of actions as there are in the list of contexts. For each dict
        in the 'contexts' list, a new group of actors is created with that
        information.

        .. note::

            Because groups may contain nested group actors, any options passed
            into this actors 'init_context' are also passed into the actors that
            we're intantiating.
        """
        contexts = self.option("contexts")
        if not contexts:
            return self._build_action_group(self._init_context)

        # If the data passed into the 'contexts' is a list of dicts, we take it
        # as is and do nothing to it.
        if isinstance(contexts, list):
            context_data = self.option("contexts")
        # If the data passed in is a string, it must be a pointer to a file
        # with contexts in it. We read that file, and we parse it for any
        # missing tokens. We use the "init tokens" that made it into this actor
        # as available token substitutions.
        elif isinstance(contexts, str):
            context_data = kp_utils.load_json_with_tokens(contexts, self._init_tokens)
            # We expect the type here to be List[Dict[str,Union[str,int,bool,float,...]]]
            assert isinstance(
                context_data, list
            ), f"Expected list but got {type(context_data)}"

        actions = []
        for context in context_data:
            combined_context = {**self._init_context, **context}
            self.log.debug(f"Inherited context {self._init_context}")
            self.log.debug(f"Specified context {context}")
            self.log.debug(f"Building acts with parameters: {combined_context}")
            for action in self._build_action_group(context=combined_context):
                actions.append(action)

        return actions

    def _build_action_group(self, context=None):
        """Build up all of the actors we need to execute.

        Builds a list of actors to execute and returns the list. The list can
        then either be yielded as a whole (for an async operation), or
        individually (for a synchronous operation).

        Returns:
            A list of references to <actor objects>.
        """
        actions = []
        self.log.debug(f"Building {len(self.option('acts'))} actors")
        for act in self.option("acts"):
            act["init_context"] = context.copy()
            act["init_tokens"] = self._init_tokens.copy()
            actor = utils.get_actor(act, dry=self._dry)
            actions.append(actor)
            self.log.debug(f"Actor {actor} built")
        return actions

    def _get_exc_type(self, exc_list):
        """Return Unrecoverable exception if at least one is in exc_list.

        Takes in a list of exceptions, and returns either a
        RecoverableActorFailure or an UnrecoverableActorFailure based on the
        exceptions that were passed in.

        Args:
            exc_list: List of Exception objects

        Returns:
            RecoverableActorFailure or UnrecoverableActorFailure
        """
        # Start by assuming we're going to be a RecoverableActorFailure
        wrapper_base = exceptions.RecoverableActorFailure
        for exc in exc_list:
            if isinstance(exc, exceptions.UnrecoverableActorFailure):
                wrapper_base = exceptions.UnrecoverableActorFailure
        return wrapper_base

    async def _execute(self):
        """Executes the actions configured, and returns.

        .. note::

            Expects the sub-class to implement ``self._run_actions()``.

        If an actor execution fails in ``_run_actions()``, then that exception
        is raised up the stack.
        """
        self.log.info(f"Beginning {len(self._actions)} actions")
        await self._run_actions()
        return




[docs]
class Sync(BaseGroupActor):
    """Execute a series of `kingpin.actors.base.BaseActor` synchronously.

    Groups together a series of Actors and executes them synchronously
    in the order that they were defined.

    **Options**

    :acts:
        An array of individual Actor definitions.

    :contexts:

        This variable can be one of two formats:

        * A list of dictionaries with *contextual tokens* to pass into the
            actors at instantiation time. If the list has more than one element,
            then every actor defined in ``acts`` will be instantiated once for
            each item in the ``contexts`` list.
        * A string that points to a file with a list of contexts, just like the
            above dictionary format.


    **Timeouts**

    Timeouts are disabled specifically in this actor. The sub-actors can still
    raise their own `kingpin.actors.exceptions.ActorTimedOut` exceptions, but
    since the group actors run an arbitrary number of sub actors, we have
    chosen to not have this actor specifically raise its own
    `kingpin.actors.exceptions.ActorTimedOut` exception unless the user sets
    the ``timeout`` setting.

    **Examples**

    Creates two arrays ... but sleeps 60 seconds between the two, then
    does not sleep at all after the last one:

    .. code-block:: json

        {
            "desc": "Clone, then sleep ... then clone, then sleep shorter...",
            "actor": "group.Sync",
            "options": {
                "contexts": [
                    { "ARRAY": "First", "SLEEP": "60", },
                    { "ARRAY": "Second", "SLEEP": "0", }
                ],
                "acts": [
                    {
                        "desc": "do something",
                        "actor": "server_array.Clone",
                        "options": {
                            "source": "template",
                            "dest": "{ARRAY}"
                        }
                    },
                    {
                        "desc": "sleep",
                        "actor": "misc.Sleep",
                        "options": {
                            "sleep": "{SLEEP}",
                        }
                    }
                ]
            }
        }

    Alternatively if no `contexts` are needed you can use the `array` syntax.

    .. code-block:: json

        [
            {
                "actor": "server_array.Clone",
                "options": {
                    "source": "template",
                    "dest": "%ARRAY%"
                }
            },
            {
                "actor": "misc.Sleep",
                "options": { "sleep": 30 }
            }
        ]

    **Dry Mode**

    Passes on the Dry mode setting to the acts that are called. Does **not**
    stop execution when one of the acts fails. Instead Group actor will finish
    all acts with warnings, and raise an error at the end of execution.

    This provides the user with an insight to all the errors that are possible
    to encounter, rather than abort and quit on the first one.

    **Failure**

    In the event that an act fails, this actor will return the failure
    immediately. Because the acts are executed in-order of definition, the
    failure will prevent any further acts from executing.

    The behavior is different in the dry run (read above.)
    """

    async def _run_actions(self):
        """Synchronously executes all of the Actor.execute() methods.

        If any one actor fails, we prevent execution of the rest of the actors.
        During a dry run - all acts are executed, and a warning is displayed.

        raises:
            In dry run - worst of all the raised errors.
            In real run - the first of the exceptions.
        """

        errors = []

        for act in self._actions:
            self.log.debug(f'Beginning "{act._desc}"..')
            try:
                await act.execute()
            except exceptions.ActorException as e:
                if self._dry:
                    self.log.error(f"{act._desc} failed: {e}")
                    self.log.warning("Continuing since this is a dry run.")
                    errors.append(e)
                else:
                    self.log.error(
                        f"Aborting sequential execution because "
                        f'"{act._desc}" failed'
                    )
                    raise

        if errors:
            ExcType = self._get_exc_type(errors)
            raise ExcType(
                f'Exceptions raised by {len(errors)} of {len(self._actions)} actors in "{self._desc}".'
            )




[docs]
class Async(BaseGroupActor):
    """Execute several `kingpin.actors.base.BaseActor` objects asynchronously.

    Groups together a series of Actors and executes them asynchronously -
    waiting until all of them finish before returning.

    **Options**

    :concurrency:
        Max number of concurrent executions. This will fire off N executions
        in parallel, and continue with the remained as soon as the first
        execution is done. This is faster than creating N Sync executions.

    :acts:
        An array of individual Actor definitions.

    :contexts:

        This variable can be one of two formats:

        * A list of dictionaries with *contextual tokens* to pass into the
            actors at instantiation time. If the list has more than one element,
            then every actor defined in ``acts`` will be instantiated once for
            each item in the ``contexts`` list.
        * A string that points to a file with a list of contexts, just like the
            above dictionary format.

    **Timeouts**

    Timeouts are disabled specifically in this actor. The sub-actors can still
    raise their own `kingpin.actors.exceptions.ActorTimedOut` exceptions, but
    since the group actors run an arbitrary number of sub actors, we have
    chosen to not have this actor specifically raise its own
    `kingpin.actors.exceptions.ActorTimedOut` exception unless the user sets
    the ``timeout`` setting.

    **Examples**

    Clone two arrays quickly.

    .. code-block:: json

        {
            "desc": "Clone two arrays",
            "actor": "group.Async",
            "options": {
                "contexts": [
                    { "ARRAY": "NewArray1" },
                    { "ARRAY": "NewArray2" }
                ],
                "acts": [
                    {
                        "desc": "do something",
                        "actor": "server_array.Clone",
                        "options": {
                            "source": "template",
                            "dest": "{ARRAY}",
                        }
                    }
                ]
            }
        }

    **Dry Mode**

    Passes on the Dry mode setting to the sub-actors that are called.

    **Failure**

    In the event that one or more ``acts`` fail in this group, the entire group
    acts will return a failure to Kingpin. Because multiple actors are
    executing all at the same time, the all of these actors will be allowed to
    finish before the failure is returned.
    """

    all_options = {
        "concurrency": (int, 0, "Max number of concurrent executions."),
        "contexts": ((str, list), [], "List of contextual hashes."),
        "acts": (list, REQUIRED, "Array of actor definitions."),
    }

    async def _run_actions(self):
        """Asynchronously executes all of the Actor.execute() methods.

        All actors execute asynchronously, so we don't bother checking whether
        they've failed or not here. The BaseGroupActor will return a True/False
        based on whether or not all actors succeeded (True) or if one-or-more
        failed (False).
        """

        tasks = []

        if self.option("concurrency"):
            self.log.info(f"Concurrency set to {self.option('concurrency')}")

        for act in self._actions:
            tasks.append(asyncio.ensure_future(act.execute()))

            if not self.option("concurrency"):
                # No concurrency limit - continue the loop without checks.
                continue

            running_tasks = len([t for t in tasks if not t.done()])

            if running_tasks < self.option("concurrency"):
                # We can queue more tasks, continue the loop to add one more.
                continue

            self.log.debug("Concurrency saturated. Waiting...")
            while running_tasks >= self.option("concurrency"):
                await asyncio.sleep(0)
                running_tasks = len([t for t in tasks if not t.done()])

            self.log.debug(
                "Concurrency desaturated: {}<{}. Continuing.".format(
                    running_tasks, self.option("concurrency")
                )
            )

        # Now that we've fired them off, we walk through them one-by-one and
        # check on their status. If they've raised an exception, we catch it
        # and log it into a list for further processing.
        errors = []
        for t in tasks:
            try:
                await t
            except exceptions.ActorException as e:
                errors.append(e)

        # Now, if there are exceptions in the list, we generate the appropriate
        # exception type (recoverable vs unrecoverable), and raise it up the
        # stack. The individual exceptions are swallowed here, but thats OK
        # because the BaseActor for each of the acts that failed has already
        # handled printing out the log message with the failure.
        if errors:
            ExcType = self._get_exc_type(errors)
            raise ExcType(
                f'Exceptions raised by {len(errors)} of {len(self._actions)} actors in "{self._desc}".'
            )