Source code for adaptivemd.worker

##############################################################################
# adaptiveMD: A Python Framework to Run Adaptive Molecular Dynamics (MD)
#             Simulations on HPC Resources
# Copyright 2017 FU Berlin and the Authors
#
# Authors: Jan-Hendrik Prinz
# Contributors:
#
# `adaptiveMD` is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
##############################################################################

"""
Implementation of a single instance stand-alone worker to execute tasks

The main idea is that you want tasks that you created in your project to be
executed somehow. There can be several ways to do that and this _worker_
approach will allow you to run a single instance worker somewhere on your
HPC which will search your project for unfinished tasks that can be run,
assign one of these tasks to itself and excute it. Once finished will
continue with the next task

The worker consists of two parts:

1. the single worker scheduler that will interprete a task, convert it to
   a bash script and run it
2. the worker job instance that runs a loop in the background, checking the DB
   for new tasks and submitting these to the scheduler for execution

"""

import os
import socket
import subprocess
import time
import sys
import random
import signal
import ctypes
import re
import shutil
from fcntl import fcntl, F_GETFL, F_SETFL

from mongodb import StorableMixin, SyncVariable, create_to_dict, \
    ObjectSyncVariable

from scheduler import Scheduler
from reducer import StrFilterParser, WorkerParser, BashParser, PrefixParser
from logentry import LogEntry
from util import DT
from adaptivemd import Transfer

import pymongo.errors

try:
    # works on linux
    libc = ctypes.CDLL("libc.so.6")
except OSError:
    libc = None


[docs]class WorkerScheduler(Scheduler):
[docs]    def __init__(self, resource, verbose=False):
        """
        A single instance worker scheduler to interprete `Task` objects

        Parameters
        ----------
        resource : `Resource`
            the resourse this scheduler should use.
        verbose : bool
            if True the worker will report lots of stuff
        """
        super(WorkerScheduler, self).__init__(resource)
        self._current_sub = None
        self._current_unit_dir = None
        self.current_task = None
        self.home_path = os.path.expanduser('~')
        self._done_tasks = set()
        self._save_log_to_db = True
        self.verbose = verbose
        self._fail_after_each_command = True
        self._cleanup_successful = True

        self._std = {}

    @property
    def path(self):
        return self.resource.shared_path.replace('$HOME', self.home_path)

    @property
    def staging_area_location(self):
        return 'sandbox:///workers/staging_area'

[docs]    def task_to_script(self, task):
        """
        Convert a task to an executable bash script

        Parameters
        ----------
        task : `Task`
            the `Task` instance to be converted

        Returns
        -------
        list of str
            a list of bash commands

        """

        # create a task that wraps errands from resource and scheduler
        wrapped_task = task >> self.wrapper >> self.project.resource.wrapper

        # call the reducer that interpretes task actions
        reducer = StrFilterParser() >> PrefixParser() >> WorkerParser() >> BashParser()
        script = reducer(self, wrapped_task.script)

        if self._fail_after_each_command:
            # the bash script exits if ANY command fails not just the last one
            script = ['set -e'] + script

        return script

[docs]    def submit(self, submission):
        """
        Submit a `Task` or a `Trajectory`

        Parameters
        ----------
        submission : (list of) `Task` or `Trajectory`

        Returns
        -------
        list of `Task`
            the list of tasks actually executed after looking at all objects

        """
        tasks = self._to_tasks(submission)

        if tasks:
            for task in tasks:
                self.tasks[task.__uuid__] = task

        return tasks

    @property
    def current_task_dir(self):
        """
        Return the current path to the worker directory
        Returns
        -------
        str or None
            the path or None if no task is executed at the time

        """
        if self._current_unit_dir is not None:
            return self.path + '/workers/' + self._current_unit_dir
        else:
            return None

    def _start_job(self, task):
        """
        Start execution of a task

        Parameters
        ----------
        task : `Task`
            the task to be executed

        """
        self._current_unit_dir = 'worker.%s' % hex(task.__uuid__)

        script_location = self.current_task_dir

        if os.path.exists(script_location):
            print 'removing existing folder', script_location
            # the folder already exists, probably a failed previous attempt
            # a restart needs a clean folder so remove it now
            shutil.rmtree(script_location)

        # create a fresh folder
        os.makedirs(script_location)

        # and set the current directory
        os.chdir(script_location)

        task.fire('submit', self)

        script = self.task_to_script(task >> self.wrapper >> self.resource.wrapper)

        # write the script

        with open(script_location + '/running.sh', 'w') as f:
            f.write('\n'.join(script))

        task.state = 'running'
        task.fire(task.state, self)

        if libc is not None:
            def set_pdeathsig(sig=signal.SIGTERM):
                def death_fnc():
                    return libc.prctl(1, sig)

                return death_fnc

            preexec_fn = set_pdeathsig(signal.SIGTERM)
        else:
            preexec_fn = None

        self._current_sub = subprocess.Popen(
            ['/bin/bash', script_location + '/running.sh'],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE,
            preexec_fn=preexec_fn, shell=False)

        # this is a special hack that allows to read from stdout and stderr
        # without a blocking `.read`, let's hope this works
        flags = fcntl(self._current_sub.stdout, F_GETFL)  # get current p.stdout flags
        fcntl(self._current_sub.stdout, F_SETFL, flags | os.O_NONBLOCK)

        flags = fcntl(self._current_sub.stderr, F_GETFL)  # get current p.stderr flags
        fcntl(self._current_sub.stderr, F_SETFL, flags | os.O_NONBLOCK)

        # prepare std catching
        self._start_std()

[docs]    def stop_current(self):
        """
        Stop execution of the current task immediately

        Returns
        -------
        bool
            if True the current task was cancelled, False if there
            was no task running

        """
        if self._current_sub is not None:
            task = self.current_task
            self._current_sub.kill()
            del self.tasks[task.__uuid__]
            self._final_std()
            self.current_task = None

            return True
        else:
            return False

    def _start_std(self):
        self._std = {
            'stdout': '',
            'stderr': ''
        }

    def _advance_std(self):
        """
        Advance the stdout and stderr for some bytes, save it and redirect
        """
        for s in ['stdout', 'stderr']:
            try:
                new_std = os.read(getattr(self._current_sub, s).fileno(), 1024)
                self._std[s] += new_std
                if self.verbose:
                    # send to stdout, stderr
                    std = getattr(sys, s)
                    std.write(new_std)
                    std.flush()

            except OSError:
                pass

    def _final_std(self):
        """
        Finish capturing of stdout and stderr

        """
        task = self.current_task
        try:
            out, err = self._current_sub.communicate()
            if self.verbose:
                sys.stderr.write(err)
                sys.stdout.write(out)

            # save full message
            stdout = self._std['stdout'] + out
            stderr = self._std['stderr'] + err

            if self._save_log_to_db:
                log_err = LogEntry(
                    'worker',
                    'stderr from running task',
                    stderr
                )
                log_out = LogEntry(
                    'worker',
                    'stdout from running task',
                    stdout
                )
                self.project.logs.add(log_err)
                self.project.logs.add(log_out)

                task.stdout = log_out
                task.stderr = log_err

        except ValueError:
            pass

[docs]    def advance(self):
        """
        Advance checking if tasks are completed or failed

        Needs to be called in regular intervals. Usually by the main
        worker instance

        """
        if self.current_task is None:
            if len(self.tasks) > 0:
                t = next(self.tasks.itervalues())
                self.current_task = t
                self._start_job(t)
        else:
            task = self.current_task
            # get current outputs
            return_code = self._current_sub.poll()

            # update current stdout and stderr by 1024 bytes

            self._advance_std()

            if return_code is not None:
                # finish std catching
                self._final_std()

                if return_code == 0:
                    # success

                    all_files_present = True
                    # see first if we have all claimed files for worker output staging transfer
                    for f in task.targets:
                        if isinstance(f, Transfer):
                            if not os.path.exists(self.replace_prefix(f.source.url)):
                                log = LogEntry(
                                    'worker',
                                    'execution error',
                                    'failed to create file before staging %s' % f.source.short,
                                    objs={'file': f, 'task': task}
                                )
                                self.project.logs.add(log)
                                all_files_present = False

                    if all_files_present:
                        try:
                            task.fire('success', self)
                            task.state = 'success'
                            print 'task succeeded'
                            if self._cleanup_successful:
                                print 'removing worker dir'
                                # go to an existing folder before we delete
                                os.chdir(self.path)
                                script_location = self.current_task_dir
                                if script_location is not None:
                                    shutil.rmtree(script_location)
                        except IOError:

                            task.state = 'fail'
                    else:
                        task.state = 'fail'
                else:
                    # failed
                    log = LogEntry(
                        'worker',
                        'task failed',
                        'see log files',
                        objs={'task': task}
                    )
                    self.project.logs.add(log)
                    task.state = 'failed'
                    try:
                        task.fire('fail', self)
                    except IOError:
                        pass

                    task.state = 'fail'

                del self.tasks[task.__uuid__]
                self._done_tasks.add(task.__uuid__)
                self._initialize_current()

[docs]    def release_queued_tasks(self):
        """
        Release captured tasks scheduled for execution (if not started yet)

        You can prefetch tasks (although not recommended for single workers)
        and this releases not started jobs back to the queue

        """
        for t in list(self.tasks.values()):
            if t.state == 'queued':
                t.state = 'created'
                t.worker = None
                del self.tasks[t.__uuid__]

    def _initialize_current(self):
        self._current_sub = None
        self._current_unit_dir = None
        self.current_task = None

    def enter(self, project=None):
        self.change_state('booting')
        if project is not None:
            self.project = project

        # register this cluster with the session for later cleanup
        self.project.schedulers.add(self)

        # create main folders. make sure we can save project files
        self.stage_project()

        self.stage_generators()
        self.change_state('running')

[docs]    def stage_project(self):
        """
        Create paths necessary for the current project

        """
        paths = [
            self.path + '/projects/',
            self.path + '/projects/' + self.project.name,
            self.path + '/projects/' + self.project.name + '/trajs',
            self.path + '/projects/' + self.project.name + '/models'
        ]

        self._create_dirs(paths)

        paths = [
            self.path + '/workers',
            self.path + '/workers/staging_area'
            # self.path + '/workers/staging_area/trajs'
        ]

        self._create_dirs(paths)

    @staticmethod
    def _create_dirs(paths):
        for p in paths:
            try:
                os.makedirs(p)
            except OSError:
                pass

    def stage_generators(self):
        os.chdir(self.path + '/workers/staging_area/')

        reducer = StrFilterParser() >> PrefixParser() >> WorkerParser() >> BashParser()

        retries = 10
        while retries > 0:
            try:
                # todo: add staging that does some file copying as well
                for g in self.generators:
                    reducer(self, g.stage_in)

                retries = 0
            except OSError:
                time.sleep(random.random())
                retries -= 1

    def replace_prefix(self, path):
        # on a worker all runs on the remote side, so if we talk about file:// locations
        # we actually want them to work, once they are transferred. There are only two
        # ways this is supported (yet). Either the file is in the DB then we do not care
        # about the file location. The other case is, if the task generates it on the
        # file side and then transfers it. The trick we use is to just create the file
        # directly on the remote side and do the link as usual. The requires to alter
        # a file:// path to be on the remote side.

        # replace any occurance of `file://a/b/c/d/something` with `worker://_file_something
        path = re.sub(r"(file://[^ ]*/)([^ /]*)", r"worker://_file_\2", path)

        # call the default replacements
        path = super(WorkerScheduler, self).replace_prefix(path)
        return path

    def shut_down(self, wait_to_finish=True):
        self.change_state('releaseunfinished')
        self.release_queued_tasks()

        if wait_to_finish:
            self.change_state('waitcurrent')
            curr = time.time()
            max_wait = 15
            while len(self.tasks) > 0 and time.time() - curr < max_wait:
                self.advance()
                time.sleep(2.0)

        # kill the current job
        self.change_state('shuttingdown')
        if self.current_task:
            if True:
                self.current_task.state = 'created'
            else:
                self.current_task.state = 'cancelled'
            self.stop_current()

        self.change_state('down')


[docs]class Worker(StorableMixin):
    """
    A Worker instance the will submit tasks from the DB to a scheduler
    """

    _find_by = ['state', 'n_tasks', 'seen', 'verbose', 'prefetch', 'current']

    state = SyncVariable('state')
    n_tasks = SyncVariable('n_tasks')
    seen = SyncVariable('seen')
    verbose = SyncVariable('verbose')
    prefetch = SyncVariable('prefetch')
    command = SyncVariable('command')
    current = ObjectSyncVariable('current', 'tasks')

[docs]    def __init__(self, walltime=None, generators=None, sleep=None,
                 heartbeat=None, prefetch=1, verbose=False):
        super(Worker, self).__init__()
        self.hostname = socket.gethostname()
        self.cwd = os.getcwd()
        self.seen = time.time()
        self.walltime = walltime
        self.generators = generators
        self.sleep = sleep
        self.heartbeat = heartbeat
        self.prefetch = prefetch
        self.reconnect_time = 10
        self._scheduler = None
        self._project = None
        self.command = None
        self.verbose = verbose
        self.current = None
        self._last_current = None
        self.pid = os.getpid()

    to_dict = create_to_dict([
        'walltime', 'generators', 'sleep', 'heartbeat', 'hostname',
        'cwd', 'seen', 'prefetch', 'pid'
    ])

    @classmethod
    def from_dict(cls, dct):
        obj = super(Worker, cls).from_dict(dct)
        obj.hostname = dct['hostname']
        obj.cwd = dct['cwd']
        obj.seen = dct['seen']
        obj.pid = dct['pid']

        return obj

    def create(self, project):
        scheduler = WorkerScheduler(project.resource, self.verbose)
        scheduler._state_cb = self._state_cb
        self._scheduler = scheduler
        self._project = project
        scheduler.enter(project)

    def _state_cb(self, scheduler):
        self.state = scheduler.state

    @property
    def scheduler(self):
        """
        Returns
        -------
        `WorkerScheduler`
            the currently used scheduler to execute tasks

        """
        return self._scheduler

    @property
    def project(self):
        """
        Returns
        -------
        `Project`
            the currently used project
        """
        return self._project

    _running_states = ['running', 'waitandshutdown']
    _accepting_states = ['running']

    def _stop_current(self, mode):
        sc = self.scheduler
        task = sc.current_task

        if task:
            attempt = self.project.storage.tasks.modify_test_one(
                lambda x: x == task, 'state', 'running', 'stopping')
            if attempt is not None:
                if sc.stop_current():
                    # success, so mark the task as cancelled
                    task.state = mode
                    task.worker = None
                    print 'stopped a task [%s] from generator `%s` and set to `%s`' % (
                        task.__class__.__name__,
                        task.generator.name if task.generator else '---',
                        task.state)

            else:
                # semms in the meantime the task has finished (success/fail)
                pass

[docs]    def execute(self, command):
        """
        Send and execute a single command to the worker

        Note that the worker is registered on the DB but running on your HPC.
        Just loading it does not allow you to call functions like `.shutdown`.
        These would only be called on your local instance. All you can do
        is use `execute` which will store a command in the DB and once the
        real running worker executed it. The command will be cleared from the
        DB.

        Parameters
        ----------
        command : str
            the command to be executed

        """
        self.command = command

[docs]    def run(self):
        """
        Start the worker to execute tasks until it is shut down

        """
        scheduler = self._scheduler
        project = self._project

        last = time.time()
        last_n_tasks = 0
        self.seen = last

        def task_test(x):
            return x.ready and (not self.generators or (
                hasattr(x.generator, 'name') and x.generator.name
                in self.generators))

        print 'up and running ...'

        try:
            reconnect = True

            while reconnect:
                reconnect = False
                try:
                    if len(scheduler.tasks) > 0:
                        # must have been a DB connection problem, attempt reconnection
                        print 'attempt reconnection'
                        self._project.reconnect()

                        print 'remove all pending tasks'
                        # remove all pending tasks as much as possible
                        for t in list(scheduler.tasks.values()):
                            if t is not scheduler.current_task:
                                if t.worker == self:
                                    t.state = 'created'
                                    t.worker = None

                                del scheduler.tasks[t.__uuid__]

                        # see, if we can salvage the currently running task
                        # unless it has been cancelled and is running with another worker
                        t = scheduler.current_task
                        if t.worker == self and t.state == 'running':
                            print 'continuing current task'
                            # seems like the task is still ours to finish
                            pass
                        else:
                            print 'current task has been captured. releasing.'
                            scheduler.stop_current()

                    # the main worker loop
                    while scheduler.state != 'down':
                        state = self.state
                        # check the state of the worker
                        if state in self._running_states:
                            scheduler.advance()
                            if scheduler.is_idle:
                                for _ in range(self.prefetch):
                                    tasklist = scheduler(
                                        project.storage.tasks.modify_test_one(
                                            task_test, 'state', 'created', 'queued'))

                                    for task in tasklist:
                                        task.worker = self
                                        print 'queued a task [%s] from generator `%s`' % (
                                            task.__class__.__name__,
                                            task.generator.name if task.generator else '---')

                                self.n_tasks = len(scheduler.tasks)

                        # handle commands
                        # todo: Place all commands in a separate store and consume ?!?
                        command = self.command

                        if command == 'shutdown':
                            # someone wants us to shutdown
                            scheduler.shut_down()

                        if command == 'kill':
                            # someone wants us to shutdown immediately. No waiting
                            scheduler.shut_down(False)

                        elif command == 'release':
                            scheduler.release_queued_tasks()

                        elif command == 'halt':
                            self._stop_current('halted')

                        elif command == 'cancel':
                            self._stop_current('cancelled')

                        elif command and command.startswith('!'):
                            result = subprocess.check_output(command[1:].split(' '))
                            project.logs.add(
                                LogEntry(
                                    'command', 'called `%s` on worker' % command[1:], result
                                )
                            )

                        if command:
                            self.command = None

                        if time.time() - last > self.heartbeat:
                            # heartbeat
                            last = time.time()
                            self.seen = last

                        time.sleep(self.sleep)
                        if self.walltime and time.time() - self.__time__ > self.walltime:
                            # we have reached the set walltime and will shutdown
                            print 'hit walltime of %s' % DT(self.walltime).length
                            scheduler.shut_down()

                        if scheduler.current_task is not self._last_current:
                            self.current = scheduler.current_task
                            self._last_current = self.current

                        n_tasks = len(scheduler.tasks)
                        if n_tasks != last_n_tasks:
                            self.n_tasks = n_tasks
                            last_n_tasks = n_tasks

                except (pymongo.errors.ConnectionFailure, pymongo.errors.AutoReconnect) as e:
                    print 'pymongo connection error', e
                    print 'try reconnection after %d seconds' % self.reconnect_time
                    # lost connection to DB, try to reconnect after some time
                    time.sleep(self.reconnect_time)
                    reconnect = True

        except KeyboardInterrupt:
            scheduler.shut_down()
            pass

[docs]    def shutdown(self, gracefully=True):
        """
        Shut down the worker

        Parameters
        ----------
        gracefully : bool
            if True the worker is allowed some time to finish running tasks

        """
        self._scheduler.shut_down(gracefully)