Source code for aup.EE.Resource.GPUResourceManager

"""
..
  Copyright (c) 2018 LG Electronics Inc.
  SPDX-License-Identifier: GPL-3.0-or-later

aup.EE.Resource.GPUResourceManager
==================================

Resource manager for Single GPU machine.

It supports:

1. Multiple Cards
2. Multiple jobs running on a shared card (no control over GPU resource limit)

APIs
----
"""
import json
import logging
import os

from .CPUResourceManager import CPUResourceManager
from ...utils import check_missing_key, load_default_env, DEFAULT_AUPTIMIZER_PATH

logger = logging.getLogger(__name__)


def _load_gpu_mapping(auppath=DEFAULT_AUPTIMIZER_PATH):
    """
    loads GPU mapping based on environment and config
    """
    config = load_default_env(auppath=auppath)
    check_missing_key(config, "gpu_mapping", "Missing gpu_mapping parameter in aup config file", log=logger)
    # need to reverse the type for GPU mapping
    d = json.loads(config["gpu_mapping"])
    return {int(k): str(d[k]) for k in d}


[docs]class GPUResourceManager(CPUResourceManager): def __init__(self, connector, n_parallel, auppath=DEFAULT_AUPTIMIZER_PATH, *args, **kwargs): super(GPUResourceManager, self).__init__(connector, n_parallel, *args, **kwargs) self.mapping = _load_gpu_mapping(auppath=auppath)
[docs] def run(self, job, rid, exp_config, call_back_func, **kwargs): logger.debug("Job %d started on GPU %s", job.jid, self.mapping[rid]) env = os.environ env["CUDA_VISIBLE_DEVICES"] = self.mapping[rid] super(GPUResourceManager, self).run(job, rid, exp_config, call_back_func, env=env)