#!/usr/bin/env python3
"""
Resource manager for VASCA
"""
import inspect
import os
from pprint import pprint
import dotenv
import yaml
CLASS_DIR = os.path.dirname(os.path.abspath(__file__))
"""Path relative to the resource manager module directory"""
PACKAGE_DIR = CLASS_DIR + "/.."
"""Path to the root directory of VASCA"""
[docs]
class ResourceManager:
"""
Manages access to external resources.
Resources like catalog files or large metadata files may be stored on
remote storage systems such as DESY Sync & Share (SAS) or LUSTRE.
Users may synchronize these resources onto their local systems
to perform software analysis on the data. To aid collaborative development,
the `ResourceManager` provides an interface to find the locations of resources
on the user's local system.
Parameters
----------
verbose : bool, optional
Enable verbose printout to stdout (default is False for no printout).
Attributes
----------
flags : dict
Dictionary holding all flags.
metadata : dict
Dictionary holding the parsed contents of the YAML-config files
located at `CLASSDIR/resource_metadata`. Additional info my be added
e.g. the set status of the env vars and the path they are pointing to.
Notes
-----
The implementation requires a set of environment variables. The full list
of variables is defined in `CLASSDIR/resource_metadata/resource_envs.yml`.
Users need to make sure these variables are known to the system. This can be
achieved in two ways. Either the user manually sets each variable via
the shell config file (`.bashrc`, `zshrc`, etc.) or the instance of
`ResourceManager` automatically sets the variables if the config file at
`PACKAGEDIR/.env` is found.
"""
def __init__(
self,
verbose=False,
):
# parse arguments
self.flags = dict()
self.flags["verbose"] = verbose
# load metadata, most importantly the resource catalog
self.metadata = self._load_metadata()
# set environment variables
self._load_env()
[docs]
def __enter__(self):
return self
[docs]
def __exit__(self, exc_type, exc_value, traceback):
# do some cleanup here
pass
[docs]
def _load_env(self, config_path=PACKAGE_DIR + "/.env", overwrite=False):
"""
Verify and set the required environment variables (env vars).
...
"""
# log if env vars are already set
self._log_env_status()
env_status_list = [
self.metadata["envs"][var]["set"] for var in self.metadata["envs"]
]
# check if env var config file exists
# If True: store listed env vars in dictionary
found_config = True if os.path.isfile(config_path) else False
if found_config:
env_config = {**dotenv.dotenv_values(config_path)}
# if all env vars were previously set, check if they match with env config
# give warning if mismatch is detected
# if no mismatch is occurs: nominal functionality of ResourceManager is assumed
# and loading of env vars is not attempted
if all(env_status_list) and found_config:
is_mismatch = False
for var in self.metadata["envs"]:
if not os.environ.get(var) == env_config[var]:
is_mismatch = True
print(
str.format(
"WARNING(ResourceManager.{}): "
"Mismatch for environent variable '{}' "
"between the version that is set: \n{}\n"
"and the version listet in conf file: \n{}",
inspect.currentframe().f_code.co_name,
var,
os.environ.get(var),
env_config[var],
)
)
if not is_mismatch:
if self.flags["verbose"]:
print(
str.format(
"ResourceManager.{}: All env vars were previously set. "
"No mismatch with env var config file occured",
inspect.currentframe().f_code.co_name,
)
)
return
# if all env vars were previously set and no config file exists
# nominal functionality of ResourceManager is assumed
# and loading of env vars is not attempted
elif all(env_status_list) and not found_config:
if self.flags["verbose"]:
print(
str.format(
"ResourceManager.{}: All env vars were previously set. "
"Missing env var config file.",
inspect.currentframe().f_code.co_name,
)
)
return
# raise error if no env var was previously set and config file is also missing
elif not all(env_status_list) and not found_config:
raise Exception(
str.format(
"Unable to set environemt variables {}. "
"Solve it by adding them to the config file at '{}' "
"or setting the variables manually in your shell config.",
[
var
for var in self.metadata["envs"]
if not self.metadata["envs"][var]["set"]
],
config_path,
)
)
# if not all env vars have been set previously but a config file exists
# missing env vars are replaced
elif not all(env_status_list) and found_config:
if overwrite:
dotenv.load_dotenv(config_path)
self._log_env_status()
else:
missing_env_var_list = [
var
for var in self.metadata["envs"]
if not self.metadata["envs"][var]["set"]
]
# raise error env var config is empty
if len(list(env_config.keys())) == 0:
raise Exception(
str.format(
"Unable to set environemt variables {}. "
"Solve it by adding them to the config file at '{}' "
"or setting the variables manually in your shell config.",
[
var
for var in self.metadata["envs"]
if not self.metadata["envs"][var]["set"]
],
config_path,
)
)
for var in missing_env_var_list:
# raise error if missing env var is not found in config file
if var not in env_config:
raise Exception("Unable to set environemt variable '{}'.", var)
# set the missing env var
else:
os.environ[var] = env_config[var]
self._log_env_status()
# check for mismatch
is_mismatch = False
for var in self.metadata["envs"]:
if not os.environ.get(var) == env_config[var]:
is_mismatch = True
print(
str.format(
"WARNING(ResourceManager.{}): "
"Mismatch for environent variable '{}' "
"between the version that is set: \n{}\n"
"and the version listet in conf file: \n{}",
inspect.currentframe().f_code.co_name,
var,
os.environ.get(var),
env_config[var],
)
)
[docs]
def _log_env_status(self):
"""
Verify and log if env vars are set.
...
"""
for var in self.metadata["envs"]:
is_set = False if os.environ.get(var) is None else True
self.metadata["envs"][var]["set"] = is_set
if self.flags["verbose"]:
print("'{}' is set '{}'".format(var, is_set))
self.metadata["envs"][var]["path"] = os.environ.get(var)
[docs]
def _check_resource_catalog(self):
"""
Dummy function atm.
Function to check the resource_catalog.yml file for consistency.
Potential problems to look out for are:
- duplicate resource IDs/names for a given storage
Additionally check if cataloged files really exist
"""
pass
[docs]
def _check_resoruce_env_info(self):
"""
Verify if
- if only unique env_var-storage pairings exist
"""
pass
[docs]
def get_path(self, resource, storage):
"""
Returns the local path to a given resource at the corresponding storage system.
The path is constructed from the local path to the synced remote directory
and the resource path relative to that directory.
Parameters
----------
resource : str
Name of the resource for which the path is requested.
storage : str
Name of the storage system where `resource` is saved at.
Returns
-------
path : str
Full local path to `resource`
Raises
------
KeyError
When either one of resource or storage names is not
listed in resource_catalog.yml
ValueError
When the inference of the needed environment variable fails
or the resource could not be found at the inferred path.
Notes
-----
This method requires specific environment variables to be declared
on the users system during runtime.
The variable names are listed in `CLASSDIR/resource_metadata/resource_envs.yml`.
Examples
--------
>>> rm = ResourceManager()
>>> rm.get_path(resource="gal_visits_list", storage="sas_cloud")
str(/local/path/to/resource.file)
"""
# validate storage name
if storage not in self.metadata["catalog"]:
raise KeyError(
str.format(
("Unknown storage system '{}'. Select one from {}."),
storage,
[strg for strg in list(self.metadata["catalog"].keys())],
)
)
# list of all known resources: [<resource name>]
resource_list = [
self.metadata["catalog"][strg][id]["name"]
for strg in self.metadata["catalog"]
for id in self.metadata["catalog"][strg]
]
# verbose list of resources: [<resource name>(<storage>:<resource ID>)]
resource_list_verbose = [
str.format(
("{}({}:{})"), self.metadata["catalog"][strg][id]["name"], strg, id
)
for strg in self.metadata["catalog"]
for id in self.metadata["catalog"][strg]
]
# validate resource name
if resource not in resource_list:
raise KeyError(
str.format(
"Unknown resource '{}'. Select one from {}.",
resource,
resource_list_verbose,
)
)
# get resource metadata
success = False
resource_id = None
resource_description = None
resource_type = None
# Loop over resource items for given storage
for id in self.metadata["catalog"][storage]:
# Collect metadata if there is a matching resource item to the requested one
if resource == self.metadata["catalog"][storage][id]["name"]:
resource_id = id
resource_description = self.metadata["catalog"][storage][id][
"description"
]
resource_type = self.metadata["catalog"][storage][id]["type"]
success = True
if not success:
raise ValueError(
str.format(
"No matching resource found for '{}' at storage system '{}'",
resource,
storage,
)
)
# identify environment variable
env_var = None
# Loop over all environment variables to get the one matching the storage
for var in self.metadata["envs"]:
if storage == self.metadata["envs"][var]["storage"]:
env_var = var
if env_var is None:
raise ValueError(
str.format(
"No matching environment variable found "
"for resource '{}' at storage '{}'. "
"Verify metadata files for consistency.",
resource,
storage,
)
)
# construct path
path = (
os.environ.get(env_var)
+ self.metadata["catalog"][storage][resource_id]["path"]
)
# validate path
if resource_type == "file" and not os.path.isfile(path):
raise ValueError("Not a file at '{}'".format(path))
elif resource_type == "directory" and not os.path.isdir(path):
raise ValueError("Not a directory at '{}'".format(path))
# success message if verbose
if self.flags["verbose"]:
print(
"Found a {} at '{}'. Resource description: {}".format(
resource_type, path, resource_description
)
)
return path