Source code for utopya.project_registry

"""Implementation of the utopya project registry"""

import logging
import os
from typing import Any, Dict, List, Optional

from pydantic import DirectoryPath, FilePath

from ._yaml_registry import BaseSchema, RegistryEntry, YAMLRegistry
from .cfg import PROJECT_INFO_FILE_SEARCH_PATHS, UTOPYA_CFG_SUBDIRS
from .tools import load_yml, recursive_update

log = logging.getLogger(__name__)

# -- Schema definition --------------------------------------------------------
# .. Sub-schemas ..............................................................


[docs]class ProjectPaths(BaseSchema): """Schema to use for a project's ``paths`` field""" base_dir: DirectoryPath project_info: Optional[FilePath] = None models_dir: Optional[DirectoryPath] = None py_tests_dir: Optional[DirectoryPath] = None py_plots_dir: Optional[DirectoryPath] = None mv_project_cfg: Optional[FilePath] = None project_base_plots: Optional[FilePath] = None
# TODO What about the BatchTaskManager configuration?
[docs]class ProjectMetadata(BaseSchema): """Schema to use for a project's ``metadata`` field""" version: Optional[str] = None long_name: Optional[str] = None description: Optional[str] = None long_description: Optional[str] = None license: Optional[str] = None authors: Optional[List[str]] = None email: Optional[str] = None website: Optional[str] = None utopya_compatibility: Optional[str] = None language: Optional[str] = None requirements: Optional[List[str]] = None misc: Optional[Dict[str, Any]] = None
[docs]class ProjectSettings(BaseSchema): """Schema to use for a project's ``settings`` field""" preload_project_py_plots: Optional[bool] = None """Whether to preload the project-level plot module (``py_plots_dir``) after initialization of the :py:mod:`~utopya.eval.plotmanager.PlotManager`. If not given, will load the module. """ preload_framework_py_plots: Optional[bool] = None """Whether to preload the framework-level plot module (``py_plots_dir``) after initialization of the :py:mod:`~utopya.eval.plotmanager.PlotManager`. If not given, will load the module. """
# .............................................................................
[docs]class ProjectSchema(BaseSchema): """The data model for a project registry entry""" project_name: str framework_name: Optional[str] = None paths: ProjectPaths metadata: ProjectMetadata settings: ProjectSettings = {} run_cfg_format: str = "yaml" cfg_set_abs_search_dirs: Optional[List[str]] = None cfg_set_model_source_subdirs: Optional[List[str]] = None custom_py_modules: Optional[Dict[str, DirectoryPath]] = None output_files: Optional[dict] = None # TODO Needs sub-schema debug_level_updates: Optional[Dict[str, dict]] = None # TODO Implement
# -- Project ------------------------------------------------------------------
[docs]class Project(RegistryEntry): """A registry entry that describes a project""" SCHEMA: type = ProjectSchema @property def framework_project(self) -> Optional["Project"]: """If a framework project is defined, retrieve it from the registry""" if not self.framework_name: return None from . import PROJECTS return PROJECTS[self.framework_name]
[docs] def get_git_info(self, *, include_patch_info: bool = False) -> dict: """Returns information about the state of this project's git repository using the ``python-git-info`` package. If no git information is retrievable, e.g. because the project's ``base_dir`` does not contain a git repository, will still return a dict but with ``have_git_info`` entry set to False. Otherwise the git information will be in the ``latest_commit`` entry. Args: include_patch_info (bool, optional): If True, will attempt a subprocess call to ``git`` and store patch information alongside in the ``diff`` entry. In that case, the ``dirty`` entry will denote whether there were uncommitted changes. Returns: dict: A dict containing information about the associated git repo. """ import subprocess import gitinfo # PyPI package `python-git-info` base_dir = str(self.paths.base_dir) sp_kws = dict(cwd=base_dir, capture_output=True, text=True) d = dict( project_name=self.project_name, project_base_dir=base_dir, have_git_repo=False, latest_commit=None, dirty="unknown", git_status=[], git_diff="", ) # Get git information (without requiring git) info = gitinfo.get_git_info(base_dir) if info: d["have_git_repo"] = True d["latest_commit"] = info if info and include_patch_info: # Attempt subprocess git calls to find out more. # Make sure these commands work in the first place. try: git_status = subprocess.run( ["git", "status", "--short"], **sp_kws, check=True ) except Exception as exc: log.caution( "Failed retrieving git patch information for " f"{self.project_name} because git invocation via a python " f"subprocess failed:\n\n{type(exc).__name__}: {exc}" ) else: # git command works, can store information # git status git_status_stdout = git_status.stdout.strip() if git_status_stdout: d["git_status"] = [ f.split(" ", 1) for f in git_status_stdout.split("\n") ] # git diff git_diff_p = subprocess.run(["git", "diff", "-p"], **sp_kws) d["git_diff"] = git_diff_p.stdout.strip() # ... whether the repo has uncommitted changes git_diff = subprocess.run(["git", "diff", "--quiet"], **sp_kws) d["dirty"] = git_diff.returncode != 0 return d
# -- ProjectRegistry ----------------------------------------------------------
[docs]class ProjectRegistry(YAMLRegistry): """The project registry"""
[docs] def __init__(self, registry_dir: str = None): """Initializes the project registry, loading available entries from the registry directory in the utopya config directory. This also creates the ``projects`` directory, if not created yet. Args: registry_dir (str, optional): A custom projects """ if registry_dir is None: registry_dir = UTOPYA_CFG_SUBDIRS["projects"] if not os.path.exists(registry_dir): os.makedirs(registry_dir) super().__init__(Project, registry_dir=registry_dir)
[docs] def register( self, *, base_dir: str, info_file: str = None, custom_project_name: str = None, require_matching_names: bool = None, exists_action: str = "raise", ) -> Project: """Register or update information of a project. Args: base_dir (str): Project base directory info_file (str, optional): Path to info file which contains further path information and metadata (may be relative to base directory). If not given, will use some defaults to search for it. custom_project_name (str, optional): Custom project name, overwrites the one given in the info file require_matching_names (bool, optional): If set, will require that the custom project name is equal to the one given in the project info file. This allows checking that the file content does not diverge from some outside state. exists_action (str, optional): Action to take upon existing project Returns: Project: Project information for the new or validated project """ log.info("Commencing project registration ...") # Parse base directory and info file; if not given, search for it base_dir = os.path.abspath(os.path.expanduser(base_dir)) log.remark("Project base directory:\n %s", base_dir) if info_file: info_file = os.path.realpath( os.path.join(base_dir, os.path.expanduser(info_file)) ) log.remark("Have project info file available:\n %s", info_file) else: for _relpath in PROJECT_INFO_FILE_SEARCH_PATHS: _abspath = os.path.join(base_dir, _relpath) if os.path.isfile(_abspath): info_file = _abspath log.remark("Found project info file:\n %s", info_file) break else: _locs = "\n".join( f" - {p}" for p in PROJECT_INFO_FILE_SEARCH_PATHS ) raise ValueError( "Missing project info file!\nEither explicitly supply a " "path or add one at the following search locations " f"relative to the base directory:\n{_locs}" ) # Load and parse the info file log.note("Loading project information from file ...") project_info = dict(paths=dict(base_dir=base_dir)) _project_info = load_yml(info_file) _project_info = _project_info if _project_info else {} # Warn if base_dir was given in info file; the base directory given # explicitly should always take precedence if _project_info.get("paths", {}).get("base_dir"): log.caution( "The project info file contains a `paths.base_dir` entry, " "which will be ignored!" ) project_info = recursive_update(_project_info, project_info) project_info["paths"]["project_info"] = info_file # Regularize paths, evaluating relative ones towards the base directory for path_name, path in project_info["paths"].items(): if path_name in ("base_dir", "project_info"): continue project_info["paths"][path_name] = os.path.realpath( os.path.join(base_dir, path) ) # May want to use a custom project name if custom_project_name: _project_name = project_info.get("project_name") if require_matching_names and custom_project_name != _project_name: raise ValueError( "The custom project name '{}' does not match the name " "given in the project info file, '{}'! " "Either ensure that the names match exactly or unset the " "`require_matching_names` flag.".format( custom_project_name, project_info["project_name"] ) ) project_info["project_name"] = custom_project_name log.remark("Using a custom project name: %s", custom_project_name) log.remark( "Note that this creates a mismatch between the project " "info file and the registered project!" ) # Now, create a registry entry from it log.note( "Preparing to add entry (exists_action: %s) ...", exists_action ) project_name = project_info["project_name"] project = self.add_entry( project_name, exists_action=exists_action, **project_info ) log.success( "Successfully stored or updated '%s' project information.", project_name, ) log.remark( "NOTE: Changes to the project info file are *not* automatically " "tracked!\nTo update project information, repeat this procedure." ) return self[project_name]
# ----------------------------------------------------------------------------- PROJECTS = ProjectRegistry() """The package-wide project registry"""