Source code for hive.utils.loggers

import abc
import copy
import os
from typing import List

import torch
import wandb

from hive.utils.registry import Registrable, registry
from hive.utils.schedule import ConstantSchedule, Schedule, get_schedule
from hive.utils.utils import Chomp, create_folder


[docs]class Logger(abc.ABC, Registrable): """Abstract class for logging in hive.""" def __init__(self, timescales=None): """Constructor for base Logger class. Every Logger must call this constructor in its own constructor Args: timescales (str | list(str)): The different timescales at which logger needs to log. If only logging at one timescale, it is acceptable to only pass a string. """ if timescales is None: self._timescales = [] elif isinstance(timescales, str): self._timescales = [timescales] elif isinstance(timescales, list): self._timescales = timescales else: raise ValueError("Need string or list of strings for timescales")
[docs] def register_timescale(self, timescale): """Register a new timescale with the logger. Args: timescale (str): Timescale to register. """ self._timescales.append(timescale)
[docs] @abc.abstractmethod def log_config(self, config): """Log the config. Args: config (dict): Config parameters. """ pass
[docs] @abc.abstractmethod def log_scalar(self, name, value, prefix): """Log a scalar variable. Args: name (str): Name of the metric to be logged. value (float): Value to be logged. prefix (str): Prefix to append to metric name. """ pass
[docs] @abc.abstractmethod def log_metrics(self, metrics, prefix): """Log a dictionary of values. Args: metrics (dict): Dictionary of metrics to be logged. prefix (str): Prefix to append to metric name. """ pass
[docs] @abc.abstractmethod def save(self, dir_name): """Saves the current state of the log files. Args: dir_name (str): Name of the directory to save the log files. """ pass
[docs] @abc.abstractmethod def load(self, dir_name): """Loads the log files from given directory. Args: dir_name (str): Name of the directory to load the log file from. """ pass
[docs] @classmethod def type_name(cls): return "logger"
[docs]class ScheduledLogger(Logger): """Abstract class that manages a schedule for logging. The update_step method should be called for each step in the loop to update the logger's schedule. The should_log method can be used to check whether the logger should log anything. This schedule is not strictly enforced! It is still possible to log something even if should_log returns false. These functions are just for the purpose of convenience. """ def __init__(self, timescales=None, logger_schedules=None): """ Any timescales not assigned schedule from logger_schedules will be assigned a ConstantSchedule(True). Args: timescales (str|list[str]): The different timescales at which logger needs to log. If only logging at one timescale, it is acceptable to only pass a string. logger_schedules (Schedule|list|dict): Schedules used to keep track of when to log. If a single schedule, it is copied for each timescale. If a list of schedules, the schedules are matched up in order with the list of timescales provided. If a dictionary, the keys should be the timescale and the values should be the schedule. """ super().__init__(timescales) if logger_schedules is None: logger_schedules = ConstantSchedule(True) if isinstance(logger_schedules, dict): self._logger_schedules = logger_schedules elif isinstance(logger_schedules, list): self._logger_schedules = { self._timescales[idx]: logger_schedules[idx] for idx in range(min(len(logger_schedules), len(self._timescales))) } elif isinstance(logger_schedules, Schedule): self._logger_schedules = { timescale: copy.deepcopy(logger_schedules) for timescale in self._timescales } else: raise ValueError( "logger_schedule must be a dict, list of Schedules, or Schedule object" ) for timescale, schedule in self._logger_schedules.items(): if isinstance(schedule, dict): self._logger_schedules[timescale] = get_schedule( schedule["name"], schedule["kwargs"] ) for timescale in self._timescales: if timescale not in self._logger_schedules: self._logger_schedules[timescale] = ConstantSchedule(True) self._steps = {timescale: 0 for timescale in self._timescales}
[docs] def register_timescale(self, timescale, schedule=None): """Register a new timescale. Args: timescale (str): Timescale to register. schedule (Schedule): Schedule to use for this timescale. """ super().register_timescale(timescale) if schedule is None: schedule = ConstantSchedule(True) self._logger_schedules[timescale] = schedule self._steps[timescale] = 0
[docs] def update_step(self, timescale): """Update the step and schedule for a given timescale. Args: timescale (str): A registered timescale. """ self._steps[timescale] += 1 self._logger_schedules[timescale].update() return self.should_log(timescale)
[docs] def should_log(self, timescale): """Check if you should log for a given timescale. Args: timescale (str): A registered timescale. """ return self._logger_schedules[timescale].get_value()
[docs] def save(self, dir_name): logger_state = Chomp() logger_state.timescales = self._timescales logger_state.schedules = self._logger_schedules logger_state.steps = self._steps logger_state.save(os.path.join(dir_name, "logger_state.p"))
[docs] def load(self, dir_name): logger_state = Chomp() logger_state.load(os.path.join(dir_name, "logger_state.p")) self._timescales = logger_state.timescales self._logger_schedules = logger_state.schedules self._steps = logger_state.steps
[docs]class NullLogger(ScheduledLogger): """A null logger that does not log anything. Used if you don't want to log anything, but still want to use parts of the framework that ask for a logger. """ def __init__(self, timescales=None, logger_schedules=None): super().__init__(timescales, logger_schedules)
[docs] def log_config(self, config): pass
[docs] def log_scalar(self, name, value, timescale): pass
[docs] def log_metrics(self, metrics, timescale): pass
[docs] def save(self, dir_name): pass
[docs] def load(self, dir_name): pass
[docs]class WandbLogger(ScheduledLogger): """A Wandb logger. This logger can be used to log to wandb. It assumes that wandb is configured locally on your system. Multiple timescales/loggers can be implemented by instantiating multiple loggers with different logger_names. These should still have the same project and run names. Check the wandb documentation for more details on the parameters. """ def __init__( self, timescales=None, logger_schedules=None, project=None, name=None, dir=None, mode=None, id=None, resume=None, start_method=None, **kwargs, ): """ Args: timescales (str|list[str]): The different timescales at which logger needs to log. If only logging at one timescale, it is acceptable to only pass a string. logger_schedules (Schedule|list|dict): Schedules used to keep track of when to log. If a single schedule, it is copied for each timescale. If a list of schedules, the schedules are matched up in order with the list of timescales provided. If a dictionary, the keys should be the timescale and the values should be the schedule. project (str): Name of the project. Wandb's dash groups all runs with the same project name together. name (str): Name of the run. Used to identify the run on the wandb dash. dir (str): Local directory where wandb saves logs. mode (str): The mode of logging. Can be "online", "offline" or "disabled". In offline mode, writes all data to disk for later syncing to a server, while in disabled mode, it makes all calls to wandb api's noop's, while maintaining core functionality. id (str, optional): A unique ID for this run, used for resuming. It must be unique in the project, and if you delete a run you can't reuse the ID. resume (bool, str, optional): Sets the resuming behavior. Options are the same as mentioned in Wandb's doc. start_method (str): The start method to use for wandb's process. See https://docs.wandb.ai/guides/track/launch#init-start-error. **kwargs: You can pass any other arguments to wandb's init method as keyword arguments. Note, these arguments can't be overriden from the command line. """ super().__init__(timescales, logger_schedules) settings = None if start_method is not None: settings = wandb.Settings(start_method=start_method) wandb.init( project=project, name=name, dir=dir, mode=mode, id=id, resume=resume, settings=settings, **kwargs, )
[docs] def log_config(self, config): # Convert list parameters to nested dictionary for k, v in config.items(): if isinstance(v, list): config[k] = {} for idx, param in enumerate(v): config[k][idx] = param wandb.config.update(config)
[docs] def log_scalar(self, name, value, prefix): metrics = {f"{prefix}/{name}": value} metrics.update( { f"{timescale}_step": self._steps[timescale] for timescale in self._timescales } ) wandb.log(metrics)
[docs] def log_metrics(self, metrics, prefix): metrics = {f"{prefix}/{name}": value for (name, value) in metrics.items()} metrics.update( { f"{timescale}_step": self._steps[timescale] for timescale in self._timescales } ) wandb.log(metrics)
[docs]class ChompLogger(ScheduledLogger): """This logger uses the Chomp data structure to store all logged values which are then directly saved to disk. """ def __init__(self, timescales=None, logger_schedules=None): super().__init__(timescales, logger_schedules) self._log_data = Chomp()
[docs] def log_config(self, config): self._log_data["config"] = config
[docs] def log_scalar(self, name, value, prefix): metric_name = f"{prefix}/{name}" if metric_name not in self._log_data: self._log_data[metric_name] = [[], []] if isinstance(value, torch.Tensor): self._log_data[metric_name][0].append(value.item()) else: self._log_data[metric_name][0].append(value) self._log_data[metric_name][1].append( {timescale: self._steps[timescale] for timescale in self._timescales} )
[docs] def log_metrics(self, metrics, prefix): for name in metrics: metric_name = f"{prefix}/{name}" if metric_name not in self._log_data: self._log_data[metric_name] = [[], []] if isinstance(metrics[name], torch.Tensor): self._log_data[metric_name][0].append(metrics[name].item()) else: self._log_data[metric_name][0].append(metrics[name]) self._log_data[metric_name][1].append( {timescale: self._steps[timescale] for timescale in self._timescales} )
[docs] def save(self, dir_name): super().save(dir_name) self._log_data.save(os.path.join(dir_name, "log_data.p"))
[docs] def load(self, dir_name): super().load(dir_name) self._log_data.load(os.path.join(dir_name, "log_data.p"))
[docs]class CompositeLogger(Logger): """This Logger aggregates multiple loggers together. This logger is for convenience and allows for logging using multiple loggers without having to keep track of several loggers. When timescales are updated, this logger updates the timescale for each one of its component loggers. When logging, logs to each of its component loggers as long as the logger is not a ScheduledLogger that should not be logging for the timescale. """ def __init__(self, logger_list: List[Logger]): super().__init__([]) self._logger_list = logger_list
[docs] def register_timescale(self, timescale, schedule=None): for logger in self._logger_list: if isinstance(logger, ScheduledLogger): logger.register_timescale(timescale, schedule) else: logger.register_timescale(timescale)
[docs] def log_config(self, config): for logger in self._logger_list: logger.log_config(config)
[docs] def log_scalar(self, name, value, prefix): for logger in self._logger_list: logger.log_scalar(name, value, prefix)
[docs] def log_metrics(self, metrics, prefix): for logger in self._logger_list: logger.log_metrics(metrics, prefix=prefix)
[docs] def update_step(self, timescale): """Update the step and schedule for a given timescale for every ScheduledLogger. Args: timescale (str): A registered timescale. """ for logger in self._logger_list: if isinstance(logger, ScheduledLogger): logger.update_step(timescale) return self.should_log(timescale)
[docs] def should_log(self, timescale): """Check if you should log for a given timescale. If any logger in the list is scheduled to log, returns True. Args: timescale (str): A registered timescale. """ for logger in self._logger_list: if not isinstance(logger, ScheduledLogger) or logger.should_log(timescale): return True return False
[docs] def save(self, dir_name): for idx, logger in enumerate(self._logger_list): save_dir = os.path.join(dir_name, f"logger_{idx}") create_folder(save_dir) logger.save(save_dir)
[docs] def load(self, dir_name): for idx, logger in enumerate(self._logger_list): load_dir = os.path.join(dir_name, f"logger_{idx}") logger.load(load_dir)
registry.register_all( Logger, { "NullLogger": NullLogger, "WandbLogger": WandbLogger, "ChompLogger": ChompLogger, "CompositeLogger": CompositeLogger, }, ) get_logger = getattr(registry, f"get_{Logger.type_name()}")