Source code for openoa.schema.metadata

from __future__ import annotations

import re
import json
import warnings
import itertools
from copy import deepcopy
from string import digits
from pathlib import Path

import yaml
import attrs
import numpy as np
import pandas as pd
from attrs import field, define
from tabulate import tabulate

from openoa.logging import logging, logged_method_call


logger = logging.getLogger(__name__)
warnings.filterwarnings("once", category=DeprecationWarning)


# *************************************************************************
# Define the analysis requirements for ease of findability and modification
# *************************************************************************


# Datetime frequency checks
_at_least_monthly = ("MS", "ME", "W", "D", "h", "min", "s", "ms", "us", "ns")
_at_least_daily = ("D", "h", "min", "s", "ms", "us", "ns")
_at_least_hourly = ("h", "min", "s", "ms", "us", "ns")
deprecated_offset_map = {
    "M": "ME",
    "H": "h",
    "T": "min",
    "S": "s",
    "L": "ms",
    "U": "us",
    "N": "ns",
}

ANALYSIS_REQUIREMENTS = {
    "MonteCarloAEP": {
        "meter": {
            "columns": ["MMTR_SupWh"],
            "freq": _at_least_monthly,
        },
        "curtail": {
            "columns": ["IAVL_DnWh", "IAVL_ExtPwrDnWh"],
            "freq": _at_least_monthly,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_AirDen"],
            "freq": _at_least_monthly,
        },
    },
    "MonteCarloAEP-temp": {
        "meter": {
            "columns": ["MMTR_SupWh"],
            "freq": _at_least_monthly,
        },
        "curtail": {
            "columns": ["IAVL_DnWh", "IAVL_ExtPwrDnWh"],
            "freq": _at_least_monthly,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_AirDen", "WMETR_EnvTmp"],
            "freq": _at_least_monthly,
        },
    },
    "MonteCarloAEP-wd": {
        "meter": {
            "columns": ["MMTR_SupWh"],
            "freq": _at_least_monthly,
        },
        "curtail": {
            "columns": ["IAVL_DnWh", "IAVL_ExtPwrDnWh"],
            "freq": _at_least_monthly,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_AirDen", "WMETR_HorWdSpdU", "WMETR_HorWdSpdV"],
            "freq": _at_least_monthly,
        },
    },
    "MonteCarloAEP-temp-wd": {
        "meter": {
            "columns": ["MMTR_SupWh"],
            "freq": _at_least_monthly,
        },
        "curtail": {
            "columns": ["IAVL_DnWh", "IAVL_ExtPwrDnWh"],
            "freq": _at_least_monthly,
        },
        "reanalysis": {
            "columns": [
                "WMETR_HorWdSpd",
                "WMETR_AirDen",
                "WMETR_EnvTmp",
                "WMETR_HorWdSpdU",
                "WMETR_HorWdSpdV",
            ],
            "freq": _at_least_monthly,
        },
    },
    "TurbineLongTermGrossEnergy": {
        "scada": {
            "columns": ["asset_id", "WMET_HorWdSpd", "WTUR_W"],
            "freq": _at_least_daily,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_HorWdDir", "WMETR_AirDen"],
            "freq": _at_least_daily,
        },
        "asset": {
            "columns": ["rated_power"],
            "freq": (),
        },
    },
    "ElectricalLosses": {
        "scada": {
            "columns": ["asset_id", "WTUR_W"],
            "freq": _at_least_daily,
        },
        "meter": {
            "columns": ["MMTR_SupWh"],
            "freq": _at_least_monthly,
        },
    },
    "WakeLosses-scada": {
        "scada": {
            "columns": ["asset_id", "WMET_HorWdSpd", "WTUR_W", "WMET_HorWdDir"],
            "freq": _at_least_hourly,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_HorWdDir"],
            "freq": _at_least_hourly,
        },
        "asset": {
            "columns": ["latitude", "longitude", "rated_power"],
            "freq": (),
        },
    },
    "WakeLosses-tower": {
        "scada": {
            "columns": ["asset_id", "WMET_HorWdSpd", "WTUR_W"],
            "freq": _at_least_hourly,
        },
        "tower": {
            "columns": ["asset_id", "WMET_HorWdSpd", "WMET_HorWdDir"],
            "freq": _at_least_hourly,
        },
        "reanalysis": {
            "columns": ["WMETR_HorWdSpd", "WMETR_HorWdDir"],
            "freq": _at_least_hourly,
        },
        "asset": {
            "columns": ["latitude", "longitude", "rated_power"],
            "freq": (),
        },
    },
    "StaticYawMisalignment": {
        "scada": {
            "columns": [
                "asset_id",
                "WMET_HorWdSpd",
                "WTUR_W",
                "WMET_HorWdDirRel",
                "WROT_BlPthAngVal",
            ],
            "freq": _at_least_hourly,
        },
        "asset": {
            "columns": ["rated_power"],
            "freq": (),
        },
    },
}


remove_digits = str.maketrans("", "", digits)


@logged_method_call
def convert_frequency(offset: str) -> str:
    """Convert Pandas offset strings that have a deprecation warning to the upcoming standard.

    Note:
        When Pandas fully deprecates the usage of "M", "H", "T", "S", "L", "U", and "N", we will
        follow shortly thereafter

    Args:
        offset (str): The alphanumeric offset string. Must be one of: "MS", "ME", "W", "D", "h",
            "min", "s", "ms", "us", "ns", "M", "H", "T", "S", "L", "U", or "N".
    """
    # Separate leading digits and the offset code
    offset_digits = re.findall(r"\d+", offset)
    offset_digits = "" if offset_digits == [] else offset_digits[0]
    offset_str = offset.translate(remove_digits)

    # Check the code is a valid format
    _check = f"{offset_digits}{offset_str}"
    if offset != _check:
        raise ValueError(
            f"Offset strings must have leading digits only, input form: '{offset}' is invalid"
        )

    # Check that the offset code is valid
    if offset_str in deprecated_offset_map:
        warnings.warn(
            f"Pandas 3.0 will deprecated the following codes, please use the following mapping {deprecated_offset_map}",
            DeprecationWarning,
        )
        offset_str = deprecated_offset_map.get(offset_str, None)

    elif offset_str not in _at_least_monthly:
        raise ValueError(
            f"The offset string identifier: '{offset_str}' is invalid. Use one of: {_at_least_monthly}"
        )

    return f"{offset_digits}{offset_str}"


def determine_analysis_requirements(
    which: str, analysis_type: str | list[str]
) -> dict | tuple[dict, dict]:
    """Determines the column, frequency, or both requirements for each type of data, such as SCADA,
    depending on the analysis type(s) provided.

    Args:
        which (str): One of "columns", "frequency", or "both".
        analysis_type (str | list[str]): The analysis type(s) determine the bare minimum requirements
            for each type of data.

    Raises:
        ValueError: Raised if :py:attr:`which` is not one of "columns", "frequency", or "both".

    Returns:
        dict | tuple[dict, dict]: The dictionary of column or frequency requirements, or if "both", then a tuple
            of each dictionary.
    """
    if isinstance(analysis_type, str):
        analysis_type = [analysis_type]
    requirements = {key: ANALYSIS_REQUIREMENTS[key] for key in analysis_type}
    if which in ("columns", "both"):
        categories = ("scada", "meter", "tower", "curtail", "reanalysis", "asset")
        column_requirements = {
            cat: set(
                itertools.chain(*[r.get(cat, {}).get("columns", []) for r in requirements.values()])
            )
            for cat in categories
        }
        column_requirements = {k: v for k, v in column_requirements.items() if v != set()}
    if which in ("frequency", "both"):
        frequency = {
            key: {name: value["freq"] for name, value in values.items()}
            for key, values in requirements.items()
        }
        frequency_requirements = {
            k: []
            for k in set(itertools.chain.from_iterable([[*val] for val in frequency.values()]))
        }
        for vals in frequency.values():
            for name, req in vals.items():
                reqs = frequency_requirements[name]
                if reqs == []:
                    frequency_requirements[name] = set(req)
                else:
                    frequency_requirements[name] = reqs.intersection(req)
    if which == "both":
        return column_requirements, frequency_requirements
    elif which == "columns":
        return column_requirements
    elif which == "frequency":
        return frequency_requirements
    raise ValueError("`which` must be one of 'columns', 'frequency', or 'both'.")


@define(auto_attribs=True)
class FromDictMixin:
    """A Mixin class to allow for kwargs overloading when a data class doesn't
    have a specific parameter definied. This allows passing of larger dictionaries
    to a data class without throwing an error.

    Raises
    ------
    AttributeError
        Raised if the required class inputs are not provided.
    """

    @classmethod
    @logged_method_call
    def from_dict(cls, data: dict):
        """Maps a data dictionary to an `attrs`-defined class.
        Args:
            data (dict): The data dictionary to be mapped.
        Returns:
            (cls): An intialized object of the `attrs`-defined class (`cls`).
        """
        # Get all parameters from the input dictionary that map to the class initialization
        kwarg_names = [a.name for a in cls.__attrs_attrs__ if a.init]
        matching = [name for name in kwarg_names if name in data]
        non_matching = [name for name in data if name not in kwarg_names]
        logger.info(f"No matches for provided kwarg inputs: {non_matching}")
        kwargs = {name: data[name] for name in matching}

        # Map the inputs must be provided: 1) must be initialized, 2) no default value defined
        required_inputs = [
            a.name
            for a in cls.__attrs_attrs__  # type: ignore
            if a.init and isinstance(a.default, type(attrs.NOTHING))  # type: ignore
        ]
        undefined = sorted(set(required_inputs) - set(kwargs))
        if undefined:
            raise AttributeError(
                f"The class defintion for {cls.__name__} is missing the following inputs: {undefined}"
            )
        return cls(**kwargs)  # type: ignore


@define(auto_attribs=True)
class ResetValuesMixin:
    """
    A MixinClass that provides the methods to reset initialized or default values for analysis
    parameters.
    """

    @logged_method_call
    def set_values(self, value_dict: dict):
        """Resets the parameters to the values provided in :py:attr:`value_dict`.

        Args:
            value_dict (dict): The parameter names (keys) and their values (values) as a dictionary.
        """
        for name, value in value_dict.items():
            logger.debug(f"{name} being set back to {value}")
            object.__setattr__(self, name, value)

    @logged_method_call
    def reset_defaults(self, which: str | list[str] | tuple[str] | None = None):
        """Reset all or a subset of the analysis parameters back to their defaults.

        Args:
            which (str | list[str] | tuple[str] | None): The parameter(s) to reset back to their
                defaults. If None, then all run parameters are reset. Defaults to None.

        Raises:
            ValueError: Raised if any of :py:attr:`which` are not included in ``self.run_parameters``.
        """
        logger.info("Resetting run parameters back to the class defaults")
        # Define the analysis class run parameters
        valid = self.run_parameters

        # If None, set to all run parameterizations
        if which is None:
            which = valid

        # Check that all values of which are valid
        if invalid := set(which).difference(valid):
            raise ValueError(f"Invalid arguments provided to reset_defaults: {invalid}")

        # Reset the selected values back to their defaults
        reset_dict = {a.name: a.default for a in attrs.fields(self) if a.name in which}
        self.set_values(reset_dict)


def _make_single_repr(name: str, meta_class) -> str:
    summary = pd.concat(
        [
            pd.DataFrame.from_dict(meta_class.col_map, orient="index", columns=["Column Name"]),
            pd.DataFrame.from_dict(
                {
                    k: str(v).replace("<class '", "").replace("'>", "")
                    for k, v in meta_class.dtypes.items()
                },
                orient="index",
                columns=["Expected Type"],
            ),
            pd.DataFrame.from_dict(meta_class.units, orient="index", columns=["Expected Units"]),
        ],
        axis=1,
    )

    if name == "ReanalysisMetaData":
        repr = []
    else:
        repr = ["-" * len(name), name, "-" * len(name) + "\n"]

    if name != "AssetMetaData":
        repr.append("frequency\n--------")
        repr.append(meta_class.frequency)

    repr.append("\nMetadata Summary\n----------------")
    repr.append(tabulate(summary, headers=summary.columns, tablefmt="grid"))
    return "\n".join(repr)


def _make_combined_repr(cls: PlantMetaData) -> str:
    reanalysis_name = "ReanalysisMetaData"
    reanalysis_repr = [
        "-" * len(reanalysis_name),
        reanalysis_name,
        "-" * len(reanalysis_name) + "\n",
    ]
    for name, meta in cls.reanalysis.items():
        reanalysis_repr.append(f"\n{name}:\n")
        reanalysis_repr.append(f"{meta}")

    repr = [
        cls.scada,
        cls.meter,
        cls.tower,
        cls.status,
        cls.curtail,
        cls.asset,
        "\n".join(reanalysis_repr),
    ]
    return "\n\n".join([f"{el}" for el in repr]).replace("\n\n\n", "\n\n")


# ***************************************
# Define the meta data validation classes
# ***************************************


[docs] @define(auto_attribs=True) class SCADAMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about the SCADA data, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the SCADA data, by default "time". This data should be of type: ``np.datetime64[ns]``, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: `frequency` asset_id (str): The turbine identifier column in the SCADA data, by default "asset_id". This data should be of type: ``str``. WTUR_W (str): The power produced, in kW, column in the SCADA data, by default "WTUR_W". This data should be of type: ``float``. WMET_HorWdSpd (str): The measured windspeed, in m/s, column in the SCADA data, by default "WMET_HorWdSpd". This data should be of type: ``float``. WMET_HorWdDir (str): The measured wind direction, in degrees, column in the SCADA data, by default "WMET_HorWdDir". This data should be of type: ``float``. WMET_HorWdDirRel (str): The measured wind direction relative to the nacelle orientation (i.e., the wind vane measurement), in degrees, column in the SCADA data, by default "WMET_HorWdDirRel". This data should be of type: ``float``. WTUR_TurSt (str): The status code column in the SCADA data, by default "WTUR_TurSt". This data should be of type: ``str``. WROT_BlPthAngVal (str): The pitch, in degrees, column in the SCADA data, by default "WROT_BlPthAngVal". This data should be of type: ``float``. WMET_EnvTmp (str): The temperature column in the SCADA data, by default "WMET_EnvTmp". This data should be of type: ``float``. frequency (str): The frequency of `time` in the SCADA data, by default "10min". The input should align with the `Pandas frequency offset aliases`_. .. _Pandas frequency offset aliases: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases """ # DataFrame columns time: str = field(default="time") asset_id: str = field(default="asset_id") WTUR_W: str = field(default="WTUR_W") WMET_HorWdSpd: str = field(default="WMET_HorWdSpd") WMET_HorWdDir: str = field(default="WMET_HorWdDir") WMET_HorWdDirRel: str = field(default="WMET_HorWdDirRel") WTUR_TurSt: str = field(default="WTUR_TurSt") WROT_BlPthAngVal: str = field(default="WROT_BlPthAngVal") WMET_EnvTmp: str = field(default="WMET_EnvTmp") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="scada", init=False) WTUR_SupWh: str = field(default="WTUR_SupWh", init=False) # calculated in PlantData col_map: dict = field(init=False) col_map_reversed: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, asset_id=str, WTUR_W=float, WMET_HorWdSpd=float, WMET_HorWdDir=float, WMET_HorWdDirRel=float, WTUR_TurSt=str, WROT_BlPthAngVal=float, WMET_EnvTmp=float, WTUR_SupWh=float, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", asset_id=None, WTUR_W="kW", WMET_HorWdSpd="m/s", WMET_HorWdDir="deg", WMET_HorWdDirRel="deg", WTUR_TurSt=None, WROT_BlPthAngVal="deg", WMET_EnvTmp="C", WTUR_SupWh="kWh", ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, asset_id=self.asset_id, WTUR_W=self.WTUR_W, WMET_HorWdSpd=self.WMET_HorWdSpd, WMET_HorWdDir=self.WMET_HorWdDir, WMET_HorWdDirRel=self.WMET_HorWdDirRel, WTUR_TurSt=self.WTUR_TurSt, WROT_BlPthAngVal=self.WROT_BlPthAngVal, WMET_EnvTmp=self.WMET_EnvTmp, WTUR_SupWh=self.WTUR_SupWh, ) self.col_map_reversed = {v: k for k, v in self.col_map.items()} def __repr__(self): return _make_single_repr("SCADAMetaData", self)
[docs] @define(auto_attribs=True) class MeterMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about energy meter data, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the meter data, by default "time". This data should be of type: ``np.datetime64[ns]``, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: :py:attr:`frequency` MMTR_SupWh (str): The energy produced, in kWh, column in the meter data, by default "MMTR_SupWh". This data should be of type: ``float``. frequency (str): The frequency of `time` in the meter data, by default "10min". The input should align with the `Pandas frequency offset aliases`_. .. _Pandas frequency offset aliases: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases """ # DataFrame columns time: str = field(default="time") MMTR_SupWh: str = field(default="MMTR_SupWh") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="meter", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, MMTR_SupWh=float, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", MMTR_SupWh="kWh", ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, MMTR_SupWh=self.MMTR_SupWh, ) def __repr__(self): return _make_single_repr("MeterMetaData", self)
[docs] @define(auto_attribs=True) class TowerMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about meteorological tower (met tower) data, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the met tower data, by default "time". This data should be of type: ``np.datetime64[ns]``, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: `frequency` asset_id (str): The met tower identifier column in the met tower data, by default "asset_id". This data should be of type: ``str``. WMET_HorWdSpd (str): The measured windspeed, in m/s, column in the SCADA data, by default "WMET_HorWdSpd". This data should be of type: ``float``. WMET_HorWdDir (str): The measured wind direction, in degrees, column in the SCADA data, by default "WMET_HorWdDir". This data should be of type: ``float``. WMET_EnvTmp (str): The temperature column in the SCADA data, by default "WMET_EnvTmp". This data should be of type: ``float``. frequency (str): The frequency of `time` in the met tower data, by default "10min". The input should align with the `Pandas frequency offset aliases`_. .. _Pandas frequency offset aliases: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases """ # DataFrame columns time: str = field(default="time") asset_id: str = field(default="asset_id") WMET_HorWdSpd: str = field(default="WMET_HorWdSpd") WMET_HorWdDir: str = field(default="WMET_HorWdDir") WMET_EnvTmp: str = field(default="WMET_EnvTmp") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="tower", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, asset_id=str, WMET_HorWdSpd=float, WMET_HorWdDir=float, WMET_EnvTmp=float, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", asset_id=None, WMET_HorWdSpd="m/s", WMET_HorWdDir="deg", WMET_EnvTmp="C", ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, asset_id=self.asset_id, WMET_HorWdSpd=self.WMET_HorWdSpd, WMET_HorWdDir=self.WMET_HorWdDir, WMET_EnvTmp=self.WMET_EnvTmp, ) def __repr__(self): return _make_single_repr("TowerMetaData", self)
[docs] @define(auto_attribs=True) class StatusMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about the turbine status log data, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the status data, by default "time". This data should be of type: `np.datetime64[ns]`, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: `frequency` asset_id (str): The turbine identifier column in the status data, by default "asset_id". This data should be of type: ``str``. status_id (str): The status code identifier column in the status data, by default "asset_id". This data should be of type: ``str``. status_code (str): The status code column in the status data, by default "asset_id". This data should be of type: ``str``. status_text (str): The status text description column in the status data, by default "asset_id". This data should be of type: ``str``. frequency (str): The frequency of `time` in the met tower data, by default "10min". The input should align with the `Pandas frequency offset aliases`_. .. _Pandas frequency offset aliases: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases """ # DataFrame columns time: str = field(default="time") asset_id: str = field(default="asset_id") status_id: str = field(default="status_id") status_code: str = field(default="status_code") status_text: str = field(default="status_text") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="status", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, asset_id=str, status_id=np.int64, status_code=np.int64, status_text=str, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", asset_id=None, status_id=None, status_code=None, status_text=None, ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, asset_id=self.asset_id, status_id=self.status_id, status_code=self.status_code, status_text=self.status_text, ) def __repr__(self): return _make_single_repr("StatusMetaData", self)
[docs] @define(auto_attribs=True) class CurtailMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about the plant curtailment data, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the curtailment data, by default "time". This data should be of type: ``np.datetime64[ns]``, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: `frequency` IAVL_ExtPwrDnWh (str): The curtailment, in kWh, column in the curtailment data, by default "IAVL_ExtPwrDnWh". This data should be of type: ``float``. IAVL_DnWh (str): The availability, in kWh, column in the curtailment data, by default "IAVL_DnWh". This data should be of type: ``float``. frequency (str): The frequency of `time` in the met tower data, by default "10min". The input should align with the `Pandas frequency offset aliases`_. .. _Pandas frequency offset aliases: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases """ # DataFrame columns time: str = field(default="time") IAVL_ExtPwrDnWh: str = field(default="IAVL_ExtPwrDnWh") IAVL_DnWh: str = field(default="IAVL_DnWh") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="curtail", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, IAVL_ExtPwrDnWh=float, IAVL_DnWh=float, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", IAVL_ExtPwrDnWh="kWh", IAVL_DnWh="kWh", ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, IAVL_ExtPwrDnWh=self.IAVL_ExtPwrDnWh, IAVL_DnWh=self.IAVL_DnWh, ) def __repr__(self): return _make_single_repr("CurtailMetaData", self)
[docs] @define(auto_attribs=True) class AssetMetaData(FromDictMixin): # noqa: F821 """A metadata schematic to create the necessary column mappings and other validation components, or other data about the site's asset metadata, that will contribute to a larger plant metadata schema/routine. Args: asset_id (str): The asset identifier column in the asset metadata, by default "asset_id" This data should be of type: ``str``. latitude (str): The asset's latitudinal position, in WGS84, column in the asset metadata, by default "latitude". This data should be of type: ``float``. longitude (str): The asset's longitudinal position, in WGS84, column in the asset metadata, by default "longitude". This data should be of type: ``float``. rated_power (str): The asset's rated power, in kW, column in the asset metadata, by default "rated_power". This data should be of type: ``float``. hub_height (str): The asset's hub height, in m, column in the asset metadata, by default "hub_height". This data should be of type: ``float``. elevation (str): The asset's elevation above sea level, in m, column in the asset metadata, by default "elevation". This data should be of type: ``float``. type (str): The type of asset column in the asset metadata, by default "type". This data should be of type: ``str``. """ # DataFrame columns asset_id: str = field(default="asset_id") latitude: str = field(default="latitude") longitude: str = field(default="longitude") rated_power: str = field(default="rated_power") hub_height: str = field(default="hub_height") rotor_diameter: str = field(default="rotor_diameter") elevation: str = field(default="elevation") type: str = field(default="type") # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="asset", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( asset_id=str, latitude=float, longitude=float, rated_power=float, hub_height=float, rotor_diameter=float, elevation=float, type=str, ), init=False, # don't allow for user input ) units: dict = field( default=dict( asset_id=None, latitude="WGS84", longitude="WGS84", rated_power="kW", hub_height="m", rotor_diameter="m", elevation="m", type=None, ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( asset_id=self.asset_id, latitude=self.latitude, longitude=self.longitude, rated_power=self.rated_power, hub_height=self.hub_height, rotor_diameter=self.rotor_diameter, elevation=self.elevation, type=self.type, ) def __repr__(self): return _make_single_repr("AssetMetaData", self)
def convert_reanalysis(value: dict[str, dict]): return {k: ReanalysisMetaData.from_dict(v) for k, v in value.items()}
[docs] @define(auto_attribs=True) class ReanalysisMetaData(FromDictMixin): # noqa: F821 """A metadata schematic for each of the reanalsis products to be used for operationa analyses to create the necessary column mappings and other validation components, or other data about the site's asset metadata, that will contribute to a larger plant metadata schema/routine. Args: time (str): The datetime stamp for the curtailment data, by default "time". This data should be of type: `np.datetime64[ns]`, or able to be converted to a pandas DatetimeIndex. Additional columns describing the datetime stamps are: `frequency` WMETR_HorWdSpd (:obj:`str`): The reanalysis non-directional windspeed data column name, in m/s, by default "WMETR_HorWdSpd". WMETR_HorWdSpdU (:obj:`str`): The reanalysis u-direction windspeed data column name, in m/s, by default "WMETR_HorWdSpdU". WMETR_HorWdSpdV (:obj:`str`): The reanalysis v-directional windspeed data column name, in m/s, by default "WMETR_HorWdSpdV". WMETR_HorWdDir (:obj:`str`): The reanalysis windspeed horizontal direction data column name, in degrees, by default "WMETR_HorWdDir". WMETR_EnvTmp (:obj:`str`): The temperature data column name in the renalysis data, in degrees Kelvin, by default "WMETR_EnvTmp". WMETR_AirDen (:obj:`str`): The air density reanalysis data column name, in kg/m^3, by default "WMETR_AirDen". WMETR_EnvPres (:obj:`str`): The surface air pressure reanalysis data column name, in Pa, by default "WMETR_EnvPres". frequency (:obj:`str`): The frequency of the timestamps in the :py:attr:`time` column, by default "10min". """ time: str = field(default="time") WMETR_HorWdSpd: str = field(default="WMETR_HorWdSpd") WMETR_HorWdSpdU: str = field(default="WMETR_HorWdSpdU") WMETR_HorWdSpdV: str = field(default="WMETR_HorWdSpdV") WMETR_HorWdDir: str = field(default="WMETR_HorWdDir") WMETR_EnvTmp: str = field(default="WMETR_EnvTmp") WMETR_AirDen: str = field(default="WMETR_AirDen") WMETR_EnvPres: str = field(default="surface_pressure") # Data about the columns frequency: str = field(default="10min", converter=convert_frequency) # Parameterizations that should not be changed # Prescribed mappings, datatypes, and units for in-code reference. name: str = field(default="reanalysis", init=False) col_map: dict = field(init=False) dtypes: dict = field( default=dict( time=np.datetime64, WMETR_HorWdSpd=float, WMETR_HorWdSpdU=float, WMETR_HorWdSpdV=float, WMETR_HorWdDir=float, WMETR_EnvTmp=float, WMETR_AirDen=float, WMETR_EnvPres=float, ), init=False, # don't allow for user input ) units: dict = field( default=dict( time="datetim64[ns]", WMETR_HorWdSpd="m/s", WMETR_HorWdSpdU="m/s", WMETR_HorWdSpdV="m/s", WMETR_HorWdDir="deg", WMETR_EnvTmp="K", WMETR_AirDen="kg/m^3", WMETR_EnvPres="Pa", ), init=False, # don't allow for user input ) def __attrs_post_init__(self) -> None: self.col_map = dict( time=self.time, WMETR_HorWdSpd=self.WMETR_HorWdSpd, WMETR_HorWdSpdU=self.WMETR_HorWdSpdU, WMETR_HorWdSpdV=self.WMETR_HorWdSpdV, WMETR_HorWdDir=self.WMETR_HorWdDir, WMETR_EnvTmp=self.WMETR_EnvTmp, WMETR_AirDen=self.WMETR_AirDen, WMETR_EnvPres=self.WMETR_EnvPres, ) def __repr__(self): return _make_single_repr("ReanalysisMetaData", self)
[docs] @define(auto_attribs=True) class PlantMetaData(FromDictMixin): # noqa: F821 """Composese the metadata/validation requirements from each of the individual data types that can compose a `PlantData` object. Args: latitude (`float`): The wind power plant's center point latitude. longitude (`float`): The wind power plant's center point longitude. reference_system (:obj:`str`, optional): Used to define the coordinate reference system (CRS). Defaults to the European Petroleum Survey Group (EPSG) code 4326 to be used with the World Geodetic System reference system, WGS 84. utm_zone (:obj:`int`, optional): UTM zone. If set to None (default), then calculated from the longitude. reference_longitude (:obj:`float`, optional): Reference longitude for calculating the UTM zone. If None (default), then taken as the average longitude of all assets when the geometry is parsed. capacity (`float`): The capacity of the plant in MW scada (`SCADAMetaData`): A dictionary containing the ``SCADAMetaData`` column mapping and frequency parameters. See ``SCADAMetaData`` for more details. meter (`MeterMetaData`): A dictionary containing the ``MeterMetaData`` column mapping and frequency parameters. See ``MeterMetaData`` for more details. tower (`TowerMetaData`): A dictionary containing the ``TowerMetaData`` column mapping and frequency parameters. See ``TowerMetaData`` for more details. status (`StatusMetaData`): A dictionary containing the ``StatusMetaData`` column mapping parameters. See ``StatusMetaData`` for more details. curtail (`CurtailMetaData`): A dictionary containing the ``CurtailMetaData`` column mapping and frequency parameters. See ``CurtailMetaData`` for more details. asset (`AssetMetaData`): A dictionary containing the ``AssetMetaData`` column mapping parameters. See ``AssetMetaData`` for more details. reanalysis (`dict[str, ReanalysisMetaData]`): A dictionary containing the reanalysis type (as keys, such as "era5" or "merra2") and ``ReanalysisMetaData`` column mapping and frequency parameters for each type of reanalysis data provided. See ``ReanalysisMetaData`` for more details. """ latitude: float = field(default=0, converter=float) longitude: float = field(default=0, converter=float) reference_system: str = field(default="epsg:4326") reference_longitude: float = field(default=None) utm_zone: int = field(default=None) capacity: float = field(default=0, converter=float) scada: SCADAMetaData = field(default={}, converter=SCADAMetaData.from_dict) meter: MeterMetaData = field(default={}, converter=MeterMetaData.from_dict) tower: TowerMetaData = field(default={}, converter=TowerMetaData.from_dict) status: StatusMetaData = field(default={}, converter=StatusMetaData.from_dict) curtail: CurtailMetaData = field(default={}, converter=CurtailMetaData.from_dict) asset: AssetMetaData = field(default={}, converter=AssetMetaData.from_dict) reanalysis: dict[str, ReanalysisMetaData] = field( default={"product": {}}, converter=convert_reanalysis # noqa: F821 ) # noqa: F821 @property def column_map(self) -> dict[str, dict]: """Provides the column mapping for all of the available data types with the name of each data type as the key and the dictionary mapping as the values. """ values = dict( scada=self.scada.col_map, meter=self.meter.col_map, tower=self.tower.col_map, status=self.status.col_map, asset=self.asset.col_map, curtail=self.curtail.col_map, reanalysis={}, ) if self.reanalysis != {}: values["reanalysis"] = {k: v.col_map for k, v in self.reanalysis.items()} return values @property def dtype_map(self) -> dict[str, dict]: """Provides the column dtype matching for all of the available data types with the name of each data type as the keys, and the column dtype mapping as values. """ types = dict( scada=self.scada.dtypes, meter=self.meter.dtypes, tower=self.tower.dtypes, status=self.status.dtypes, asset=self.asset.dtypes, curtail=self.curtail.dtypes, reanalysis={}, ) if self.reanalysis != {}: types["reanalysis"] = {k: v.dtypes for k, v in self.reanalysis.items()} return types @property def coordinates(self) -> tuple[float, float]: """Returns the latitude, longitude pair for the wind power plant. Returns: tuple[float, float]: The (latitude, longitude) pair """ return self.latitude, self.longitude
[docs] @classmethod def from_json(cls, metadata_file: str | Path) -> PlantMetaData: """Loads the metadata from a JSON file. Args: metadata_file (`str | Path`): The full path and file name of the JSON file. Raises: FileExistsError: Raised if the file doesn't exist at the provided location. Returns: PlantMetaData """ metadata_file = Path(metadata_file).resolve() if not metadata_file.is_file(): raise FileExistsError(f"Input JSON file: {metadata_file} is an invalid input.") with open(metadata_file) as f: return cls.from_dict(json.load(f))
[docs] @classmethod def from_yaml(cls, metadata_file: str | Path) -> PlantMetaData: """Loads the metadata from a YAML file with a PyYAML encoding. Args: metadata_file (`str | Path`): The full path and file name of the YAML file. Raises: FileExistsError: Raised if the file doesn't exist at the provided location. Returns: PlantMetaData """ metadata_file = Path(metadata_file).resolve() if not metadata_file.is_file(): raise FileExistsError(f"Input YAML file: {metadata_file} is an invalid input.") with open(metadata_file) as f: return cls.from_dict(yaml.safe_load(f))
[docs] @classmethod def load(cls, data: str | Path | dict | PlantMetaData) -> PlantMetaData: """Loads the metadata from either a dictionary or file such as a JSON or YAML file. Args: metadata_file (`str | Path | dict`): Either a pre-loaded dictionary or the full path and file name of the JSON or YAML file. Raises: ValueError: Raised if the file name doesn't reflect a JSON or YAML encoding. ValueError: Raised if the data provided isn't of the correct data type. Returns: PlantMetaData """ if isinstance(data, PlantMetaData): return data if isinstance(data, str): data = Path(data).resolve() if isinstance(data, Path): if data.suffix == ".json": return cls.from_json(data) elif data.suffix in (".yaml", ".yml"): return cls.from_yaml(data) else: raise ValueError("Bad input file extension, must be one of: .json, .yml, or .yaml") if isinstance(data, dict): return cls.from_dict(data) raise ValueError("PlantMetaData can only be loaded from str, Path, or dict objects.")
[docs] def frequency_requirements(self, analysis_types: list[str | None]) -> dict[str, set[str]]: """Creates a frequency requirements dictionary for each data type with the name as the key and a set of valid frequency fields as the values. Args: analysis_types (list[str | None]): The analyses the data is intended to be used for, which will determine what data need to be checked. Returns: dict[str, set[str]]: The dictionary of data type name and valid frequencies for the datetime stamps. """ if "all" in analysis_types: requirements = deepcopy(ANALYSIS_REQUIREMENTS) else: requirements = { key: ANALYSIS_REQUIREMENTS[key] for key in analysis_types if key is not None } frequency_requirements = { key: {name: value["freq"] for name, value in values.items()} for key, values in requirements.items() } frequency = { k: [] for k in set( itertools.chain.from_iterable([[*val] for val in frequency_requirements.values()]) ) } for vals in frequency_requirements.values(): for name, req in vals.items(): reqs = frequency[name] if reqs == []: frequency[name] = set(req) else: frequency[name] = reqs.intersection(req) return frequency
def __repr__(self): return _make_combined_repr(self)