Source code for openoa.analysis.electrical_losses

# This class defines key analytical routines for calculating electrical losses for
# a wind plant using operational data. Electrical loss is calculated per month and on
# an average annual basis by comparing monthly energy production from the turbines
# and the revenue meter

from __future__ import annotations

import datetime
from copy import deepcopy

import attrs
import numpy as np
import pandas as pd
import numpy.typing as npt
import matplotlib.pyplot as plt
from tqdm import tqdm
from attrs import field, define

import openoa.utils.timeseries as ts
from openoa.plant import PlantData
from openoa.schema import FromDictMixin, ResetValuesMixin
from openoa.logging import logging, logged_method_call
from openoa.utils.plot import set_styling
from openoa.analysis._analysis_validators import validate_UQ_input, validate_half_closed_0_1_right


logger = logging.getLogger(__name__)
set_styling()

NDArrayFloat = npt.NDArray[np.float64]

MINUTES_PER_HOUR = 60
HOURS_PER_DAY = 24


[docs] @define(auto_attribs=True) class ElectricalLosses(FromDictMixin, ResetValuesMixin): """ A serial implementation of calculating the average monthly and annual electrical losses at a wind power plant, and the associated uncertainty. Energy output from the turbine SCADA meter and the wind plant revenue meter are used to estimate electrical losses. First, the daily sums of turbine and revenue meter energy are calculated over the plant's period of record where all turbines and the revenue meter contan every considered timestep. Electrical losses are then calculated as the difference between the total turbine energy production and the meter production over those concurrent days. For uncertainty quantification, a Monte Carlo (MC) approach is used to sample the revenue meter data and SCADA data with a default 0.5% imposed uncertainty, alongside a sampled filtering parameter. The uncertainty in estimated electrical losses is quantified as the standard deviation of the distribution of losses obtained from the MC sampling. If the revenue meter data is not provided on a daily or sub-daily basis (e.g. monthly), the the sum of daily turbine energy is corrected for any missing reported energy data from the turbines based on the ratio of expected number of data points per day to the actual data points available. The daily corrected sum of turbine energy is then summed on a monthly basis. Electrical loss is then the difference between the total corrected turbine energy production and meter production over those concurrent months. Args: plant(:obj:`PlantData`): A :py:attr:`openoa.plant.PlantData` object that has been validated with at least `:py:attr:`openoa.plant.PlantData.analysis_type` = "ElectricalLosses". UQ(:obj:`bool`): Indicator to perform (True) or not (False) uncertainty quantification. num_sim(:obj:`int`): Number of Monte Carlo simulations to perform. uncertainty_meter(:obj:`float`): Uncertainty imposed on the revenue meter data (for :py:attr:`UQ` = True case). uncertainty_scada(:obj:`float`): Uncertainty imposed on the scada data (for :py:attr:`UQ` = True case). uncertainty_correction_threshold(:obj:`tuple` | `float`): Data availability thresholds, in the range of (0, 1), under which months should be eliminated. If :py:attr:`UQ` = True, then a 2-element tuple containing an upper and lower bound for a randomly selected value should be given, otherwise, a scalar value should be provided. """ plant: PlantData = field(converter=deepcopy, validator=attrs.validators.instance_of(PlantData)) UQ: bool = field(default=False, validator=attrs.validators.instance_of(bool)) num_sim: int = field(default=20000, converter=int) uncertainty_meter: float = field(default=0.005, validator=validate_half_closed_0_1_right) uncertainty_scada: float = field(default=0.005, validator=validate_half_closed_0_1_right) uncertainty_correction_threshold: NDArrayFloat | tuple[float, float] | float = field( default=(0.9, 0.995), validator=(validate_UQ_input, validate_half_closed_0_1_right) ) # Internally created attributes need to be given a type before usage monthly_meter: bool = field(default=False, init=False) inputs: pd.DataFrame = field(init=False) electrical_losses: NDArrayFloat = field(init=False) scada_sum: pd.DataFrame = field(init=False) scada_daily: pd.DataFrame = field(init=False) scada_full_count: pd.DataFrame = field(init=False) meter_daily: pd.DataFrame = field(init=False) combined_energy: pd.DataFrame = field(init=False) total_turbine_energy: pd.DataFrame = field(init=False) total_meter_energy: pd.DataFrame = field(init=False) run_parameters: list[str] = field( init=False, default=[ "UQ", "num_sim", "uncertainty_meter", "uncertainty_scada", "uncertainty_correction_threshold", ], ) @logged_method_call def __attrs_post_init__(self): """ Initialize logging and post-initialization setup steps. """ if {"ElectricalLosses", "all"}.intersection(self.plant.analysis_type) == set(): self.plant.analysis_type.append("ElectricalLosses") # Ensure the data are up to spec before continuing with initialization self.plant.validate() logger.info("Initializing Electrical Losses Object") # Check that selected UQ is allowed and reset num_sim if no UQ if self.UQ: logger.info("Note: uncertainty quantification will be performed in the calculation") else: logger.info("Note: uncertainty quantification will NOT be performed in the calculation") self.num_sim = 1 # override in case of bad user input # Process the SCADA and meter data appropriately self.process_scada() if self.plant.metadata.meter.frequency not in ("MS", "ME", "1MS"): self.process_meter() self.monthly_meter = False
[docs] @logged_method_call def run( self, num_sim: int | None = None, uncertainty_meter: NDArrayFloat | float = None, uncertainty_scada: NDArrayFloat | float = None, uncertainty_correction_threshold: NDArrayFloat | tuple[float, float] | float = None, ): """ Run the electrical losses calculation. .. note:: If None is provided to any of the inputs, then the last used input value will be used for the analysis, and if no prior values were set, then this is the model's defaults. Args: num_sim(:obj:`int`): Number of Monte Carlo simulations to perform. uncertainty_meter(:obj:`float`): Uncertainty imposed on the revenue meter data (for :py:attr:`UQ` = True case). uncertainty_scada(:obj:`float`): Uncertainty imposed on the scada data (for :py:attr:`UQ` = True case). uncertainty_correction_threshold(:obj:`tuple` | `float`): Data availability thresholds, in the range of (0, 1], under which months should be eliminated. If :py:attr:`UQ` = True, then a 2-element tuple containing an upper and lower bound for a randomly selected value should be given, otherwise, a scalar value should be provided. """ initial_parameters = {} if num_sim is not None: if self.UQ: initial_parameters["num_sim"] = self.num_sim self.num_sim = num_sim elif num_sim > 1: logger.info( "`num_sim` can NOT be greater than 1 when `UQ=False`, value has not been set." ) if uncertainty_meter is not None: initial_parameters["uncertainty_meter"] = self.uncertainty_meter self.uncertainty_meter = uncertainty_meter if uncertainty_scada is not None: initial_parameters["uncertainty_scada"] = self.uncertainty_scada self.uncertainty_scada = uncertainty_scada if uncertainty_correction_threshold is not None: initial_parameters[ "uncertainty_correction_threshold" ] = self.uncertainty_correction_threshold self.uncertainty_correction_threshold = uncertainty_correction_threshold # Setup Monte Carlo approach, and calculate the electrical losses self.setup_inputs() self.calculate_electrical_losses() # Reset the class arguments back to the initialized values self.set_values(initial_parameters)
[docs] @logged_method_call def setup_inputs(self): """ Create and populate the data frame defining the simulation parameters. This data frame is stored as self.inputs. """ if self.UQ: n_decimal = max( len(str(el).split(".")[1]) for el in self.uncertainty_correction_threshold ) integer_multiplier = 10**n_decimal inputs = { "meter_data_fraction": np.random.normal(1, self.uncertainty_meter, self.num_sim), "scada_data_fraction": np.random.normal(1, self.uncertainty_scada, self.num_sim), "correction_threshold": np.random.randint( self.uncertainty_correction_threshold[0] * integer_multiplier, self.uncertainty_correction_threshold[1] * integer_multiplier, self.num_sim, ) / integer_multiplier, } self.inputs = pd.DataFrame(inputs) else: inputs = { "meter_data_fraction": 1, "scada_data_fraction": 1, "correction_threshold": self.uncertainty_correction_threshold, } self.inputs = pd.DataFrame(inputs, index=[0]) self.electrical_losses = np.empty([self.num_sim, 1])
[docs] @logged_method_call def process_scada(self): """ Calculate daily sum of turbine energy only for days when all turbines are reporting at all time steps. """ logger.info("Processing SCADA data") scada_df = self.plant.scada.copy() # Sum up SCADA data power and energy and count number of entries ix_time = self.plant.scada.index.get_level_values("time") self.scada_sum = scada_df.groupby(ix_time)[["WTUR_SupWh"]].sum() self.scada_sum["count"] = scada_df.groupby(ix_time)[["WTUR_SupWh"]].count() # Calculate daily sum of all turbine energy production and count number of entries self.scada_daily = self.scada_sum.resample("D")["WTUR_SupWh"].sum().to_frame() self.scada_daily["count"] = self.scada_sum.resample("D")["count"].sum() # Specify expected count provided all turbines reporting expected_count = ( HOURS_PER_DAY * MINUTES_PER_HOUR / (ts.offset_to_seconds(self.plant.metadata.scada.frequency) / 60) * self.plant.n_turbines ) # Correct sum of turbine energy for cases with missing reported data self.scada_daily["corrected_energy"] = ( self.scada_daily["WTUR_SupWh"] * expected_count / self.scada_daily["count"] ) self.scada_daily["percent"] = self.scada_daily["count"] / expected_count # Store daily SCADA data where all turbines reporting for every time step during the day self.scada_full_count = self.scada_daily.loc[self.scada_daily["count"] == expected_count]
[docs] @logged_method_call def process_meter(self): """ Calculate daily sum of meter energy only for days when meter data is reporting at all time steps. """ logger.info("Processing meter data") meter_df = self.plant.meter.copy() # Sum up meter data to daily self.meter_daily = meter_df.resample("D").sum() self.meter_daily["count"] = meter_df.resample("D")["MMTR_SupWh"].count() # Specify expected count provided all timestamps reporting expected_count = ( HOURS_PER_DAY * MINUTES_PER_HOUR / (ts.offset_to_seconds(self.plant.metadata.scada.frequency) / 60) ) # Keep only data with all turbines reporting for every time step during the day self.meter_daily = self.meter_daily[self.meter_daily["count"] == expected_count]
[docs] @logged_method_call def calculate_electrical_losses(self): """ Apply Monte Carlo approach to calculate electrical losses and their uncertainty based on the difference in the sum of turbine and metered energy over the compiled days. """ logger.info("Calculating electrical losses") # Loop through number of simulations, calculate losses each time, store results for n in tqdm(np.arange(self.num_sim)): _run = self.inputs.loc[n] meter_df = self.plant.meter.copy() # If monthly meter data, sum the corrected daily turbine energy to monthly and merge if self.monthly_meter: scada_monthly = self.scada_daily.resample("MS")["corrected_energy"].sum().to_frame() scada_monthly.columns = ["WTUR_SupWh"] # Determine availability for each month represented scada_monthly["count"] = self.scada_sum.resample("MS")["count"].sum() scada_monthly["expected_count_monthly"] = ( scada_monthly.index.daysinmonth * HOURS_PER_DAY * MINUTES_PER_HOUR / (pd.to_timedelta(self.plant.scada.frequency).total_seconds() / 60) * self.plant.n_turbines ) scada_monthly["percent"] = ( scada_monthly["count"] / scada_monthly["expected_count_monthly"] ) # Filter out months in which there was less than x% of total running (all turbines at all timesteps) scada_monthly = scada_monthly.loc[ scada_monthly["percent"] >= _run.correction_threshold, : ] self.combined_energy = meter_df.join( scada_monthly, lsuffix="_meter", rsuffix="_scada" ) # If sub-monthly meter data, merge the daily data for which all turbines are reporting at all timestamps else: # Note 'self.scada_full_count' only contains full reported data self.combined_energy = self.meter_daily.join( self.scada_full_count, lsuffix="_meter", rsuffix="_scada" ) # Drop non-concurrent timestamps and get total sums over concurrent period of record self.combined_energy.dropna(inplace=True) merge_sum = self.combined_energy.sum(axis=0) # Calculate electrical loss from difference of sum of turbine and meter energy self.total_turbine_energy = merge_sum["WTUR_SupWh"] * _run.scada_data_fraction self.total_meter_energy = merge_sum["MMTR_SupWh"] * _run.meter_data_fraction self.electrical_losses[n] = 1 - self.total_meter_energy / self.total_turbine_energy
[docs] def plot_monthly_losses( self, xlim: tuple[datetime.datetime | None, datetime.datetime | None] = (None, None), ylim: tuple[float | None, float | None] = (None, None), return_fig: bool = False, figure_kwargs: dict = {}, legend_kwargs: dict = {}, plot_kwargs: dict = {}, ) -> None | tuple[plt.Figure, plt.Axes]: """Plots the monthly timeseries of electrical losses as a percent. Args: xlim(:obj: `tuple[float, float]`, optional): A tuple of the x-axis (min, max) values. Defaults to (None, None). ylim(:obj: `tuple[float, float]`, optional): A tuple of the y-axis (min, max) values. Defaults to (None, None). return_fig(:obj:`bool`, optional): Set to True to return the figure and axes objects, otherwise set to False. Defaults to False. figure_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``plt.figure()``. Defaults to {}. scatter_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.plot()``. Defaults to {}. legend_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.legend()``. Defaults to {}. Returns: None | tuple[plt.Figure, plt.Axes]: If :py:attr:`return_fig`, then return the figure and axes objects in addition to showing the plot. """ figure_kwargs.setdefault("dpi", 200) fig = plt.figure(**figure_kwargs) ax = fig.add_subplot(111) monthly_energy = self.combined_energy.resample("MS").sum() losses = ( monthly_energy["corrected_energy"] - monthly_energy["MMTR_SupWh"] ) / monthly_energy["corrected_energy"] mean = losses.mean() std = losses.std() ax.plot( losses * 100, label=f"Electrical Losses\n$\\mu$={mean:.2%}, $\\sigma$={std:.2%}", # noqa: W605 **plot_kwargs, ) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.legend(**legend_kwargs) ax.set_xlabel("Period of Record") ax.set_ylabel("Electrical Losses (%)") fig.tight_layout() plt.show() if return_fig: return fig, ax
__defaults_UQ = ElectricalLosses.__attrs_attrs__.UQ.default __defaults_num_sim = ElectricalLosses.__attrs_attrs__.num_sim.default __defaults_uncertainty_correction_threshold = ( ElectricalLosses.__attrs_attrs__.uncertainty_correction_threshold.default ) __defaults_uncertainty_meter = ElectricalLosses.__attrs_attrs__.uncertainty_meter.default __defaults_uncertainty_scada = ElectricalLosses.__attrs_attrs__.uncertainty_scada.default def create_ElectricalLosses( project: PlantData, UQ: bool = __defaults_UQ, num_sim: int = __defaults_num_sim, uncertainty_correction_threshold: NDArrayFloat | tuple[float, float] | float = __defaults_uncertainty_correction_threshold, uncertainty_meter: NDArrayFloat | tuple[float, float] | float = __defaults_uncertainty_meter, uncertainty_scada: NDArrayFloat | tuple[float, float] | float = __defaults_uncertainty_scada, ) -> ElectricalLosses: return ElectricalLosses( plant=project, UQ=UQ, num_sim=num_sim, uncertainty_meter=uncertainty_meter, uncertainty_scada=uncertainty_scada, uncertainty_correction_threshold=uncertainty_correction_threshold, ) create_ElectricalLosses.__doc__ = ElectricalLosses.__doc__