Source code for openoa.analysis.turbine_long_term_gross_energy

"""
This class defines key analytical routines for performing a gap analysis on
EYA-estimated annual energy production (AEP) and that from operational data. Categories
considered are availability, electrical losses, and long-term gross energy. The main
output is a 'waterfall' plot linking the EYA-estimated and operational-estiamted AEP values.
"""

from __future__ import annotations

import random
from copy import deepcopy
from typing import Callable

import attrs
import numpy as np
import pandas as pd
import numpy.typing as npt
import matplotlib.pyplot as plt
from tqdm import tqdm
from attrs import field, define
from matplotlib.ticker import StrMethodFormatter

from openoa.plant import PlantData, convert_to_list
from openoa.utils import plot, filters, imputing
from openoa.utils import timeseries as ts
from openoa.utils import met_data_processing as met
from openoa.schema import FromDictMixin, ResetValuesMixin
from openoa.logging import logging, logged_method_call
from openoa.utils.power_curve import functions
from openoa.analysis._analysis_validators import (
    validate_UQ_input,
    validate_half_closed_0_1_right,
    validate_reanalysis_selections,
)


logger = logging.getLogger(__name__)
plot.set_styling()

NDArrayFloat = npt.NDArray[np.float64]

MINUTES_PER_HOUR = 60
HOURS_PER_DAY = 24


[docs] @define(auto_attribs=True) class TurbineLongTermGrossEnergy(FromDictMixin, ResetValuesMixin): """ Calculates long-term gross energy for each turbine in a wind farm using methods implemented in the utils subpackage for data processing and analysis. The method proceeds as follows: 1. Filter turbine data for normal operation 2. Calculate daily means of wind speed, wind direction, and air density from reanalysis products 3. Calculate daily sums of energy from each turbine 4. Fit daily data (features are atmospheric variables, response is turbine power) using a generalized additive model (GAM) 5. Apply model results to long-term atmospheric varaibles to calculate long term gross energy for each turbine A Monte Carlo approach is implemented to obtain distribution of results, from which uncertainty can be quantified for the long-term gross energy estimate. A pandas DataFrame of long-term gross energy values is produced, containing each turbine in the wind farm. Note that this gross energy metric does not back out losses associated with waking or turbine performance. Rather, gross energy in this context is what turbine would have produced under normal operation (i.e. excluding downtime and underperformance). Required schema of PlantData: - _scada_freq - reanalysis products with columns ['time', 'WMETR_HorWdSpdU', 'WMETR_HorWdSpdV', 'WMETR_HorWdSpd', 'WMETR_AirDen'] - scada with columns: ['time', 'asset_id', 'WMET_HorWdSpd', 'WTUR_W', 'WTUR_SupWh'] Args: UQ(:obj:`bool`): Indicator to perform (True) or not (False) uncertainty quantification. num_sim(:obj:`int`): Number of simulations to run when `UQ` is True, otherwise set to 1. Defaults to 20000. uncertainty_scada(:obj:`float`): Uuncertainty imposed to the SCADA data when :py:attr:`UQ` is True only Defaults to 0.005. reanalysis_products(obj:`list[str]`) : List of reanalysis products to use for Monte Carlo sampling. Defaults to None, which pulls all the products contained in :py:attr:`plant.reanalysis`. wind_bin_threshold(:obj:`tuple`): The filter threshold for each vertical bin, expressed as number of standard deviations from the median in each bin. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this threshold, otherwise a single value should be used. Defaults to (1.0, 3.0) max_power_filter(:obj:`tuple`): Maximum power threshold, in the range (0, 1], to which the bin filter should be applied. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this filter, otherwise a single value should be used. Defaults to (0.8, 0.9). correction_threshold(:obj:`tuple`): The threshold, in the range of (0, 1], above which daily scada energy data should be corrected. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this threshold, otherwise a single value should be used. Defaults to (0.85, 0.95) """ plant: PlantData = field(converter=deepcopy, validator=attrs.validators.instance_of(PlantData)) UQ: bool = field(default=True, converter=bool) num_sim: int = field(default=20000, converter=int) reanalysis_products: list[str] = field( default=None, converter=convert_to_list, validator=( attrs.validators.deep_iterable( iterable_validator=attrs.validators.instance_of(list), member_validator=attrs.validators.instance_of((str, type(None))), ), validate_reanalysis_selections, ), ) uncertainty_scada: float = field(default=0.005, converter=float) wind_bin_threshold: NDArrayFloat = field(default=(1.0, 3.0), validator=validate_UQ_input) max_power_filter: NDArrayFloat = field( default=(0.8, 0.9), validator=(validate_UQ_input, validate_half_closed_0_1_right) ) correction_threshold: NDArrayFloat = field( default=(0.85, 0.95), validator=(validate_UQ_input, validate_half_closed_0_1_right) ) # Internally created attributes need to be given a type before usage por_start: pd.Timestamp = field(init=False) por_end: pd.Timestamp = field(init=False) turbine_ids: np.ndarray = field(init=False) scada: pd.DataFrame = field(init=False) scada_dict: dict = field(factory=dict, init=False) daily_reanal_dict: dict = field(factory=dict, init=False) model_dict: dict = field(factory=dict, init=False) model_results: dict = field(factory=dict, init=False) scada_daily_valid: pd.DataFrame = field(default=pd.DataFrame(), init=False) reanalysis_memo: dict[str, pd.DataFrame] = field(factory=dict, init=False) daily_reanalysis: dict[str, pd.DataFrame] = field(factory=dict, init=False) _run: pd.DataFrame = field(init=False) _inputs: pd.DataFrame = field(init=False) scada_valid: pd.DataFrame = field(init=False) turbine_model_dict: dict[str, pd.DataFrame] = field(factory=dict, init=False) _model_results: dict[str, Callable] = field(factory=dict, init=False) turb_lt_gross: pd.DataFrame = field(default=pd.DataFrame(), init=False) summary_results: pd.DataFrame = field(init=False) plant_gross: dict[int, pd.DataFrame] = field(factory=dict, init=False) run_parameters: list[str] = field( init=False, default=[ "num_sim", "reanalysis_products", "uncertainty_scada", "wind_bin_threshold", "max_power_filter", "correction_threshold", ], ) @logged_method_call def __attrs_post_init__(self): """ Runs any non-automated setup steps for the analysis class. """ if {"TurbineLongTermGrossEnergy", "all"}.intersection(self.plant.analysis_type) == set(): self.plant.analysis_type.append("TurbineLongTermGrossEnergy") # Ensure the data are up to spec before continuing with initialization self.plant.validate() logger.info("Initializing TurbineLongTermGrossEnergy Object") # Check that selected UQ is allowed if self.UQ: logger.info("Note: uncertainty quantification will be performed in the calculation") else: logger.info("Note: uncertainty quantification will NOT be performed in the calculation") self.num_sim = 1 self.turbine_ids = self.plant.turbine_ids # Get start and end of POR days in SCADA self.por_start = self.plant.scada.index.get_level_values("time").min() self.por_end = self.plant.scada.index.get_level_values("time").max() # Initially sort the different turbine data into dictionary entries logger.info("Processing SCADA data into dictionaries by turbine (this can take a while)") self.sort_scada_by_turbine()
[docs] @logged_method_call def run( self, num_sim: int | None = None, reanalysis_products: list[str] | None = None, uncertainty_scada: float | None = None, wind_bin_threshold: float | tuple[float, float] | None = None, max_power_filter: float | tuple[float, float] | None = None, correction_threshold: float | tuple[float, float] | None = None, ) -> None: """ Pre-process the run-specific data settings for each simulation, then fit and apply the model for each simualtion. .. note:: If None is provided to any of the inputs, then the last used input value will be used for the analysis, and if no prior values were set, then this is the model's defaults. Args: num_sim(:obj:`int`): Number of simulations to run when `UQ` is True, otherwise set to 1. Defaults to 20000. uncertainty_scada(:obj:`float`): Uuncertainty imposed to the SCADA data when :py:attr:`UQ` is True only Defaults to 0.005. reanalysis_products(obj:`list[str]`) : List of reanalysis products to use for Monte Carlo sampling. Defaults to None, which pulls all the products contained in :py:attr:`plant.reanalysis`. wind_bin_threshold(:obj:`tuple`): The filter threshold for each vertical bin, expressed as number of standard deviations from the median in each bin. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this threshold, otherwise a single value should be used. Defaults to (1.0, 3.0) max_power_filter(:obj:`tuple`): Maximum power threshold, in the range (0, 1], to which the bin filter should be applied. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this filter, otherwise a single value should be used. Defaults to (0.8, 0.9). correction_threshold(:obj:`tuple`): The threshold, in the range of (0, 1], above which daily scada energy data should be corrected. When :py:attr:`UQ` is True, then this should be a tuple of the lower and upper limits of this threshold, otherwise a single value should be used. Defaults to (0.85, 0.95) """ initial_parameters = {} if num_sim is not None: if self.UQ: self.num_sim = num_sim elif num_sim > 1: logger.info( "`num_sim` can NOT be greater than 1 when `UQ=False`, value has not been set." ) if reanalysis_products is not None: initial_parameters["reanalysis_products"] = self.reanalysis_products self.reanalysis_products = reanalysis_products if uncertainty_scada is not None: initial_parameters["uncertainty_scada"] = self.uncertainty_scada self.uncertainty_scada = uncertainty_scada if wind_bin_threshold is not None: initial_parameters["wind_bin_threshold"] = self.wind_bin_threshold self.wind_bin_threshold = wind_bin_threshold if max_power_filter is not None: initial_parameters["max_power_filter"] = self.max_power_filter self.max_power_filter = max_power_filter if correction_threshold is not None: initial_parameters["correction_threshold"] = self.correction_threshold self.correction_threshold = correction_threshold self.setup_inputs() logger.info("Running the long term gross energy analysis") # Loop through number of simulations, store TIE results for i in tqdm(np.arange(self.num_sim)): self._run = self._inputs.loc[i] self.filter_turbine_data() # Filter turbine data self.setup_daily_reanalysis_data() # Setup daily reanalysis products self.filter_sum_impute_scada() # Setup daily scada data self.setupturbine_model_dict() # Setup daily data to be fit using the GAM self.fit_model() # Fit daily turbine energy to atmospheric data self.apply_model(i) # Apply fitting result to long-term reanalysis data # Log the completion of the run logger.info("Run completed") # Reset the class arguments back to the initialized values self.set_values(initial_parameters)
[docs] def setup_inputs(self) -> None: """ Create and populate the data frame defining the simulation parameters. This data frame is stored as self._inputs """ if self.UQ: reanal_list = list( np.repeat(self.reanalysis_products, self.num_sim) ) # Create extra long list of renanalysis product names to sample from inputs = { "reanalysis_product": np.asarray(random.sample(reanal_list, self.num_sim)), "scada_data_fraction": np.random.normal(1, self.uncertainty_scada, self.num_sim), "wind_bin_thresh": np.random.randint( self.wind_bin_threshold[0] * 100, self.wind_bin_threshold[1] * 100, self.num_sim, ) / 100.0, "max_power_filter": np.random.randint( self.max_power_filter[0] * 100, self.max_power_filter[1] * 100, self.num_sim, ) / 100.0, "correction_threshold": np.random.randint( self.correction_threshold[0] * 100, self.correction_threshold[1] * 100, self.num_sim, ) / 100.0, } self.plant_gross = np.empty([self.num_sim, 1]) if not self.UQ: inputs = { "reanalysis_product": self.reanalysis_products, "scada_data_fraction": 1, "wind_bin_thresh": self.wind_bin_threshold, "max_power_filter": self.max_power_filter, "correction_threshold": self.correction_threshold, } self.plant_gross = np.empty([len(self.reanalysis_products), 1]) self.num_sim = len(self.reanalysis_products) self._inputs = pd.DataFrame(inputs)
[docs] def sort_scada_by_turbine(self) -> None: """ Sorts the SCADA DataFrame by the asset_id and timestamp index columns, respectively. """ df = self.plant.scada.copy() dic = self.scada_dict # Loop through turbine IDs for t in self.turbine_ids: # Store relevant variables in dictionary dic[t] = df.loc[df.index.get_level_values("asset_id") == t].reindex( columns=["WMET_HorWdSpd", "WTUR_W", "WTUR_SupWh"] ) dic[t].sort_index(inplace=True)
[docs] @logged_method_call def filter_turbine_data(self) -> None: """ Apply a set of filtering algorithms to the turbine wind speed vs power curve to flag data not representative of normal turbine operation Performs the following manipulations: 1. Drops any scada rows that don't have any windspeed or energy data 2. Flags windspeed values outside the range [0, 40] 3. Flags windspeed values that have stayed the same for at least 3 straight readings 4. Flags power values less than 2% of turbine capacity when wind speed above cut-in 5. Flags windspeed and power values that don't mutually coincide within a reasonable range 6. Combine the flags using an "or" combination to be a new column in scada: "flag_final" """ dic = self.scada_dict # Loop through turbines for t in self.turbine_ids: scada_df = self.scada_dict[t] turbine_capacity = self.plant.asset.loc[t, "rated_power"] max_bin = ( self._run.max_power_filter * turbine_capacity ) # Set maximum range for using bin-filter scada_df.dropna( subset=["WMET_HorWdSpd", "WTUR_SupWh"], inplace=True ) # Drop any data where scada wind speed or energy is NaN scada_df = scada_df.assign( flag_neg=filters.range_flag(scada_df.WTUR_W, lower=0, upper=scada_df.WTUR_W.max()), flag_range=filters.range_flag(scada_df.WMET_HorWdSpd, lower=0, upper=40), flag_frozen=filters.unresponsive_flag(scada_df.WMET_HorWdSpd, threshold=3), flag_window=filters.window_range_flag( window_col=dic[t].loc[:, "WMET_HorWdSpd"], window_start=5.0, window_end=40, value_col=dic[t].loc[:, "WTUR_W"], value_min=0.02 * turbine_capacity, value_max=1.2 * turbine_capacity, ), flag_bin=filters.bin_filter( bin_col=dic[t].loc[:, "WTUR_W"], value_col=dic[t].loc[:, "WMET_HorWdSpd"], bin_width=0.06 * turbine_capacity, threshold=self._run.wind_bin_thresh, center_type="median", bin_min=np.round(0.01 * turbine_capacity), bin_max=np.round(max_bin), threshold_type="std", direction="all", ), ) # Create a 'final' flag which is true if any of the previous flags are true self.scada_dict[t].loc[:, "flag_final"] = ( scada_df.flag_range | scada_df.flag_window | scada_df.flag_bin | scada_df.flag_frozen )
[docs] @logged_method_call def setup_daily_reanalysis_data(self) -> None: """ Process reanalysis data to daily means for later use in the GAM model. """ # Memoize the function so you don't have to recompute the same reanalysis product twice if (df_daily := self.reanalysis_memo.get(self._run.reanalysis_product, None)) is not None: self.daily_reanalysis = df_daily.copy() return # Capture the runs reanalysis data set and ensure the U/V components exist reanalysis_df = self.plant.reanalysis[self._run.reanalysis_product] if len({"WMETR_HorWdSpdU", "WMETR_HorWdSpdV"}.intersection(reanalysis_df.columns)) < 2: ( reanalysis_df["WMETR_HorWdSpdU"], reanalysis_df["WMETR_HorWdSpdV"], ) = met.compute_u_v_components("WMETR_HorWdSpd", "WMETR_HorWdDir", reanalysis_df) # Resample at a daily resolution and recalculate daily average wind direction df_daily = reanalysis_df.groupby([pd.Grouper(freq="D", level="time")])[ ["WMETR_HorWdSpdU", "WMETR_HorWdSpdV", "WMETR_HorWdSpd", "WMETR_AirDen"] ].mean() wd = met.compute_wind_direction(u="WMETR_HorWdSpdU", v="WMETR_HorWdSpdV", data=df_daily) df_daily = df_daily.assign(WMETR_HorWdDir=wd.values) self.daily_reanalysis = df_daily # Store the results for re-use self.reanalysis_memo[self._run.reanalysis_product] = df_daily
[docs] @logged_method_call def filter_sum_impute_scada(self) -> None: """ Filter SCADA data for unflagged data, gather SCADA energy data into daily sums, and correct daily summed energy based on amount of missing data and a threshold limit. Finally impute missing data for each turbine based on reported energy data from other highly correlated turbines. threshold """ scada = self.scada_dict expected_count = ( HOURS_PER_DAY * MINUTES_PER_HOUR / (ts.offset_to_seconds(self.plant.metadata.scada.frequency) / 60) ) num_thres = self._run.correction_threshold * expected_count # Allowable reported timesteps self.scada_valid = pd.DataFrame() # Loop through turbines for t in self.turbine_ids: scada_filt = scada[t].loc[~scada[t]["flag_final"]] # Filter for valid data # Calculate daily energy sum scada_daily = ( scada_filt.groupby([pd.Grouper(freq="D", level="time")])["WTUR_SupWh"] .sum() .to_frame() ) # Count number of entries in sum scada_daily["data_count"] = ( scada_filt.groupby([pd.Grouper(freq="D", level="time")])["WTUR_SupWh"] .count() .to_frame() ) scada_daily["percent_nan"] = ( scada_filt.groupby([pd.Grouper(freq="D", level="time")])["WTUR_SupWh"] .apply(ts.percent_nan) .to_frame() ) # Correct energy for missing data scada_daily["energy_corrected"] = ( scada_daily["WTUR_SupWh"] * expected_count / scada_daily["data_count"] ) # Discard daily sums if less than 140 data counts (90% reported data) scada_daily = scada_daily.loc[scada_daily["data_count"] >= num_thres] # Create temporary data frame that is gap filled and to be used for imputing temp_df = pd.DataFrame( index=pd.date_range(self.por_start, self.por_end, freq="D", name="time") ) temp_df["energy_corrected"] = scada_daily["energy_corrected"] temp_df["percent_nan"] = scada_daily["percent_nan"] temp_df["asset_id"] = np.repeat(t, temp_df.shape[0]) temp_df["day"] = temp_df.index # Append turbine data into single data frame for imputing self.scada_valid = pd.concat([self.scada_valid, temp_df], axis=0) # Reset index after all turbines has been combined self.scada_valid = self.scada_valid.set_index("asset_id", append=True) # Impute missing days for each turbine - provides progress bar self.scada_valid["energy_imputed"] = imputing.impute_all_assets_by_correlation( self.scada_valid, impute_col="energy_corrected", reference_col="energy_corrected", ) # Drop data that could not be imputed self.scada_valid.dropna(subset=["energy_imputed"], inplace=True)
[docs] @logged_method_call def setupturbine_model_dict(self) -> None: """Setup daily atmospheric variable averages and daily energy sums by turbine.""" reanalysis = self.daily_reanalysis for t in self.turbine_ids: self.turbine_model_dict[t] = ( self.scada_valid.loc[self.scada_valid.index.get_level_values("asset_id") == t] .set_index("day") .join(reanalysis) .dropna(subset=["energy_imputed", "WMETR_HorWdSpd"]) )
[docs] @logged_method_call def fit_model(self) -> None: """Fit the daily turbine energy sum and atmospheric variable averages using a GAM model using wind speed, wind direction, and air density. """ mod_dict = self.turbine_model_dict mod_results = self._model_results for t in self.turbine_ids: # Loop throuh turbines df = mod_dict[t] # Add Monte-Carlo sampled uncertainty to SCADA data df["energy_imputed"] = df["energy_imputed"] * self._run.scada_data_fraction # Consider wind speed, wind direction, and air density as features mod_results[t] = functions.gam_3param( windspeed_col="WMETR_HorWdSpd", wind_direction_col="WMETR_HorWdDir", air_density_col="WMETR_AirDen", power_col="energy_imputed", data=df, ) self._model_results = mod_results
[docs] @logged_method_call def apply_model(self, i: int) -> None: """ Apply the model to the reanalysis data to calculate long-term gross energy for each turbine. Args: i(:obj:`int`): The Monte Carlo iteration number. """ turb_gross = self.turb_lt_gross mod_results = self._model_results # Create a data frame to store final results self.summary_results = pd.DataFrame( index=self.reanalysis_products, columns=self.turbine_ids ) daily_reanalysis = self.daily_reanalysis turb_gross = pd.DataFrame(index=daily_reanalysis.index) # Loop through the turbines and apply the GAM to the reanalysis data for t in self.turbine_ids: # Loop through turbines turb_gross.loc[:, t] = mod_results[t]( daily_reanalysis["WMETR_HorWdSpd"], daily_reanalysis["WMETR_HorWdDir"], daily_reanalysis["WMETR_AirDen"], ) turb_gross[turb_gross < 0] = 0 # Calculate monthly sums of energy from long-term estimate turb_mo = turb_gross.resample("MS").sum() # Get average sum by calendar month turb_mo_avg = turb_mo.groupby(turb_mo.index.month).mean() # Store sum of turbine gross energy self.plant_gross[i] = turb_mo_avg.sum(axis=1).sum(axis=0) self.turb_lt_gross = turb_gross
[docs] def plot_filtered_power_curves( self, turbines: list[str] | None = None, flag_labels: tuple[str, str] = None, max_cols: int = 3, xlim: tuple[float, float] = (None, None), ylim: tuple[float, float] = (None, None), legend: bool = False, return_fig: bool = False, figure_kwargs: dict = {}, legend_kwargs: dict = {}, plot_kwargs: dict = {}, ): """Plot the raw and flagged power curve data. Args: turbines(:obj:`list[str]`, optional): The list of turbines to be plot, if not all of the keys in :py:attr:`data`. flag_labels (:obj:`tuple[str, str]`, optional): The labels to give to the scatter points, where the first entryis the flagged points, and the second entry correpsponds to the standard power curve. Defaults to None. max_cols(:obj:`int`, optional): The maximum number of columns in the plot. Defaults to 3. xlim(:obj:`tuple[float, float]`, optional): A tuple of the x-axis (min, max) values. Defaults to (None, None). ylim(:obj:`tuple[float, float]`, optional): A tuple of the y-axis (min, max) values. Defaults to (None, None). legend(:obj:`bool`, optional): Set to True to place a legend in the figure, otherwise set to False. Defaults to False. return_fig(:obj:`bool`, optional): Set to True to return the figure and axes objects, otherwise set to False. Defaults to False. figure_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``plt.figure()``. Defaults to {}. plot_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.scatter()``. Defaults to {}. legend_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.legend()``. Defaults to {}. Returns: None | tuple[matplotlib.pyplot.Figure, matplotlib.pyplot.Axes]: If `return_fig` is True, then the figure and axes objects are returned for further tinkering/saving. """ return plot.plot_power_curves( data=self.scada_dict, windspeed_col="WMET_HorWdSpd", power_col="WTUR_W", flag_col="flag_final", turbines=turbines, flag_labels=flag_labels, max_cols=max_cols, xlim=xlim, ylim=ylim, legend=legend, return_fig=return_fig, figure_kwargs=figure_kwargs, legend_kwargs=legend_kwargs, plot_kwargs=plot_kwargs, )
[docs] def plot_daily_fitting_result( self, turbines: list[str] | None = None, flag_labels: tuple[str, str, str] = ("Modeled", "Imputed", "Input"), max_cols: int = 3, xlim: tuple[float, float] = (None, None), ylim: tuple[float, float] = (None, None), legend: bool = False, return_fig: bool = False, figure_kwargs: dict = {}, legend_kwargs: dict = {}, plot_kwargs: dict = {}, ): """Plot the raw, imputed, and modeled power curve data. Args: turbines(:obj:`list[str]`, optional): The list of turbines to be plot, if not all of the keys in :py:attr:`data`. labels (:obj:`tuple[str, str]`, optional): The labels to give to the scatter points, corresponding to the modeled, imputed, and input data, respectively. Defaults to ("Modeled", "Imputed", "Input"). max_cols(:obj:`int`, optional): The maximum number of columns in the plot. Defaults to 3. xlim(:obj:`tuple[float, float]`, optional): A tuple of the x-axis (min, max) values. Defaults to (None, None). ylim(:obj:`tuple[float, float]`, optional): A tuple of the y-axis (min, max) values. Defaults to (None, None). legend(:obj:`bool`, optional): Set to True to place a legend in the figure, otherwise set to False. Defaults to False. return_fig(:obj:`bool`, optional): Set to True to return the figure and axes objects, otherwise set to False. Defaults to False. figure_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``plt.figure()``. Defaults to {}. plot_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.scatter()``. Defaults to {}. legend_kwargs(:obj:`dict`, optional): Additional keyword arguments that should be passed to ``ax.legend()``. Defaults to {}. Returns: None | tuple[matplotlib.pyplot.Figure, matplotlib.pyplot.Axes]: If :py:attr`return_fig` is True, then the figure and axes objects are returned for further tinkering/saving. """ turbines = list(self.turbine_model_dict.keys()) if turbines is None else turbines num_cols = len(turbines) num_rows = int(np.ceil(num_cols / max_cols)) if flag_labels is None: flag_labels = ("Modeled", "Imputed", "Input") figure_kwargs.setdefault("dpi", 200) figure_kwargs.setdefault("figsize", (15, num_rows * 5)) fig, axes_list = plt.subplots(num_rows, max_cols, **figure_kwargs) ws_daily = self.daily_reanalysis["WMETR_HorWdSpd"] for i, (t, ax) in enumerate(zip(turbines, axes_list.flatten())): df = self.turbine_model_dict[t] df_imputed = df.loc[df["energy_corrected"] != df["energy_imputed"]] ax.scatter( ws_daily, self.turb_lt_gross[t], label=flag_labels[0], alpha=0.2, color="tab:blue", **plot_kwargs, ) ax.scatter( df["WMETR_HorWdSpd"], df["energy_imputed"], label=flag_labels[2], alpha=0.6, color="tab:green", **plot_kwargs, ) ax.scatter( df_imputed["WMETR_HorWdSpd"], df_imputed["energy_imputed"], label=flag_labels[1], alpha=0.6, color="tab:orange", **plot_kwargs, ) ax.set_title(t) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.yaxis.set_major_formatter(StrMethodFormatter("{x:,.0f}")) if legend: ax.legend(**legend_kwargs) if i % max_cols == 0: ax.set_ylabel("Power (kW)") if i in range(max_cols * (num_rows - 1), num_cols): ax.set_xlabel("Wind Speed (m/s)") num_axes = axes_list.size if i < num_axes - 1: for j in range(i + 1, num_axes): fig.delaxes(axes_list.flatten()[j]) fig.tight_layout() plt.show() if return_fig: return fig, ax
__defaults_UQ = TurbineLongTermGrossEnergy.__attrs_attrs__.UQ.default __defaults_num_sim = TurbineLongTermGrossEnergy.__attrs_attrs__.num_sim.default __defaults_reanalysis_products = ( TurbineLongTermGrossEnergy.__attrs_attrs__.reanalysis_products.default ) __defaults_uncertainty_scada = TurbineLongTermGrossEnergy.__attrs_attrs__.uncertainty_scada.default __defaults_wind_bin_threshold = ( TurbineLongTermGrossEnergy.__attrs_attrs__.wind_bin_threshold.default ) __defaults_max_power_filter = TurbineLongTermGrossEnergy.__attrs_attrs__.max_power_filter.default __defaults_correction_threshold = ( TurbineLongTermGrossEnergy.__attrs_attrs__.correction_threshold.default ) def create_TurbineLongTermGrossEnergy( project: PlantData, UQ: bool = __defaults_UQ, num_sim: int = __defaults_num_sim, reanalysis_products=__defaults_reanalysis_products, uncertainty_scada: float = __defaults_uncertainty_scada, wind_bin_threshold: NDArrayFloat = __defaults_wind_bin_threshold, max_power_filter: NDArrayFloat = __defaults_max_power_filter, correction_threshold: NDArrayFloat = __defaults_correction_threshold, ) -> TurbineLongTermGrossEnergy: return TurbineLongTermGrossEnergy( plant=project, UQ=UQ, num_sim=num_sim, reanalysis_products=reanalysis_products, wind_bin_threshold=wind_bin_threshold, max_power_filter=max_power_filter, correction_threshold=correction_threshold, uncertainty_scada=uncertainty_scada, ) create_TurbineLongTermGrossEnergy.__doc__ = TurbineLongTermGrossEnergy.__doc__