Source code for unox.plot_format

import numpy as np

from unox import data as udata
from unox.HPC.data0.verify_dtype import verify_number

[docs] def set_fig_row_col( n_subplots, n_rows=None, n_cols=None, **kwargs, ): """ Set the number of rows and columns in a figure. Determine the number of rows and columns in a figure based on the number of subplots. Parameters ---------- n_subplots : `int` The total number of subplots in the figure. n_rows : `int`, `None`, optional The number of rows to use in the figure. Default is `None`. n_cols : `int`, `None`, optional The number of columns to use in the figure. Default is `None`. **kwargs : keyword arguments Additional keyword arguments accepted to facilitate wrapper functions. Returns ------- n_rows : `int` The number of rows in the figure. n_cols : `int` The number of columns in the figure. Examples -------- >>> n_rows, n_cols = set_fig_row_col(4) 2, 2 >>> n_rows, n_cols = set_fig_row_col(6) 2, 3 >>> n_rows, n_cols = set_fig_row_col(6, n_rows=3) 3, 2 """ # Verify argument types if not isinstance(n_subplots, int): raise TypeError(f"(set_fig_row_col) `n_subplots` must be an integer. Got type: {type(n_subplots)}") if not isinstance(n_rows, type(None)) and not isinstance(n_rows, int): raise TypeError(f"(set_fig_row_col) `n_rows` must be an integer or `None`. Got type: {type(n_rows)}") if not isinstance(n_cols, type(None)) and not isinstance(n_cols, int): raise TypeError(f"(set_fig_row_col) `n_cols` must be an integer or `None`. Got type: {type(n_cols)}") # Make sure none of the inputs are equal to zero or negative if n_subplots <= 0: raise ValueError(f"(set_fig_row_col) `n_subplots` must be a positive integer. Got: {n_subplots}") if not isinstance(n_rows, type(None)) and n_rows <= 0: raise ValueError(f"(set_fig_row_col) `n_rows` must be a positive integer. Got: {n_rows}") if not isinstance(n_cols, type(None)) and n_cols <= 0: raise ValueError(f"(set_fig_row_col) `n_cols` must be a positive integer. Got: {n_cols}") # Determine the number of rows and columns if not isinstance(n_rows, type(None)) and not isinstance(n_cols, type(None)): # Both rows and columns are specified if n_rows * n_cols < n_subplots: raise ValueError(f"(set_fig_row_col) `n_rows` * `n_cols` must be greater than or equal to `n_subplots`. Got: {n_rows} * {n_cols} < {n_subplots}") return n_rows, n_cols elif not isinstance(n_rows, type(None)): # Only rows are specified n_cols = int(np.ceil(n_subplots / n_rows)) return n_rows, n_cols elif not isinstance(n_cols, type(None)): # Only columns are specified n_rows = int(np.ceil(n_subplots / n_cols)) return n_rows, n_cols else: # Neither rows nor columns are specified if n_subplots == 3: n_rows = 1 n_cols = 3 elif n_subplots == 7: n_rows = 2 n_cols = 4 elif n_subplots == 8: n_rows = 2 n_cols = 4 else: # Use as close to a square layout as possible n_cols = int(np.ceil(np.sqrt(n_subplots))) n_rows = int(np.ceil(n_subplots / n_cols)) return n_rows, n_cols
[docs] def pad_extent( extent, padding=0.1, ): """ Pad the given extent. Pad the latitude and longitude extent of a dataset by enlarging the extent by the padding value. Parameters ---------- extent : `tuple` A tuple of np.float64 in the form (lat_min, lat_max, lon_min, lon_max). padding : `float`, optional The amount to pad the extent by in a fraction. Returns ------- padded_extent : `tuple` A tuple of np.float64 in the form (p_lat_min, p_lat_max, p_lon_min, p_lon_max). Examples -------- >>> nox = xr.open_dataset('datafiles/nox_2019_t106_US.nc') >>> extent = unox.data.get_extent(nox) >>> padded_extent = pad_extent(extent, padding=0.1) (20.635399999999997, 62.3546, -132.6375, -52.9875) """ # Verify the tuple is the right shape if not isinstance(extent, tuple) or len(extent) != 4: raise ValueError(f"(pad_extent) `extent` must be a tuple of the form (lat_min, lat_max, lon_min, lon_max). Got type: {type(extent)}") # Verify the padding is a number if not verify_number(padding): raise TypeError(f"(pad_extent) `padding` must be a number. Got type: {type(padding)}.") # Unpack the extent tuple lat_min, lat_max, lon_min, lon_max = extent # Verify these values lat_min = udata.verify_lat(lat_min) lat_max = udata.verify_lat(lat_max) lon_min = udata.verify_lon(lon_min) lon_max = udata.verify_lon(lon_max) # Enlarge the extent of the map by the given padding value p_lat_min = lat_min - padding*abs(lat_max - lat_min) p_lat_max = lat_max + padding*abs(lat_max - lat_min) p_lon_min = lon_min - padding*abs(lon_max - lon_min) p_lon_max = lon_max + padding*abs(lon_max - lon_min) # Verify the latitude values are in the range [-90, 90] if p_lat_min < -90: p_lat_min = -90 if p_lat_max > 90: p_lat_max = 90 # Verify the longitude values are in the range [-180, 180] if p_lon_min < -180: p_lon_min = -180 if p_lon_max > 180: p_lon_max = 180 # Return the padded extent as a tuple return (p_lat_min, p_lat_max, p_lon_min, p_lon_max)
[docs] def get_var_label_and_units( var, ): """ Get the label and units for a variable. Return the label and units for a variable based on its name. Parameters ---------- var : `str` The name of the variable. Returns ------- label : `str` The label for the variable. units : `str` The units for the variable. Examples -------- >>> label, units = get_var_label_and_units('temperature') ('Temperature', '°C') """ var_labels_and_units = { 'lat': ('Latitude', r'$^\circ$N'), 'lon': ('Longitude', r'$^\circ$E'), ## NOx # t106 variables 'nox': (r'Surface NO$_x$ emissions', r'kg N m$^{-2}$ s$^{-1}$'), # TROPESS variables 'no2': (r'NO$_2$', 'ppt'), 'no2_tm1': (r'NO$_2$ at $t-1$', 'ppt'), ## CO # HEMCO variables 'EmisCO_Total': (r'CO emission flux (all sectors)', r'kg/m$^2$/s'), # GEOS-Chem variables 'SpeciesConcVV_CO': (r'CO concentration', r'mol / mol dry'), ## Meteorology # ERA5 variables 'u10': ('10 metre U wind component', r'm s$^{-1}$'), 'v10': ('10 metre V wind component', r'm s$^{-1}$'), 'blh': ('Boundary layer height', 'm'), 'sp': ('Surface pressure', 'Pa'), 'skt': ('Skin temperature', 'K'), 'ssrd': ('Surface short-wave (solar) radiation downwards', r'J m$^{-2}$'), 't2m': ('2 metre temperature', 'K'), } if var not in var_labels_and_units.keys(): raise ValueError(f"(get_var_label_and_units) Variable '{var}' not recognized. Available variables: {list(var_labels_and_units.keys())}") else: label, units = var_labels_and_units[var] return label, units
[docs] def make_stage_comp_arrs( in_arrs, this_date, var, avg_over=None, stage1_only=False, ): """ Create arrays for stage comparison plots. Create a dictionary of arrays for stage comparison, where each key is a stage and the value is an array of the variable for that stage. For use with the `unox.plotting.plot_stage_comp_maps()` function. Parameters ---------- in_arrs : `dict` A dictionary of input arrays, where the keys are stage names and the values are arrays. Expects format like: {'truth': truth_arr, 'stage1': stage1_arr, 'stage2': stage2_arr} this_date : `np.datetime64` or `str` Date and time to select from the data file. Expected format is 'YYYY-MM-DDTHH:MM:SS' or 'YYYY-MM-DD'. var : `str` The variable which will be plotted. avg_over : `str`, `numpy.timedelta64`, `None`, optional If provided, averages the data over the specified time period. If None, takes just the time slice specified in `datetime`. stage1_only : `bool`, optional If True, produce arrays just corresponding to stage 1. If False, produce arrays for stage 1 and stage 2. Default is False. Returns ------- out_arrs : `dict` A dictionary of output arrays for each stage. overall_title : `str` A title for the overall plot, based on the variable and date(s). Examples -------- >>> # Example usage >>> out_arrs, title = make_stage_comp_arrs(in_arrs, '2019-01-01', 'no2') """ out_arrs = {} # Get the variable label and units var_label, var_units = get_var_label_and_units(var) # Get the day of year to plot DOY = udata.get_DOY(this_date) if isinstance(avg_over, type(None)): # Get just that day from the numpy arrays truth = in_arrs['truth'][DOY, :, :, :] stage1 = in_arrs['stage1'][DOY, :, :, :] # Get the differences t_m_st1 = truth - stage1 # Format a string for the title overall_title = var_label + ' on ' + this_date # If including stage 2 if not stage1_only: # Get just that day from the numpy arrays stage2 = in_arrs['stage2'][DOY, :, :, :] # Get the differences t_m_st2 = truth - stage2 st1_m_st2 = stage1 - stage2 # If averaging over a time period, get the end date else: # Add the increment to the date end_date = udata.add_amount_to_date(this_date, avg_over, keep_within_year=True) # Get the day of year for the end date DOY_end = udata.get_DOY(end_date) # Account for the fact that they only have 364 days if DOY_end > 364: DOY_end = 364 print('start DOY:', DOY, 'end DOY:', DOY_end) # Get just the data between those two days truth = in_arrs['truth'][DOY:DOY_end, :, :, :] stage1 = in_arrs['stage1'][DOY:DOY_end, :, :, :] # Get the differences t_m_st1 = truth - stage1 # Take the average over the time period for all truth = truth.mean(axis=0) stage1 = stage1.mean(axis=0) t_m_st1 = t_m_st1.mean(axis=0) # If including stage 2 if not stage1_only: # Get just the data between those two days stage2 = in_arrs['stage2'][DOY:DOY_end, :, :, :] # Get the differences t_m_st2 = truth - stage2 st1_m_st2 = stage1 - stage2 # Take the average over the time period for all stage2 = stage2.mean(axis=0) t_m_st2 = t_m_st2.mean(axis=0) st1_m_st2 = st1_m_st2.mean(axis=0) # Get the value and unit of the averaging avg_over_num, avg_over_unit = udata.get_increment_info(avg_over) # Format a string for the title overall_title = var_label + ' averaged over ' + str(avg_over_num) + ' ' + avg_over_unit + ' from ' + this_date # Set the arrays to plot ## They only have one channel, so just select index 0 out_arrs['truth'] = truth[:,:,0] out_arrs['stage1'] = stage1[:,:,0] out_arrs['t_m_st1'] = t_m_st1[:,:,0] # If including stage 2 if not stage1_only: out_arrs['stage2'] = stage2[:,:,0] out_arrs['t_m_st2'] = t_m_st2[:,:,0] out_arrs['st1_m_st2'] = st1_m_st2[:,:,0] return out_arrs, overall_title
[docs] def format_sci_notation( x, ndp=2, sci_lims_f=(-3, 3), pm_val=None, condense=False ): """ Format a number into scientific notation. Return a formatted string of a number in scientific notation if the exponent is outside the specified limits. Uses LaTeX formatting for the string to be used in plot labels. Parameters ---------- x : `float`, `int` The number to format. ndp : `int`, optional The number of decimal places to include in the formatted string. Default is 2. sci_lims_f : `tuple` of `int`, optional The limits on the powers of 10 between which scientific notation will not be used. Default is (-3, 3). pm_val : `float`, `int`, `None`, optional The plus-minus uncertainty value to include in the formatted string after a `\pm` symbol. If `None`, no uncertainty will be included. Default is `None`. condense : `bool`, optional Whether to remove spaces around `\pm` and `\times` operators in the formatted string. Default is `False`. Returns ------- formatted_str : `str` The formatted string of the number in scientific notation if the exponent is outside the specified limits, or in normal decimal notation otherwise. If `pm_val` is provided, the uncertainty will be included in the formatted string. Examples -------- >>> format_sci_notation(0.00012345) '1.23\\times10^{-4}' >>> format_sci_notation(12345, ndp=3) '1.234\\times10^{4}' >>> format_sci_notation(0.00012345, sci_lims_f=(-5, 5)) '0.00' >>> format_sci_notation(12345, sci_lims_f=(-5, 5)) '12345.00' """ # Verify argument types if not verify_number(x): raise TypeError(f"(format_sci_notation) `x` must be a number. Got type: {type(x)}.") if not isinstance(ndp, int): raise TypeError(f"(format_sci_notation) `ndp` must be an integer. Got type: {type(ndp)}.") if not isinstance(sci_lims_f, tuple) or len(sci_lims_f) != 2 or not all(isinstance(i, int) for i in sci_lims_f): raise TypeError(f"(format_sci_notation) `sci_lims_f` must be a tuple of two integers. Got type: {type(sci_lims_f)} with values: {sci_lims_f}.") if not (isinstance(pm_val, type(None)) or verify_number(pm_val)): raise TypeError(f"(format_sci_notation) `pm_val` must be a number or `None`. Got type: {type(pm_val)}.") if not isinstance(condense, bool): raise TypeError(f"(format_sci_notation) `condense` must be a boolean. Got type: {type(condense)}.") if isinstance(pm_val, type(None)): try: s = '{x:0.{ndp:d}e}'.format(x=x, ndp=ndp) m, e = s.split('e') except: print('Warning: could not format',x,'with',ndp,'decimal places') return r'{x:0.{ndp:d}f}'.format(x=x, ndp=ndp) # Check to see whether it's outside the scientific notation exponent limits if int(e) < min(sci_lims_f) or int(e) > max(sci_lims_f): if condense: return r'{m:s}{{\times}}10^{{{e:d}}}'.format(m=m, e=int(e)) else: return r'{m:s}\times10^{{{e:d}}}'.format(m=m, e=int(e)) else: return r'{x:0.{ndp:d}f}'.format(x=x, ndp=ndp) else: try: # Find magnitude and base 10 exponent for x s = '{x:0.{ndp:d}e}'.format(x=x, ndp=ndp) m, e = s.split('e') except: print('Warning: could not format',x,'with',ndp,'decimal places and pm_val:',pm_val) if condense: return r'{x:0.{ndp:d}f}{{\pm}}{pm_val:0.{ndp:d}f}'.format(x=x, ndp=ndp, pm_val=pm_val) else: return r'{x:0.{ndp:d}f}{{\pm}}{pm_val:0.{ndp:d}f}'.format(x=x, ndp=ndp, pm_val=pm_val) # Find magnitude and base 10 exponent for pm_val pm_s = '{pm_val:0.{ndp:d}e}'.format(pm_val=pm_val, ndp=ndp) pm_m, pm_e = pm_s.split('e') # Find difference between exponents to use as new number of decimal places new_ndp = max(2, int(e)-int(pm_e)) # Reformat x s = '{x:0.{ndp:d}e}'.format(x=x, ndp=new_ndp) m, e = s.split('e') # Shift value of pm_val to correct exponent pm_m = '{pm_val:0.{ndp:d}f}'.format(pm_val=pm_val/(10**int(e)), ndp=new_ndp) # Check to see whether it's outside the scientific notation exponent limits if int(e) < min(sci_lims_f) or int(e) > max(sci_lims_f): if condense: return r'({m:s}{{\pm}}{pm_m:s}){{\times}} 10^{{{e:d}}}'.format(m=m, pm_m=pm_m, e=int(e)) else: return r'({m:s}\pm{pm_m:s})\times 10^{{{e:d}}}'.format(m=m, pm_m=pm_m, e=int(e)) else: if condense: return r'{x:0.{ndp:d}f}{{\pm}}{pm_val:0.{ndp:d}f}'.format(x=x, ndp=new_ndp, pm_val=pm_val) else: return r'{x:0.{ndp:d}f}\pm{pm_val:0.{ndp:d}f}'.format(x=x, ndp=new_ndp, pm_val=pm_val)