Source code for unox.unox
import numpy as np
import os
from datetime import datetime
from unox.HPC.data0.paths import verify_path
[docs]
def time_this(
func
):
""" Time how long a function takes to execute.
A decorator which can be applied to a function to print the execution time.
Parameters
----------
func : `function`
The function for which to time.
Returns
-------
None
Examples
--------
@time_this
def my_function():
# Do some calculations
...
return result
>>> my_function()
Function my_function executed in 2.91s
"""
def wrap_func(*args, **kwargs):
# Get the time at the start of the execution
t1 = datetime.now()
# Execute the function
result = func(*args, **kwargs)
# Get the time at the end of the execution
t2 = datetime.now()
# Calculate the difference and output the execution time
print(f'\tFunction {func.__name__!r} execution time: {t2-t1}')
return result
return wrap_func
[docs]
def load_lats_lons(
path='datafiles/',
):
""" Load latitude and longitude data from files.
Load arrays of latitude and longitude values that cover the region of interest.
Parameters
----------
path : `str`, optional
Relative path to the directory containing data files.
Returns
-------
lats : `numpy.ndarray`
Array of latitude values.
lons : `numpy.ndarray`
Array of longitude values.
Examples
--------
>>> lats, lons = load_lats_lons()
"""
# Verify the paths
lat_path = verify_path(path+'lats.npy')
lon_path = verify_path(path+'lons.npy')
with open(lat_path, 'rb') as f:
lats = np.load(f)
with open(lon_path, 'rb') as f:
lons = np.load(f)
return lats, lons
[docs]
def show_available_data(
path='inputfiles/no2_sample_input/',
verb=False,
):
""" Print a list of available data.
For the given path, print all the files in the given directory.
Parameters
----------
path : `str`, optional
Relative path to the directory containing data files.
verb : `bool`, optional
Verbose mode. If True, print the file paths. Defaults to False.
Returns
-------
data_files : `list`
List of file paths in the given directory.
Examples
--------
>>> data_files = show_available_data('inputfiles/no2_sample_input/')
['inputfiles/no2_sample_input/stage1/y/Y_2019.npy',
'inputfiles/no2_sample_input/stage1/y/Y_2005.npy',
'inputfiles/no2_sample_input/stage1/y/Y_2006.npy',
...
'inputfiles/no2_sample_input/stage2/x/X_2014.npy',
'inputfiles/no2_sample_input/stage2/x/X_2015.npy']
"""
# Check if the path exists
path = verify_path(path)
# Recursively get all files in the directory
data_files = recursive_paths(path)
# Print the file paths, if Verbose mode is enabled
if verb:
for data_file in data_files:
print(data_file)
return data_files
[docs]
def recursive_paths(
path,
):
""" Create list recursively of all files in the given path.
Get all files in the given path by recursively calling the function itself.
Assumes the path is a directory that exists, as is confirmed when called from show_available_data()
Parameters
----------
path : `str`
Relative path to the directory containing data files.
Returns
-------
path_list : `list`
List of file paths in the given directory.
Examples
--------
>>> path_list = recursive_paths('datafiles')
['datafiles/README.md',
'datafiles/concatenate.py',
'datafiles/download_era5.sh',
...
'datafiles/sample_data/daily_42602_2019.csv',
'datafiles/sample_data/nox_2019_t106_US.nc']
"""
# Create an empty list in which to store the paths
path_list = []
# Iterate through all entries in the directory
for entry in os.listdir(path):
full_path = os.path.join(path, entry)
if os.path.isdir(full_path):
# If the entry is a directory, call this function recursively
for this_path in recursive_paths(full_path):
# Exclude hidden files, that is, where they start with a `.`
# after the last `/` in the path
if not this_path.split('/')[-1].startswith('.'):
path_list.append(this_path)
else:
# If the entry is a file, add it to the list
# Exclude hidden files, that is, where they start with a `.`
# after the last `/` in the path
if not full_path.split('/')[-1].startswith('.'):
path_list.append(full_path)
return path_list
[docs]
def get_input_data(
stage=1,
x_or_y='y',
year=2019,
input_set='no2_sample_input',
path_prefix='',
):
""" Get the path of a input data file.
Build the path to a specific input data file based on the stage, x_or_y, and year.
Parameters
----------
stage : `int`, optional
Stage of the data (1 or 2).
x_or_y : `str`, optional
'x' or 'y' to specify the type of data.
year : `int`, optional
Year of the data.
input_set : `str`, optional
Name of the directory under `inputfiles/` containing the data files.
path_prefix : `str`, optional
Prefix to the path to the directory containing data files.
Returns
-------
file_path : `str`
Path to the input data file.
Examples
--------
>>> file_path = get_input_data(stage=1, x_or_y='y', year=2019)
'../inputfiles/no2_sample_input/stage1/y/Y_2019.npy'
"""
# Verify the stage and x_or_y values
if stage not in [1, 2]:
raise ValueError(f"(get_input_data) `stage` must be 1 or 2. Got: {stage}")
if x_or_y not in ['x', 'y']:
raise ValueError(f"(get_input_data) `x_or_y` must be 'x' or 'y'. Got type: {type(x_or_y)}")
# Build the file path
file_path = f'{path_prefix}inputfiles/{input_set}/stage{stage}/{x_or_y}/{x_or_y.upper()}_{year}.npy'
# Verify the path
file_path = verify_path(file_path)
# Find the available data files
data_files = show_available_data(f'{path_prefix}inputfiles/{input_set}/')
# Check if the file exists
if file_path not in data_files:
raise FileNotFoundError(f"(get_input_data) File {file_path} not found.")
return file_path
[docs]
def get_one_input_var_array(
var,
**kwargs,
):
""" Get the array of a single input variable for a given year.
Parameters
----------
var : `str`
Name of the variable to get.
**kwargs : `dict`
Additional keyword arguments to pass to `get_input_data()`.
Should include `stage`, `year`, and `input_set`.
Returns
-------
var_array : `numpy.ndarray`
Array of the specified variable.
var_index : `int`
Index of the specified variable in the input data array.
"""
# Determine if the variable is an x or y variable
from unox.input import x_or_y_var, input_vars_dict
x_or_y = x_or_y_var(var)
# Get the file path
file_path = get_input_data(x_or_y=x_or_y, **kwargs)
# Load the array
input_array = np.load(file_path)
# Determine which var list `var` is in and at which index
for key in input_vars_dict.keys():
if var in input_vars_dict[key][f'{x_or_y}_vars']:
var_index = input_vars_dict[key][f'{x_or_y}_vars'].index(var)
var_array = input_array[:, :, :, var_index]
return var_array, var_index
raise ValueError(f"(get_one_input_var_array) Variable '{var}' not found in input_vars_dict: {input_vars_dict}")
[docs]
def get_one_t_input_var_array(
var,
this_date,
**kwargs,
):
""" Get an array of a single variable at the given date from the given input file.
Parameters
----------
var : `str`
Name of the variable to get.
this_date : `np.datetime64` or `str`
Date and time to select from the data file.
Expected format is 'YYYY-MM-DDTHH:MM:SS' or 'YYYY-MM-DD'.
**kwargs : `dict`
Additional keyword arguments to pass to `get_input_data()`.
Should include `var`, `stage`, and `input_set`.
Returns
-------
var_array : `numpy.ndarray`
Array of the specified variable at the given date.
"""
# Get the year from the date
from unox.data import get_YMD_from_date, get_DOY
year, month, day = get_YMD_from_date(this_date)
# Get the input array for the year
year_array, index = get_one_input_var_array(var, year=year, **kwargs)
# Get the DOY from the date
doy = get_DOY(this_date)
# Return the array for that date
return year_array[doy, :, :]
[docs]
def get_pred_data(
stage=1,
HPC_run='no2_example_run',
year=2019,
path_prefix='',
):
""" Get the path of a prediction data file.
Build the path to a specific prediction data file based on the stage, HPC_run ID, and year.
Parameters
----------
stage : `int`, optional
Stage of the data (1 or 2).
HPC_run : `str`, optional
ID of the HPC run.
year : `int`, optional
Year of the data.
path_prefix : `str`, optional
Prefix to the path to the directory containing data files.
Returns
-------
file_path : `str`
Path to the prediction data file.
Examples
--------
>>> file_path = get_pred_data(stage=1, HPC_run='no2_example_run', year=2019)
'../HPC_runs/no2_example_run/stage1_output/pred_X_2019.npy'
"""
# Verify the stage value
if stage not in [1, 2]:
raise ValueError(f"(get_pred_data) `stage` must be 1 or 2. Got: {stage}")
# Build the file path
file_path = f'{path_prefix}HPC_runs/{HPC_run}/stage{stage}_output/pred_X_{year}.npy'
# Verify the path
file_path = verify_path(file_path)
# Find the available data files
data_files = show_available_data(f'{path_prefix}HPC_runs/')
# Check if the file exists
if file_path not in data_files:
raise FileNotFoundError(f"(get_pred_data) File {file_path} not found.")
return file_path
[docs]
def interpret_user_input(
user_input,
):
""" Interprets a yes/no input from the user.
Takes input from prompting the user for a yes/no input and returns True/False appropriately.
Parameters
----------
user_input : `str`
The input the user entered.
Returns
-------
`bool`
True if the user input is 'y' or 'yes', False if 'n' or 'no'.
Examples
--------
>>> proceed = interpret_user_input(input('Do you wish to continue? (y/n): '))
Do you wish to continue? (y/n): y
>>> proceed
True
"""
# Verify argument types
if not isinstance(user_input, str):
raise TypeError(f"(interpret_user_input) `user_input` must be a string. Got type: {type(user_input)}")
# Get user input
while True:
input_yn = user_input.strip().lower()
if input_yn in ['y', 'yes']:
return True
elif input_yn in ['n', 'no']:
return False
else:
raise ValueError(f"(interpret_user_input) Invalid input: {user_input}. Please enter 'y' or 'n'.")