1.3 Getting Data: Make Concurrent Requests
In this example we will fetch historic timeseries data for a location using concurrent requests for increased speed.
It is important to note that it is possible to exceed your rate limit as you increase the number of parallel downloads, which may cause some requests to fail!
In [1]:
Copied!
# ! pip install pandas matplotlib
# ! pip install pandas matplotlib
In [1]:
Copied!
import concurrent.futures
import pandas as pd
from solcast.unmetered_locations import UNMETERED_LOCATIONS
from solcast import historic
import concurrent.futures
import pandas as pd
from solcast.unmetered_locations import UNMETERED_LOCATIONS
from solcast import historic
In [2]:
Copied!
site = UNMETERED_LOCATIONS["Stonehenge"]
latitude, longitude = site["latitude"], site["longitude"]
latitude, longitude
site = UNMETERED_LOCATIONS["Stonehenge"]
latitude, longitude = site["latitude"], site["longitude"]
latitude, longitude
Out[2]:
(51.178882, -1.826215)
In [3]:
Copied!
# /data/historic/radiation_and_weather returns up to 31 days data, so each request should be one month
months = pd.date_range("2022-01-01", "2023-01-01", freq="MS")
months
# /data/historic/radiation_and_weather returns up to 31 days data, so each request should be one month
months = pd.date_range("2022-01-01", "2023-01-01", freq="MS")
months
Out[3]:
DatetimeIndex(['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01', '2022-05-01', '2022-06-01', '2022-07-01', '2022-08-01', '2022-09-01', '2022-10-01', '2022-11-01', '2022-12-01', '2023-01-01'], dtype='datetime64[ns]', freq='MS')
In [4]:
Copied!
# make API calls
futures = []
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
for start, end in zip(months[:-1], months[1:]):
f = pool.submit(
historic.radiation_and_weather,
latitude=latitude,
longitude=longitude,
start=start,
end=end,
output_parameters=["ghi", "dni"],
period="PT5M"
)
futures.append(f)
# read API responses
df = []
for f in futures:
res = f.result()
if res.success:
df.append(res.to_pandas())
else:
# NOTE for production purposes you will need to deal with API failures, e.g. due rate-limiting!
pass
df = pd.concat(df)
df
# make API calls
futures = []
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as pool:
for start, end in zip(months[:-1], months[1:]):
f = pool.submit(
historic.radiation_and_weather,
latitude=latitude,
longitude=longitude,
start=start,
end=end,
output_parameters=["ghi", "dni"],
period="PT5M"
)
futures.append(f)
# read API responses
df = []
for f in futures:
res = f.result()
if res.success:
df.append(res.to_pandas())
else:
# NOTE for production purposes you will need to deal with API failures, e.g. due rate-limiting!
pass
df = pd.concat(df)
df
Out[4]:
dni | ghi | |
---|---|---|
period_end | ||
2022-01-01 00:05:00+00:00 | 0 | 0 |
2022-01-01 00:10:00+00:00 | 0 | 0 |
2022-01-01 00:15:00+00:00 | 0 | 0 |
2022-01-01 00:20:00+00:00 | 0 | 0 |
2022-01-01 00:25:00+00:00 | 0 | 0 |
... | ... | ... |
2022-12-31 23:40:00+00:00 | 0 | 0 |
2022-12-31 23:45:00+00:00 | 0 | 0 |
2022-12-31 23:50:00+00:00 | 0 | 0 |
2022-12-31 23:55:00+00:00 | 0 | 0 |
2023-01-01 00:00:00+00:00 | 0 | 0 |
105120 rows × 2 columns
In [5]:
Copied!
# plot up the results
dates = (df.index - pd.Timedelta(hours=1)).floor("1d")
(df.groupby(dates).sum() / 1000).plot(subplots=True, title="Daily Irradiance", ylabel="kWh/m^2")
# plot up the results
dates = (df.index - pd.Timedelta(hours=1)).floor("1d")
(df.groupby(dates).sum() / 1000).plot(subplots=True, title="Daily Irradiance", ylabel="kWh/m^2")
Out[5]:
array([<Axes: xlabel='period_end', ylabel='kWh/m^2'>, <Axes: xlabel='period_end', ylabel='kWh/m^2'>], dtype=object)
In [ ]:
Copied!