erddapClient
View Source
from erddapClient.erddap_server import ERDDAP_Server from erddapClient.erddap_dataset import ERDDAP_Dataset from erddapClient.erddap_tabledap import ERDDAP_Tabledap from erddapClient.erddap_griddap import ERDDAP_Griddap from erddapClient.erddap_griddap_dimensions import ERDDAP_Griddap_dimensions, ERDDAP_Griddap_dimension __all__ = ["ERDDAP_Server", "ERDDAP_Dataset", "ERDDAP_Tabledap", "ERDDAP_Griddap", "ERDDAP_Griddap_dimensions", "ERDDAP_Griddap_dimension"] __version__ = "1.0.0"
View Source
class ERDDAP_Server: """ Class with the representation and methods to access a ERDDAP server. """ ALLDATASETS_VARIABLES = [ 'datasetID','accessible','institution','dataStructure', 'cdm_data_type','class','title','minLongitude','maxLongitude', 'longitudeSpacing','minLatitude','maxLatitude','latitudeSpacing', 'minAltitude','maxAltitude','minTime','maxTime','timeSpacing', 'griddap','subset','tabledap','MakeAGraph','sos','wcs','wms', 'files','fgdc','iso19115','metadata','sourceUrl','infoUrl', 'rss','email','testOutOfDate','outOfDate','summary' ] def __init__(self, url, auth=None, lazyload=True): """ Constructs a ERDDAP Server object ... Arguments: `url` : The ERDDAP Server URL `auth` : Tupple with username and password, to access a protected ERDDAP Server """ self.serverURL = url self.auth = auth self.tabledapAllDatasets = ERDDAP_Dataset(self.serverURL, 'allDatasets', auth=auth) """ An `erddapClient.ERDDAP_Tabledap` object with the reference to the "allDatasets" Dataset, [About allDatasets](https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDTableFromAllDatasets) """ self.__status_values = None def __repr__(self): return erddap_server_repr(self) @property def version_numeric(self): if not hasattr(self,'__version_numeric'): self.__version_numeric = parseNumericVersion(self.version) return self.__version_numeric @property def version(self): if not hasattr(self,'__version'): try: req = urlread( url_operations.url_join(self.serverURL, 'version'), self.auth) __version = req.text self.__version = __version.replace("\n", "") except: self.__version = 'ERDDAP_version=<1.22' return self.__version @property def version_string(self): if not hasattr(self,'__version_string'): try: req = urlread( url_operations.url_join(self.serverURL, 'version_string'), self.auth) __version_string = req.text self.__version_string = __version_string.replace("\n", "") except: self.__version_string = 'ERDDAP_version_string=<1.80' return self.__version_string def search(self, **filters): """ Makes a search request to the ERDDAP Server Search filters kwargs: `searchFor` : * This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. * To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase". * Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset). * Searches are not case-sensitive. * To search for specific attribute values, use attName=attValue . * To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search. * This ERDDAP is using searchEngine=original. * In this ERDDAP, you can search for any part of a word. * For example, searching for spee will find datasets with speed and datasets with WindSpeed. * In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search. Optional filters: `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) Returns a `erddapClient.ERDDAP_SearchResults` object """ searchURL = self.getSearchURL( **filters) rawSearchResults = urlread(searchURL, self.auth) dictSearchResult = rawSearchResults.json() formatedResults = ERDDAP_SearchResults(self.serverURL, dictSearchResult['table']['rows']) return formatedResults def getSearchURL(self, filetype='json', **searchFilters): """ Builds the url call for the basic Search ERDDAP API Rest service. Arguments `filetype` : The result format (htmlTable, csv, json, tsv, etc) [https://coastwatch.pfeg.noaa.gov/erddap/rest.html#responses](https://coastwatch.pfeg.noaa.gov/erddap/rest.html#responses) Search filters kwargs: `searchFor` * This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. * To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase". * Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset). * Searches are not case-sensitive. * To search for specific attribute values, use attName=attValue . * To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search. * This ERDDAP is using searchEngine=original. * In this ERDDAP, you can search for any part of a word. * For example, searching for spee will find datasets with speed and datasets with WindSpeed. * In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search. Optional filters: `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "itemsPerPage" you can specify the page of results. (Default: 1) Returns a string with the url search request. """ searchAPIEndpoint = "search/index.{}".format(filetype) searchAPIURL = url_operations.url_join( self.serverURL, searchAPIEndpoint ) queryElementsDefaults = { 'page' : 1 , 'itemsPerPage' : 1000, 'searchFor' : None} queryURL=[] for queryElement, queryElementDefault in queryElementsDefaults.items(): queryValue = searchFilters.get(queryElement, queryElementDefault) if queryElement == 'searchFor': if queryValue: queryValue = quote_plus(queryValue) queryURL.append( queryElement + "=" + ("" if queryValue is None else queryValue) ) continue if queryValue is None: queryURL.append( queryElement + "=" ) else: queryURL.append( queryElement + "=" + str(queryValue) ) return url_operations.joinURLElements(searchAPIURL, url_operations.parseQueryItems(queryURL, safe='=+-&')) def advancedSearch(self, **filters): """ Makes a advancedSearch request to the ERDDAP Server Search filters kwargs: `searchFor` : This is a Google-like search of the datasets metadata, set the words you want to search for with spaces between the words. ERDDAP will search for the words separately, not as a phrase. To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord. To exclude datasets with a specific phrase, use -"excluded phrase" To search for specific attribute values, use attName=attValue To find just grid or table datasets, include protocol=griddap or protocol=tabledap Optional filters: `protocolol` : Set either: griddap, tabledap or wms (Default: (ANY)) `cdm_data_type` : Set either: grid, timeseries, point, timeseriesProfile, trajectory trajectoryProfile, etc.. (Default: (ANY)) `institution` : Set either to one of the available instituion values in the ERDDAP (Default: (ANY)) `ioos_category` : Set either to one of the available ioos_category values in the ERDDAP (Default: (ANY)) `keywords` : Set either to one of the available keywords values in the ERDDAP (Default: (ANY)) `long_name` : Set either to one of the available long_name values in the ERDDAP (Default: (ANY)) `standard_name` : Set either to one of the available standard_name values in the ERDDAP (Default: (ANY)) `variableName` : Set either to one of the available variable names values in the ERDDAP (Default: (ANY)) `minLon`, `maxLon` : Some datasets have longitude values within -180 to 180, others use 0 to 360. If you specify Min and Max Longitude within -180 to 180 (or 0 to 360), ERDDAP will only find datasets that match the values you specify. Consider doing one search: longitude -180 to 360, or two searches: longitude -180 to 180, and 0 to 360. `minLat`, `maxLat` : Set latitude bounds, range -90 to 90 `minTime`, `maxTime` : Your can pass a <datetime> object or a string with the following specifications > - A time string with the format yyyy-MM-ddTHH:mm:ssZ, for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd. You can omit (backwards from the end) Z, :ss, :mm, :HH, and T. Always use UTC (GMT/Zulu) time. - Or specify the number of seconds since 1970-01-01T00:00:00Z. - Or specify "now-nUnits", for example, "now-7days" `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) The search will find datasets that have some data within the specified time bounds. Returns a `erddapClient.ERDDAP_SearchResults` object """ searchURL = self.getAdvancedSearchURL( **filters) rawSearchResults = urlread(searchURL, self.auth) dictSearchResult = rawSearchResults.json() formatedResults = ERDDAP_SearchResults(self.serverURL, dictSearchResult['table']['rows']) return formatedResults def getAdvancedSearchURL(self, filetype='json', **searchFilters): """ Builds the url call for the advanced Search ERDDAP API Rest service. Search filters kwargs: `searchFor` : This is a Google-like search of the datasets metadata, set the words you want to search for with spaces between the words. ERDDAP will search for the words separately, not as a phrase. To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord. To exclude datasets with a specific phrase, use -"excluded phrase" To search for specific attribute values, use attName=attValue To find just grid or table datasets, include protocol=griddap or protocol=tabledap Optional filters: `protocolol` : Set either: griddap, tabledap or wms (Default: (ANY)) `cdm_data_type` : Set either: grid, timeseries, point, timeseriesProfile, trajectory trajectoryProfile, etc.. (Default: (ANY)) `institution` : Set either to one of the available instituion values in the ERDDAP (Default: (ANY)) `ioos_category` : Set either to one of the available ioos_category values in the ERDDAP (Default: (ANY)) `keywords` : Set either to one of the available keywords values in the ERDDAP (Default: (ANY)) `long_name` : Set either to one of the available long_name values in the ERDDAP (Default: (ANY)) `standard_name` : Set either to one of the available standard_name values in the ERDDAP (Default: (ANY)) `variableName` : Set either to one of the available variable names values in the ERDDAP (Default: (ANY)) `minLon`, `maxLon` : Some datasets have longitude values within -180 to 180, others use 0 to 360. If you specify Min and Max Longitude within -180 to 180 (or 0 to 360), ERDDAP will only find datasets that match the values you specify. Consider doing one search: longitude -180 to 360, or two searches: longitude -180 to 180, and 0 to 360. `minLat`, `maxLat` : Set latitude bounds, range -90 to 90 `minTime`, `maxTime` : Your can pass a <datetime> object or a string with the following specifications > - A time string with the format yyyy-MM-ddTHH:mm:ssZ, for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd. You can omit (backwards from the end) Z, :ss, :mm, :HH, and T. Always use UTC (GMT/Zulu) time. - Or specify the number of seconds since 1970-01-01T00:00:00Z. - Or specify "now-nUnits", for example, "now-7days" `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) The search will find datasets that have some data within the specified time bounds. Returns the string url for the search service. """ searchAPIEndpoint = "search/advanced.{}".format(filetype) searchAPIURL = url_operations.url_join( self.serverURL, searchAPIEndpoint ) queryElementsDefaults = { 'page' : 1 , 'itemsPerPage' : 1000, 'searchFor' : None, 'protocol' : "(ANY)", 'cdm_data_type' : "(ANY)", 'institution' : "(ANY)", 'ioos_category' : "(ANY)", 'keywords' : "(ANY)", 'long_name' : "(ANY)", 'standard_name' : "(ANY)", 'variableName' : "(ANY)", 'maxLat' : None, 'minLon' : None, 'maxLon' : None, 'minLat' : None, 'minTime' : None, 'maxTime' : None} queryURL=[] for queryElement, queryElementDefault in queryElementsDefaults.items(): queryValue = searchFilters.get(queryElement, queryElementDefault) if queryElement == 'searchFor': if queryValue: queryValue = quote_plus(queryValue) queryURL.append( queryElement + "=" + ("" if queryValue is None else queryValue) ) continue if queryValue is None: queryURL.append( queryElement + "=" ) elif queryElement in ['minTime', 'maxTime']: queryURL.append( queryElement + "=" + parseConstraintDateTime(queryValue) ) else: queryURL.append( queryElement + "=" + str(queryValue) ) return url_operations.joinURLElements(searchAPIURL, url_operations.parseQueryItems(queryURL, safe='=+-&')) def getQueryAllDatasetsURL(self, filetype='json', constraints=[]): """ This method returns a string URL with the allDatasets default Tabledap Dataset from ERDDAP. Arguments: `filetype` : The result format for the request `constraints` : The request constraints list Returns a url string """ resultVariables = self.ALLDATASETS_VARIABLES response = ( self.tabledapAllDatasets.setResultVariables(resultVariables) .setConstraints(constraints) .getDataRequestURL(filetype=filetype) ) return response @property def statusPageURL(self): """ Returns the status.html url for the current ERDDAP Server reference. """ if not hasattr(self,'__statusPageURL'): self.__statusPageURL = url_operations.url_join(self.serverURL, 'status.html') return self.__statusPageURL def parseStatusPage(self, force=False): """ This method will load the status.html page of the current ERRDAP server reference this data is parsed into a OrderedDict, with the scalars, and DataFrames with the tables provided in status.html page. The data will be available in the `erddapClient.ERDDAP_Server.statusValues` property Parameters: `force` : Data is stored in a class property, if force is True, the data will be reloaded, if False, the last loaded data is returned. """ if self.__status_values is None or force: statusPageCode = urlread.__wrapped__( self.statusPageURL, self.auth).text self.__status_values = parseERDDAPStatusPage(statusPageCode, numversion=self.version_numeric) @property def statusValues(self): """ Returns a OrderedDict with the parsed data of the status.html page. More information on the data provided in status.html: [ERDDAP documentaiton](https://coastwatch.pfeg.noaa.gov/erddap/download/setup.html#monitoring) """ self.parseStatusPage(force=False) return self.__status_values
Class with the representation and methods to access a ERDDAP server.
View Source
def __init__(self, url, auth=None, lazyload=True): """ Constructs a ERDDAP Server object ... Arguments: `url` : The ERDDAP Server URL `auth` : Tupple with username and password, to access a protected ERDDAP Server """ self.serverURL = url self.auth = auth self.tabledapAllDatasets = ERDDAP_Dataset(self.serverURL, 'allDatasets', auth=auth) """ An `erddapClient.ERDDAP_Tabledap` object with the reference to the "allDatasets" Dataset, [About allDatasets](https://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#EDDTableFromAllDatasets) """ self.__status_values = None
Constructs a ERDDAP Server object ... Arguments:
url
: The ERDDAP Server URL
auth
: Tupple with username and password, to access a protected ERDDAP Server
An erddapClient.ERDDAP_Tabledap
object with the reference to the "allDatasets"
Dataset, About allDatasets
View Source
def search(self, **filters): """ Makes a search request to the ERDDAP Server Search filters kwargs: `searchFor` : * This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. * To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase". * Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset). * Searches are not case-sensitive. * To search for specific attribute values, use attName=attValue . * To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search. * This ERDDAP is using searchEngine=original. * In this ERDDAP, you can search for any part of a word. * For example, searching for spee will find datasets with speed and datasets with WindSpeed. * In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search. Optional filters: `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) Returns a `erddapClient.ERDDAP_SearchResults` object """ searchURL = self.getSearchURL( **filters) rawSearchResults = urlread(searchURL, self.auth) dictSearchResult = rawSearchResults.json() formatedResults = ERDDAP_SearchResults(self.serverURL, dictSearchResult['table']['rows']) return formatedResults
Makes a search request to the ERDDAP Server
Search filters kwargs:
searchFor
:
- This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase.
- To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase".
- Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset).
- Searches are not case-sensitive.
- To search for specific attribute values, use attName=attValue .
- To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search.
- This ERDDAP is using searchEngine=original.
- In this ERDDAP, you can search for any part of a word.
- For example, searching for spee will find datasets with speed and datasets with WindSpeed.
- In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search.
Optional filters:
itemsPerPage
: Set the maximum number of results. (Default: 1000)
page
: If the number of results is bigger than the "itemsPerPage
" you can
specify the page of results. (Default: 1)
Returns a erddapClient.ERDDAP_SearchResults
object
View Source
def getSearchURL(self, filetype='json', **searchFilters): """ Builds the url call for the basic Search ERDDAP API Rest service. Arguments `filetype` : The result format (htmlTable, csv, json, tsv, etc) [https://coastwatch.pfeg.noaa.gov/erddap/rest.html#responses](https://coastwatch.pfeg.noaa.gov/erddap/rest.html#responses) Search filters kwargs: `searchFor` * This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase. * To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase". * Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset). * Searches are not case-sensitive. * To search for specific attribute values, use attName=attValue . * To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search. * This ERDDAP is using searchEngine=original. * In this ERDDAP, you can search for any part of a word. * For example, searching for spee will find datasets with speed and datasets with WindSpeed. * In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search. Optional filters: `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "itemsPerPage" you can specify the page of results. (Default: 1) Returns a string with the url search request. """ searchAPIEndpoint = "search/index.{}".format(filetype) searchAPIURL = url_operations.url_join( self.serverURL, searchAPIEndpoint ) queryElementsDefaults = { 'page' : 1 , 'itemsPerPage' : 1000, 'searchFor' : None} queryURL=[] for queryElement, queryElementDefault in queryElementsDefaults.items(): queryValue = searchFilters.get(queryElement, queryElementDefault) if queryElement == 'searchFor': if queryValue: queryValue = quote_plus(queryValue) queryURL.append( queryElement + "=" + ("" if queryValue is None else queryValue) ) continue if queryValue is None: queryURL.append( queryElement + "=" ) else: queryURL.append( queryElement + "=" + str(queryValue) ) return url_operations.joinURLElements(searchAPIURL, url_operations.parseQueryItems(queryURL, safe='=+-&'))
Builds the url call for the basic Search ERDDAP API Rest service.
Arguments
filetype
: The result format (htmlTable, csv, json, tsv, etc)
https://coastwatch.pfeg.noaa.gov/erddap/rest.html#responses
Search filters kwargs:
searchFor
- This is a Google-like search of the datasets metadata: Type the words you want to search for, with spaces between the words. ERDDAP will search for the words separately, not as a phrase.
- To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord To exclude datasets with a specific phrase, use -"excluded phrase".
- Don't use AND between search terms. It is implied. The results will include only the datasets that have all of the specified words and phrases (and none of the excluded words and phrases) in the dataset's metadata (data about the dataset).
- Searches are not case-sensitive.
- To search for specific attribute values, use attName=attValue .
- To find just grid or just table datasets, include protocol=griddap or protocol=tabledap in your search.
- This ERDDAP is using searchEngine=original.
- In this ERDDAP, you can search for any part of a word.
- For example, searching for spee will find datasets with speed and datasets with WindSpeed.
- In this ERDDAP, the last word in a phrase may be a partial word. For example, to find datasets from a specific website (usually the start of the datasetID), include (for example) "datasetID=erd" in your search.
Optional filters:
itemsPerPage
: Set the maximum number of results. (Default: 1000)
page
: If the number of results is bigger than the "itemsPerPage" you can
specify the page of results. (Default: 1)
Returns a string with the url search request.
View Source
def advancedSearch(self, **filters): """ Makes a advancedSearch request to the ERDDAP Server Search filters kwargs: `searchFor` : This is a Google-like search of the datasets metadata, set the words you want to search for with spaces between the words. ERDDAP will search for the words separately, not as a phrase. To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord. To exclude datasets with a specific phrase, use -"excluded phrase" To search for specific attribute values, use attName=attValue To find just grid or table datasets, include protocol=griddap or protocol=tabledap Optional filters: `protocolol` : Set either: griddap, tabledap or wms (Default: (ANY)) `cdm_data_type` : Set either: grid, timeseries, point, timeseriesProfile, trajectory trajectoryProfile, etc.. (Default: (ANY)) `institution` : Set either to one of the available instituion values in the ERDDAP (Default: (ANY)) `ioos_category` : Set either to one of the available ioos_category values in the ERDDAP (Default: (ANY)) `keywords` : Set either to one of the available keywords values in the ERDDAP (Default: (ANY)) `long_name` : Set either to one of the available long_name values in the ERDDAP (Default: (ANY)) `standard_name` : Set either to one of the available standard_name values in the ERDDAP (Default: (ANY)) `variableName` : Set either to one of the available variable names values in the ERDDAP (Default: (ANY)) `minLon`, `maxLon` : Some datasets have longitude values within -180 to 180, others use 0 to 360. If you specify Min and Max Longitude within -180 to 180 (or 0 to 360), ERDDAP will only find datasets that match the values you specify. Consider doing one search: longitude -180 to 360, or two searches: longitude -180 to 180, and 0 to 360. `minLat`, `maxLat` : Set latitude bounds, range -90 to 90 `minTime`, `maxTime` : Your can pass a <datetime> object or a string with the following specifications > - A time string with the format yyyy-MM-ddTHH:mm:ssZ, for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd. You can omit (backwards from the end) Z, :ss, :mm, :HH, and T. Always use UTC (GMT/Zulu) time. - Or specify the number of seconds since 1970-01-01T00:00:00Z. - Or specify "now-nUnits", for example, "now-7days" `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) The search will find datasets that have some data within the specified time bounds. Returns a `erddapClient.ERDDAP_SearchResults` object """ searchURL = self.getAdvancedSearchURL( **filters) rawSearchResults = urlread(searchURL, self.auth) dictSearchResult = rawSearchResults.json() formatedResults = ERDDAP_SearchResults(self.serverURL, dictSearchResult['table']['rows']) return formatedResults
Makes a advancedSearch request to the ERDDAP Server
Search filters kwargs:
searchFor
: This is a Google-like search of the datasets metadata, set the words you
want to search for with spaces between the words. ERDDAP will search
for the words separately, not as a phrase.
To search for a phrase, put double quotes around the phrase (for
example, "wind speed").
To exclude datasets with a specific word, use -excludedWord.
To exclude datasets with a specific phrase, use -"excluded phrase"
To search for specific attribute values, use attName=attValue
To find just grid or table datasets, include protocol=griddap
or protocol=tabledap
Optional filters:
protocolol
: Set either: griddap, tabledap or wms (Default: (ANY))
cdm_data_type
: Set either: grid, timeseries, point, timeseriesProfile, trajectory
trajectoryProfile, etc.. (Default: (ANY))
institution
: Set either to one of the available instituion values in the ERDDAP
(Default: (ANY))
ioos_category
: Set either to one of the available ioos_category values in the ERDDAP
(Default: (ANY))
keywords
: Set either to one of the available keywords values in the ERDDAP
(Default: (ANY))
long_name
: Set either to one of the available long_name values in the ERDDAP
(Default: (ANY))
standard_name
: Set either to one of the available standard_name values in the ERDDAP
(Default: (ANY))
variableName
: Set either to one of the available variable names values in the
ERDDAP (Default: (ANY))
minLon
, maxLon
: Some datasets have longitude values within -180 to 180, others
use 0 to 360. If you specify Min and Max Longitude within -180 to 180
(or 0 to 360), ERDDAP will only find datasets that match the values
you specify. Consider doing one search: longitude -180 to 360, or
two searches: longitude -180 to 180, and 0 to 360.
minLat
, maxLat
: Set latitude bounds, range -90 to 90
minTime
, maxTime
: Your can pass a
- A time string with the format yyyy-MM-ddTHH:mm:ssZ,
for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd.
You can omit (backwards from the end) Z, :ss, :mm, :HH, and T.
Always use UTC (GMT/Zulu) time.- Or specify the number of seconds since 1970-01-01T00:00:00Z.
- Or specify "now-nUnits", for example, "now-7days"
itemsPerPage
: Set the maximum number of results. (Default: 1000)
page
: If the number of results is bigger than the "itemsPerPage
" you can
specify the page of results. (Default: 1)
The search will find datasets that have some data within the specified time bounds.
Returns a erddapClient.ERDDAP_SearchResults
object
View Source
def getAdvancedSearchURL(self, filetype='json', **searchFilters): """ Builds the url call for the advanced Search ERDDAP API Rest service. Search filters kwargs: `searchFor` : This is a Google-like search of the datasets metadata, set the words you want to search for with spaces between the words. ERDDAP will search for the words separately, not as a phrase. To search for a phrase, put double quotes around the phrase (for example, "wind speed"). To exclude datasets with a specific word, use -excludedWord. To exclude datasets with a specific phrase, use -"excluded phrase" To search for specific attribute values, use attName=attValue To find just grid or table datasets, include protocol=griddap or protocol=tabledap Optional filters: `protocolol` : Set either: griddap, tabledap or wms (Default: (ANY)) `cdm_data_type` : Set either: grid, timeseries, point, timeseriesProfile, trajectory trajectoryProfile, etc.. (Default: (ANY)) `institution` : Set either to one of the available instituion values in the ERDDAP (Default: (ANY)) `ioos_category` : Set either to one of the available ioos_category values in the ERDDAP (Default: (ANY)) `keywords` : Set either to one of the available keywords values in the ERDDAP (Default: (ANY)) `long_name` : Set either to one of the available long_name values in the ERDDAP (Default: (ANY)) `standard_name` : Set either to one of the available standard_name values in the ERDDAP (Default: (ANY)) `variableName` : Set either to one of the available variable names values in the ERDDAP (Default: (ANY)) `minLon`, `maxLon` : Some datasets have longitude values within -180 to 180, others use 0 to 360. If you specify Min and Max Longitude within -180 to 180 (or 0 to 360), ERDDAP will only find datasets that match the values you specify. Consider doing one search: longitude -180 to 360, or two searches: longitude -180 to 180, and 0 to 360. `minLat`, `maxLat` : Set latitude bounds, range -90 to 90 `minTime`, `maxTime` : Your can pass a <datetime> object or a string with the following specifications > - A time string with the format yyyy-MM-ddTHH:mm:ssZ, for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd. You can omit (backwards from the end) Z, :ss, :mm, :HH, and T. Always use UTC (GMT/Zulu) time. - Or specify the number of seconds since 1970-01-01T00:00:00Z. - Or specify "now-nUnits", for example, "now-7days" `itemsPerPage` : Set the maximum number of results. (Default: 1000) `page` : If the number of results is bigger than the "`itemsPerPage`" you can specify the page of results. (Default: 1) The search will find datasets that have some data within the specified time bounds. Returns the string url for the search service. """ searchAPIEndpoint = "search/advanced.{}".format(filetype) searchAPIURL = url_operations.url_join( self.serverURL, searchAPIEndpoint ) queryElementsDefaults = { 'page' : 1 , 'itemsPerPage' : 1000, 'searchFor' : None, 'protocol' : "(ANY)", 'cdm_data_type' : "(ANY)", 'institution' : "(ANY)", 'ioos_category' : "(ANY)", 'keywords' : "(ANY)", 'long_name' : "(ANY)", 'standard_name' : "(ANY)", 'variableName' : "(ANY)", 'maxLat' : None, 'minLon' : None, 'maxLon' : None, 'minLat' : None, 'minTime' : None, 'maxTime' : None} queryURL=[] for queryElement, queryElementDefault in queryElementsDefaults.items(): queryValue = searchFilters.get(queryElement, queryElementDefault) if queryElement == 'searchFor': if queryValue: queryValue = quote_plus(queryValue) queryURL.append( queryElement + "=" + ("" if queryValue is None else queryValue) ) continue if queryValue is None: queryURL.append( queryElement + "=" ) elif queryElement in ['minTime', 'maxTime']: queryURL.append( queryElement + "=" + parseConstraintDateTime(queryValue) ) else: queryURL.append( queryElement + "=" + str(queryValue) ) return url_operations.joinURLElements(searchAPIURL, url_operations.parseQueryItems(queryURL, safe='=+-&'))
Builds the url call for the advanced Search ERDDAP API Rest service.
Search filters kwargs:
searchFor
: This is a Google-like search of the datasets metadata, set the words you
want to search for with spaces between the words. ERDDAP will search
for the words separately, not as a phrase.
To search for a phrase, put double quotes around the phrase (for
example, "wind speed").
To exclude datasets with a specific word, use -excludedWord.
To exclude datasets with a specific phrase, use -"excluded phrase"
To search for specific attribute values, use attName=attValue
To find just grid or table datasets, include protocol=griddap
or protocol=tabledap
Optional filters:
protocolol
: Set either: griddap, tabledap or wms (Default: (ANY))
cdm_data_type
: Set either: grid, timeseries, point, timeseriesProfile, trajectory
trajectoryProfile, etc.. (Default: (ANY))
institution
: Set either to one of the available instituion values in the ERDDAP
(Default: (ANY))
ioos_category
: Set either to one of the available ioos_category values in the ERDDAP
(Default: (ANY))
keywords
: Set either to one of the available keywords values in the ERDDAP
(Default: (ANY))
long_name
: Set either to one of the available long_name values in the ERDDAP
(Default: (ANY))
standard_name
: Set either to one of the available standard_name values in the ERDDAP
(Default: (ANY))
variableName
: Set either to one of the available variable names values in the
ERDDAP (Default: (ANY))
minLon
, maxLon
: Some datasets have longitude values within -180 to 180, others
use 0 to 360. If you specify Min and Max Longitude within -180 to 180
(or 0 to 360), ERDDAP will only find datasets that match the values
you specify. Consider doing one search: longitude -180 to 360, or
two searches: longitude -180 to 180, and 0 to 360.
minLat
, maxLat
: Set latitude bounds, range -90 to 90
minTime
, maxTime
: Your can pass a
- A time string with the format yyyy-MM-ddTHH:mm:ssZ,
for example, 2009-01-21T23:00:00Z. If you specify something, you must include yyyy-MM-dd.
You can omit (backwards from the end) Z, :ss, :mm, :HH, and T.
Always use UTC (GMT/Zulu) time.- Or specify the number of seconds since 1970-01-01T00:00:00Z.
- Or specify "now-nUnits", for example, "now-7days"
itemsPerPage
: Set the maximum number of results. (Default: 1000)
page
: If the number of results is bigger than the "itemsPerPage
" you can
specify the page of results. (Default: 1)
The search will find datasets that have some data within the specified time bounds.
Returns the string url for the search service.
View Source
def getQueryAllDatasetsURL(self, filetype='json', constraints=[]): """ This method returns a string URL with the allDatasets default Tabledap Dataset from ERDDAP. Arguments: `filetype` : The result format for the request `constraints` : The request constraints list Returns a url string """ resultVariables = self.ALLDATASETS_VARIABLES response = ( self.tabledapAllDatasets.setResultVariables(resultVariables) .setConstraints(constraints) .getDataRequestURL(filetype=filetype) ) return response
This method returns a string URL with the allDatasets default Tabledap Dataset from ERDDAP.
Arguments:
filetype
: The result format for the request
constraints
: The request constraints list
Returns a url string
Returns the status.html url for the current ERDDAP Server reference.
View Source
def parseStatusPage(self, force=False): """ This method will load the status.html page of the current ERRDAP server reference this data is parsed into a OrderedDict, with the scalars, and DataFrames with the tables provided in status.html page. The data will be available in the `erddapClient.ERDDAP_Server.statusValues` property Parameters: `force` : Data is stored in a class property, if force is True, the data will be reloaded, if False, the last loaded data is returned. """ if self.__status_values is None or force: statusPageCode = urlread.__wrapped__( self.statusPageURL, self.auth).text self.__status_values = parseERDDAPStatusPage(statusPageCode, numversion=self.version_numeric)
This method will load the status.html page of the current ERRDAP server reference
this data is parsed into a OrderedDict, with the scalars, and DataFrames with the
tables provided in status.html page.
The data will be available in the erddapClient.ERDDAP_Server.statusValues
property
Parameters:
force
: Data is stored in a class property, if force is True, the data will be
reloaded, if False, the last loaded data is returned.
Returns a OrderedDict with the parsed data of the status.html page. More information on the data provided in status.html: ERDDAP documentaiton
View Source
class ERDDAP_Dataset: """ Class to represent the shared attributes and methods of a ERDDAP Dataset either a griddap or tabledap. """ DEFAULT_FILETYPE = 'csvp' BINARY_FILETYPES = [ 'dods', 'mat', 'nc', 'ncCF', 'ncCFMA', 'wav', 'smallPdf', 'pdf', 'largePdf', 'smallPng', 'png', 'largePng', 'transparentPng'] def __init__(self, erddapurl, datasetid, protocol='tabledap', auth=None, lazyload=True): self.erddapurl = erddapurl self.datasetid = datasetid self.protocol = protocol self.erddapauth = auth self.__metadata = None self.resultVariables = [] self.constraints = [] self.serverSideFunctions = [] if not lazyload: self.loadMetadata() def __str__(self): return dataset_str(self) def __simple_repr__(self): return simple_dataset_repr(self) def setResultVariables(self, variables): """ This function sets the optional comma-separated list of variables called "resultsVariables" as part of the query for data request. (for example: longitude,latitude,time,station,wmo_platform_code,T_25). For each variable in resultsVariables, there will be a column in the results table, in the same order. If you don't specify any results variables, the results table will include columns for all of the variables in the dataset. Arguments `variables` : The list of variables, this can be a string with the comma separated variables, or a list. Returns the current object allowing chaining functions. """ if type(variables) is list: self.resultVariables = variables elif type(variables) is str: self.resultVariables = variables.split(',') self.resultVariables = [ rv.strip() for rv in self.resultVariables] else: raise Exception("variables argument must be list, or comma separated list of variables") return self def addResultVariable(self, variable): """ Adds a variable to the data request query element "resultsVariables" Arguments `variable` : A string with the variable name add. """ self.resultVariables.append(variable) return self def setConstraints(self, constraintListOrDict): """ This functions sets the constraints for the data request query. More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query """ self.clearConstraints() self.addConstraints(constraintListOrDict) return self def addConstraints(self, constraintListOrDict): if isinstance(constraintListOrDict,dict): for k,v in constraintListOrDict.items(): self.addConstraint({k:v}) elif isinstance(constraintListOrDict,list): for constraint in constraintListOrDict: self.addConstraint(constraint) else: raise Exception("Constraints argument must be either dictionary or list") return self def addConstraint(self, constraint): """ This adds a constraint to the data request query. Arguments `constraint` : This can be a string with the constraint, or a dictionary element, being the key the first part of the constraint, and the dict value the constraint value. Example: ``` >>> dataset.addConstraint('time>=2020-12-29T00:00:00Z') >>> dataset.addConstraint({ 'time>=' : dt.datetime(2020,12,29) }) ``` More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query """ if isinstance(constraint,dict): self._addConstraintDict(constraint) elif isinstance(constraint,str): self._addConstraintStr(constraint) else: raise Exception("constraint argument must be either string or a dictionary") return self def _addConstraintStr(self, constraint): self.constraints.append(constraint) def _addConstraintDict(self, constraintDict): constraintKey = next(iter(constraintDict)) self._addConstraintStr( "{key_plus_conditional}{value}".format( key_plus_conditional=constraintKey, value=parseConstraintValue(constraintDict[constraintKey]) ) ) def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True): requestURL = self.getBaseURL(filetype) query = "" if len(self.resultVariables) > 0: query += url_operations.parseQueryItems(self.resultVariables, useSafeURL, safe='', item_separator=',') if len(self.constraints) > 0: query += '&' + url_operations.parseQueryItems(self.constraints, useSafeURL, safe='=!()&') if len(self.serverSideFunctions) > 0: query += '&' + url_operations.parseQueryItems(self.serverSideFunctions, useSafeURL, safe='=!()&/') requestURL = url_operations.joinURLElements(requestURL, query) self.lastRequestURL = requestURL return self.lastRequestURL def getURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True): """ Buils and returns a strint with the data request query, with the available resultsVariables, constraints and operations provided. """ return self.getDataRequestURL(filetype, useSafeURL) def getBaseURL(self, filetype=DEFAULT_FILETYPE): if filetype.lower() == 'opendap': return url_operations.url_join(self.erddapurl, self.protocol, self.datasetid ) else: return url_operations.url_join(self.erddapurl, self.protocol, self.datasetid + "." + filetype ) def getAttribute(self, attribute, variableName='NC_GLOBAL'): """ Returns the value for a attribute name in the dataset. If the metadata of the dataset is not already in memory, this functions will load it, calling the function `erddapClient.ERDDAP_Dataset.loadMetadata` """ self.loadMetadata() if variableName == 'NC_GLOBAL': if attribute in self.__metadata['global'].keys(): return self.__metadata['global'][attribute] else: for vd in [ self.__metadata['dimensions'], self.__metadata['variables'] ]: if variableName in vd.keys(): if attribute in vd[variableName].keys(): return vd[variableName][attribute] def loadMetadata(self, force=False): """ Loads in to memory the metadata atributes and values available in the info page of the dataset. Arguments: `force` : If true, this method will reload the metadata attributes even if the information where already downloaded. """ if self.__metadata is None or force: rawRequest = urlread(self.getMetadataURL(), auth=self.erddapauth) rawRequestJson = rawRequest.json() self.__metadata = parseDictMetadata(rawRequestJson) return True @property def variables(self): """ Returns list of variables of this dataset, and it's associate metadata """ self.loadMetadata() return self.__metadata['variables'] @property def dimensions(self): """ Returns a list of the dataset dimensions, and it's associate metadata """ self.loadMetadata() return self.__metadata['dimensions'] @property def info(self): """ Returns a list of the global metadata of this dataset. """ self.loadMetadata() return self.__metadata['global'] def getMetadataURL(self, filetype='json'): """ Returns a string with the url to request the metadata for the dataset. Arguments `filetype` : The filetype for the metadata request, defaults to 'json' """ return url_operations.url_join(self.erddapurl, "info", self.datasetid , "index." + filetype ) def clearConstraints(self): """ Clears from the constrains stack all the constraints provided by the `erddapClient.ERDDAP_Dataset.setConstraints`, `erddapClient.ERDDAP_Dataset.addConstraint` methods. """ self.constraints = [] def clearServerSideFunctions(self): """ Clears from the server side functions stack all the functions provided by the methods available, like `erddapClient.ERDDAP_Tabledap.orderBy`, `erddapClient.ERDDAP_Tabledap.orderByClosest`, etc. """ self.serverSideFunctions = [] def clearResultVariables(self): """ Clears from the results variables stack all the variabkles provided by the `erddapClient.ERDDAP_Dataset.setResultsVariables` and `erddapClient.ERDDAP_Dataset.addResultVariable` methods """ self.resultVariables = [] def clearQuery(self): """ Clears all the query elements of the stack, variables, constraints and server side variables. """ self.clearConstraints() self.clearServerSideFunctions() self.clearResultVariables() def getData(self, filetype=DEFAULT_FILETYPE, request_kwargs={}): """ Makes a data request to the ERDDAP server, the request url is build using the `erddapClient.ERDDAP_Dataset.getURL` function. Aditional request arguments for the urlread function can be provided as kwargs in this function. Returns either a string or a binary format (`erddapClient.ERDDAP_Dataset.BINARY_FILETYPES`) depending on the filetype specified in the query. """ rawRequest = urlread(self.getDataRequestURL(filetype), auth=self.erddapauth, **request_kwargs) if filetype in self.BINARY_FILETYPES: return rawRequest.content else: return rawRequest.text def getDataFrame(self, request_kwargs={}, **kwargs): """ This method makes a data request to the ERDDAP server in csv format then convert it to a pandas object. The pandas object is created using the read_csv method, and additional arguments for this method can be provided as kwargs in this method. Returns the pandas DataFrame object. """ csvpdata = self.getData('csvp', **request_kwargs) return pd.read_csv(StringIO(csvpdata), **kwargs)
Class to represent the shared attributes and methods of a ERDDAP Dataset either a griddap or tabledap.
View Source
def __init__(self, erddapurl, datasetid, protocol='tabledap', auth=None, lazyload=True): self.erddapurl = erddapurl self.datasetid = datasetid self.protocol = protocol self.erddapauth = auth self.__metadata = None self.resultVariables = [] self.constraints = [] self.serverSideFunctions = [] if not lazyload: self.loadMetadata()
View Source
def setResultVariables(self, variables): """ This function sets the optional comma-separated list of variables called "resultsVariables" as part of the query for data request. (for example: longitude,latitude,time,station,wmo_platform_code,T_25). For each variable in resultsVariables, there will be a column in the results table, in the same order. If you don't specify any results variables, the results table will include columns for all of the variables in the dataset. Arguments `variables` : The list of variables, this can be a string with the comma separated variables, or a list. Returns the current object allowing chaining functions. """ if type(variables) is list: self.resultVariables = variables elif type(variables) is str: self.resultVariables = variables.split(',') self.resultVariables = [ rv.strip() for rv in self.resultVariables] else: raise Exception("variables argument must be list, or comma separated list of variables") return self
This function sets the optional comma-separated list of variables called "resultsVariables" as part of the query for data request. (for example: longitude,latitude,time,station,wmo_platform_code,T_25). For each variable in resultsVariables, there will be a column in the results table, in the same order. If you don't specify any results variables, the results table will include columns for all of the variables in the dataset.
Arguments
variables
: The list of variables, this can be a string with the
comma separated variables, or a list.
Returns the current object allowing chaining functions.
View Source
def addResultVariable(self, variable): """ Adds a variable to the data request query element "resultsVariables" Arguments `variable` : A string with the variable name add. """ self.resultVariables.append(variable) return self
Adds a variable to the data request query element "resultsVariables"
Arguments
variable
: A string with the variable name add.
View Source
def setConstraints(self, constraintListOrDict): """ This functions sets the constraints for the data request query. More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query """ self.clearConstraints() self.addConstraints(constraintListOrDict) return self
This functions sets the constraints for the data request query.
More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query
View Source
def addConstraints(self, constraintListOrDict): if isinstance(constraintListOrDict,dict): for k,v in constraintListOrDict.items(): self.addConstraint({k:v}) elif isinstance(constraintListOrDict,list): for constraint in constraintListOrDict: self.addConstraint(constraint) else: raise Exception("Constraints argument must be either dictionary or list") return self
View Source
def addConstraint(self, constraint): """ This adds a constraint to the data request query. Arguments `constraint` : This can be a string with the constraint, or a dictionary element, being the key the first part of the constraint, and the dict value the constraint value. Example: ``` >>> dataset.addConstraint('time>=2020-12-29T00:00:00Z') >>> dataset.addConstraint({ 'time>=' : dt.datetime(2020,12,29) }) ``` More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query """ if isinstance(constraint,dict): self._addConstraintDict(constraint) elif isinstance(constraint,str): self._addConstraintStr(constraint) else: raise Exception("constraint argument must be either string or a dictionary") return self
This adds a constraint to the data request query.
Arguments
constraint
: This can be a string with the constraint, or a dictionary
element, being the key the first part of the constraint, and
the dict value the constraint value.
Example:
>>> dataset.addConstraint('time>=2020-12-29T00:00:00Z')
>>> dataset.addConstraint({ 'time>=' : dt.datetime(2020,12,29) })
More on ERDDAP constraints: https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#query
View Source
def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True): requestURL = self.getBaseURL(filetype) query = "" if len(self.resultVariables) > 0: query += url_operations.parseQueryItems(self.resultVariables, useSafeURL, safe='', item_separator=',') if len(self.constraints) > 0: query += '&' + url_operations.parseQueryItems(self.constraints, useSafeURL, safe='=!()&') if len(self.serverSideFunctions) > 0: query += '&' + url_operations.parseQueryItems(self.serverSideFunctions, useSafeURL, safe='=!()&/') requestURL = url_operations.joinURLElements(requestURL, query) self.lastRequestURL = requestURL return self.lastRequestURL
View Source
def getURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True): """ Buils and returns a strint with the data request query, with the available resultsVariables, constraints and operations provided. """ return self.getDataRequestURL(filetype, useSafeURL)
Buils and returns a strint with the data request query, with the available resultsVariables, constraints and operations provided.
View Source
def getBaseURL(self, filetype=DEFAULT_FILETYPE): if filetype.lower() == 'opendap': return url_operations.url_join(self.erddapurl, self.protocol, self.datasetid ) else: return url_operations.url_join(self.erddapurl, self.protocol, self.datasetid + "." + filetype )
View Source
def getAttribute(self, attribute, variableName='NC_GLOBAL'): """ Returns the value for a attribute name in the dataset. If the metadata of the dataset is not already in memory, this functions will load it, calling the function `erddapClient.ERDDAP_Dataset.loadMetadata` """ self.loadMetadata() if variableName == 'NC_GLOBAL': if attribute in self.__metadata['global'].keys(): return self.__metadata['global'][attribute] else: for vd in [ self.__metadata['dimensions'], self.__metadata['variables'] ]: if variableName in vd.keys(): if attribute in vd[variableName].keys(): return vd[variableName][attribute]
Returns the value for a attribute name in the dataset. If the metadata of the
dataset is not already in memory, this functions will load it, calling the
function erddapClient.ERDDAP_Dataset.loadMetadata
View Source
def loadMetadata(self, force=False): """ Loads in to memory the metadata atributes and values available in the info page of the dataset. Arguments: `force` : If true, this method will reload the metadata attributes even if the information where already downloaded. """ if self.__metadata is None or force: rawRequest = urlread(self.getMetadataURL(), auth=self.erddapauth) rawRequestJson = rawRequest.json() self.__metadata = parseDictMetadata(rawRequestJson) return True
Loads in to memory the metadata atributes and values available in the info page of the dataset.
Arguments:
force
: If true, this method will reload the metadata attributes
even if the information where already downloaded.
Returns list of variables of this dataset, and it's associate metadata
Returns a list of the dataset dimensions, and it's associate metadata
Returns a list of the global metadata of this dataset.
View Source
def getMetadataURL(self, filetype='json'): """ Returns a string with the url to request the metadata for the dataset. Arguments `filetype` : The filetype for the metadata request, defaults to 'json' """ return url_operations.url_join(self.erddapurl, "info", self.datasetid , "index." + filetype )
Returns a string with the url to request the metadata for the dataset.
Arguments
filetype
: The filetype for the metadata request, defaults to 'json'
View Source
def clearConstraints(self): """ Clears from the constrains stack all the constraints provided by the `erddapClient.ERDDAP_Dataset.setConstraints`, `erddapClient.ERDDAP_Dataset.addConstraint` methods. """ self.constraints = []
Clears from the constrains stack all the constraints provided by the
erddapClient.ERDDAP_Dataset.setConstraints
, erddapClient.ERDDAP_Dataset.addConstraint
methods.
View Source
def clearServerSideFunctions(self): """ Clears from the server side functions stack all the functions provided by the methods available, like `erddapClient.ERDDAP_Tabledap.orderBy`, `erddapClient.ERDDAP_Tabledap.orderByClosest`, etc. """ self.serverSideFunctions = []
Clears from the server side functions stack all the functions provided by the
methods available, like erddapClient.ERDDAP_Tabledap.orderBy
,
erddapClient.ERDDAP_Tabledap.orderByClosest
, etc.
View Source
def clearResultVariables(self): """ Clears from the results variables stack all the variabkles provided by the `erddapClient.ERDDAP_Dataset.setResultsVariables` and `erddapClient.ERDDAP_Dataset.addResultVariable` methods """ self.resultVariables = []
Clears from the results variables stack all the variabkles provided by the
erddapClient.ERDDAP_Dataset.setResultsVariables
and
erddapClient.ERDDAP_Dataset.addResultVariable
methods
View Source
def clearQuery(self): """ Clears all the query elements of the stack, variables, constraints and server side variables. """ self.clearConstraints() self.clearServerSideFunctions() self.clearResultVariables()
Clears all the query elements of the stack, variables, constraints and server side variables.
View Source
def getData(self, filetype=DEFAULT_FILETYPE, request_kwargs={}): """ Makes a data request to the ERDDAP server, the request url is build using the `erddapClient.ERDDAP_Dataset.getURL` function. Aditional request arguments for the urlread function can be provided as kwargs in this function. Returns either a string or a binary format (`erddapClient.ERDDAP_Dataset.BINARY_FILETYPES`) depending on the filetype specified in the query. """ rawRequest = urlread(self.getDataRequestURL(filetype), auth=self.erddapauth, **request_kwargs) if filetype in self.BINARY_FILETYPES: return rawRequest.content else: return rawRequest.text
Makes a data request to the ERDDAP server, the request url is build
using the erddapClient.ERDDAP_Dataset.getURL
function.
Aditional request arguments for the urlread function can be provided as kwargs in this function.
Returns either a string or a binary format (erddapClient.ERDDAP_Dataset.BINARY_FILETYPES
)
depending on the filetype specified in the query.
View Source
def getDataFrame(self, request_kwargs={}, **kwargs): """ This method makes a data request to the ERDDAP server in csv format then convert it to a pandas object. The pandas object is created using the read_csv method, and additional arguments for this method can be provided as kwargs in this method. Returns the pandas DataFrame object. """ csvpdata = self.getData('csvp', **request_kwargs) return pd.read_csv(StringIO(csvpdata), **kwargs)
This method makes a data request to the ERDDAP server in csv format then convert it to a pandas object.
The pandas object is created using the read_csv method, and additional arguments for this method can be provided as kwargs in this method.
Returns the pandas DataFrame object.
View Source
class ERDDAP_Tabledap(ERDDAP_Dataset): """ Class with the representation and methods for a ERDDAP Tabledap Dataset """ DEFAULT_FILETYPE = 'csvp' def __init__(self, url, datasetid, auth=None, lazyload=True): """ Constructs the ERDDAP_Tabledap, and if specified, automaticaly loads the metadata information of the dataset. Arguments: `url` : The url of the ERDDAP Server that contains the tabledap dataset. `datasetid` : The identifier of the dataset. `auth` : Tupple with username and password for a protected ERDDAP Server. `lazyload` : If false calls the loadMetadata method. """ super().__init__(url, datasetid, 'tabledap', auth, lazyload=lazyload) def __str__(self): dst_repr_ = super().__str__() return dst_repr_ + tabledap_str(self) def loadMetadata(self, force=False): if super().loadMetadata(force): parseTimeRangeAttributes(self.variables.items()) # # Tabledap server side functions wrappers # def addVariablesWhere(self, attributeName, attributeValue): ''' Adds "addVariablesWhere" server side function to the data request query [https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere](https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere) ''' self.serverSideFunctions.append( 'addVariablesWhere("{}","{}")'.format(attributeName, attributeValue) ) return self def distinct(self): ''' Adds "distinct" server side function to the data request query [https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#distinct](https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#distinct) ''' self.serverSideFunctions.append( 'distinct()' ) return self def units(self, value): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#units ''' self.serverSideFunctions.append( 'units({})'.format(value) ) def orderBy(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderBy ''' self.addServerSideFunction('orderBy', variables) return self def orderByClosest(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByClosest ''' self.addServerSideFunction('orderByClosest', variables) return self def orderByCount(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByCount ''' self.addServerSideFunction('orderByCount', variables) return self def orderByLimit(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByLimit ''' self.addServerSideFunction('orderByLimit', variables) return self def orderByMax(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMax ''' self.addServerSideFunction('orderByMax', variables) return self def orderByMin(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMin ''' self.addServerSideFunction('orderByMin', variables) return self def orderByMinMax(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMinMax ''' self.addServerSideFunction('orderByMinMax', variables) return self def orderByMean(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMean ''' self.addServerSideFunction('orderByMean', variables) return self def addServerSideFunction(self, functionName, arguments): self.serverSideFunctions.append( "{}(\"{}\")".format( functionName, ifListToCommaSeparatedString(arguments) ) )
Class with the representation and methods for a ERDDAP Tabledap Dataset
View Source
def __init__(self, url, datasetid, auth=None, lazyload=True): """ Constructs the ERDDAP_Tabledap, and if specified, automaticaly loads the metadata information of the dataset. Arguments: `url` : The url of the ERDDAP Server that contains the tabledap dataset. `datasetid` : The identifier of the dataset. `auth` : Tupple with username and password for a protected ERDDAP Server. `lazyload` : If false calls the loadMetadata method. """ super().__init__(url, datasetid, 'tabledap', auth, lazyload=lazyload)
Constructs the ERDDAP_Tabledap, and if specified, automaticaly loads the metadata information of the dataset.
Arguments:
url
: The url of the ERDDAP Server that contains the tabledap dataset.
datasetid
: The identifier of the dataset.
auth
: Tupple with username and password for a protected ERDDAP Server.
lazyload
: If false calls the loadMetadata method.
View Source
def loadMetadata(self, force=False): if super().loadMetadata(force): parseTimeRangeAttributes(self.variables.items())
Loads in to memory the metadata atributes and values available in the info page of the dataset.
Arguments:
force
: If true, this method will reload the metadata attributes
even if the information where already downloaded.
View Source
def addVariablesWhere(self, attributeName, attributeValue): ''' Adds "addVariablesWhere" server side function to the data request query [https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere](https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere) ''' self.serverSideFunctions.append( 'addVariablesWhere("{}","{}")'.format(attributeName, attributeValue) ) return self
Adds "addVariablesWhere" server side function to the data request query https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#addVariablesWhere
View Source
def distinct(self): ''' Adds "distinct" server side function to the data request query [https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#distinct](https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#distinct) ''' self.serverSideFunctions.append( 'distinct()' ) return self
Adds "distinct" server side function to the data request query https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#distinct
View Source
def units(self, value): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#units ''' self.serverSideFunctions.append( 'units({})'.format(value) )
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#units
View Source
def orderBy(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderBy ''' self.addServerSideFunction('orderBy', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderBy
View Source
def orderByClosest(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByClosest ''' self.addServerSideFunction('orderByClosest', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByClosest
View Source
def orderByCount(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByCount ''' self.addServerSideFunction('orderByCount', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByCount
View Source
def orderByLimit(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByLimit ''' self.addServerSideFunction('orderByLimit', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByLimit
View Source
def orderByMax(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMax ''' self.addServerSideFunction('orderByMax', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMax
View Source
def orderByMin(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMin ''' self.addServerSideFunction('orderByMin', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMin
View Source
def orderByMinMax(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMinMax ''' self.addServerSideFunction('orderByMinMax', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMinMax
View Source
def orderByMean(self, variables): ''' https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMean ''' self.addServerSideFunction('orderByMean', variables) return self
https://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html#orderByMean
View Source
def addServerSideFunction(self, functionName, arguments): self.serverSideFunctions.append( "{}(\"{}\")".format( functionName, ifListToCommaSeparatedString(arguments) ) )
Inherited Members
View Source
class ERDDAP_Griddap(ERDDAP_Dataset): """ Class with the representation and methods for a ERDDAP Griddap Dataset """ DEFAULT_FILETYPE = 'nc' def __init__(self, url, datasetid, auth=None, lazyload=True): super().__init__(url, datasetid, 'griddap', auth, lazyload=lazyload) self.__dimensions = None self.__positional_indexes = None """ This property stores the last dimensions slices that builds the subset query. Its used to build opendap compatible queryes, and to get the dimensions values of the subset. """ def __str__(self): dst_repr_ = super().__str__() return dst_repr_ + griddap_str(self) def loadMetadata(self, force=False): """ Loads in to memory the metadata atributes and values available in the info page of the dataset. Arguments: `force` : If true, this method will reload the metadata attributes even if the information where already downloaded. """ if super().loadMetadata(force): parseTimeRangeAttributes(self._ERDDAP_Dataset__metadata['dimensions'].items()) @property def dimensions(self): self.loadDimensionValues() return self.__dimensions def clearQuery(self): super().clearQuery() self.__positional_indexes = None def loadDimensionValues(self, force=False): """ This methods loads from the ERDDAP Server the dimension values for the current griddap dataset. This values will be used to calculate integer indexes for opendap requests. Arguments: `force` : If true, this method will reload the dimensions values even if the values where already downloaded. """ if self.__dimensions is None or force: self.loadMetadata() dimensionVariableNames = list(self._ERDDAP_Dataset__metadata['dimensions'].keys()) _resultVars = self.resultVariables dimensionsData = ( self.setResultVariables(dimensionVariableNames) .getDataFrame(header=0, names=dimensionVariableNames) ) self.resultVariables = _resultVars self.__dimensions = ERDDAP_Griddap_dimensions() for dimName in dimensionVariableNames: dimDatadroppedNaNs = dimensionsData[dimName].dropna() if dimName == 'time': numericDates = np.array([ date2num(dt.datetime.strptime(_dt, ERDDAP_DATETIME_FORMAT), ERDDAP_TIME_UNITS) if (isinstance(_dt,str)) else _dt for _dt in dimDatadroppedNaNs] ) dimensionSeries = pd.Series( data = np.arange(numericDates.size), index = numericDates) else: dimensionSeries = pd.Series( data = dimDatadroppedNaNs.index.values, index = dimDatadroppedNaNs.values) dimMeta = self._ERDDAP_Dataset__metadata['dimensions'][dimName] self.__dimensions[dimName] = ERDDAP_Griddap_dimension(dimName, dimensionSeries, metadata=dimMeta) def getxArray(self, **kwargs_od): """ Returns an xarray object subset of the ERDDAP dataset current selection query Arguments: This method will pass all kwargs to the xarray.open_dataset method. """ open_dataset_kwparams = { 'mask_and_scale' : True } # Accept _FillValue, scale_value and add_offset attribute functionality open_dataset_kwparams.update(kwargs_od) subsetURL = self.getDataRequestURL(filetype='opendap', useSafeURL=False) if self.erddapauth: session = requests.Session() session.auth = self.erddapauth store = xr.backends.PydapDataStore.open(subsetURL, session=session) _xarray = xr.open_dataset(store, **open_dataset_kwparams) else: _xarray = xr.open_dataset(subsetURL, **open_dataset_kwparams) # Add extra information to the xarray object, the dimension information. # Add the subset of the dimensions values to the xarray object _subset_coords = { dimName : dObj.data[self.__positional_indexes[dimName]] for dimName, dObj in self.dimensions.items() } if self.dimensions.timeDimension: _subset_coords[self.dimensions.timeDimension.name] = self.dimensions.timeDimension.timeData[ self.__positional_indexes[self.dimensions.timeDimension.name] ] _xarray = _xarray.assign_coords(_subset_coords) # Add attributes to the coordinates for dimName, dObj in self.dimensions.items(): _xarray.coords[dimName].attrs = dObj.metadata return _xarray def getncDataset(self, **kwargs): """ Returns an netCDF4.Dataset object subset of the ERDDAP dataset Arguments: This method will pass all kwargs to the netCDF4.Dataset method. """ subsetURL = (self.getDataRequestURL(filetype='opendap', useSafeURL=False)) if self.erddapauth: # TODO Add user, password in URL _netcdf4Dataset = Dataset(subsetURL, **kwargs) else: _netcdf4Dataset = Dataset(subsetURL, **kwargs) return _netcdf4Dataset def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True, resultVariables=None): """ Returns the fully built ERDDAP data request url with the available components. Arguments: `filetype` : The request download format `useSafeURL` : If True the query part of the url will be encoded `resultVariables` : If None, the self.resultVariables will be used. """ requestURL = self.getBaseURL(filetype) query = "" if resultVariables is None: resultVariables = self.resultVariables if filetype == 'opendap': self.loadDimensionValues() if self.__positional_indexes: resultVariables = self._resultVariablesWithValidDapIndexing() else: # resultVariables = self._convertERDDAPSubset2OpendapRegular(resultVariables) # resultVariables = self._parseResultVariablesExtendedDapQueryToValidDap(resultVariables) else: if self.__positional_indexes: resultVariables = self._resultVariablesWithValidDapIndexing() if len(self.resultVariables) > 0: query += url_operations.parseQueryItems(resultVariables, useSafeURL, safe='', item_separator=',') requestURL = url_operations.joinURLElements(requestURL, query) self.lastRequestURL = requestURL return self.lastRequestURL def setSubset(self, *pdims, **kwdims): """ Sets a query subset for griddap request, by using dimension names Usage example: ``` dsub = ( remote.setResultVariables(['temperature','salinity']) .setSubset( time=slice(dt.datetime(2014,6,15), dt.datetime(2014,7,15)), depth=0, latitude=slice(18.10, 31.96), longitude=slice(-98, -76.41)) .getxArray() ) ``` """ self.__positional_indexes = self.dimensions.subset(*pdims, **kwdims) return self def setSubsetI(self, *pdims, **kwdims): """ Sets a query subset for griddap request, by using its positional integer index. Usage example: ``` dsub = ( remote.setResultVariables(['temperature','salinity']) .setSubsetI( time=slice(-10,-1), depth=0, latitude=slice(10, 150), longitude=slice(20, 100) ) .getxArray() ) ``` """ self.__positional_indexes = self.dimensions.subsetI(*pdims, **kwdims) return self def _parseResultVariablesExtendedDapQueryToValidDap(self, resultVariables): """ This method will receive a string from the resultVariables part of the ERDDAP griddap request like: ssh[(2001-06-01T09:00:00Z):(2002-06-01T09:00:00Z)][0:(last-20.3)][last-20:1:last] And will return the each result varibel with valid opendap integer indexing query, ssh[10:20][0:70.7][300:359] This operation is done by parsing the subset, obtaining the elements of the slice that are erddap opendap extended format, the ones between ( ), and converting the nearest integer index. By parsing the subset, this function returns also error messages on a bad formed query. """ queryComponents = parse_griddap_resultvariables_slices(resultVariables) parsedResultVariables = [] _parsed_positional_indexes = OrderedDict({ dimname : None for dimname, dobj in self.dimensions.items() }) for qIdx, queryComponent in enumerate(queryComponents): if len(queryComponent['sliceComponents']) != 0 and len(self.dimensions) != len(queryComponent['sliceComponents']): raise Exception('The subset request ({}) must match the number of dimensions ({})'.format(resultVariables[qIdx], self.dimensions.ndims)) indexSlice = {'start' : None, 'stop' : None} indexSliceStr = "" for dimOrder, (dimensionName, sliceComponent) in enumerate(zip(self.dimensions.keys(), queryComponent['sliceComponents'])): # Check if start of stop components of the slice is opendap extended format, # The way to detect them is if they are between (dimValue) for slicePart in ['start', 'stop']: if slicePart in sliceComponent and is_slice_element_opendap_extended(sliceComponent[slicePart]): # In griddap querys, the slice start and stop, can be between ( ), this notation is a extended # opendap format that uses the dimensions values or special keywords to slice the dimensions. # More on this: https://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html#query sliceComponentValue = get_value_from_opendap_extended_slice_element(sliceComponent[slicePart]) if validate_iso8601(sliceComponentValue): sliceComponentValueNum = iso8601STRtoNum(sliceComponentValue) sliceComponentIdx = self.dimensions[dimensionName].closestIdx(sliceComponentValueNum) elif validate_float(sliceComponentValue): sliceComponentValue = float(sliceComponentValue) sliceComponentIdx = self.dimensions[dimensionName].closestIdx(sliceComponentValue) elif validate_int(sliceComponentValue): sliceComponentValue = int(sliceComponentValue) sliceComponentIdx = self.dimensions[dimensionName].closestIdx(sliceComponentValue) elif validate_last_keyword(sliceComponentValue): sliceComponentValue2Eval = sliceComponentValue.replace('last',str(self.dimensions[dimensionName].values.index[-1])) sliceComponentIdx = self.dimensions[dimensionName].closestIdx(eval(sliceComponentValue2Eval)) else: raise Exception('Malformed subset : ({}) , couldn\'t parse: ({})'.format(resultVariables[qIdx], sliceComponentValue)) else: # In the slice is not between ( ) , then this means the slice component its using the numeric indexes # to create the slice. The only special keyword allowed here, its 'last', which means the last numeric # index in the current dimension. # More on this: https://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html#last if slicePart in sliceComponent: sliceComponentValue = sliceComponent[slicePart] if validate_last_keyword(sliceComponentValue): sliceComponentValue2Eval = sliceComponentValue.replace('last',str(self.dimensions[dimensionName].values.iloc[-1])) sliceComponentIdx = int(eval(sliceComponentValue2Eval)) else: sliceComponentIdx = int(sliceComponentValue) if sliceComponentIdx is None: raise Exception('Malformed subset : ({}) , The constraint ({}) is out of dimension range: [{}]'.format(resultVariables[qIdx], sliceComponentValue, self.dimensions[dimensionName].range)) indexSlice[slicePart] = sliceComponentIdx #endfor slicePart # Build the valid slice object with equivalent numeric indexes in self.__positional_indexes _sobj = None # slice object, to include to self.__positional_indexes if 'stride' in sliceComponent: _sobj = slice( indexSlice['start'], indexSlice['stop'] + 1, int(sliceComponent['stride']) ) elif 'stop' in sliceComponent: _sobj = slice(indexSlice['start'], indexSlice['stop'] + 1) elif 'start' in sliceComponent: _sobj = slice(indexSlice['start'], indexSlice['start'] + 1) _parsed_positional_indexes[dimensionName] = _sobj #endfor dimension self.__positional_indexes = _parsed_positional_indexes _validDapIndexing = self._convertPositionalIndexes2DapQuery() # Join the variable name with the openDap indexing parsedResultVariables.append(queryComponent['variableName'] + _validDapIndexing) #end for queryComponent return parsedResultVariables def _convertPositionalIndexes2DapQuery(self): """ This function will convert the positional_indexes dictionary of slices, to a string query type, compatible with the opendap protocol. The property __positional_indexes will contain the slices for each dimension in a dict. OrderedDict([('time', slice(200, 201, None)), ('altitude', slice(0, 1, None)), ('latitude', slice(337, 465, None)), ('longitude', slice(1018, 1146, None))]) This method converts and returns the above to the opendap compatible query string. [200:200][0:0][337:464][1018:1145] Returns: - Compatible opendap query string """ def parseNegativeIndex(nidx, dref): return nidx if nidx >= 0 else dref.size + nidx if self.__positional_indexes is None or all(dimSlice is None for dimName, dimSlice in self.__positional_indexes.items()): return "" validDapIndexing = "" for dimName, dimSlice in self.__positional_indexes.items(): if dimSlice is None: raise Exception("Not a valid slice available for dimension: {} ".format(dimName)) if not dimSlice.step is None: _start = parseNegativeIndex(dimSlice.start, self.dimensions[dimName]) _stop = parseNegativeIndex(dimSlice.stop, self.dimensions[dimName]) #_sc = "[{}:{}:{}]".format(dimSlice.start, dimSlice.step, dimSlice.stop-1) _sc = "[{}:{}:{}]".format(_start, dimSlice.step, _stop-1) elif not dimSlice.start is None: _start = parseNegativeIndex(dimSlice.start, self.dimensions[dimName]) _stop = parseNegativeIndex(dimSlice.stop, self.dimensions[dimName]) # _sc = "[{}:{}]".format(dimSlice.start, dimSlice.stop-1) _sc = "[{}:{}]".format(_start, _stop-1) elif not dimSlice.stop is None: _stop = parseNegativeIndex(dimSlice.stop, self.dimensions[dimName]) # _sc = "[{}]".format(dimSlice.stop) _sc = "[{}]".format(_stop) else: raise Exception("No valid slice available for dimension: {} ".format(dimName)) validDapIndexing+=_sc return validDapIndexing def _resultVariablesWithValidDapIndexing(self): """ Returns the opendap query of the variables listed in the resultVariables property, with valid opendap indexing. The indexing its built from the current positional_indexes. """ _validDapIndexing = self._convertPositionalIndexes2DapQuery() validDapQuery = [] if self.resultVariables is None: for varName in self.variables.keys(): validDapQuery.append(varName + _validDapIndexing) else: for varName in self.resultVariables: _justvarname = extractVariableName(varName) validDapQuery.append(_justvarname + _validDapIndexing) return validDapQuery @property def positional_indexes(self): return self.__positional_indexes @property def xarray(self): """ Returns the xarray object representation of the whoe dataset. Ths method creates the xarray object by calling the open_dataset method and connecting to the opendap endpoint that ERDDAP provides. """ if not hasattr(self,'__xarray'): if self.erddapauth: session = requests.Session() session.auth = self.erddapauth store = xr.backends.PydapDataStore.open(self.getBaseURL('opendap'), session=session) self.__xarray = xr.open_dataset(store) else: self.__xarray = xr.open_dataset(self.getBaseURL('opendap')) return self.__xarray @property def ncDataset(self): """ Returns the netCDF4.Dataset object representation of the whole dataset. Ths method creates the Dataset object by calling the Dataset constructor connecting to the opendap endpoint that ERDDAP provides. """ if not hasattr(self,'__netcdf4Dataset'): if self.erddapauth: # TODO Add user, password in URL self.__netcdf4Dataset = Dataset(self.getBaseURL('opendap')) else: self.__netcdf4Dataset = Dataset(self.getBaseURL('opendap')) return self.__netcdf4Dataset
Class with the representation and methods for a ERDDAP Griddap Dataset
View Source
def __init__(self, url, datasetid, auth=None, lazyload=True): super().__init__(url, datasetid, 'griddap', auth, lazyload=lazyload) self.__dimensions = None self.__positional_indexes = None """ This property stores the last dimensions slices that builds the subset query. Its used to build opendap compatible queryes, and to get the dimensions values of the subset. """
View Source
def loadMetadata(self, force=False): """ Loads in to memory the metadata atributes and values available in the info page of the dataset. Arguments: `force` : If true, this method will reload the metadata attributes even if the information where already downloaded. """ if super().loadMetadata(force): parseTimeRangeAttributes(self._ERDDAP_Dataset__metadata['dimensions'].items())
Loads in to memory the metadata atributes and values available in the info page of the dataset.
Arguments:
force
: If true, this method will reload the metadata attributes
even if the information where already downloaded.
Returns a list of the dataset dimensions, and it's associate metadata
View Source
def clearQuery(self): super().clearQuery() self.__positional_indexes = None
Clears all the query elements of the stack, variables, constraints and server side variables.
View Source
def loadDimensionValues(self, force=False): """ This methods loads from the ERDDAP Server the dimension values for the current griddap dataset. This values will be used to calculate integer indexes for opendap requests. Arguments: `force` : If true, this method will reload the dimensions values even if the values where already downloaded. """ if self.__dimensions is None or force: self.loadMetadata() dimensionVariableNames = list(self._ERDDAP_Dataset__metadata['dimensions'].keys()) _resultVars = self.resultVariables dimensionsData = ( self.setResultVariables(dimensionVariableNames) .getDataFrame(header=0, names=dimensionVariableNames) ) self.resultVariables = _resultVars self.__dimensions = ERDDAP_Griddap_dimensions() for dimName in dimensionVariableNames: dimDatadroppedNaNs = dimensionsData[dimName].dropna() if dimName == 'time': numericDates = np.array([ date2num(dt.datetime.strptime(_dt, ERDDAP_DATETIME_FORMAT), ERDDAP_TIME_UNITS) if (isinstance(_dt,str)) else _dt for _dt in dimDatadroppedNaNs] ) dimensionSeries = pd.Series( data = np.arange(numericDates.size), index = numericDates) else: dimensionSeries = pd.Series( data = dimDatadroppedNaNs.index.values, index = dimDatadroppedNaNs.values) dimMeta = self._ERDDAP_Dataset__metadata['dimensions'][dimName] self.__dimensions[dimName] = ERDDAP_Griddap_dimension(dimName, dimensionSeries, metadata=dimMeta)
This methods loads from the ERDDAP Server the dimension values for the current griddap dataset. This values will be used to calculate integer indexes for opendap requests.
Arguments:
force
: If true, this method will reload the dimensions values
even if the values where already downloaded.
View Source
def getxArray(self, **kwargs_od): """ Returns an xarray object subset of the ERDDAP dataset current selection query Arguments: This method will pass all kwargs to the xarray.open_dataset method. """ open_dataset_kwparams = { 'mask_and_scale' : True } # Accept _FillValue, scale_value and add_offset attribute functionality open_dataset_kwparams.update(kwargs_od) subsetURL = self.getDataRequestURL(filetype='opendap', useSafeURL=False) if self.erddapauth: session = requests.Session() session.auth = self.erddapauth store = xr.backends.PydapDataStore.open(subsetURL, session=session) _xarray = xr.open_dataset(store, **open_dataset_kwparams) else: _xarray = xr.open_dataset(subsetURL, **open_dataset_kwparams) # Add extra information to the xarray object, the dimension information. # Add the subset of the dimensions values to the xarray object _subset_coords = { dimName : dObj.data[self.__positional_indexes[dimName]] for dimName, dObj in self.dimensions.items() } if self.dimensions.timeDimension: _subset_coords[self.dimensions.timeDimension.name] = self.dimensions.timeDimension.timeData[ self.__positional_indexes[self.dimensions.timeDimension.name] ] _xarray = _xarray.assign_coords(_subset_coords) # Add attributes to the coordinates for dimName, dObj in self.dimensions.items(): _xarray.coords[dimName].attrs = dObj.metadata return _xarray
Returns an xarray object subset of the ERDDAP dataset current selection query
Arguments:
This method will pass all kwargs to the xarray.open_dataset method.
View Source
def getncDataset(self, **kwargs): """ Returns an netCDF4.Dataset object subset of the ERDDAP dataset Arguments: This method will pass all kwargs to the netCDF4.Dataset method. """ subsetURL = (self.getDataRequestURL(filetype='opendap', useSafeURL=False)) if self.erddapauth: # TODO Add user, password in URL _netcdf4Dataset = Dataset(subsetURL, **kwargs) else: _netcdf4Dataset = Dataset(subsetURL, **kwargs) return _netcdf4Dataset
Returns an netCDF4.Dataset object subset of the ERDDAP dataset
Arguments:
This method will pass all kwargs to the netCDF4.Dataset method.
View Source
def getDataRequestURL(self, filetype=DEFAULT_FILETYPE, useSafeURL=True, resultVariables=None): """ Returns the fully built ERDDAP data request url with the available components. Arguments: `filetype` : The request download format `useSafeURL` : If True the query part of the url will be encoded `resultVariables` : If None, the self.resultVariables will be used. """ requestURL = self.getBaseURL(filetype) query = "" if resultVariables is None: resultVariables = self.resultVariables if filetype == 'opendap': self.loadDimensionValues() if self.__positional_indexes: resultVariables = self._resultVariablesWithValidDapIndexing() else: # resultVariables = self._convertERDDAPSubset2OpendapRegular(resultVariables) # resultVariables = self._parseResultVariablesExtendedDapQueryToValidDap(resultVariables) else: if self.__positional_indexes: resultVariables = self._resultVariablesWithValidDapIndexing() if len(self.resultVariables) > 0: query += url_operations.parseQueryItems(resultVariables, useSafeURL, safe='', item_separator=',') requestURL = url_operations.joinURLElements(requestURL, query) self.lastRequestURL = requestURL return self.lastRequestURL
Returns the fully built ERDDAP data request url with the available components.
Arguments:
filetype
: The request download format
useSafeURL
: If True the query part of the url will be encoded
resultVariables
: If None, the self.resultVariables will be used.
View Source
def setSubset(self, *pdims, **kwdims): """ Sets a query subset for griddap request, by using dimension names Usage example: ``` dsub = ( remote.setResultVariables(['temperature','salinity']) .setSubset( time=slice(dt.datetime(2014,6,15), dt.datetime(2014,7,15)), depth=0, latitude=slice(18.10, 31.96), longitude=slice(-98, -76.41)) .getxArray() ) ``` """ self.__positional_indexes = self.dimensions.subset(*pdims, **kwdims) return self
Sets a query subset for griddap request, by using dimension names
Usage example:
dsub = ( remote.setResultVariables(['temperature','salinity'])
.setSubset( time=slice(dt.datetime(2014,6,15), dt.datetime(2014,7,15)),
depth=0,
latitude=slice(18.10, 31.96),
longitude=slice(-98, -76.41))
.getxArray() )
View Source
def setSubsetI(self, *pdims, **kwdims): """ Sets a query subset for griddap request, by using its positional integer index. Usage example: ``` dsub = ( remote.setResultVariables(['temperature','salinity']) .setSubsetI( time=slice(-10,-1), depth=0, latitude=slice(10, 150), longitude=slice(20, 100) ) .getxArray() ) ``` """ self.__positional_indexes = self.dimensions.subsetI(*pdims, **kwdims) return self
Sets a query subset for griddap request, by using its positional integer index.
Usage example:
dsub = ( remote.setResultVariables(['temperature','salinity'])
.setSubsetI( time=slice(-10,-1),
depth=0,
latitude=slice(10, 150),
longitude=slice(20, 100) )
.getxArray() )
Returns the xarray object representation of the whoe dataset. Ths method creates the xarray object by calling the open_dataset method and connecting to the opendap endpoint that ERDDAP provides.
Returns the netCDF4.Dataset object representation of the whole dataset. Ths method creates the Dataset object by calling the Dataset constructor connecting to the opendap endpoint that ERDDAP provides.
View Source
class ERDDAP_Griddap_dimensions(OrderedDict): """ Class with the representation and methods for a ERDDAP Griddap dimensions variables """ def __str__(self): return erddap_dimensions_str(self) def __getitem__(self, val): if isinstance(val, int): return self[list(self.keys())[val]] else: return super().__getitem__(val) def subsetI(self, *pdims, **kwdims): """ This method receives slices with the numeric indexes for each dimension. It will validate if the provided slices are valid and inside the dimension size, just to warn and avoid further problems when requesting data. """ def parseSlice(sobj, dref): estart, estop, estep = None, None, None if isinstance(sobj, slice): if sobj.start is None: estart = None else: estart = sobj.start if estart >= dref.size: raise Exception("index {} its out of bounds for the dimensions {} with size {}".format(sobj.start, dref.name, dref.size)) if sobj.stop is None: estop = None else: estop = sobj.stop if estop > dref.size: raise Exception("index stop {} its out of bounds for the dimensions {} with size {}".format(sobj.stop, dref.name, dref.size)) estep = sobj.step if not sobj.step is None else None elif isinstance(sobj, int): estop = sobj if estop > dref.size: raise Exception("index stop {} its out of bounds for the dimensions {} with size {}".format(sobj.stop, dref.name, dref.size)) else: raise Exception("Invalid slice format for dimension {}".format(dref.name)) if estart is None: # Deal the ugly case of -1 integer index. An aplied slice(-1) will return a empty subset. # So set the slice.stop component to the size of the dimension. if estop == -1: return slice(estop, dref.size) else: return slice(estop, estop + 1) else: return slice(estart, estop , estep) validDimSlices = OrderedDict( { k : None for k in self.keys() } ) # Parse positional arguments, dimensions slices in order for idx, pdim in enumerate(pdims): validDimSlices[self[idx].name] = parseSlice(pdim,self[idx]) # Parse keyword arguments, dimension names, order not important for kdim, vdim in kwdims.items(): validDimSlices[kdim] = parseSlice(vdim, self[kdim]) return validDimSlices def subset(self, *pdims, **kwdims): """ This method receives slices for the dimensions, parses and returns the numeric index values, in slice objects. Usage example: ``` iidx = dimensions.subset(slice("2014-06-15","2014-07-15"), 0.0, slice(18.1,31.96), slice(-98, -76.41)) # or iidx = dimensions.subset(time=slice("2014-06-15","2014-07-15"), depth=0.0, latitude=slice(18.1,31.96), longitude=slice(-98, -76.41)) # Returns, the integer indexes for the closest inside values of the dimensions { time : slice(0:10), depth : slice(0:1), latitude: slice(0:100), longitude : slice(0:200) } ``` """ def parseSlice(sobj, dref): estart, estop, estep = None, None, None if isinstance(sobj, slice): if sobj.start is None: estart = None else: estart = dref.closestIdx(sobj.start) if estart is None: raise Exception("{} its outside the dimensions values of {}".format(sobj.start, dref.name)) if sobj.stop is None: estop = None else: estop = dref.closestIdx(sobj.stop) if estop is None: raise Exception("{} its outside the dimensions values of {}".format(sobj.stop, dref.name)) estep = sobj.step if not sobj.step is None else None else: estop = dref.closestIdx(sobj) if estart is None: return slice(estop, estop + 1) # +1 to make it a valid integer index for python else: return slice(estart, estop + 1, estep) # validDimSlices = OrderedDict( { k : None for k in self.keys() } ) for idx, pdim in enumerate(pdims): validDimSlices[self[idx].name] = parseSlice(pdim,self[idx]) for kdim, vdim in kwdims.items(): validDimSlices[kdim] = parseSlice(vdim, self[kdim]) return validDimSlices @property def timeDimension(self): if 'time' in self.keys(): return self['time'] else: None @property def ndims(self): return len(self)
Class with the representation and methods for a ERDDAP Griddap dimensions variables
View Source
def subsetI(self, *pdims, **kwdims): """ This method receives slices with the numeric indexes for each dimension. It will validate if the provided slices are valid and inside the dimension size, just to warn and avoid further problems when requesting data. """ def parseSlice(sobj, dref): estart, estop, estep = None, None, None if isinstance(sobj, slice): if sobj.start is None: estart = None else: estart = sobj.start if estart >= dref.size: raise Exception("index {} its out of bounds for the dimensions {} with size {}".format(sobj.start, dref.name, dref.size)) if sobj.stop is None: estop = None else: estop = sobj.stop if estop > dref.size: raise Exception("index stop {} its out of bounds for the dimensions {} with size {}".format(sobj.stop, dref.name, dref.size)) estep = sobj.step if not sobj.step is None else None elif isinstance(sobj, int): estop = sobj if estop > dref.size: raise Exception("index stop {} its out of bounds for the dimensions {} with size {}".format(sobj.stop, dref.name, dref.size)) else: raise Exception("Invalid slice format for dimension {}".format(dref.name)) if estart is None: # Deal the ugly case of -1 integer index. An aplied slice(-1) will return a empty subset. # So set the slice.stop component to the size of the dimension. if estop == -1: return slice(estop, dref.size) else: return slice(estop, estop + 1) else: return slice(estart, estop , estep) validDimSlices = OrderedDict( { k : None for k in self.keys() } ) # Parse positional arguments, dimensions slices in order for idx, pdim in enumerate(pdims): validDimSlices[self[idx].name] = parseSlice(pdim,self[idx]) # Parse keyword arguments, dimension names, order not important for kdim, vdim in kwdims.items(): validDimSlices[kdim] = parseSlice(vdim, self[kdim]) return validDimSlices
This method receives slices with the numeric indexes for each dimension. It will validate if the provided slices are valid and inside the dimension size, just to warn and avoid further problems when requesting data.
View Source
def subset(self, *pdims, **kwdims): """ This method receives slices for the dimensions, parses and returns the numeric index values, in slice objects. Usage example: ``` iidx = dimensions.subset(slice("2014-06-15","2014-07-15"), 0.0, slice(18.1,31.96), slice(-98, -76.41)) # or iidx = dimensions.subset(time=slice("2014-06-15","2014-07-15"), depth=0.0, latitude=slice(18.1,31.96), longitude=slice(-98, -76.41)) # Returns, the integer indexes for the closest inside values of the dimensions { time : slice(0:10), depth : slice(0:1), latitude: slice(0:100), longitude : slice(0:200) } ``` """ def parseSlice(sobj, dref): estart, estop, estep = None, None, None if isinstance(sobj, slice): if sobj.start is None: estart = None else: estart = dref.closestIdx(sobj.start) if estart is None: raise Exception("{} its outside the dimensions values of {}".format(sobj.start, dref.name)) if sobj.stop is None: estop = None else: estop = dref.closestIdx(sobj.stop) if estop is None: raise Exception("{} its outside the dimensions values of {}".format(sobj.stop, dref.name)) estep = sobj.step if not sobj.step is None else None else: estop = dref.closestIdx(sobj) if estart is None: return slice(estop, estop + 1) # +1 to make it a valid integer index for python else: return slice(estart, estop + 1, estep) # validDimSlices = OrderedDict( { k : None for k in self.keys() } ) for idx, pdim in enumerate(pdims): validDimSlices[self[idx].name] = parseSlice(pdim,self[idx]) for kdim, vdim in kwdims.items(): validDimSlices[kdim] = parseSlice(vdim, self[kdim]) return validDimSlices
This method receives slices for the dimensions, parses and returns the numeric index values, in slice objects.
Usage example:
iidx = dimensions.subset(slice("2014-06-15","2014-07-15"), 0.0, slice(18.1,31.96), slice(-98, -76.41))
# or
iidx = dimensions.subset(time=slice("2014-06-15","2014-07-15"), depth=0.0, latitude=slice(18.1,31.96), longitude=slice(-98, -76.41))
# Returns, the integer indexes for the closest inside values of the dimensions
{ time : slice(0:10), depth : slice(0:1), latitude: slice(0:100), longitude : slice(0:200) }
Inherited Members
- collections.OrderedDict
- OrderedDict
- clear
- popitem
- move_to_end
- keys
- items
- values
- pop
- setdefault
- copy
- fromkeys
- update
- builtins.dict
- get
View Source
class ERDDAP_Griddap_dimension: """ Class with the representation and methods for each ERDDAP Griddap dimension, for its metadata and values """ def __init__(self, name, values, metadata): self.name = name self.values = values self.metadata = metadata def __getitem__(self, val): return self.values.index[val] def __str__(self): return erddap_dimension_str(self) def closestIdx(self, value, method='nearest'): """ Returns the integer index that matches the closest 'value' in dimensions values. Arguments: `value` : The value to search in the dimension values. If the object contains a time dimension, this parameter can be a valid ISO 86091 string or datetime. `method` : The argument passed to pandas index.get_loc method that returns the closest value index. """ if self.isTime and isinstance(value,str): value = iso8601STRtoNum(value) elif isinstance(value, dt.datetime): value = dttonum(value) if self.isTime: rangemin = dttonum(self.metadata['actual_range'][0]) rangemax = dttonum(self.metadata['actual_range'][1]) else: rangemin = self.metadata['actual_range'][0] rangemax = self.metadata['actual_range'][1] if value > rangemax or value < rangemin: return None idx = self.values.index.get_loc(value, method=method) return idx @property def info(self): return self.metadata @property def data(self): """ Returns the dimension values """ return self.values.index @property def size(self): """ Returns dimension lenght """ return self.data.size @property def timeData(self): if self.isTime: return numtodate(self.data) @property def isTime(self): return self.name == 'time' @property def range(self): if 'actual_range' in self.metadata: return self.metadata['actual_range'] elif self.name == 'time': return (numtodate(self.values.index.min()), numtodate(self.values.index.max())) else: return (self.values.index.min(), self.values.index.max())
Class with the representation and methods for each ERDDAP Griddap dimension, for its metadata and values
View Source
def __init__(self, name, values, metadata): self.name = name self.values = values self.metadata = metadata
View Source
def closestIdx(self, value, method='nearest'): """ Returns the integer index that matches the closest 'value' in dimensions values. Arguments: `value` : The value to search in the dimension values. If the object contains a time dimension, this parameter can be a valid ISO 86091 string or datetime. `method` : The argument passed to pandas index.get_loc method that returns the closest value index. """ if self.isTime and isinstance(value,str): value = iso8601STRtoNum(value) elif isinstance(value, dt.datetime): value = dttonum(value) if self.isTime: rangemin = dttonum(self.metadata['actual_range'][0]) rangemax = dttonum(self.metadata['actual_range'][1]) else: rangemin = self.metadata['actual_range'][0] rangemax = self.metadata['actual_range'][1] if value > rangemax or value < rangemin: return None idx = self.values.index.get_loc(value, method=method) return idx
Returns the integer index that matches the closest 'value' in dimensions values.
Arguments:
value
: The value to search in the dimension values. If the object
contains a time dimension, this parameter can be a valid ISO 86091 string
or datetime.
method
: The argument passed to pandas index.get_loc method
that returns the closest value index.
Returns the dimension values
Returns dimension lenght