Skip to content

Commit 50e3717

Browse files
Merge pull request #100 from stac-utils/add_filter_ext
Add filter ext
2 parents 48ba65b + 371e020 commit 50e3717

File tree

8 files changed

+271
-3
lines changed

8 files changed

+271
-3
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
88

99
### Added
1010
- Jupyter Notebook tutorials
11+
- Basic CQL-JSON filtering [#100](https://github.com/stac-utils/pystac-client/pull/100)
1112

1213
### Changed
1314

docs/tutorials.rst

+10-1
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,13 @@ STAC Metadata Visualization
1818
- :ref:`Docs version </tutorials/stac-metadata-viz.ipynb>`
1919

2020
This tutorial gives an introduction to using Holeviews and hvplot to visualize
21-
STAC metadata and Item geometries on a map.
21+
STAC metadata and Item geometries on a map.
22+
23+
CQL Filtering
24+
---------------------------
25+
26+
- :tutorial:`GitHub version <cql-filter.ipynb>`
27+
- :ref:`Docs version </tutorials/cql-filter.ipynb>`
28+
29+
This tutorial gives an introduction to using CQL-JSON filtering in searches to
30+
search by arbitrary STAC Item properties.

docs/tutorials/cql-filter.ipynb

+212
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "e06a27bf",
6+
"metadata": {},
7+
"source": [
8+
"# pystac-client CQL Filtering\n",
9+
"\n",
10+
"This notebook demonstrates the use of pystac-client to use [CQL Filtering](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter). The server needs to support this and will advertise conformance as the `https://api.stacspec.org/v1.0.0-beta.3/item-search#filter:filter` class in the `conformsTo` attribute of the root API.\n",
11+
"\n",
12+
"**This should be considered an experimental feature. This notebook uses the Microsoft Planetary Computer staging environment as it is currently the only public CQL implementation. The Planetary Computer also does not advertise the correct conformance class, thus the `ignore_conformance` keyword is specified in the `Client.open` function below.**"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": null,
18+
"id": "b65de617",
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"from pystac_client import Client\n",
23+
"\n",
24+
"# set pystac_client logger to DEBUG to see API calls\n",
25+
"import logging\n",
26+
"logging.basicConfig()\n",
27+
"logger = logging.getLogger('pystac_client')\n",
28+
"logger.setLevel(logging.INFO)\n",
29+
"\n",
30+
"# function for creating GeoDataFrame from Items\n",
31+
"from copy import deepcopy\n",
32+
"import geopandas as gpd\n",
33+
"import pandas as pd\n",
34+
"from shapely.geometry import shape\n",
35+
"\n",
36+
"# convert a list of STAC Items into a GeoDataFrame\n",
37+
"def items_to_geodataframe(items):\n",
38+
" _items = []\n",
39+
" for i in items:\n",
40+
" _i = deepcopy(i)\n",
41+
" _i['geometry'] = shape(_i['geometry'])\n",
42+
" _items.append(_i)\n",
43+
" gdf = gpd.GeoDataFrame(pd.json_normalize(_items))\n",
44+
" for field in ['properties.datetime', 'properties.created', 'properties.updated']:\n",
45+
" if field in gdf:\n",
46+
" gdf[field] = pd.to_datetime(gdf[field])\n",
47+
" gdf.set_index('properties.datetime', inplace=True)\n",
48+
" return gdf"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"id": "98942e75",
55+
"metadata": {},
56+
"outputs": [],
57+
"source": [
58+
"# STAC API root URL\n",
59+
"URL = 'https://planetarycomputer-staging.microsoft.com/api/stac/v1'\n",
60+
"\n",
61+
"# custom headers\n",
62+
"headers = []\n",
63+
"\n",
64+
"cat = Client.open(URL, headers=headers, ignore_conformance=True)\n",
65+
"cat"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"id": "1e16077c",
71+
"metadata": {},
72+
"source": [
73+
"## Initial Search Parameters\n",
74+
"\n",
75+
"Here we perform a search with the `Client.search` function, providing a geometry (`intersects`) a datetime range (`datetime`), and filtering by Item properties (`filter`) using CQL-JSON."
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"id": "d8af6334",
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"# AOI around Delfzijl, in the north of The Netherlands\n",
86+
"geom = {\n",
87+
" \"type\": \"Polygon\",\n",
88+
" \"coordinates\": [\n",
89+
" [\n",
90+
" [\n",
91+
" 6.42425537109375,\n",
92+
" 53.174765470134616\n",
93+
" ],\n",
94+
" [\n",
95+
" 7.344360351562499,\n",
96+
" 53.174765470134616\n",
97+
" ],\n",
98+
" [\n",
99+
" 7.344360351562499,\n",
100+
" 53.67393435835391\n",
101+
" ],\n",
102+
" [\n",
103+
" 6.42425537109375,\n",
104+
" 53.67393435835391\n",
105+
" ],\n",
106+
" [\n",
107+
" 6.42425537109375,\n",
108+
" 53.174765470134616\n",
109+
" ]\n",
110+
" ]\n",
111+
" ]\n",
112+
"}\n",
113+
"\n",
114+
"params = {\n",
115+
" \"collections\": \"landsat-8-c2-l2\",\n",
116+
" \"intersects\": geom,\n",
117+
" \"datetime\": \"2018-01-01/2020-12-31\",\n",
118+
" \"max_items\": 100,\n",
119+
"}\n",
120+
"\n",
121+
"import hvplot.pandas\n",
122+
"import json\n",
123+
"\n",
124+
"# reusable search function\n",
125+
"def search_fetch_plot(params, filt):\n",
126+
" # limit sets the # of items per page so we can see multiple pages getting fetched\n",
127+
" params['filter'] = filt\n",
128+
" search = cat.search(**params)\n",
129+
" items_json = search.get_all_items_as_dict()\n",
130+
" # DataFrame\n",
131+
" items_df = pd.DataFrame(items_to_geodataframe(items_json['features']))\n",
132+
" print(f\"{len(items_df.index)} items found\")\n",
133+
" field = 'properties.eo:cloud_cover'\n",
134+
" return items_df.hvplot(y=field, label=json.dumps(filt), frame_height=500, frame_width=800) "
135+
]
136+
},
137+
{
138+
"cell_type": "markdown",
139+
"id": "44d3bc04",
140+
"metadata": {},
141+
"source": [
142+
"## CQL Filters\n",
143+
"\n",
144+
"Below are examples of several different CQL filters on the `eo:cloud_cover` property. Up to 100 Items are fetched and the eo:cloud_cover values plotted.\n"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"id": "dfc0e759",
151+
"metadata": {},
152+
"outputs": [],
153+
"source": [
154+
"filt = {\n",
155+
" \"lte\": [{\"property\": \"eo:cloud_cover\"}, 10]\n",
156+
"}\n",
157+
"\n",
158+
"search_fetch_plot(params, filt)"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": null,
164+
"id": "9c2f9ca1",
165+
"metadata": {},
166+
"outputs": [],
167+
"source": [
168+
"filt = {\n",
169+
" \"gte\": [{\"property\": \"eo:cloud_cover\"}, 80]\n",
170+
"}\n",
171+
"\n",
172+
"search_fetch_plot(params, filt)"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": null,
178+
"id": "109f673c",
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"filt = {\n",
183+
" \"lte\": [{\"property\": \"eo:cloud_cover\"}, 60],\n",
184+
" \"gte\": [{\"property\": \"eo:cloud_cover\"}, 40]\n",
185+
"}\n",
186+
"\n",
187+
"search_fetch_plot(params, filt)"
188+
]
189+
}
190+
],
191+
"metadata": {
192+
"kernelspec": {
193+
"display_name": "Python 3 (ipykernel)",
194+
"language": "python",
195+
"name": "python3"
196+
},
197+
"language_info": {
198+
"codemirror_mode": {
199+
"name": "ipython",
200+
"version": 3
201+
},
202+
"file_extension": ".py",
203+
"mimetype": "text/x-python",
204+
"name": "python",
205+
"nbconvert_exporter": "python",
206+
"pygments_lexer": "ipython3",
207+
"version": "3.9.4"
208+
}
209+
},
210+
"nbformat": 4,
211+
"nbformat_minor": 5
212+
}

docs/usage.rst

+19
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,22 @@ by finding links with a ``"rel"`` type of ``"next"`` and parsing them to constru
168168
implementation of this ``"next"`` link parsing assumes that the link follows the spec for an extended STAC link as
169169
described in the `STAC API - Item Search: Paging <https://github.com/radiantearth/stac-api-spec/tree/master/item-search#paging>`__
170170
section. See the :mod:`Paging <pystac_client.paging>` docs for details on how to customize this behavior.
171+
172+
Query Filter
173+
------------
174+
175+
If the server supports the [query filter](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/query)
176+
arbitrary Item properties can be included in the search parameters. The query parameter to :class:`ItemSearch` accepts
177+
a JSON structure as in the STAC API spec, but also accepts an abbreviated syntax. Instead of JSON, a list of strings can
178+
be provided, in which case pystac-client will convert them to the equivalent JSON structure.
179+
180+
The syntax is simply:
181+
182+
```
183+
<property><operator><value>
184+
e.g.,
185+
186+
eo:cloud_cover<10
187+
view:off_nadir<50
188+
platform=landsat-8
189+
```

pystac_client/cli.py

+4
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ def parse_args(args):
101101
nargs='*',
102102
help='Query properties of form '
103103
'KEY=VALUE (<, >, <=, >=, = supported)')
104+
search_group.add_argument('--filter', help='Filter on queryables using CQL JSON')
104105
search_group.add_argument('--sortby', help='Sort by fields', nargs='*')
105106
search_group.add_argument('--fields', help='Control what fields get returned', nargs='*')
106107
search_group.add_argument('--limit', help='Page size limit', type=int, default=100)
@@ -145,6 +146,9 @@ def parse_args(args):
145146
logger.warning(f"Unable to parse header {head}")
146147
parsed_args['headers'] = new_headers
147148

149+
if 'filter' in parsed_args:
150+
parsed_args['filter'] = json.loads(parsed_args['filter'])
151+
148152
return parsed_args
149153

150154

pystac_client/item_search.py

+23
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
Query = dict
4242
QueryLike = Union[Query, List[str]]
4343

44+
FilterLike = dict
45+
4446
Sortby = List[str]
4547
SortbyLike = Union[Sortby, str]
4648

@@ -131,6 +133,15 @@ class ItemSearch:
131133
(except ``limit``) are ignored.
132134
collections: List of one or more Collection IDs or :class:`pystac.Collection` instances. Only Items in one
133135
of the provided Collections will be searched
136+
query: List or JSON of query parameters as per the STAC API `query` extension
137+
filter: JSON of query parameters as per the STAC API `filter` extension
138+
sortby: A single field or list of fields to sort the response by
139+
fields: A list of fields to return in the response. Note this may result in invalid JSON.
140+
Use `get_all_items_as_dict` to avoid errors
141+
max_items: The maximum number of items to get, even if there are more matched items
142+
method: The http method, 'GET' or 'POST'
143+
stac_io: An instance of of StacIO for retrieving results. Normally comes from the Client that returns this ItemSearch
144+
client: An instance of a root Client used to set the root on resulting Items
134145
"""
135146
def __init__(self,
136147
url: str,
@@ -142,6 +153,7 @@ def __init__(self,
142153
ids: Optional[IDsLike] = None,
143154
collections: Optional[CollectionsLike] = None,
144155
query: Optional[QueryLike] = None,
156+
filter: Optional[FilterLike] = None,
145157
sortby: Optional[SortbyLike] = None,
146158
fields: Optional[FieldsLike] = None,
147159
max_items: Optional[int] = None,
@@ -174,10 +186,14 @@ def __init__(self,
174186
'collections': self._format_collections(collections),
175187
'intersects': self._format_intersects(intersects),
176188
'query': self._format_query(query),
189+
'filter': self._format_filter(filter),
177190
'sortby': self._format_sortby(sortby),
178191
'fields': self._format_fields(fields)
179192
}
180193

194+
if params['filter'] is not None:
195+
params['filter-lang'] = 'cql-json'
196+
181197
self._parameters = {k: v for k, v in params.items() if v is not None}
182198

183199
def get_parameters(self):
@@ -221,6 +237,13 @@ def _format_query(value: List[QueryLike]) -> Optional[dict]:
221237

222238
return query
223239

240+
def _format_filter(self, value: FilterLike) -> Optional[dict]:
241+
if value is None:
242+
return None
243+
244+
self._stac_io.assert_conforms_to(ConformanceClasses.FILTER)
245+
return value
246+
224247
@staticmethod
225248
def _format_bbox(value: Optional[BBoxLike]) -> Optional[BBox]:
226249
if value is None:

pystac_client/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.3.0a1'
1+
__version__ = '0.3.0-beta.1'

tests/test_pystac_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33

44
def test_version():
5-
assert __version__ == '0.3.0a1'
5+
assert __version__ == '0.3.0-beta.1'

0 commit comments

Comments
 (0)