Merge pull request #100 from stac-utils/add_filter_ext

matthewhanson · web-flow · commit 50e3717c8298 · 2021-09-21T22:24:02.000-04:00
Add filter ext
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Added
 - Jupyter Notebook tutorials
+- Basic CQL-JSON filtering [#100](https://github.com/stac-utils/pystac-client/pull/100)
 
 ### Changed
 
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
@@ -18,4 +18,13 @@ STAC Metadata Visualization
 - :ref:`Docs version </tutorials/stac-metadata-viz.ipynb>`
 
 This tutorial gives an introduction to using Holeviews and hvplot to visualize
-STAC metadata and Item geometries on a map.
+STAC metadata and Item geometries on a map.
+
+CQL Filtering
+---------------------------
+
+- :tutorial:`GitHub version <cql-filter.ipynb>`
+- :ref:`Docs version </tutorials/cql-filter.ipynb>`
+
+This tutorial gives an introduction to using CQL-JSON filtering in searches to 
+search by arbitrary STAC Item properties.
diff --git a/docs/tutorials/cql-filter.ipynb b/docs/tutorials/cql-filter.ipynb
@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e06a27bf",
+   "metadata": {},
+   "source": [
+    "# pystac-client CQL Filtering\n",
+    "\n",
+    "This notebook demonstrates the use of pystac-client to use [CQL Filtering](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/filter). The server needs to support this and will advertise conformance as the `https://api.stacspec.org/v1.0.0-beta.3/item-search#filter:filter` class in the `conformsTo` attribute of the root API.\n",
+    "\n",
+    "**This should be considered an experimental feature. This notebook uses the Microsoft Planetary Computer staging environment as it is currently the only public CQL implementation. The Planetary Computer also does not advertise the correct conformance class, thus the `ignore_conformance` keyword is specified in the `Client.open` function below.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b65de617",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pystac_client import Client\n",
+    "\n",
+    "# set pystac_client logger to DEBUG to see API calls\n",
+    "import logging\n",
+    "logging.basicConfig()\n",
+    "logger = logging.getLogger('pystac_client')\n",
+    "logger.setLevel(logging.INFO)\n",
+    "\n",
+    "# function for creating GeoDataFrame from Items\n",
+    "from copy import deepcopy\n",
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "from shapely.geometry import shape\n",
+    "\n",
+    "# convert a list of STAC Items into a GeoDataFrame\n",
+    "def items_to_geodataframe(items):\n",
+    "    _items = []\n",
+    "    for i in items:\n",
+    "        _i = deepcopy(i)\n",
+    "        _i['geometry'] = shape(_i['geometry'])\n",
+    "        _items.append(_i)\n",
+    "    gdf = gpd.GeoDataFrame(pd.json_normalize(_items))\n",
+    "    for field in ['properties.datetime', 'properties.created', 'properties.updated']:\n",
+    "        if field in gdf:\n",
+    "            gdf[field] = pd.to_datetime(gdf[field])\n",
+    "    gdf.set_index('properties.datetime', inplace=True)\n",
+    "    return gdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98942e75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# STAC API root URL\n",
+    "URL = 'https://planetarycomputer-staging.microsoft.com/api/stac/v1'\n",
+    "\n",
+    "# custom headers\n",
+    "headers = []\n",
+    "\n",
+    "cat = Client.open(URL, headers=headers, ignore_conformance=True)\n",
+    "cat"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e16077c",
+   "metadata": {},
+   "source": [
+    "## Initial Search Parameters\n",
+    "\n",
+    "Here we perform a search with the `Client.search` function, providing a geometry (`intersects`) a datetime range (`datetime`), and filtering by Item properties (`filter`) using CQL-JSON."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d8af6334",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# AOI around Delfzijl, in the north of The Netherlands\n",
+    "geom = {\n",
+    "    \"type\": \"Polygon\",\n",
+    "    \"coordinates\": [\n",
+    "      [\n",
+    "        [\n",
+    "          6.42425537109375,\n",
+    "          53.174765470134616\n",
+    "        ],\n",
+    "        [\n",
+    "          7.344360351562499,\n",
+    "          53.174765470134616\n",
+    "        ],\n",
+    "        [\n",
+    "          7.344360351562499,\n",
+    "          53.67393435835391\n",
+    "        ],\n",
+    "        [\n",
+    "          6.42425537109375,\n",
+    "          53.67393435835391\n",
+    "        ],\n",
+    "        [\n",
+    "          6.42425537109375,\n",
+    "          53.174765470134616\n",
+    "        ]\n",
+    "      ]\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "params = {\n",
+    "    \"collections\": \"landsat-8-c2-l2\",\n",
+    "    \"intersects\": geom,\n",
+    "    \"datetime\": \"2018-01-01/2020-12-31\",\n",
+    "    \"max_items\": 100,\n",
+    "}\n",
+    "\n",
+    "import hvplot.pandas\n",
+    "import json\n",
+    "\n",
+    "# reusable search function\n",
+    "def search_fetch_plot(params, filt):\n",
+    "    # limit sets the # of items per page so we can see multiple pages getting fetched\n",
+    "    params['filter'] = filt\n",
+    "    search = cat.search(**params)\n",
+    "    items_json = search.get_all_items_as_dict()\n",
+    "    # DataFrame\n",
+    "    items_df = pd.DataFrame(items_to_geodataframe(items_json['features']))\n",
+    "    print(f\"{len(items_df.index)} items found\")\n",
+    "    field = 'properties.eo:cloud_cover'\n",
+    "    return items_df.hvplot(y=field, label=json.dumps(filt), frame_height=500, frame_width=800)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44d3bc04",
+   "metadata": {},
+   "source": [
+    "## CQL Filters\n",
+    "\n",
+    "Below are examples of several different CQL filters on the `eo:cloud_cover` property. Up to 100 Items are fetched and the eo:cloud_cover values plotted.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfc0e759",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filt = {\n",
+    "    \"lte\": [{\"property\": \"eo:cloud_cover\"}, 10]\n",
+    "}\n",
+    "\n",
+    "search_fetch_plot(params, filt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c2f9ca1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filt = {\n",
+    "    \"gte\": [{\"property\": \"eo:cloud_cover\"}, 80]\n",
+    "}\n",
+    "\n",
+    "search_fetch_plot(params, filt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "109f673c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filt = {\n",
+    "    \"lte\": [{\"property\": \"eo:cloud_cover\"}, 60],\n",
+    "    \"gte\": [{\"property\": \"eo:cloud_cover\"}, 40]\n",
+    "}\n",
+    "\n",
+    "search_fetch_plot(params, filt)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/usage.rst b/docs/usage.rst
@@ -168,3 +168,22 @@ by finding links with a ``"rel"`` type of ``"next"`` and parsing them to constru
 implementation of this ``"next"`` link parsing assumes that the link follows the spec for an extended STAC link as
 described in the `STAC API - Item Search: Paging <https://github.com/radiantearth/stac-api-spec/tree/master/item-search#paging>`__
 section. See the :mod:`Paging <pystac_client.paging>` docs for details on how to customize this behavior.
+
+Query Filter
+------------
+
+If the server supports the [query filter](https://github.com/radiantearth/stac-api-spec/tree/master/fragments/query)
+arbitrary Item properties can be included in the search parameters. The query parameter to :class:`ItemSearch` accepts
+a JSON structure as in the STAC API spec, but also accepts an abbreviated syntax. Instead of JSON, a list of strings can
+be provided, in which case pystac-client will convert them to the equivalent JSON structure.
+
+The syntax is simply:
+
+```
+<property><operator><value>
+e.g.,
+
+eo:cloud_cover<10
+view:off_nadir<50
+platform=landsat-8
+```
diff --git a/pystac_client/cli.py b/pystac_client/cli.py
@@ -101,6 +101,7 @@ def parse_args(args):
                               nargs='*',
                               help='Query properties of form '
                               'KEY=VALUE (<, >, <=, >=, = supported)')
+    search_group.add_argument('--filter', help='Filter on queryables using CQL JSON')
     search_group.add_argument('--sortby', help='Sort by fields', nargs='*')
     search_group.add_argument('--fields', help='Control what fields get returned', nargs='*')
     search_group.add_argument('--limit', help='Page size limit', type=int, default=100)
@@ -145,6 +146,9 @@ def parse_args(args):
                 logger.warning(f"Unable to parse header {head}")
         parsed_args['headers'] = new_headers
 
+    if 'filter' in parsed_args:
+        parsed_args['filter'] = json.loads(parsed_args['filter'])
+
     return parsed_args
 
 
diff --git a/pystac_client/item_search.py b/pystac_client/item_search.py
@@ -41,6 +41,8 @@
 Query = dict
 QueryLike = Union[Query, List[str]]
 
+FilterLike = dict
+
 Sortby = List[str]
 SortbyLike = Union[Sortby, str]
 
@@ -131,6 +133,15 @@ class ItemSearch:
             (except ``limit``) are ignored.
         collections: List of one or more Collection IDs or :class:`pystac.Collection` instances. Only Items in one
             of the provided Collections will be searched
+        query: List or JSON of query parameters as per the STAC API `query` extension
+        filter: JSON of query parameters as per the STAC API `filter` extension
+        sortby: A single field or list of fields to sort the response by
+        fields: A list of fields to return in the response. Note this may result in invalid JSON.
+            Use `get_all_items_as_dict` to avoid errors
+        max_items: The maximum number of items to get, even if there are more matched items
+        method: The http method, 'GET' or 'POST'
+        stac_io: An instance of of StacIO for retrieving results. Normally comes from the Client that returns this ItemSearch
+        client: An instance of a root Client used to set the root on resulting Items
     """
     def __init__(self,
                  url: str,
@@ -142,6 +153,7 @@ def __init__(self,
                  ids: Optional[IDsLike] = None,
                  collections: Optional[CollectionsLike] = None,
                  query: Optional[QueryLike] = None,
+                 filter: Optional[FilterLike] = None,
                  sortby: Optional[SortbyLike] = None,
                  fields: Optional[FieldsLike] = None,
                  max_items: Optional[int] = None,
@@ -174,10 +186,14 @@ def __init__(self,
             'collections': self._format_collections(collections),
             'intersects': self._format_intersects(intersects),
             'query': self._format_query(query),
+            'filter': self._format_filter(filter),
             'sortby': self._format_sortby(sortby),
             'fields': self._format_fields(fields)
         }
 
+        if params['filter'] is not None:
+            params['filter-lang'] = 'cql-json'
+
         self._parameters = {k: v for k, v in params.items() if v is not None}
 
     def get_parameters(self):
@@ -221,6 +237,13 @@ def _format_query(value: List[QueryLike]) -> Optional[dict]:
 
         return query
 
+    def _format_filter(self, value: FilterLike) -> Optional[dict]:
+        if value is None:
+            return None
+
+        self._stac_io.assert_conforms_to(ConformanceClasses.FILTER)
+        return value
+
     @staticmethod
     def _format_bbox(value: Optional[BBoxLike]) -> Optional[BBox]:
         if value is None:
diff --git a/pystac_client/version.py b/pystac_client/version.py
@@ -1 +1 @@
-__version__ = '0.3.0a1'
+__version__ = '0.3.0-beta.1'
diff --git a/tests/test_pystac_api.py b/tests/test_pystac_api.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == '0.3.0a1'
+    assert __version__ == '0.3.0-beta.1'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.3.0a1'`
	`1`	`+__version__ = '0.3.0-beta.1'`
Original file line number	Diff line number	Diff line change
`@@ -2,4 +2,4 @@`
`2`	`2`
`3`	`3`
`4`	`4`	`def test_version():`
`5`		`- assert __version__ == '0.3.0a1'`
	`5`	`+ assert __version__ == '0.3.0-beta.1'`