@@ -956,6 +956,10 @@ def find_neighbors(
956
956
queries : List [List [float ]],
957
957
num_neighbors : int = 10 ,
958
958
filter : Optional [List [Namespace ]] = [],
959
+ per_crowding_attribute_neighbor_count : Optional [int ] = None ,
960
+ approx_num_neighbors : Optional [int ] = None ,
961
+ fraction_leaf_nodes_to_search_override : Optional [float ] = None ,
962
+ return_full_datapoint : bool = False ,
959
963
) -> List [List [MatchNeighbor ]]:
960
964
"""Retrieves nearest neighbors for the given embedding queries on the specified deployed index which is deployed to public endpoint.
961
965
@@ -979,25 +983,58 @@ def find_neighbors(
979
983
For example, [Namespace("color", ["red"], []), Namespace("shape", [], ["squared"])] will match datapoints
980
984
that satisfy "red color" but not include datapoints with "squared shape".
981
985
Please refer to https://cloud.google.com/vertex-ai/docs/matching-engine/filtering#json for more detail.
986
+
987
+ per_crowding_attribute_neighbor_count (int):
988
+ Optional. Crowding is a constraint on a neighbor list produced
989
+ by nearest neighbor search requiring that no more than some
990
+ value k' of the k neighbors returned have the same value of
991
+ crowding_attribute. It's used for improving result diversity.
992
+ This field is the maximum number of matches with the same crowding tag.
993
+
994
+ approx_num_neighbors (int):
995
+ Optional. The number of neighbors to find via approximate search
996
+ before exact reordering is performed. If not set, the default
997
+ value from scam config is used; if set, this value must be > 0.
998
+
999
+ fraction_leaf_nodes_to_search_override (float):
1000
+ Optional. The fraction of the number of leaves to search, set at
1001
+ query time allows user to tune search performance. This value
1002
+ increase result in both search accuracy and latency increase.
1003
+ The value should be between 0.0 and 1.0.
1004
+
1005
+ return_full_datapoint (bool):
1006
+ Optional. If set to true, the full datapoints (including all
1007
+ vector values and of the nearest neighbors are returned.
1008
+ Note that returning full datapoint will significantly increase the
1009
+ latency and cost of the query.
1010
+
982
1011
Returns:
983
1012
List[List[MatchNeighbor]] - A list of nearest neighbors for each query.
984
1013
"""
985
1014
986
1015
if not self ._public_match_client :
987
1016
raise ValueError (
988
- "Please make sure index has been deployed to public endpoint, and follow the example usage to call this method."
1017
+ "Please make sure index has been deployed to public endpoint,and follow the example usage to call this method."
989
1018
)
990
1019
991
1020
# Create the FindNeighbors request
992
1021
find_neighbors_request = gca_match_service_v1beta1 .FindNeighborsRequest ()
993
1022
find_neighbors_request .index_endpoint = self .resource_name
994
1023
find_neighbors_request .deployed_index_id = deployed_index_id
1024
+ find_neighbors_request .return_full_datapoint = return_full_datapoint
995
1025
996
1026
for query in queries :
997
1027
find_neighbors_query = (
998
1028
gca_match_service_v1beta1 .FindNeighborsRequest .Query ()
999
1029
)
1000
1030
find_neighbors_query .neighbor_count = num_neighbors
1031
+ find_neighbors_query .per_crowding_attribute_neighbor_count = (
1032
+ per_crowding_attribute_neighbor_count
1033
+ )
1034
+ find_neighbors_query .approximate_neighbor_count = approx_num_neighbors
1035
+ find_neighbors_query .fraction_leaf_nodes_to_search_override = (
1036
+ fraction_leaf_nodes_to_search_override
1037
+ )
1001
1038
datapoint = gca_index_v1beta1 .IndexDatapoint (feature_vector = query )
1002
1039
for namespace in filter :
1003
1040
restrict = gca_index_v1beta1 .IndexDatapoint .Restriction ()
0 commit comments