Skip to content

Commit b275483

Browse files
authored
apacheGH-41741: [C++] Check that extension metadata key is present before attempting to delete it (apache#41763)
### Rationale for this change Neither Schema.fbs nor the Arrow C Data interface nor the columnar specification indicates that the ARROW:extension:metadata key must be present; however, the `ImportType()` implementation assumes that both `ARROW:extension:name` and `ARROW:extension:metadata` are both present and throws an exception if `ARROW:extension:metadata` is missing. This causes pyarrow to crash (see issue for reproducer). ### What changes are included in this PR? This PR checks that the extension metadata is present before attempting to delete it. ### Are these changes tested? Yes (test added). ### Are there any user-facing changes? No. * GitHub Issue: apache#41741 Authored-by: Dewey Dunnington <dewey@voltrondata.com> Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
1 parent 19044ee commit b275483

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

cpp/src/arrow/c/bridge.cc

+8-2
Original file line numberDiff line numberDiff line change
@@ -1059,8 +1059,14 @@ struct SchemaImporter {
10591059
ARROW_ASSIGN_OR_RAISE(
10601060
type_, registered_ext_type->Deserialize(std::move(type_),
10611061
metadata_.extension_serialized));
1062-
RETURN_NOT_OK(metadata_.metadata->DeleteMany(
1063-
{metadata_.extension_name_index, metadata_.extension_serialized_index}));
1062+
// If metadata is present, delete both metadata keys (otherwise, just remove
1063+
// the extension name key)
1064+
if (metadata_.extension_serialized_index >= 0) {
1065+
RETURN_NOT_OK(metadata_.metadata->DeleteMany(
1066+
{metadata_.extension_name_index, metadata_.extension_serialized_index}));
1067+
} else {
1068+
RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index));
1069+
}
10641070
}
10651071
}
10661072

cpp/src/arrow/c/bridge_test.cc

+17
Original file line numberDiff line numberDiff line change
@@ -4102,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) {
41024102
TestWithArrayFactory(NestedFactory(ExampleDictExtension));
41034103
}
41044104

4105+
TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) {
4106+
auto ext_type = std::make_shared<MetadataOptionalExtensionType>();
4107+
ExtensionTypeGuard guard(ext_type);
4108+
4109+
auto ext_metadata =
4110+
KeyValueMetadata::Make({"ARROW:extension:name"}, {ext_type->extension_name()});
4111+
auto ext_field = field("", ext_type->storage_type(), true, std::move(ext_metadata));
4112+
4113+
struct ArrowSchema c_schema {};
4114+
SchemaExportGuard schema_guard(&c_schema);
4115+
ASSERT_OK(ExportField(*ext_field, &c_schema));
4116+
4117+
ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema));
4118+
ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION);
4119+
AssertTypeEqual(ext_type_roundtrip, ext_type);
4120+
}
4121+
41054122
TEST_F(TestArrayRoundtrip, UnregisteredExtension) {
41064123
auto StorageExtractor = [](ArrayFactory factory) {
41074124
return [factory]() -> Result<std::shared_ptr<Array>> {

cpp/src/arrow/testing/extension_type.h

+19
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType {
132132
std::string Serialize() const override { return "dict-extension-serialized"; }
133133
};
134134

135+
// A minimal extension type that does not error when passed blank extension information
136+
class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType {
137+
public:
138+
MetadataOptionalExtensionType() : ExtensionType(null()) {}
139+
std::string extension_name() const override { return "metadata.optional"; }
140+
std::string Serialize() const override { return ""; }
141+
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
142+
return nullptr;
143+
}
144+
bool ExtensionEquals(const ExtensionType& other) const override {
145+
return other.extension_name() == extension_name();
146+
}
147+
Result<std::shared_ptr<DataType>> Deserialize(
148+
std::shared_ptr<DataType> storage_type,
149+
const std::string& serialized_data) const override {
150+
return std::make_shared<MetadataOptionalExtensionType>();
151+
}
152+
};
153+
135154
class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
136155
public:
137156
using ExtensionArray::ExtensionArray;

0 commit comments

Comments
 (0)