Skip to content

Commit 25d789c

Browse files
committed
GH-37938:[Swift] fix null count when using reader
1 parent 1f42e6d commit 25d789c

File tree

5 files changed

+210
-81
lines changed

5 files changed

+210
-81
lines changed

swift/Arrow/Sources/Arrow/ArrowBuffer.swift

+41-7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ public class ArrowBuffer {
3939
data.append(ptr, count: Int(capacity))
4040
}
4141

42+
static func createEmptyBuffer() -> ArrowBuffer {
43+
return ArrowBuffer(
44+
length: 0,
45+
capacity: 0,
46+
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
47+
}
48+
4249
static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
4350
let byteCount = UInt(data.count)
4451
let capacity = alignTo64(byteCount)
@@ -48,14 +55,10 @@ public class ArrowBuffer {
4855
return ArrowBuffer(length: length, capacity: capacity, rawPointer: rawPointer)
4956
}
5057

51-
static func createBuffer(_ length: UInt, size: UInt, doAlign: Bool = true) -> ArrowBuffer {
58+
static func createBuffer(_ length: UInt, size: UInt) -> ArrowBuffer {
5259
let actualLen = max(length, ArrowBuffer.minLength)
5360
let byteCount = size * actualLen
54-
var capacity = byteCount
55-
if doAlign {
56-
capacity = alignTo64(byteCount)
57-
}
58-
61+
let capacity = alignTo64(byteCount)
5962
let memory = MemoryAllocator(64)
6063
let rawPointer = memory.allocateArray(Int(capacity))
6164
rawPointer.initializeMemory(as: UInt8.self, repeating: 0, count: Int(capacity))
@@ -66,7 +69,11 @@ public class ArrowBuffer {
6669
to.rawPointer.copyMemory(from: from.rawPointer, byteCount: Int(len))
6770
}
6871

69-
private static func alignTo64(_ length: UInt) -> UInt {
72+
static func copyCurrent(_ from: ArrowBuffer, to: inout ArrowNullBuffer, len: UInt) {
73+
to.rawPointer.copyMemory(from: from.rawPointer, byteCount: Int(len))
74+
}
75+
76+
fileprivate static func alignTo64(_ length: UInt) -> UInt {
7077
let bufAlignment = length % 64
7178
if bufAlignment != 0 {
7279
return length + (64 - bufAlignment) + 8
@@ -75,3 +82,30 @@ public class ArrowBuffer {
7582
return length + 8
7683
}
7784
}
85+
86+
public class ArrowNullBuffer: ArrowBuffer {
87+
var nullCount: UInt
88+
init(_ nullCount: UInt, length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
89+
self.nullCount = nullCount
90+
super.init(length: length, capacity: capacity, rawPointer: rawPointer)
91+
}
92+
93+
static func createBuffer(_ data: [UInt8], length: UInt, nullCount: UInt) -> ArrowNullBuffer {
94+
let byteCount = UInt(data.count)
95+
let capacity = alignTo64(byteCount)
96+
let memory = MemoryAllocator(64)
97+
let rawPointer = memory.allocateArray(Int(capacity))
98+
rawPointer.copyMemory(from: data, byteCount: data.count)
99+
return ArrowNullBuffer(nullCount, length: length, capacity: capacity, rawPointer: rawPointer)
100+
}
101+
102+
static func createBuffer(_ length: UInt, size: UInt, nullCount: UInt) -> ArrowNullBuffer {
103+
let actualLen = max(length, ArrowBuffer.minLength)
104+
let byteCount = size * actualLen
105+
let capacity = alignTo64(byteCount)
106+
let memory = MemoryAllocator(64)
107+
let rawPointer = memory.allocateArray(Int(capacity))
108+
rawPointer.initializeMemory(as: UInt8.self, repeating: 0, count: Int(capacity))
109+
return ArrowNullBuffer(nullCount, length: length, capacity: capacity, rawPointer: rawPointer)
110+
}
111+
}

swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift

+22-15
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ public protocol ArrowBufferBuilder {
3232

3333
public class BaseBufferBuilder<T> {
3434
var values: ArrowBuffer
35-
var nulls: ArrowBuffer
35+
var nulls: ArrowNullBuffer
3636
var stride: Int
3737
public var offset: UInt = 0
3838
public var capacity: UInt {return self.values.capacity}
3939
public var length: UInt = 0
40-
public var nullCount: UInt = 0
40+
public var nullCount: UInt {return self.nulls.nullCount}
4141

42-
init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
42+
init(values: ArrowBuffer, nulls: ArrowNullBuffer, stride: Int = MemoryLayout<T>.stride) {
4343
self.stride = stride
4444
self.values = values
4545
self.nulls = nulls
@@ -67,7 +67,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
6767
public required init() throws {
6868
self.defaultVal = try FixedBufferBuilder<T>.defaultValueForType()
6969
let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<T>.stride))
70-
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
70+
let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride), nullCount: 0)
7171
super.init(values: values, nulls: nulls)
7272
}
7373

@@ -83,7 +83,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
8383
BitUtility.setBit(index + self.offset, buffer: self.nulls)
8484
self.values.rawPointer.advanced(by: byteIndex).storeBytes(of: val, as: T.self)
8585
} else {
86-
self.nullCount += 1
86+
self.nulls.nullCount += 1
8787
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
8888
self.values.rawPointer.advanced(by: byteIndex).storeBytes(of: defaultVal, as: T.self)
8989
}
@@ -93,7 +93,8 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
9393
if length > self.values.length {
9494
let resizeLength = resizeLength(self.values)
9595
var values = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<T>.size))
96-
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
96+
var nulls = ArrowNullBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size),
97+
nullCount: self.nullCount)
9798
ArrowBuffer.copyCurrent(self.values, to: &values, len: self.values.capacity)
9899
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
99100
self.values = values
@@ -104,7 +105,8 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
104105
public func finish() -> [ArrowBuffer] {
105106
let length = self.length
106107
var values = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout<T>.size))
107-
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
108+
var nulls = ArrowNullBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size),
109+
nullCount: self.nullCount)
108110
ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity)
109111
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
110112
return [nulls, values]
@@ -142,7 +144,8 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
142144
public typealias ItemType = Bool
143145
public required init() throws {
144146
let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
145-
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
147+
let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride),
148+
nullCount: 0)
146149
super.init(values: values, nulls: nulls)
147150
}
148151

@@ -162,7 +165,7 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
162165
}
163166

164167
} else {
165-
self.nullCount += 1
168+
self.nulls.nullCount += 1
166169
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
167170
BitUtility.clearBit(index + self.offset, buffer: self.values)
168171
}
@@ -172,7 +175,8 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
172175
if (length/8) > self.values.length {
173176
let resizeLength = resizeLength(self.values)
174177
var values = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<UInt8>.size))
175-
var nulls = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<UInt8>.size))
178+
var nulls = ArrowNullBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<UInt8>.size),
179+
nullCount: nullCount)
176180
ArrowBuffer.copyCurrent(self.values, to: &values, len: self.values.capacity)
177181
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
178182
self.values = values
@@ -183,7 +187,8 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
183187
public func finish() -> [ArrowBuffer] {
184188
let length = self.length
185189
var values = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout<UInt8>.size))
186-
var nulls = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout<UInt8>.size))
190+
var nulls = ArrowNullBuffer.createBuffer(length, size: UInt(MemoryLayout<UInt8>.size),
191+
nullCount: nullCount)
187192
ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity)
188193
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
189194
return [nulls, values]
@@ -196,7 +201,7 @@ public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder
196201
let binaryStride = MemoryLayout<UInt8>.stride
197202
public required init() throws {
198203
let values = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
199-
let nulls = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
204+
let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(binaryStride), nullCount: 0)
200205
self.offsets = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<Int32>.stride))
201206
super.init(values: values, nulls: nulls, stride: binaryStride)
202207
}
@@ -229,7 +234,7 @@ public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder
229234
}
230235

231236
if isNull {
232-
self.nullCount += 1
237+
self.nulls.nullCount += 1
233238
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
234239
} else {
235240
BitUtility.setBit(index + self.offset, buffer: self.nulls)
@@ -257,7 +262,8 @@ public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder
257262
public func resize(_ length: UInt) {
258263
if length > self.offsets.length {
259264
let resizeLength = resizeLength(self.offsets, len: length)
260-
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
265+
var nulls = ArrowNullBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size),
266+
nullCount: self.nullCount)
261267
var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
262268
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
263269
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
@@ -269,7 +275,8 @@ public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder
269275
public func finish() -> [ArrowBuffer] {
270276
let length = self.length
271277
var values = ArrowBuffer.createBuffer(self.values.length, size: UInt(MemoryLayout<UInt8>.size))
272-
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
278+
var nulls = ArrowNullBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size),
279+
nullCount: self.nullCount)
273280
var offsets = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout<Int32>.size))
274281
ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity)
275282
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)

swift/Arrow/Sources/Arrow/ArrowReader.swift

+6-2
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,12 @@ public class ArrowReader {
5757
private func loadPrimitiveData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
5858
do {
5959
let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
60+
let nullLength = UInt(ceil(Double(node.length) / 8))
6061
try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
6162
let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
6263
let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
63-
length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset)
64+
length: nullLength, messageOffset: loadInfo.messageOffset,
65+
nullCount: UInt(node.nullCount))
6466
try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
6567
let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
6668
let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
@@ -76,10 +78,12 @@ public class ArrowReader {
7678
private func loadVariableData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
7779
let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
7880
do {
81+
let nullLength = UInt(ceil(Double(node.length) / 8))
7982
try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
8083
let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
8184
let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
82-
length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset)
85+
length: nullLength, messageOffset: loadInfo.messageOffset,
86+
nullCount: UInt(node.nullCount))
8387
try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
8488
let offsetBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
8589
let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData,

0 commit comments

Comments
 (0)