assert get_max_dtype(pa.float64()) == DType.float64
assert get_max_dtype(pa.date32()) == DType.int32 # Date32 maps to int32🔗 Core Bridge
Zero-copy connection between PyArrow and MAX Engine.
get_max_dtype
def get_max_dtype(
arrow_type:DataType
)->DType:
Get MAX DType for an Arrow type.
get_numpy_dtype
def get_numpy_dtype(
arrow_type:DataType
)->dtype:
Get NumPy dtype for an Arrow type.
Unsupported types raise TypeError:
arrow_to_numpy_view
def arrow_to_numpy_view(
arr:Union, # PyArrow array (primitive type, no nulls)
)->ndarray: # NumPy view over same memory
Get zero-copy NumPy view of an Arrow array.
arr = pa.array([1.0, 2.0, 3.0])
np_view = arrow_to_numpy_view(arr)
assert np_view.ctypes.data == arr.buffers()[1].address # Zero-copy!try:
arrow_to_numpy_view(pa.array([1.0, None, 3.0]))
assert False, "Should have raised ValueError"
except ValueError as e:
assert "nulls" in str(e)arrow_to_max_tensor
def arrow_to_max_tensor(
arr:Union, # PyArrow array to convert
device:Optional=None, # Target device (`None` = CPU)
)->Tensor: # MAX Tensor (zero-copy on CPU, copied on GPU)
Zero-copy bridge from PyArrow array to MAX Tensor.
MXFrame
def MXFrame(
data:Union, # Arrow Table or dict of lists
):
PyArrow-backed DataFrame with zero-copy MAX Engine integration.
MXFrame.to_numpy
def to_numpy(
column:str, # Column name
)->ndarray: # Zero-copy NumPy view
Get zero-copy NumPy view of column (cached).
df = MXFrame({'x': [1.0, 2.0, 3.0]})
np1 = df.to_numpy('x')
np2 = df.to_numpy('x')
assert np1 is np2 # Same cached viewPass a device to copy to GPU (unavoidable for GPU compute):
MXFrame.get_buffer_address
def get_buffer_address(
column:str, # Column name
)->int: # Memory address
Get memory address of column’s data buffer (for zero-copy verification).
df = MXFrame({'x': [1.0, 2.0, 3.0]})
arrow_addr = df.get_buffer_address('x')
numpy_addr = df.to_numpy('x').ctypes.data
assert arrow_addr == numpy_addr # Same memory!# 📋 Test 1: Create MXFrame
df = MXFrame({
'price': [10.0, 20.0, 30.0, 40.0, 50.0],
'qty': [1, 2, 3, 4, 5],
})
print(f"Created: {df}")
print(f"Columns: {df.columns}")
print(f"Rows: {df.num_rows}")Created: MXFrame(5 rows, ['price', 'qty'])
Columns: ['price', 'qty']
Rows: 5
# ⚡ Test 3: Arrow to MAX Tensor
tensor = df.to_max_tensor('price')
print(f"Tensor shape: {tensor.shape}")
print(f"Tensor dtype: {tensor.dtype}")
print(f"Tensor values: {tensor.to_numpy()}")
print(f"Arrow values: {df['price'].to_pylist()}")Tensor shape: (5,)
Tensor dtype: DType.float64
Tensor values: [10. 20. 30. 40. 50.]
Arrow values: [10.0, 20.0, 30.0, 40.0, 50.0]
# 🔢 Test 5: Integer column
numpy_qty = df.to_numpy('qty')
tensor_qty = df.to_max_tensor('qty')
print(f"qty NumPy dtype: {numpy_qty.dtype}")
print(f"qty Tensor dtype: {tensor_qty.dtype}")
print(f"qty values: {numpy_qty}")qty NumPy dtype: int64
qty Tensor dtype: DType.int64
qty values: [1 2 3 4 5]
# 🚀 Test 7: Large array performance test (proves zero-copy)
n = 10_000_000
large_arr = pa.array(np.random.rand(n).astype(np.float32))
df_large = MXFrame(pa.table({'data': large_arr}))
# Time the zero-copy conversion
t0 = time.perf_counter()
for _ in range(100):
tensor = df_large.to_max_tensor('data')
elapsed = (time.perf_counter() - t0) / 100 * 1000
print(f"Array size: {n:,} elements ({n * 4 / 1e6:.1f} MB)")
print(f"Arrow → MAX Tensor: {elapsed:.3f} ms")
print(f"Throughput: {n * 4 / 1e9 / (elapsed / 1000):.1f} GB/s")
print("(Fast time = zero-copy confirmed)")Array size: 10,000,000 elements (40.0 MB)
Arrow → MAX Tensor: 8.594 ms
Throughput: 4.7 GB/s
(Fast time = zero-copy confirmed)
# ❌ Test 9: Error handling - nulls should fail
arr_with_nulls = pa.array([1.0, None, 3.0])
df_nulls = MXFrame(pa.table({'col': arr_with_nulls}))
try:
_ = df_nulls.to_numpy('col')
print("ERROR: Should have raised ValueError!")
except ValueError as e:
print(f"Correctly rejected nulls: {e}")Correctly rejected nulls: Array has 1 nulls - zero-copy not possible