NTSC Part 2
04 May 2025In part 1, I detailed the challenges inherent in using Core Image to produce an NTSC video filter for Apple platforms, building up an alternative processing pipeline in Metal. In this post, I’ll discuss how to integrate this approach into a whole application, following MTLCommandBuffer
through multiple processing steps, and integrating it with some common flows.
Command Buffers Redux
To recap, MTLCommandBuffer
s contain the serialized function calls and references to data structures needed to run code on the GPU. In order to implement our NTSC filter, we’ll append the multiple function calls we need to the single command buffer we’ll use to render the frame:
- Convert RGB data to YIQ
- Composite preemphasis
- Video noise
etc.
The actual implementation of this looks like:
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
return nil
}
let textures: [MTLTexture] = ...
let pool = Pool(vals: textures)
do {
try Self.convertToYIQ(
try pool.last,
output: pool.next(),
commandBuffer: commandBuffer,
device: device,
pipelineCache: pipelineCache
)
try Self.compositePreemphasis(
input: pool.last,
texA: pool.next(),
texB: pool.next(),
output: pool.next(),
filter: compositePreemphasisFilter,
preemphasis: effect.compositePreemphasis,
commandBuffer: commandBuffer,
device: device,
pipelineCache: pipelineCache
)
try Self.videoNoise(
input: pool.last,
tex: pool.next(),
output: pool.next(),
filter: noiseFilter,
zoom: effect.compositeNoiseZoom,
contrast: effect.compositeNoiseContrast,
frameNumber: frameNum,
commandBuffer: commandBuffer
)
// ...
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
return CIImage(mtlTexture: pool.last)
}
The important takeaway here is that the same command buffer is passing through multiple functions, collecting Metal function invocations along the way. When we’re ready for the GPU to perform the work we commit and wait, returning a new CIImage
with the contents of the last-rendered-to texture.
Unlike CoreImage
, which lets us stitch multiple filters together with outputImage
and inputImage
, working with Metal requires us to keep tabs on which textures have been drawn to when. For example, we drew to texture A in step 1, then texture A needs to be used as the input texture in step 2.
It’s preferred to triple-buffer textures, so that one function’s output texture becomes the next function’s input texture (i.e., the last-rendered-to texture becomes the next input while the “spare” texture becomes the next output.) The Pool
class helps us manage this, so we don’t have to hard-code references to specific textures in the pipeline. This also lets us skip past or comment out sections of the filter and maintain the pipeline’s integrity.
class Pool<A> {
typealias Element = A
let vals: Array<Element>
var currentIndex = 0
init(vals: Array<Element>) {
self.vals = vals
}
func next() -> Element {
defer { currentIndex = (currentIndex + 1) % vals.count }
return vals[currentIndex]
}
var last: Element {
let prevIndex = currentIndex - 1
if vals.indices.contains(prevIndex) {
return vals[prevIndex]
} else {
let lastIndex = vals.endIndex - 1
return vals[lastIndex]
}
}
}
The individual filter instances (e.g., CompositePreemphasisFilter
) are used to encapsulate the Metal boilerplate.
public class CompositePreemphasisFilter {
typealias Error = TextureFilterError
private let device: MTLDevice
private let pipelineCache: MetalPipelineCache
private var highpassFilter: HighpassFilter
var preemphasis: Float16 = NTSCEffect.default.compositePreemphasis
init(frequencyCutoff: Float, device: MTLDevice, pipelineCache: MetalPipelineCache) {
self.device = device
self.pipelineCache = pipelineCache
let lowpass = LowpassFilter(frequencyCutoff: frequencyCutoff, device: device)
self.highpassFilter = HighpassFilter(lowpassFilter: lowpass, device: device, pipelineCache: pipelineCache)
}
func run(input: MTLTexture, texA: MTLTexture, texB: MTLTexture, output: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
let highpassed = texA
let spare = texB
try highpassFilter.run(input: input, tex: spare, output: highpassed, commandBuffer: commandBuffer)
try encodeKernelFunction(.compositePreemphasis, pipelineCache: pipelineCache, textureWidth: input.width, textureHeight: input.height, commandBuffer: commandBuffer, encode: { encoder in
encoder.setTexture(input, index: 0)
encoder.setTexture(highpassed, index: 1)
encoder.setTexture(output, index: 2)
var preemphasis = preemphasis
encoder.setBytes(&preemphasis, length: MemoryLayout<Float16>.size, index: 0)
})
}
}
And encodeKernelFunction
wraps up even more.
func encodeKernelFunction(_ kernelFunction: KernelFunction, pipelineCache: MetalPipelineCache, textureWidth: Int, textureHeight: Int, commandBuffer: MTLCommandBuffer, encode: (MTLComputeCommandEncoder) -> Void) throws {
let pipelineState = try pipelineCache.pipelineState(function: kernelFunction)
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw TextureFilterError.cantMakeComputeEncoder
}
encoder.setComputePipelineState(pipelineState)
encode(encoder)
let executionWidth = pipelineState.threadExecutionWidth
encoder.dispatchThreads(
threadsPerGrid: MTLSize(width: textureWidth, height: textureHeight, depth: 1),
threadsPerThreadgroup: MTLSize(width: executionWidth, height: executionWidth, depth: 1)
)
encoder.endEncoding()
}
It relies on MetalPipelineCache
, which is responsible for caching the MTLComputePipelineState
s that represent the Metal functions we want to call.
class MetalPipelineCache {
enum Error: Swift.Error {
case cantMakeFunction(KernelFunction)
case underlying(Swift.Error)
case noPipelineStateAvailable
}
let device: MTLDevice
let library: MTLLibrary
init(device: MTLDevice, library: MTLLibrary) throws {
self.device = device
self.library = library
// Warm cache
for function in KernelFunction.allCases {
_ = try pipelineState(function: function)
}
}
var pipelineStateByFunction: [KernelFunction: MTLComputePipelineState] = [:]
func pipelineState(function: KernelFunction) throws -> MTLComputePipelineState {
if let pipelineState = pipelineStateByFunction[function] {
return pipelineState
}
guard let fn = library.makeFunction(name: function.rawValue) else {
throw Error.cantMakeFunction(function)
}
do {
let pipelineState = try device.makeComputePipelineState(function: fn)
self.pipelineStateByFunction[function] = pipelineState
return pipelineState
} catch {
throw Error.underlying(error)
}
}
}
Instantiating Textures
Unlike Core Image, when working with Metal, we’re responsible for the creation and ownership of the textures we need. To create them, we can use a MTLTextureDescriptor
:
static func texture(width: Int, height: Int, pixelFormat: MTLPixelFormat, device: MTLDevice) -> MTLTexture? {
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
pixelFormat: pixelFormat,
width: width,
height: height,
mipmapped: false
)
textureDescriptor.usage = [.shaderRead, .shaderWrite, .renderTarget]
return device.makeTexture(descriptor: textureDescriptor)
}
Since we want to store negative floating point values in our NTSC filter, we can declare our pixel format as .rgba16Float
.
So that’s pretty much it! You can see how we start off with a Metal function that we want to call, build up a compute pipeline, add the function invocations to a command buffer, and retrieve our final processed image data on the other end.
Integration
Of course, the individual frame buffers need to come from somewhere. Below, I’ll show two examples – one for live video feed data, and the second for offline processing of an existing AVAsset.
Live Video
Recording from the camera on iOS can be managed with AVCaptureSession
– we just need to insert ourselves as the delegate in order to get the following callback:
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
let ciImage = CIImage(cvImageBuffer: pixelBuffer)
.oriented(forExifOrientation: Int32(CGImagePropertyOrientation.right.rawValue))
self.lastImage = ciImage
DispatchQueue.main.async {
self.mtkView.setNeedsDisplay()
}
}
This gives us a buffer that we can process the next time our MTKView is ready for a frame (which we signal by calling setNeedsDisplay
above.) Since we’ve set ourselves as the MTKView.delegate
, we’ll be expected to implement the draw(in:)
method, which is where we actually filter our input image.
func draw(in view: MTKView) {
guard let lastImage else {
return
}
guard let drawable = view.currentDrawable else {
return
}
guard let commandBuffer = self.commandQueue.makeCommandBuffer() else {
return
}
let dSize = view.drawableSize
let destination = CIRenderDestination(
width: Int(dSize.width),
height: Int(dSize.height),
pixelFormat: view.colorPixelFormat,
commandBuffer: commandBuffer,
mtlTextureProvider: {
drawable.texture
})
// Apply NTSC filter
filter.inputImage = lastImage
guard let outputImage = filter.outputImage else {
return
}
let widthMultiple = dSize.width / outputImage.extent.size.width
let heightMultiple = dSize.height / outputImage.extent.size.height
let scaleFactor = max(widthMultiple, heightMultiple)
let scaledImage = outputImage.transformed(by: CGAffineTransform.init(scaleX: scaleFactor, y: scaleFactor))
do {
try ciContext.startTask(toRender: scaledImage, to: destination)
commandBuffer.present(drawable)
commandBuffer.commit()
} catch {
print("Error starting render task: \(error)")
}
}
The code above shows how we can ask the MTKView
for its current drawable, generate a CIImage
, and asynchronously render to the screen using CIRenderDestination
.
Offline Processing
There are a couple of hooks into processing the frames of an existing AVAsset. The simplest is a static AVVideoComposition
method that allows us to do our image processing in a closure. The system provides us with an input CIImage and we call a closure argument with our resulting output. is found in AVVideoComposition
and its customVideoCompositorClass
property, which gives us low-level access to the underlying CVPixelBuffer
data for each of the tracks in our composition. This is the route we want to take if we’re doing multitrack video editing.
final class LayerVideoCompositor: NSObject, AVVideoCompositing {
private var renderContext = AVVideoCompositionRenderContext()
static let ciContext = CIContext(options: [.cacheIntermediates: false])
enum LayerVideoCompositingError: Error {
case sourceFrameBuffer
case ciFilterCompositing
}
func startRequest(_ request: AVAsynchronousVideoCompositionRequest) {
do {
let renderedBuffer = try renderFrame(forRequest: request)
request.finish(withComposedVideoFrame: renderedBuffer)
}
catch {
request.finish(with: error)
}
}
private func renderFrame(forRequest request: AVAsynchronousVideoCompositionRequest) throws -> CVPixelBuffer {
return try autoreleasepool {
switch request.videoCompositionInstruction {
case let myInstruction as MyVideoCompositionInstructionImplementation:
return try renderFrame(request: request, instruction: myInstruction)
default:
throw LayerVideoCompositingError.invalidRequest
}
}
}
func renderStandardFrame(request: AVAsynchronousVideoCompositionRequest, instruction: StandardVideoCompositionInstruction) throws -> CVPixelBuffer {
guard let videoFrameBuffer: CVPixelBuffer = request.sourceFrame(byTrackID: instruction.videoTrackID) else {
throw LayerVideoCompositingError.sourceFrameBuffer
}
let inputImage = CIImage(cvPixelBuffer: videoFrameBuffer)
let outputImage = applyMetalFilter(to: inputImage)
guard let renderedBuffer = renderContext.newPixelBuffer() else {
throw LayerVideoCompositingError.ciFilterCompositing
}
let renderDestination = CIRenderDestination(pixelBuffer: renderedBuffer)
try Self.ciContext.startTask(toRender: outputImage, to: renderDestination)
return renderedBuffer
}
}
The conversion of videoFrameBuffer
to a CIImage
can be dropped in favor of rendering the pixel buffer directly to a texture if Core Image isn’t part of your processing pipeline.
And that’s it! We’ve discussed the role of command buffers and how to stitch them through multiple processing kernels, some helper code for setting up pipelines, how to instantiate and cycle textures, and how to tie our Metal processing into live and offline video processing. Happy shading!