You can compute the MD5 checksum in chunks, as demonstrated
e.g. in Is there a MD5 library that doesn't require the whole input at the same time?.
Here is a possible implementation using Swift (now updated for Swift 5)
import CommonCrypto
func md5File(url: URL) -> Data? {
let bufferSize = 1024 * 1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0.baseAddress, numericCast(data.count))
}
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
var digest: [UInt8] = Array(repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
_ = CC_MD5_Final(&digest, &context)
return Data(digest)
} catch {
print("Cannot open file:", error.localizedDescription)
return nil
}
}
The autorelease pool is needed to release the memory returned by
file.readData(), without it the entire (potentially huge) file
would be loaded into memory. Thanks to Abhi Beckert for noticing that
and providing an implementation.
If you need the digest as a hex-encoded string then change the
return type to String? and replace
return digest
by
let hexDigest = digest.map { String(format: "%02hhx", $0) }.joined()
return hexDigest