(For the purposes of this question, I'm disregarding file-copying APIs such as CopyFile, etc.)
I'm trying to answer the question: if I need to copy many large files, which method is fastest?
I can think of four basic methods for copying a file:
ReadFile+WriteFileMapViewOfFilethe source into memory, andWriteFilethe buffer to the destinationMapViewOfFilethe destination into memory, andReadFilethe source into the bufferMapViewOfFileboth files, andmemcpyfrom one file to another
Furthermore, in each case, there are also some options I can set, such as FILE_FLAG_NO_BUFFERING and SEC_LARGE_PAGE.
However, I don't know how to properly benchmark this. I've written the following code:
#include <stdio.h>
#include <time.h>
#include <tchar.h>
#include <Windows.h>
void MyCopyFile(HANDLE source, HANDLE sink, bool mapsource, bool mapsink)
{
LARGE_INTEGER size = { 0 };
GetFileSizeEx(source, &size);
HANDLE msource = mapsource ? CreateFileMapping(source, NULL, PAGE_READONLY, 0, 0, NULL) : NULL;
HANDLE msink = mapsink ? CreateFileMapping(sink, NULL, PAGE_READWRITE, size.HighPart, size.LowPart, NULL) : NULL;
void const *const psource = mapsource ? MapViewOfFile(msource, FILE_MAP_READ, 0, 0, size.QuadPart) : NULL;
void *const psink = mapsink ? MapViewOfFile(msink, FILE_MAP_WRITE, 0, 0, size.QuadPart) : NULL;
clock_t const start = clock();
unsigned long nw = 0;
if (mapsource)
{
if (mapsink)
{
memcpy(psink, psource, size.QuadPart);
nw = size.QuadPart;
}
else
{ WriteFile(sink, psource, size.QuadPart, &nw, NULL); }
}
else
{
if (mapsink)
{ ReadFile(source, psink, size.QuadPart, &nw, NULL); }
else
{
void *const buf = malloc(size.QuadPart);
if (!ReadFile(source, buf, size.QuadPart, &nw, NULL)) { fprintf(stderr, "Error reading from file: %u\n", GetLastError()); }
if (!WriteFile(sink, buf, size.QuadPart, &nw, NULL)) { fprintf(stderr, "Error writing to file: %u\n", GetLastError()); }
free(buf);
}
}
FlushViewOfFile(psink, size.QuadPart);
clock_t const end = clock();
if (mapsource) { UnmapViewOfFile(psource); }
if (mapsink) { UnmapViewOfFile(psink); }
if (mapsource) { CloseHandle(msource); }
if (mapsink) { CloseHandle(msink); }
if (nw) { fprintf(stderr, "(%d, %d): %u MiB/s\n", mapsource, mapsink, (unsigned int)(size.QuadPart * CLOCKS_PER_SEC / (((long long)(end - start) << 20) + 1))); }
}
int main()
{
// Request permission to extend file without zeroing, for faster performance
{
enum TokenPrivilege { SeManageVolumePrivilege = 28 };
typedef NTSTATUS NTAPI PRtlAdjustPrivilege(IN TokenPrivilege Privilege, IN BOOLEAN Enable, IN BOOLEAN Client, OUT PBOOLEAN WasEnabled);
static PRtlAdjustPrivilege &RtlAdjustPrivilege = *(PRtlAdjustPrivilege *)(GetProcAddress(GetModuleHandle(_T("ntdll.dll")), _CRT_STRINGIZE(RtlAdjustPrivilege)));
BOOLEAN old; RtlAdjustPrivilege(SeManageVolumePrivilege, TRUE, FALSE, &old);
}
for (int i = 0;; i++)
{
HANDLE source = CreateFile(_T("TempSource.bin"), FILE_READ_DATA | FILE_WRITE_DATA | SYNCHRONIZE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_ALWAYS, FILE_FLAG_DELETE_ON_CLOSE | FILE_FLAG_NO_BUFFERING | FILE_FLAG_SEQUENTIAL_SCAN, NULL);
HANDLE sink = CreateFile(_T("TempSink.bin"), FILE_READ_DATA | FILE_WRITE_DATA | SYNCHRONIZE, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_ALWAYS, FILE_FLAG_DELETE_ON_CLOSE | FILE_FLAG_NO_BUFFERING | FILE_FLAG_SEQUENTIAL_SCAN, NULL);
LARGE_INTEGER size; size.QuadPart = 1 << 26;
LARGE_INTEGER zero = { 0 };
SetFilePointerEx(source, size, &size, FILE_BEGIN);
SetEndOfFile(source);
SetFileValidData(source, size.QuadPart);
SetFilePointerEx(source, zero, &zero, FILE_BEGIN);
SetFilePointerEx(sink, zero, &zero, FILE_BEGIN);
MyCopyFile(source, sink, i % 2 != 0, i / 2 % 2 != 0);
FlushFileBuffers(source);
FlushFileBuffers(sink);
if ((i % 4) + 1 == 4) { fprintf(stderr, "\n"); }
CloseHandle(source);
CloseHandle(sink);
}
}
Unfortunately my code gives me wildly varying results on the first iteration than the following iterations, so I have a hard time figuring out how to benchmark this operation.
Which method should be fastest, and how do I properly benchmark my system to confirm this?