博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
关于使用FileMapping和直接操作File+缓存的IO性能对比
阅读量:397 次
发布时间:2019-03-04

本文共 14955 字,大约阅读时间需要 49 分钟。

最近在看Windows核心编程17章

作者推崇使用FileMapping来进行大文件读写。虽然这样可以直接把文件映射到进程地址空间,由操作系统来进行后备的换成,换页等复杂操作。程序员只需要直接把文件当做内存地址来操作即可。

可是笔者发现这可能对于编码来说相对简单了,可是把缓存这些全交给操作系统来处理并不是一个好的方法。尤其在处理大文件IO的时候。于是笔者对原书上的一个例子进行了改写对比。首先这是改写后的代码。此代码当然也支持原书本上的逻辑。

完整代码:

#define _CRT_SECURE_NO_WARNINGS#include 
#include
#include
#include
//class CStopwatch {public: CStopwatch() { QueryPerformanceFrequency(&m_liPerfFreq); Start(); } void Start() { QueryPerformanceCounter(&m_liPerfStart); } __int64 Now() const { // Returns # of millisecond since Start was called LARGE_INTEGER liPerfNow; QueryPerformanceCounter(&liPerfNow); return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000) / m_liPerfFreq.QuadPart); } __int64 NowInMicro() const { // Return # of microseconds // since Start was called LARGE_INTEGER liPerfNow; QueryPerformanceCounter(&liPerfNow); return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000000) / m_liPerfFreq.QuadPart); }private: LARGE_INTEGER m_liPerfFreq; // Counts per second LARGE_INTEGER m_liPerfStart; // Starting count};CStopwatch stopwatch;//typedef struct { PVOID pvAddr; DWORD dwBlockSize; DWORD dwCount; bool bIsFinished;} WORK_DATA, *PWORK_DATA;typedef struct { WORK_DATA wDATA; PTP_WORK pWork;} WORKITEM_INFO, *PWORKITEM_INFO;#define MAX_THREADS 4PWORKITEM_INFO g_WORKITEM[MAX_THREADS] = { 0 };PBYTE g_BUFF[MAX_THREADS] = { 0 };HANDLE hSemp = NULL;//VOID CALLBACK WorkCallback( PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work) { PWORK_DATA pData = (PWORK_DATA)Context; // Count the number of 0s in this block. PBYTE pFile = (PBYTE)pData->pvAddr; for (DWORD dwByte = 0; dwByte < pData->dwBlockSize; dwByte++) { if (pFile[dwByte] == 0) pData->dwCount++; } pData->bIsFinished = true; ReleaseSemaphoreWhenCallbackReturns(Instance, hSemp, 1);}__int64 CountWithFileMapMultiThreads(HANDLE hFileMap, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads) { if (nThreads > MAX_THREADS) return -1; // invalid threads count. ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads); __int64 qwFileOffset = 0, qwNumOfZeros = 0; hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL); // allocate the buff. for (unsigned int i = 0; i < nThreads; i++) { g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO)); g_WORKITEM[i]->wDATA.bIsFinished = true; g_WORKITEM[i]->pWork = NULL; g_WORKITEM[i]->wDATA.pvAddr = NULL; } while (qwFileSize > 0) { WaitForSingleObject(hSemp, INFINITE); // wait for a resource. // do some resource clean up. for (unsigned int i = 0; i < nThreads; i++) { if (g_WORKITEM[i]) { // there is a working item. if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) { // this item has been finished // get the data. qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount; // Unmap the file. UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr); // close the work item. CloseThreadpoolWork(g_WORKITEM[i]->pWork); g_WORKITEM[i]->pWork = NULL; } } } // get a free slot unsigned int nSlotID; for (nSlotID = 0; nSlotID < nThreads; nSlotID++) { if (g_WORKITEM[nSlotID]->pWork == NULL) // get a free slot. break; } // there is a thread available. if (nSlotID < nThreads) { // Determine the number of bytes to be mapped in this view DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ? (DWORD)qwFileSize : dwBlockSize; // map the file. PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMap, FILE_MAP_READ, (DWORD)(qwFileOffset >> 32), (DWORD)(qwFileOffset & 0xFFFFFFFF), dwBytesInBlock); // initialize some data. g_WORKITEM[nSlotID]->wDATA.pvAddr = pbFile; g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock; g_WORKITEM[nSlotID]->wDATA.dwCount = 0; g_WORKITEM[nSlotID]->wDATA.bIsFinished = false; g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork( WorkCallback, &g_WORKITEM[nSlotID]->wDATA, NULL ); // submit the working data to thread pool SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork); qwFileOffset += dwBytesInBlock; // modify the offset. qwFileSize -= dwBytesInBlock; // reduce the file size. } } // wait for all of the working item. for (unsigned int i = 0; i < nThreads; i++) { if (g_WORKITEM[i]) { // there is a working item memory block. if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool { if(g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished? WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE); // Wait for the work to be finished. // get the data from the active work qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount; // Unmap the file. UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr); // close the work item. CloseThreadpoolWork(g_WORKITEM[i]->pWork); g_WORKITEM[i]->pWork = NULL; } // free the memory for the workitem. free(g_WORKITEM[i]); g_WORKITEM[i] = NULL; } } CloseHandle(hSemp); hSemp = NULL; return qwNumOfZeros;}// count directly with the logic that read block from file.__int64 CountWithFileMultiThread(HANDLE hFile, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads){ if (nThreads > MAX_THREADS) return -1; // invalid threads count. ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads); __int64 qwFileOffset = 0, qwNumOfZeros = 0; hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL); // allocate the buff. for (unsigned int i = 0; i < nThreads; i++) { g_BUFF[i] = (PBYTE)malloc(dwBlockSize); g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO)); g_WORKITEM[i]->wDATA.bIsFinished = true; g_WORKITEM[i]->pWork = NULL; g_WORKITEM[i]->wDATA.pvAddr = g_BUFF[i]; } while (qwFileSize > 0) { WaitForSingleObject(hSemp, INFINITE); // wait for a resource. // do some resource clean up. for (unsigned int i = 0; i < nThreads; i++) { if (g_WORKITEM[i]) { // there is a working item. if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) { // this item has been finished. // get the data. qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount; // close the work item. CloseThreadpoolWork(g_WORKITEM[i]->pWork); g_WORKITEM[i]->pWork = NULL; } } } // get a free slot unsigned int nSlotID; for (nSlotID = 0; nSlotID < nThreads; nSlotID++) { if (g_WORKITEM[nSlotID]->pWork == NULL) // get a free slot. break; } // there is a thread available. if (nSlotID < nThreads) { // Determine the number of bytes to be mapped in this view DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ? (DWORD)qwFileSize : dwBlockSize; DWORD dwRead = 0; ReadFile(hFile, g_BUFF[nSlotID], dwBytesInBlock, &dwRead, NULL); // initialize some data. g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock; g_WORKITEM[nSlotID]->wDATA.dwCount = 0; g_WORKITEM[nSlotID]->wDATA.bIsFinished = false; g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork( WorkCallback, &g_WORKITEM[nSlotID]->wDATA, NULL ); // submit the working data to thread pool SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork); qwFileOffset += dwBytesInBlock; // modify the offset. qwFileSize -= dwBytesInBlock; // reduce the file size. } } // wait for all of the working item. for (unsigned int i = 0; i < nThreads; i++) { if (g_WORKITEM[i]) { // there is a working item. if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool { if (g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished? WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE); // Wait for the work to be finished. // get the data. qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount; // close the work item. CloseThreadpoolWork(g_WORKITEM[i]->pWork); g_WORKITEM[i]->pWork = NULL; } } } // clean up for the buff. for (unsigned int i = 0; i < nThreads; i++) { free(g_BUFF[i]); g_BUFF[i] = NULL; free(g_WORKITEM[i]); g_WORKITEM[i] = NULL; } CloseHandle(hSemp); hSemp = NULL; return qwNumOfZeros;}//__int64 CountZeros(LPCTSTR szFileName, DWORD dwBlockSize, DWORD nThreads = 1, bool bUseFileMapp = true) { // Vies must always start on a multiple // of the allocation granularity SYSTEM_INFO sinf; GetSystemInfo(&sinf); // Open the data file. HANDLE hFile = CreateFile(szFileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); DWORD dwFileSizeHigh; __int64 qwFileSize = GetFileSize(hFile, &dwFileSizeHigh); qwFileSize += (((__int64)dwFileSizeHigh) << 32); // ceiling algorithm DWORD dwBlockCount = (dwBlockSize + sinf.dwAllocationGranularity - 1) / sinf.dwAllocationGranularity; dwBlockSize = sinf.dwAllocationGranularity * dwBlockCount; if (!bUseFileMapp) { LONGLONG lLastTimeStamp = stopwatch.Now(); _tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"), dwBlockSize, szFileName, qwFileSize); __int64 qwNumOfZeros = CountWithFileMultiThread(hFile, qwFileSize, dwBlockSize, nThreads); LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp; _tprintf(TEXT("Count finished in %lldms\n"), lElpasedTime); CloseHandle(hFile); return qwNumOfZeros; } // Create the file-mapping object. HANDLE hFileMapping = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL); // We no longer need access the file object's handle. CloseHandle(hFile); __int64 qwFileOffset = 0, qwNumOfZeros = 0; LONGLONG lLastTimeStamp = stopwatch.Now(); _tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"), dwBlockSize, szFileName, qwFileSize); if (nThreads > 1) // support multiple threads { qwNumOfZeros = CountWithFileMapMultiThreads(hFileMapping, qwFileSize, dwBlockSize, nThreads); } else { while (qwFileSize > 0) { // Determine the number of bytes to be mapped in this view DWORD dwBytesInBlock = dwBlockSize; if (qwFileSize < dwBlockSize) dwBytesInBlock = (DWORD)qwFileSize; PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMapping, FILE_MAP_READ, (DWORD)(qwFileOffset >> 32), (DWORD)(qwFileOffset & 0xFFFFFFFF), dwBytesInBlock); // Count the number of 0s in this block. for (DWORD dwByte = 0; dwByte < dwBytesInBlock; dwByte++) { if (pbFile[dwByte] == 0) qwNumOfZeros++; } // Unmap the view; we don't want multiple vies // in our address space. UnmapViewOfFile(pbFile); // Skip to the next set of bytes in the file. qwFileOffset += dwBytesInBlock; qwFileSize -= dwBytesInBlock; } } LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp; _tprintf(TEXT("Count finished in %lldms\n"), lElpasedTime); CloseHandle(hFileMapping); return qwNumOfZeros;}void testcase1() { // block 64KB __int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 64 * 1024, 1, true); _tprintf(TEXT("result : %I64d\n"), nCount); // block 512KB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 1, true); _tprintf(TEXT("result : %I64d\n"), nCount); // block 4MB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 4 * 1024 * 1024, 1, true); _tprintf(TEXT("result : %I64d\n"), nCount);}void testcase2() { // block 512KB __int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 1,true); _tprintf(TEXT("result : %I64d\n"), nCount); // block 512KB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 2, true); _tprintf(TEXT("result : %I64d\n"), nCount); // block 512KB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 4, true); _tprintf(TEXT("result : %I64d\n"), nCount);}void testcase3() { // block 512KB __int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 1, false); _tprintf(TEXT("result : %I64d\n"), nCount); // block 512KB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 2, false); _tprintf(TEXT("result : %I64d\n"), nCount); // block 512KB nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"), 512 * 1024, 4, false); _tprintf(TEXT("result : %I64d\n"), nCount);}int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){ testcase2();}

笔者对其进行了扩展,比如支持了多线程并发,支持可以预先定义分配粒度(在笔者的机器上默认是64KB) 支持非文件映射而采用直接操作文件和缓存。

测试一 :分别测试单线程,多线程 和多线程非FileMap下的性能。关闭所有可能导致产生IO的进程(理性情况下最大性能对比)

1)首先对一个大小接近3GB的文件进行操作。全部使用单线程,文件映射为操作对象,分配粒度(blocksize)这里采用64KB, 512KB  和 4MB进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 64KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		64 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 4MB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		4 * 1024 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}

运行结果。

2)接着测试一个大小接近3GB的文件进行操作。分别使用单线程,两线程和4线程,文件映射为操作对象,分配粒度(blocksize)这里采用 512KB 进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}
运行结果:

3)接着测试一个大小接近3GB的文件进行操作。分别使用单线程,两线程和4线程,文件操作对象,分配粒度(blocksize)这里采用 512KB 进行测试。

测试代码如下:

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}
运行结果:

在IO带宽充足的情况下,多线程能提升性能。

测试二 :分别测试单线程,多线程 和多线程非FileMap下的性能。开启会产生IO干扰的软件(QQ,Chrome浏览器)

由于干扰因素不稳定干扰因素太多,测试在后续补充完成。。。

你可能感兴趣的文章