关于使用FileMapping和直接操作File+缓存的IO性能对比-白红宇

关于使用FileMapping和直接操作File+缓存的IO性能对比

阅读量：397 次

发布时间：2019-03-04

本文共 14955 字，大约阅读时间需要 49 分钟。

最近在看Windows核心编程17章

作者推崇使用FileMapping来进行大文件读写。虽然这样可以直接把文件映射到进程地址空间，由操作系统来进行后备的换成，换页等复杂操作。程序员只需要直接把文件当做内存地址来操作即可。

可是笔者发现这可能对于编码来说相对简单了，可是把缓存这些全交给操作系统来处理并不是一个好的方法。尤其在处理大文件IO的时候。于是笔者对原书上的一个例子进行了改写对比。首先这是改写后的代码。此代码当然也支持原书本上的逻辑。

完整代码：

#define _CRT_SECURE_NO_WARNINGS#include 
   
    #include 
    
     #include 
     
      #include 
      
       //class CStopwatch {public:	CStopwatch() { QueryPerformanceFrequency(&m_liPerfFreq); Start(); }	void Start() { QueryPerformanceCounter(&m_liPerfStart); }	__int64 Now() const { // Returns # of millisecond since Start was called		LARGE_INTEGER liPerfNow;		QueryPerformanceCounter(&liPerfNow);		return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000)			/ m_liPerfFreq.QuadPart);	}	__int64 NowInMicro() const { // Return # of microseconds		// since Start was called		LARGE_INTEGER liPerfNow;		QueryPerformanceCounter(&liPerfNow);		return(((liPerfNow.QuadPart - m_liPerfStart.QuadPart) * 1000000)			/ m_liPerfFreq.QuadPart);	}private:	LARGE_INTEGER m_liPerfFreq;	// Counts per second	LARGE_INTEGER m_liPerfStart; // Starting count};CStopwatch stopwatch;//typedef struct {	PVOID pvAddr;	DWORD dwBlockSize;	DWORD dwCount;	bool bIsFinished;} WORK_DATA, *PWORK_DATA;typedef struct {	WORK_DATA wDATA;	PTP_WORK pWork;} WORKITEM_INFO, *PWORKITEM_INFO;#define MAX_THREADS	4PWORKITEM_INFO  g_WORKITEM[MAX_THREADS] = { 0 };PBYTE		g_BUFF[MAX_THREADS] = { 0 };HANDLE hSemp = NULL;//VOID CALLBACK WorkCallback(	PTP_CALLBACK_INSTANCE Instance,	PVOID Context,	PTP_WORK Work) {	PWORK_DATA pData = (PWORK_DATA)Context;	// Count the number of 0s in this block.	PBYTE pFile = (PBYTE)pData->pvAddr;	for (DWORD dwByte = 0; dwByte < pData->dwBlockSize; dwByte++) {		if (pFile[dwByte] == 0)			pData->dwCount++;	}	pData->bIsFinished = true;	ReleaseSemaphoreWhenCallbackReturns(Instance, hSemp, 1);}__int64 CountWithFileMapMultiThreads(HANDLE hFileMap, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads) {	if (nThreads > MAX_THREADS)		return -1;		// invalid threads count.	ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads);	__int64 qwFileOffset = 0, qwNumOfZeros = 0;	hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL);	// allocate the buff.	for (unsigned int i = 0; i < nThreads; i++) {		g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO));		g_WORKITEM[i]->wDATA.bIsFinished = true;		g_WORKITEM[i]->pWork = NULL;		g_WORKITEM[i]->wDATA.pvAddr = NULL;	}	while (qwFileSize > 0) {		WaitForSingleObject(hSemp, INFINITE);	// wait for a resource.		// do some resource clean up.		for (unsigned int i = 0; i < nThreads; i++) {			if (g_WORKITEM[i]) {	// there is a working item.				if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) { // this item has been finished					// get the data.					qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;					// Unmap the file.					UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr);					// close the work item.					CloseThreadpoolWork(g_WORKITEM[i]->pWork);					g_WORKITEM[i]->pWork = NULL;				}			}		}		// get a free slot		unsigned int nSlotID;		for (nSlotID = 0; nSlotID < nThreads; nSlotID++) {			if (g_WORKITEM[nSlotID]->pWork == NULL)	// get a free slot.				break;		}		// there is a thread available.		if (nSlotID < nThreads)		{			// Determine the number of bytes to be mapped in this view			DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ?				(DWORD)qwFileSize : dwBlockSize;			// map the file.			PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMap, FILE_MAP_READ,				(DWORD)(qwFileOffset >> 32),				(DWORD)(qwFileOffset & 0xFFFFFFFF),				dwBytesInBlock);			// initialize some data.			g_WORKITEM[nSlotID]->wDATA.pvAddr = pbFile;			g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock;			g_WORKITEM[nSlotID]->wDATA.dwCount = 0;			g_WORKITEM[nSlotID]->wDATA.bIsFinished = false;			g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork(				WorkCallback,				&g_WORKITEM[nSlotID]->wDATA,				NULL				);			// submit the working data to thread pool			SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork);			qwFileOffset += dwBytesInBlock;		// modify the offset.			qwFileSize -= dwBytesInBlock;		// reduce the file size.		}		}	// wait for all of the working item.	for (unsigned int i = 0; i < nThreads; i++) {		if (g_WORKITEM[i]) {	// there is a working item memory block.			if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool			{				if(g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished?					WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE);	 // Wait for the work to be finished.				// get the data from the active work				qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;				// Unmap the file.				UnmapViewOfFile(g_WORKITEM[i]->wDATA.pvAddr);				// close the work item.				CloseThreadpoolWork(g_WORKITEM[i]->pWork);				g_WORKITEM[i]->pWork = NULL;			}			// free the memory for the workitem.			free(g_WORKITEM[i]);			g_WORKITEM[i] = NULL;				}	}	CloseHandle(hSemp);	hSemp = NULL;	return qwNumOfZeros;}// count directly with the logic that read block from file.__int64 CountWithFileMultiThread(HANDLE hFile, __int64 qwFileSize, DWORD dwBlockSize, DWORD nThreads){	if (nThreads > MAX_THREADS)		return -1;		// invalid threads count.	ZeroMemory(g_WORKITEM, sizeof(PTP_WORK) * nThreads);	__int64 qwFileOffset = 0, qwNumOfZeros = 0;	hSemp = CreateSemaphore(NULL, nThreads, nThreads, NULL);	// allocate the buff.	for (unsigned int i = 0; i < nThreads; i++) {		g_BUFF[i] = (PBYTE)malloc(dwBlockSize);		g_WORKITEM[i] = (PWORKITEM_INFO)malloc(sizeof(WORKITEM_INFO));		g_WORKITEM[i]->wDATA.bIsFinished = true;		g_WORKITEM[i]->pWork = NULL;		g_WORKITEM[i]->wDATA.pvAddr = g_BUFF[i];	}	while (qwFileSize > 0) {		WaitForSingleObject(hSemp, INFINITE);	// wait for a resource.		// do some resource clean up.		for (unsigned int i = 0; i < nThreads; i++) {			if (g_WORKITEM[i]) {	// there is a working item.				if (g_WORKITEM[i]->wDATA.bIsFinished && g_WORKITEM[i]->pWork) {	// this item has been finished.					// get the data.					qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;					// close the work item.					CloseThreadpoolWork(g_WORKITEM[i]->pWork);					g_WORKITEM[i]->pWork = NULL;				}			}		}		// get a free slot		unsigned int nSlotID;		for (nSlotID = 0; nSlotID < nThreads; nSlotID++) {			if (g_WORKITEM[nSlotID]->pWork == NULL)	// get a free slot.				break;		}		// there is a thread available.		if (nSlotID < nThreads)		{			// Determine the number of bytes to be mapped in this view			DWORD dwBytesInBlock = (qwFileSize < dwBlockSize) ?				(DWORD)qwFileSize : dwBlockSize;			DWORD dwRead = 0;			ReadFile(hFile, g_BUFF[nSlotID], dwBytesInBlock, &dwRead, NULL);			// initialize some data.						g_WORKITEM[nSlotID]->wDATA.dwBlockSize = dwBytesInBlock;			g_WORKITEM[nSlotID]->wDATA.dwCount = 0;			g_WORKITEM[nSlotID]->wDATA.bIsFinished = false;			g_WORKITEM[nSlotID]->pWork = CreateThreadpoolWork(				WorkCallback,				&g_WORKITEM[nSlotID]->wDATA,				NULL				);			// submit the working data to thread pool			SubmitThreadpoolWork(g_WORKITEM[nSlotID]->pWork);			qwFileOffset += dwBytesInBlock;		// modify the offset.			qwFileSize -= dwBytesInBlock;		// reduce the file size.		}	}	// wait for all of the working item.	for (unsigned int i = 0; i < nThreads; i++) {		if (g_WORKITEM[i]) {	// there is a working item.			if (g_WORKITEM[i]->pWork) // there is an active work in the thread pool			{				if (g_WORKITEM[i]->wDATA.bIsFinished == false) // Is that the work has been finished?					WaitForThreadpoolWorkCallbacks(g_WORKITEM[i]->pWork, FALSE);	 // Wait for the work to be finished.				// get the data.				qwNumOfZeros += g_WORKITEM[i]->wDATA.dwCount;				// close the work item.				CloseThreadpoolWork(g_WORKITEM[i]->pWork);				g_WORKITEM[i]->pWork = NULL;			}		}	}	// clean up for the buff.	for (unsigned int i = 0; i < nThreads; i++) {		free(g_BUFF[i]);		g_BUFF[i] = NULL;		free(g_WORKITEM[i]); 		g_WORKITEM[i] = NULL;	}	CloseHandle(hSemp);	hSemp = NULL;	return qwNumOfZeros;}//__int64 CountZeros(LPCTSTR szFileName, DWORD dwBlockSize, DWORD nThreads = 1, bool bUseFileMapp = true) {	// Vies must always start on a multiple	// of the allocation granularity	SYSTEM_INFO sinf;	GetSystemInfo(&sinf);	// Open the data file.	HANDLE hFile = CreateFile(szFileName, GENERIC_READ,		FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);	DWORD dwFileSizeHigh;	__int64 qwFileSize = GetFileSize(hFile, &dwFileSizeHigh);	qwFileSize += (((__int64)dwFileSizeHigh) << 32);	// ceiling algorithm	DWORD dwBlockCount = (dwBlockSize + sinf.dwAllocationGranularity - 1) / sinf.dwAllocationGranularity;	dwBlockSize = sinf.dwAllocationGranularity * dwBlockCount;	if (!bUseFileMapp) {		LONGLONG lLastTimeStamp = stopwatch.Now();		_tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"),			dwBlockSize, szFileName, qwFileSize);		__int64 qwNumOfZeros = CountWithFileMultiThread(hFile, qwFileSize, dwBlockSize, nThreads);		LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp;		_tprintf(TEXT("Count finished in %lldms\n"),			lElpasedTime);		CloseHandle(hFile);		return qwNumOfZeros;	}	// Create the file-mapping object.	HANDLE hFileMapping = CreateFileMapping(hFile, NULL,		PAGE_READONLY, 0, 0, NULL);	// We no longer need access the file object's handle.	CloseHandle(hFile);	__int64 qwFileOffset = 0, qwNumOfZeros = 0;	LONGLONG lLastTimeStamp = stopwatch.Now();	_tprintf(TEXT("Start to count the file with block size %d\n%s with size %I64dBytes\n"), 		dwBlockSize, szFileName, qwFileSize);	if (nThreads > 1) // support multiple threads 	{		qwNumOfZeros = CountWithFileMapMultiThreads(hFileMapping, qwFileSize, dwBlockSize, nThreads);	}	else {		while (qwFileSize > 0) {			// Determine the number of bytes to be mapped in this view			DWORD dwBytesInBlock = dwBlockSize;			if (qwFileSize < dwBlockSize)				dwBytesInBlock = (DWORD)qwFileSize;			PBYTE pbFile = (PBYTE)MapViewOfFile(hFileMapping, FILE_MAP_READ,				(DWORD)(qwFileOffset >> 32),				(DWORD)(qwFileOffset & 0xFFFFFFFF),				dwBytesInBlock);			// Count the number of 0s in this block.			for (DWORD dwByte = 0; dwByte < dwBytesInBlock; dwByte++) {				if (pbFile[dwByte] == 0)					qwNumOfZeros++;			}			// Unmap the view; we don't want multiple vies			// in our address space.			UnmapViewOfFile(pbFile);			// Skip to the next set of bytes in the file.			qwFileOffset += dwBytesInBlock;			qwFileSize -= dwBytesInBlock;		}	}	LONGLONG lElpasedTime = stopwatch.Now() - lLastTimeStamp;	_tprintf(TEXT("Count finished in %lldms\n"),		lElpasedTime);	CloseHandle(hFileMapping);	return qwNumOfZeros;}void testcase1() {	// block 64KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		64 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 4MB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		4 * 1024 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);}void testcase2() {	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1,true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, true);	_tprintf(TEXT("result : %I64d\n"), nCount);}void testcase3() {	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, false);	_tprintf(TEXT("result : %I64d\n"), nCount);}int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	testcase2();}

笔者对其进行了扩展，比如支持了多线程并发，支持可以预先定义分配粒度（在笔者的机器上默认是64KB）支持非文件映射而采用直接操作文件和缓存。

测试一：分别测试单线程，多线程和多线程非FileMap下的性能。关闭所有可能导致产生IO的进程（理性情况下最大性能对比）

1）首先对一个大小接近3GB的文件进行操作。全部使用单线程，文件映射为操作对象，分配粒度（blocksize）这里采用64KB， 512KB 和 4MB进行测试。

测试代码如下：

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 64KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		64 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 4MB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		4 * 1024 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}

运行结果。

2）接着测试一个大小接近3GB的文件进行操作。分别使用单线程，两线程和4线程，文件映射为操作对象，分配粒度（blocksize）这里采用 512KB 进行测试。

测试代码如下：

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, true);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}

运行结果：

3）接着测试一个大小接近3GB的文件进行操作。分别使用单线程，两线程和4线程，文件操作对象，分配粒度（blocksize）这里采用 512KB 进行测试。

测试代码如下：

int _tmain(int argc, TCHAR* argv[], TCHAR * env[]){	// block 512KB	__int64 nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 1, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 2, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	// block 512KB	nCount = CountZeros(TEXT("E:\\Virtual Machine\\Windows_XP_x86_sp3\\virtual_HDD\\Windows XP Professional-s001.vmdk"),		512 * 1024, 4, false);	_tprintf(TEXT("result : %I64d\n"), nCount);	return 0;}

运行结果：

在IO带宽充足的情况下，多线程能提升性能。

测试二：分别测试单线程，多线程和多线程非FileMap下的性能。开启会产生IO干扰的软件（QQ，Chrome浏览器）

由于干扰因素不稳定干扰因素太多，测试在后续补充完成。。。

你可能感兴趣的文章

测试一 ：分别测试单线程，多线程 和多线程非FileMap下的性能。关闭所有可能导致产生IO的进程（理性情况下最大性能对比）

测试二 ：分别测试单线程，多线程 和多线程非FileMap下的性能。开启会产生IO干扰的软件（QQ，Chrome浏览器）

测试一：分别测试单线程，多线程和多线程非FileMap下的性能。关闭所有可能导致产生IO的进程（理性情况下最大性能对比）

测试二：分别测试单线程，多线程和多线程非FileMap下的性能。开启会产生IO干扰的软件（QQ，Chrome浏览器）