VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMBlkCache.cpp@ 34341

最後變更 在這個檔案從34341是 34340,由 vboxsync 提交於 14 年 前

PDMBlkCache: Bug fixes

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 83.7 KB
 
1/* $Id: PDMBlkCache.cpp 34340 2010-11-24 20:11:53Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/stam.h>
33#include <VBox/uvm.h>
34#include <VBox/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66/*******************************************************************************
67* Internal Functions *
68*******************************************************************************/
69
70/**
71 * Decrement the reference counter of the given cache entry.
72 *
73 * @returns nothing.
74 * @param pEntry The entry to release.
75 */
76DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
77{
78 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
79 ASMAtomicDecU32(&pEntry->cRefs);
80}
81
82/**
83 * Increment the reference counter of the given cache entry.
84 *
85 * @returns nothing.
86 * @param pEntry The entry to reference.
87 */
88DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
89{
90 ASMAtomicIncU32(&pEntry->cRefs);
91}
92
93#ifdef DEBUG
94static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
95{
96 /* Amount of cached data should never exceed the maximum amount. */
97 AssertMsg(pCache->cbCached <= pCache->cbMax,
98 ("Current amount of cached data exceeds maximum\n"));
99
100 /* The amount of cached data in the LRU and FRU list should match cbCached */
101 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
102 ("Amount of cached data doesn't match\n"));
103
104 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
105 ("Paged out list exceeds maximum\n"));
106}
107#endif
108
109DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
110{
111 RTCritSectEnter(&pCache->CritSect);
112#ifdef DEBUG
113 pdmBlkCacheValidate(pCache);
114#endif
115}
116
117DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
118{
119#ifdef DEBUG
120 pdmBlkCacheValidate(pCache);
121#endif
122 RTCritSectLeave(&pCache->CritSect);
123}
124
125DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
126{
127 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
128 pCache->cbCached -= cbAmount;
129}
130
131DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached += cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
138{
139 pList->cbCached += cbAmount;
140}
141
142DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
143{
144 pList->cbCached -= cbAmount;
145}
146
147#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
148/**
149 * Checks consistency of a LRU list.
150 *
151 * @returns nothing
152 * @param pList The LRU list to check.
153 * @param pNotInList Element which is not allowed to occur in the list.
154 */
155static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
156{
157 PPDMBLKCACHEENTRY pCurr = pList->pHead;
158
159 /* Check that there are no double entries and no cycles in the list. */
160 while (pCurr)
161 {
162 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
163
164 while (pNext)
165 {
166 AssertMsg(pCurr != pNext,
167 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
168 pCurr, pList));
169 pNext = pNext->pNext;
170 }
171
172 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
173
174 if (!pCurr->pNext)
175 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
176
177 pCurr = pCurr->pNext;
178 }
179}
180#endif
181
182/**
183 * Unlinks a cache entry from the LRU list it is assigned to.
184 *
185 * @returns nothing.
186 * @param pEntry The entry to unlink.
187 */
188static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
189{
190 PPDMBLKLRULIST pList = pEntry->pList;
191 PPDMBLKCACHEENTRY pPrev, pNext;
192
193 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
194
195 AssertPtr(pList);
196
197#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
198 pdmBlkCacheCheckList(pList, NULL);
199#endif
200
201 pPrev = pEntry->pPrev;
202 pNext = pEntry->pNext;
203
204 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
205 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
206
207 if (pPrev)
208 pPrev->pNext = pNext;
209 else
210 {
211 pList->pHead = pNext;
212
213 if (pNext)
214 pNext->pPrev = NULL;
215 }
216
217 if (pNext)
218 pNext->pPrev = pPrev;
219 else
220 {
221 pList->pTail = pPrev;
222
223 if (pPrev)
224 pPrev->pNext = NULL;
225 }
226
227 pEntry->pList = NULL;
228 pEntry->pPrev = NULL;
229 pEntry->pNext = NULL;
230 pdmBlkCacheListSub(pList, pEntry->cbData);
231#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
232 pdmBlkCacheCheckList(pList, pEntry);
233#endif
234}
235
236/**
237 * Adds a cache entry to the given LRU list unlinking it from the currently
238 * assigned list if needed.
239 *
240 * @returns nothing.
241 * @param pList List to the add entry to.
242 * @param pEntry Entry to add.
243 */
244static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
245{
246 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
247#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
248 pdmBlkCacheCheckList(pList, NULL);
249#endif
250
251 /* Remove from old list if needed */
252 if (pEntry->pList)
253 pdmBlkCacheEntryRemoveFromList(pEntry);
254
255 pEntry->pNext = pList->pHead;
256 if (pList->pHead)
257 pList->pHead->pPrev = pEntry;
258 else
259 {
260 Assert(!pList->pTail);
261 pList->pTail = pEntry;
262 }
263
264 pEntry->pPrev = NULL;
265 pList->pHead = pEntry;
266 pdmBlkCacheListAdd(pList, pEntry->cbData);
267 pEntry->pList = pList;
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, NULL);
270#endif
271}
272
273/**
274 * Destroys a LRU list freeing all entries.
275 *
276 * @returns nothing
277 * @param pList Pointer to the LRU list to destroy.
278 *
279 * @note The caller must own the critical section of the cache.
280 */
281static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
282{
283 while (pList->pHead)
284 {
285 PPDMBLKCACHEENTRY pEntry = pList->pHead;
286
287 pList->pHead = pEntry->pNext;
288
289 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
290 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
291
292 RTMemPageFree(pEntry->pbData, pEntry->cbData);
293 RTMemFree(pEntry);
294 }
295}
296
297/**
298 * Tries to remove the given amount of bytes from a given list in the cache
299 * moving the entries to one of the given ghosts lists
300 *
301 * @returns Amount of data which could be freed.
302 * @param pCache Pointer to the global cache data.
303 * @param cbData The amount of the data to free.
304 * @param pListSrc The source list to evict data from.
305 * @param pGhostListSrc The ghost list removed entries should be moved to
306 * NULL if the entry should be freed.
307 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
308 * @param ppbBuf Where to store the address of the buffer if an entry with the
309 * same size was found and fReuseBuffer is true.
310 *
311 * @note This function may return fewer bytes than requested because entries
312 * may be marked as non evictable if they are used for I/O at the
313 * moment.
314 */
315static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
316 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
317 bool fReuseBuffer, uint8_t **ppbBuffer)
318{
319 size_t cbEvicted = 0;
320
321 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
322
323 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
324 AssertMsg( !pGhostListDst
325 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
326 ("Destination list must be NULL or the recently used but paged out list\n"));
327
328 if (fReuseBuffer)
329 {
330 AssertPtr(ppbBuffer);
331 *ppbBuffer = NULL;
332 }
333
334 /* Start deleting from the tail. */
335 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
336
337 while ((cbEvicted < cbData) && pEntry)
338 {
339 PPDMBLKCACHEENTRY pCurr = pEntry;
340
341 pEntry = pEntry->pPrev;
342
343 /* We can't evict pages which are currently in progress or dirty but not in progress */
344 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
345 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
346 {
347 /* Ok eviction candidate. Grab the endpoint semaphore and check again
348 * because somebody else might have raced us. */
349 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
350 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
351
352 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
353 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
354 {
355 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
356
357 if (fReuseBuffer && (pCurr->cbData == cbData))
358 {
359 STAM_COUNTER_INC(&pCache->StatBuffersReused);
360 *ppbBuffer = pCurr->pbData;
361 }
362 else if (pCurr->pbData)
363 RTMemPageFree(pCurr->pbData, pCurr->cbData);
364
365 pCurr->pbData = NULL;
366 cbEvicted += pCurr->cbData;
367
368 pdmBlkCacheEntryRemoveFromList(pCurr);
369 pdmBlkCacheSub(pCache, pCurr->cbData);
370
371 if (pGhostListDst)
372 {
373 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
374
375 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
376
377 /* We have to remove the last entries from the paged out list. */
378 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
379 && pGhostEntFree)
380 {
381 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
382 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
383
384 pGhostEntFree = pGhostEntFree->pPrev;
385
386 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
387
388 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
389 {
390 pdmBlkCacheEntryRemoveFromList(pFree);
391
392 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
393 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
394 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
395
396 RTMemFree(pFree);
397 }
398
399 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
400 }
401
402 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
403 {
404 /* Couldn't remove enough entries. Delete */
405 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
406 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
407 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
408
409 RTMemFree(pCurr);
410 }
411 else
412 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
413 }
414 else
415 {
416 /* Delete the entry from the AVL tree it is assigned to. */
417 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
418 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
419 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
420
421 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
422 RTMemFree(pCurr);
423 }
424 }
425
426 }
427 else
428 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
429 }
430
431 return cbEvicted;
432}
433
434static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
435{
436 size_t cbRemoved = 0;
437
438 if ((pCache->cbCached + cbData) < pCache->cbMax)
439 return true;
440 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
441 {
442 /* Try to evict as many bytes as possible from A1in */
443 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
444 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
445
446 /*
447 * If it was not possible to remove enough entries
448 * try the frequently accessed cache.
449 */
450 if (cbRemoved < cbData)
451 {
452 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
453
454 /*
455 * If we removed something we can't pass the reuse buffer flag anymore because
456 * we don't need to evict that much data
457 */
458 if (!cbRemoved)
459 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
460 NULL, fReuseBuffer, ppbBuffer);
461 else
462 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
463 NULL, false, NULL);
464 }
465 }
466 else
467 {
468 /* We have to remove entries from frequently access list. */
469 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
470 NULL, fReuseBuffer, ppbBuffer);
471 }
472
473 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
474 return (cbRemoved >= cbData);
475}
476
477DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
478{
479 int rc = VINF_SUCCESS;
480
481 switch (pBlkCache->enmType)
482 {
483 case PDMBLKCACHETYPE_DEV:
484 {
485 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
486 pIoXfer->enmXferDir,
487 off, pIoXfer->cbXfer,
488 &pIoXfer->SgBuf, pIoXfer);
489 break;
490 }
491 case PDMBLKCACHETYPE_DRV:
492 {
493 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
494 pIoXfer->enmXferDir,
495 off, pIoXfer->cbXfer,
496 &pIoXfer->SgBuf, pIoXfer);
497 break;
498 }
499 case PDMBLKCACHETYPE_USB:
500 {
501 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
502 pIoXfer->enmXferDir,
503 off, pIoXfer->cbXfer,
504 &pIoXfer->SgBuf, pIoXfer);
505 break;
506 }
507 case PDMBLKCACHETYPE_INTERNAL:
508 {
509 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
510 pIoXfer->enmXferDir,
511 off, pIoXfer->cbXfer,
512 &pIoXfer->SgBuf, pIoXfer);
513 break;
514 }
515 default:
516 AssertMsgFailed(("Unknown block cache type!\n"));
517 }
518
519 return rc;
520}
521
522/**
523 * Initiates a read I/O task for the given entry.
524 *
525 * @returns VBox status code.
526 * @param pEntry The entry to fetch the data to.
527 */
528static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
529{
530 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
531 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
532
533 /* Make sure no one evicts the entry while it is accessed. */
534 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
535
536 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
537 if (RT_UNLIKELY(!pIoXfer))
538 return VERR_NO_MEMORY;
539
540 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
541
542 pIoXfer->fIoCache = true;
543 pIoXfer->pEntry = pEntry;
544 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
545 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
546 pIoXfer->cbXfer = pEntry->cbData;
547 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
548 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
549
550 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
551}
552
553/**
554 * Initiates a write I/O task for the given entry.
555 *
556 * @returns nothing.
557 * @param pEntry The entry to read the data from.
558 */
559static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
560{
561 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
562 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
563
564 /* Make sure no one evicts the entry while it is accessed. */
565 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
566
567 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
568 if (RT_UNLIKELY(!pIoXfer))
569 return VERR_NO_MEMORY;
570
571 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
572
573 pIoXfer->fIoCache = true;
574 pIoXfer->pEntry = pEntry;
575 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
576 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
577 pIoXfer->cbXfer = pEntry->cbData;
578 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
579 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
580
581 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
582}
583
584/**
585 * Passthrough a part of a request directly to the I/O manager
586 * handling the endpoint.
587 *
588 * @returns VBox status code.
589 * @param pEndpoint The endpoint.
590 * @param pTask The task.
591 * @param pIoMemCtx The I/O memory context to use.
592 * @param offStart Offset to start transfer from.
593 * @param cbData Amount of data to transfer.
594 * @param enmTransferType The transfer type (read/write)
595 */
596static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
597 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
598 PDMBLKCACHEXFERDIR enmXferDir)
599{
600
601 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
602 if (RT_UNLIKELY(!pIoXfer))
603 return VERR_NO_MEMORY;
604
605 ASMAtomicIncU32(&pReq->cXfersPending);
606 pIoXfer->fIoCache = false;
607 pIoXfer->pReq = pReq;
608 pIoXfer->cbXfer = cbData;
609 pIoXfer->enmXferDir = enmXferDir;
610 if (pSgBuf)
611 {
612 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
613 RTSgBufAdvance(pSgBuf, cbData);
614 }
615
616 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
617}
618
619/**
620 * Commit a single dirty entry to the endpoint
621 *
622 * @returns nothing
623 * @param pEntry The entry to commit.
624 */
625static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
626{
627 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
628 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
629 ("Invalid flags set for entry %#p\n", pEntry));
630
631 pdmBlkCacheEntryWriteToMedium(pEntry);
632}
633
634/**
635 * Commit all dirty entries for a single endpoint.
636 *
637 * @returns nothing.
638 * @param pBlkCache The endpoint cache to commit.
639 */
640static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
641{
642 uint32_t cbCommitted = 0;
643 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
644
645 /* The list is moved to a new header to reduce locking overhead. */
646 RTLISTNODE ListDirtyNotCommitted;
647 RTSPINLOCKTMP Tmp;
648
649 RTListInit(&ListDirtyNotCommitted);
650 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
651 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
652 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
653
654 if (!RTListIsEmpty(&ListDirtyNotCommitted))
655 {
656 PPDMBLKCACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
657 PDMBLKCACHEENTRY,
658 NodeNotCommitted);
659
660 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
661 {
662 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
663 NodeNotCommitted);
664 pdmBlkCacheEntryCommit(pEntry);
665 cbCommitted += pEntry->cbData;
666 RTListNodeRemove(&pEntry->NodeNotCommitted);
667 pEntry = pNext;
668 }
669
670 /* Commit the last endpoint */
671 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
672 pdmBlkCacheEntryCommit(pEntry);
673 RTListNodeRemove(&pEntry->NodeNotCommitted);
674 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
675 ("Committed all entries but list is not empty\n"));
676 }
677
678 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
679 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
680 ("Number of committed bytes exceeds number of dirty bytes\n"));
681 ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
682}
683
684/**
685 * Commit all dirty entries in the cache.
686 *
687 * @returns nothing.
688 * @param pCache The global cache instance.
689 */
690static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
691{
692 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
693
694 if (!fCommitInProgress)
695 {
696 pdmBlkCacheLockEnter(pCache);
697 Assert(!RTListIsEmpty(&pCache->ListUsers));
698
699 PPDMBLKCACHE pBlkCache = RTListNodeGetFirst(&pCache->ListUsers,
700 PDMBLKCACHE,
701 NodeCacheUser);
702 AssertPtr(pBlkCache);
703
704 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
705 {
706 pdmBlkCacheCommit(pBlkCache);
707
708 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
709 NodeCacheUser);
710 }
711
712 /* Commit the last endpoint */
713 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
714 pdmBlkCacheCommit(pBlkCache);
715
716 pdmBlkCacheLockLeave(pCache);
717 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
718 }
719}
720
721/**
722 * Adds the given entry as a dirty to the cache.
723 *
724 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
725 * @param pBlkCache The endpoint cache the entry belongs to.
726 * @param pEntry The entry to add.
727 */
728static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
729{
730 bool fDirtyBytesExceeded = false;
731 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
732
733 /* If the commit timer is disabled we commit right away. */
734 if (pCache->u32CommitTimeoutMs == 0)
735 {
736 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
737 pdmBlkCacheEntryCommit(pEntry);
738 }
739 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
740 {
741 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
742
743 RTSPINLOCKTMP Tmp;
744 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
745 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
746 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
747
748 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
749
750 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
751 }
752
753 return fDirtyBytesExceeded;
754}
755
756/**
757 * Commit timer callback.
758 */
759static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
760{
761 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
762
763 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
764
765 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
766 pdmBlkCacheCommitDirtyEntries(pCache);
767
768 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
769 LogFlowFunc(("Entries committed, going to sleep\n"));
770}
771
772int pdmR3BlkCacheInit(PVM pVM)
773{
774 int rc = VINF_SUCCESS;
775 PUVM pUVM = pVM->pUVM;
776 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
777
778 LogFlowFunc((": pVM=%p\n", pVM));
779
780 VM_ASSERT_EMT(pVM);
781
782 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
783 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
784
785 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
786 if (!pBlkCacheGlobal)
787 return VERR_NO_MEMORY;
788
789 RTListInit(&pBlkCacheGlobal->ListUsers);
790 pBlkCacheGlobal->pVM = pVM;
791 pBlkCacheGlobal->cRefs = 0;
792 pBlkCacheGlobal->cbCached = 0;
793 pBlkCacheGlobal->fCommitInProgress = false;
794
795 /* Initialize members */
796 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
797 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
798 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
799
800 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
801 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
802 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
803
804 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
805 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
806 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
807
808 do
809 {
810 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
811 AssertLogRelRCBreak(rc);
812 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
813
814 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
815 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
816 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
817 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
818
819 /** @todo r=aeichner: Experiment to find optimal default values */
820 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
821 AssertLogRelRCBreak(rc);
822 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
823 AssertLogRelRCBreak(rc);
824 } while (0);
825
826 if (RT_SUCCESS(rc))
827 {
828 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
829 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
830 "/PDM/BlkCache/cbMax",
831 STAMUNIT_BYTES,
832 "Maximum cache size");
833 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
834 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
835 "/PDM/BlkCache/cbCached",
836 STAMUNIT_BYTES,
837 "Currently used cache");
838 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
839 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
840 "/PDM/BlkCache/cbCachedMruIn",
841 STAMUNIT_BYTES,
842 "Number of bytes cached in MRU list");
843 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
844 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
845 "/PDM/BlkCache/cbCachedMruOut",
846 STAMUNIT_BYTES,
847 "Number of bytes cached in FRU list");
848 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
849 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
850 "/PDM/BlkCache/cbCachedFru",
851 STAMUNIT_BYTES,
852 "Number of bytes cached in FRU ghost list");
853
854#ifdef VBOX_WITH_STATISTICS
855 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
856 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
857 "/PDM/BlkCache/CacheHits",
858 STAMUNIT_COUNT, "Number of hits in the cache");
859 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
860 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
861 "/PDM/BlkCache/CachePartialHits",
862 STAMUNIT_COUNT, "Number of partial hits in the cache");
863 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
864 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
865 "/PDM/BlkCache/CacheMisses",
866 STAMUNIT_COUNT, "Number of misses when accessing the cache");
867 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
868 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
869 "/PDM/BlkCache/CacheRead",
870 STAMUNIT_BYTES, "Number of bytes read from the cache");
871 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
872 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
873 "/PDM/BlkCache/CacheWritten",
874 STAMUNIT_BYTES, "Number of bytes written to the cache");
875 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
876 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
877 "/PDM/BlkCache/CacheTreeGet",
878 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
879 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
880 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
881 "/PDM/BlkCache/CacheTreeInsert",
882 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
883 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
884 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
885 "/PDM/BlkCache/CacheTreeRemove",
886 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
887 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
888 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
889 "/PDM/BlkCache/CacheBuffersReused",
890 STAMUNIT_COUNT, "Number of times a buffer could be reused");
891#endif
892
893 /* Initialize the critical section */
894 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
895 }
896
897 if (RT_SUCCESS(rc))
898 {
899 /* Create the commit timer */
900 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
901 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
902 pdmBlkCacheCommitTimerCallback,
903 pBlkCacheGlobal,
904 "BlkCache-Commit",
905 &pBlkCacheGlobal->pTimerCommit);
906
907 if (RT_SUCCESS(rc))
908 {
909 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
910 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
911 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
912 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
913 return VINF_SUCCESS;
914 }
915
916 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
917 }
918
919 if (pBlkCacheGlobal)
920 RTMemFree(pBlkCacheGlobal);
921
922 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
923 return rc;
924}
925
926void pdmR3BlkCacheTerm(PVM pVM)
927{
928 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
929
930 if (pBlkCacheGlobal)
931 {
932 /* Make sure no one else uses the cache now */
933 pdmBlkCacheLockEnter(pBlkCacheGlobal);
934
935 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
936 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
937 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
938 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
939
940 pdmBlkCacheLockLeave(pBlkCacheGlobal);
941
942 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
943 RTMemFree(pBlkCacheGlobal);
944 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
945 }
946}
947
948static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
949{
950 int rc = VINF_SUCCESS;
951 PPDMBLKCACHE pBlkCache = NULL;
952 bool fAlreadyExists = false;
953 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
954
955 if (!pBlkCacheGlobal)
956 return VERR_NOT_SUPPORTED;
957
958 /*
959 * Check that no other user cache has the same id first,
960 * Unique id's are necessary in case the state is saved.
961 */
962 pdmBlkCacheLockEnter(pBlkCacheGlobal);
963
964 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
965 {
966 if (!RTStrCmp(pBlkCache->pszId, pcszId))
967 {
968 fAlreadyExists = true;
969 break;
970 }
971 }
972
973 if (!fAlreadyExists)
974 {
975 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
976
977 if (pBlkCache)
978 pBlkCache->pszId = RTStrDup(pcszId);
979
980 if ( pBlkCache
981 && pBlkCache->pszId)
982 {
983 pBlkCache->pCache = pBlkCacheGlobal;
984 RTListInit(&pBlkCache->ListDirtyNotCommitted);
985
986 rc = RTSpinlockCreate(&pBlkCache->LockList);
987 if (RT_SUCCESS(rc))
988 {
989 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
990 if (RT_SUCCESS(rc))
991 {
992 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
993 if (pBlkCache->pTree)
994 {
995 /* Arm the timer if this is the first endpoint. */
996 if ( pBlkCacheGlobal->cRefs == 1
997 && pBlkCacheGlobal->u32CommitTimeoutMs > 0)
998 rc = TMTimerSetMillies(pBlkCacheGlobal->pTimerCommit, pBlkCacheGlobal->u32CommitTimeoutMs);
999
1000 if (RT_SUCCESS(rc))
1001 {
1002#ifdef VBOX_WITH_STATISTICS
1003 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1004 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1005 STAMUNIT_COUNT, "Number of deferred writes",
1006 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1007#endif
1008
1009 /* Add to the list of users. */
1010 pBlkCacheGlobal->cRefs++;
1011 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1012 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1013
1014 *ppBlkCache = pBlkCache;
1015 LogFlowFunc(("returns success\n"));
1016 return VINF_SUCCESS;
1017 }
1018 }
1019 else
1020 rc = VERR_NO_MEMORY;
1021
1022 RTSemRWDestroy(pBlkCache->SemRWEntries);
1023 }
1024
1025 RTSpinlockDestroy(pBlkCache->LockList);
1026 }
1027
1028 RTStrFree(pBlkCache->pszId);
1029 }
1030 else
1031 rc = VERR_NO_MEMORY;
1032
1033 if (pBlkCache)
1034 RTMemFree(pBlkCache);
1035 }
1036 else
1037 rc = VERR_ALREADY_EXISTS;
1038
1039 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1040
1041 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1042 return rc;
1043}
1044
1045VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1046 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1047 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1048 const char *pcszId)
1049{
1050 int rc = VINF_SUCCESS;
1051 PPDMBLKCACHE pBlkCache;
1052
1053 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1054 if (RT_SUCCESS(rc))
1055 {
1056 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1057 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1058 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1059 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1060 *ppBlkCache = pBlkCache;
1061 }
1062
1063 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1064 return rc;
1065}
1066
1067VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1068 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1069 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1070 const char *pcszId)
1071{
1072 int rc = VINF_SUCCESS;
1073 PPDMBLKCACHE pBlkCache;
1074
1075 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1076 if (RT_SUCCESS(rc))
1077 {
1078 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1079 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1080 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1081 pBlkCache->u.Dev.pDevIns = pDevIns;
1082 *ppBlkCache = pBlkCache;
1083 }
1084
1085 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1086 return rc;
1087
1088}
1089
1090VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1091 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1092 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1093 const char *pcszId)
1094{
1095 int rc = VINF_SUCCESS;
1096 PPDMBLKCACHE pBlkCache;
1097
1098 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1099 if (RT_SUCCESS(rc))
1100 {
1101 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1102 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1103 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1104 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1105 *ppBlkCache = pBlkCache;
1106 }
1107
1108 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1109 return rc;
1110
1111}
1112
1113VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1114 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1115 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1116 const char *pcszId)
1117{
1118 int rc = VINF_SUCCESS;
1119 PPDMBLKCACHE pBlkCache;
1120
1121 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1122 if (RT_SUCCESS(rc))
1123 {
1124 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1125 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1126 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1127 pBlkCache->u.Int.pvUser = pvUser;
1128 *ppBlkCache = pBlkCache;
1129 }
1130
1131 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1132 return rc;
1133
1134}
1135
1136/**
1137 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1138 *
1139 * @returns IPRT status code.
1140 * @param pNode The node to destroy.
1141 * @param pvUser Opaque user data.
1142 */
1143static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1144{
1145 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1146 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1147 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1148
1149 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY))
1150 {
1151 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1152 pdmBlkCacheEntryRef(pEntry);
1153 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1154 pdmBlkCacheLockLeave(pCache);
1155
1156 RTThreadSleep(250);
1157
1158 /* Re-enter all locks */
1159 pdmBlkCacheLockEnter(pCache);
1160 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1161 pdmBlkCacheEntryRelease(pEntry);
1162 }
1163
1164 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
1165 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1166
1167 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1168 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1169
1170 pdmBlkCacheEntryRemoveFromList(pEntry);
1171
1172 if (fUpdateCache)
1173 pdmBlkCacheSub(pCache, pEntry->cbData);
1174
1175 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1176 RTMemFree(pEntry);
1177
1178 return VINF_SUCCESS;
1179}
1180
1181/**
1182 * Destroys all cache resources used by the given endpoint.
1183 *
1184 * @returns nothing.
1185 * @param pEndpoint The endpoint to the destroy.
1186 */
1187VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1188{
1189 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1190
1191 /* Make sure nobody is accessing the cache while we delete the tree. */
1192 pdmBlkCacheLockEnter(pCache);
1193 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1194 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1195 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1196
1197 RTSpinlockDestroy(pBlkCache->LockList);
1198
1199 pCache->cRefs--;
1200 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1201
1202 if ( !pCache->cRefs
1203 && pCache->u32CommitTimeoutMs > 0)
1204 TMTimerStop(pCache->pTimerCommit);
1205
1206 pdmBlkCacheLockLeave(pCache);
1207
1208 RTSemRWDestroy(pBlkCache->SemRWEntries);
1209
1210#ifdef VBOX_WITH_STATISTICS
1211 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1212#endif
1213
1214 RTStrFree(pBlkCache->pszId);
1215 RTMemFree(pBlkCache);
1216}
1217
1218VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1219{
1220 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1221
1222 /*
1223 * Validate input.
1224 */
1225 if (!pDevIns)
1226 return;
1227 VM_ASSERT_EMT(pVM);
1228
1229 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1230 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1231
1232 /* Return silently if not supported. */
1233 if (!pBlkCacheGlobal)
1234 return;
1235
1236 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1237
1238 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1239 {
1240 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1241 && pBlkCache->u.Dev.pDevIns == pDevIns)
1242 PDMR3BlkCacheRelease(pBlkCache);
1243 }
1244
1245 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1246}
1247
1248VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1249{
1250 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1251
1252 /*
1253 * Validate input.
1254 */
1255 if (!pDrvIns)
1256 return;
1257 VM_ASSERT_EMT(pVM);
1258
1259 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1260 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1261
1262 /* Return silently if not supported. */
1263 if (!pBlkCacheGlobal)
1264 return;
1265
1266 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1267
1268 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1269 {
1270 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1271 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1272 PDMR3BlkCacheRelease(pBlkCache);
1273 }
1274
1275 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1276}
1277
1278VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1279{
1280 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1281
1282 /*
1283 * Validate input.
1284 */
1285 if (!pUsbIns)
1286 return;
1287 VM_ASSERT_EMT(pVM);
1288
1289 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1290 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1291
1292 /* Return silently if not supported. */
1293 if (!pBlkCacheGlobal)
1294 return;
1295
1296 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1297
1298 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1299 {
1300 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1301 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1302 PDMR3BlkCacheRelease(pBlkCache);
1303 }
1304
1305 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1306}
1307
1308static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1309{
1310 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1311 PPDMBLKCACHEENTRY pEntry = NULL;
1312
1313 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1314
1315 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1316 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1317 if (pEntry)
1318 pdmBlkCacheEntryRef(pEntry);
1319 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1320
1321 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1322
1323 return pEntry;
1324}
1325
1326/**
1327 * Return the best fit cache entries for the given offset.
1328 *
1329 * @returns nothing.
1330 * @param pBlkCache The endpoint cache.
1331 * @param off The offset.
1332 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1333 * the given offset. NULL if not required.
1334 * @param pEntryBelow Where to store the pointer to the best fit entry below the
1335 * the given offset. NULL if not required.
1336 */
1337static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1338 PPDMBLKCACHEENTRY *ppEntryAbove,
1339 PPDMBLKCACHEENTRY *ppEntryBelow)
1340{
1341 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1342
1343 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1344
1345 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1346 if (ppEntryAbove)
1347 {
1348 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1349 if (*ppEntryAbove)
1350 pdmBlkCacheEntryRef(*ppEntryAbove);
1351 }
1352
1353 if (ppEntryBelow)
1354 {
1355 *ppEntryBelow = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, false /*fAbove*/);
1356 if (*ppEntryBelow)
1357 pdmBlkCacheEntryRef(*ppEntryBelow);
1358 }
1359 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1360
1361 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1362}
1363
1364static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1365{
1366 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1367
1368 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1369 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1370 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1371 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1372 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1373 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1374}
1375
1376/**
1377 * Allocates and initializes a new entry for the cache.
1378 * The entry has a reference count of 1.
1379 *
1380 * @returns Pointer to the new cache entry or NULL if out of memory.
1381 * @param pBlkCache The cache the entry belongs to.
1382 * @param off Start offset.
1383 * @param cbData Size of the cache entry.
1384 * @param pbBuffer Pointer to the buffer to use.
1385 * NULL if a new buffer should be allocated.
1386 * The buffer needs to have the same size of the entry.
1387 */
1388static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1389 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1390{
1391 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1392
1393 if (RT_UNLIKELY(!pEntryNew))
1394 return NULL;
1395
1396 pEntryNew->Core.Key = off;
1397 pEntryNew->Core.KeyLast = off + cbData - 1;
1398 pEntryNew->pBlkCache = pBlkCache;
1399 pEntryNew->fFlags = 0;
1400 pEntryNew->cRefs = 1; /* We are using it now. */
1401 pEntryNew->pList = NULL;
1402 pEntryNew->cbData = cbData;
1403 pEntryNew->pWaitingHead = NULL;
1404 pEntryNew->pWaitingTail = NULL;
1405 if (pbBuffer)
1406 pEntryNew->pbData = pbBuffer;
1407 else
1408 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1409
1410 if (RT_UNLIKELY(!pEntryNew->pbData))
1411 {
1412 RTMemFree(pEntryNew);
1413 return NULL;
1414 }
1415
1416 return pEntryNew;
1417}
1418
1419/**
1420 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1421 * in exclusive mode.
1422 *
1423 * @returns true if the flag in fSet is set and the one in fClear is clear.
1424 * false otherwise.
1425 * The R/W semaphore is only held if true is returned.
1426 *
1427 * @param pBlkCache The endpoint cache instance data.
1428 * @param pEntry The entry to check the flags for.
1429 * @param fSet The flag which is tested to be set.
1430 * @param fClear The flag which is tested to be clear.
1431 */
1432DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1433 PPDMBLKCACHEENTRY pEntry,
1434 uint32_t fSet, uint32_t fClear)
1435{
1436 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1437 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1438
1439 if (fPassed)
1440 {
1441 /* Acquire the lock and check again because the completion callback might have raced us. */
1442 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1443
1444 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1445 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1446
1447 /* Drop the lock if we didn't passed the test. */
1448 if (!fPassed)
1449 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1450 }
1451
1452 return fPassed;
1453}
1454
1455/**
1456 * Adds a segment to the waiting list for a cache entry
1457 * which is currently in progress.
1458 *
1459 * @returns nothing.
1460 * @param pEntry The cache entry to add the segment to.
1461 * @param pSeg The segment to add.
1462 */
1463DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1464 PPDMBLKCACHEWAITER pWaiter)
1465{
1466 pWaiter->pNext = NULL;
1467
1468 if (pEntry->pWaitingHead)
1469 {
1470 AssertPtr(pEntry->pWaitingTail);
1471
1472 pEntry->pWaitingTail->pNext = pWaiter;
1473 pEntry->pWaitingTail = pWaiter;
1474 }
1475 else
1476 {
1477 Assert(!pEntry->pWaitingTail);
1478
1479 pEntry->pWaitingHead = pWaiter;
1480 pEntry->pWaitingTail = pWaiter;
1481 }
1482}
1483
1484/**
1485 * Add a buffer described by the I/O memory context
1486 * to the entry waiting for completion.
1487 *
1488 * @returns VBox status code.
1489 * @param pEntry The entry to add the buffer to.
1490 * @param pTask Task associated with the buffer.
1491 * @param pIoMemCtx The memory context to use.
1492 * @param offDiff Offset from the start of the buffer
1493 * in the entry.
1494 * @param cbData Amount of data to wait for onthis entry.
1495 * @param fWrite Flag whether the task waits because it wants to write
1496 * to the cache entry.
1497 */
1498static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1499 PPDMBLKCACHEREQ pReq,
1500 PCRTSGBUF pSgBuf, uint64_t offDiff,
1501 size_t cbData, bool fWrite)
1502{
1503 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1504 if (!pWaiter)
1505 return VERR_NO_MEMORY;
1506
1507 ASMAtomicIncU32(&pReq->cXfersPending);
1508 pWaiter->pReq = pReq;
1509 pWaiter->offCacheEntry = offDiff;
1510 pWaiter->cbTransfer = cbData;
1511 pWaiter->fWrite = fWrite;
1512 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1513
1514 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1515
1516 return VINF_SUCCESS;
1517}
1518
1519/**
1520 * Calculate aligned offset and size for a new cache entry
1521 * which do not intersect with an already existing entry and the
1522 * file end.
1523 *
1524 * @returns The number of bytes the entry can hold of the requested amount
1525 * of byte.
1526 * @param pEndpoint The endpoint.
1527 * @param pBlkCache The endpoint cache.
1528 * @param off The start offset.
1529 * @param cb The number of bytes the entry needs to hold at least.
1530 * @param uAlignment Alignment of the boundary sizes.
1531 * @param poffAligned Where to store the aligned offset.
1532 * @param pcbAligned Where to store the aligned size of the entry.
1533 */
1534static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1535 uint64_t off, size_t cb,
1536 unsigned uAlignment,
1537 uint64_t *poffAligned, size_t *pcbAligned)
1538{
1539 size_t cbAligned;
1540 size_t cbInEntry = 0;
1541 uint64_t offAligned;
1542 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1543 PPDMBLKCACHEENTRY pEntryBelow = NULL;
1544
1545 /* Get the best fit entries around the offset */
1546 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove, &pEntryBelow);
1547
1548 /* Log the info */
1549 LogFlow(("%sest fit entry below off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1550 pEntryBelow ? "B" : "No b",
1551 off,
1552 pEntryBelow ? pEntryBelow->Core.Key : 0,
1553 pEntryBelow ? pEntryBelow->Core.KeyLast : 0,
1554 pEntryBelow ? pEntryBelow->cbData : 0));
1555
1556 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1557 pEntryAbove ? "B" : "No b",
1558 off,
1559 pEntryAbove ? pEntryAbove->Core.Key : 0,
1560 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1561 pEntryAbove ? pEntryAbove->cbData : 0));
1562
1563 /* Align the offset first. */
1564 offAligned = off & ~(uint64_t)(512-1);
1565 if ( pEntryBelow
1566 && offAligned <= pEntryBelow->Core.KeyLast)
1567 offAligned = pEntryBelow->Core.KeyLast;
1568
1569 if ( pEntryAbove
1570 && off + cb > pEntryAbove->Core.Key)
1571 {
1572 cbInEntry = pEntryAbove->Core.Key - off;
1573 cbAligned = pEntryAbove->Core.Key - offAligned;
1574 }
1575 else
1576 {
1577 cbAligned = cb;
1578 cbInEntry = cb;
1579 }
1580
1581 /* A few sanity checks */
1582 AssertMsg(!pEntryBelow || pEntryBelow->Core.KeyLast < offAligned,
1583 ("Aligned start offset intersects with another cache entry\n"));
1584 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1585 ("Aligned size intersects with another cache entry\n"));
1586 Assert(cbInEntry <= cbAligned);
1587
1588 if (pEntryBelow)
1589 pdmBlkCacheEntryRelease(pEntryBelow);
1590 if (pEntryAbove)
1591 pdmBlkCacheEntryRelease(pEntryAbove);
1592
1593 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1594
1595 *poffAligned = offAligned;
1596 *pcbAligned = cbAligned;
1597
1598 return cbInEntry;
1599}
1600
1601/**
1602 * Create a new cache entry evicting data from the cache if required.
1603 *
1604 * @returns Pointer to the new cache entry or NULL
1605 * if not enough bytes could be evicted from the cache.
1606 * @param pEndpoint The endpoint.
1607 * @param pBlkCache The endpoint cache.
1608 * @param off The offset.
1609 * @param cb Number of bytes the cache entry should have.
1610 * @param uAlignment Alignment the size of the entry should have.
1611 * @param pcbData Where to store the number of bytes the new
1612 * entry can hold. May be lower than actually requested
1613 * due to another entry intersecting the access range.
1614 */
1615static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1616 uint64_t off, size_t cb,
1617 unsigned uAlignment,
1618 size_t *pcbData)
1619{
1620 uint64_t offStart = 0;
1621 size_t cbEntry = 0;
1622 PPDMBLKCACHEENTRY pEntryNew = NULL;
1623 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1624 uint8_t *pbBuffer = NULL;
1625
1626 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1627 &offStart, &cbEntry);
1628
1629 pdmBlkCacheLockEnter(pCache);
1630 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1631
1632 if (fEnough)
1633 {
1634 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1635
1636 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1637 if (RT_LIKELY(pEntryNew))
1638 {
1639 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1640 pdmBlkCacheAdd(pCache, cbEntry);
1641 pdmBlkCacheLockLeave(pCache);
1642
1643 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1644
1645 AssertMsg( (off >= pEntryNew->Core.Key)
1646 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1647 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1648 off, pEntryNew->Core.Key));
1649 }
1650 else
1651 pdmBlkCacheLockLeave(pCache);
1652 }
1653 else
1654 pdmBlkCacheLockLeave(pCache);
1655
1656 return pEntryNew;
1657}
1658
1659static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1660{
1661 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1662
1663 if (RT_LIKELY(pReq))
1664 {
1665 pReq->pvUser = pvUser;
1666 pReq->cbXfer = cbXfer;
1667 pReq->rcReq = VINF_SUCCESS;
1668 pReq->cXfersPending = 0;
1669 }
1670
1671 return pReq;
1672}
1673
1674static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1675{
1676 switch (pBlkCache->enmType)
1677 {
1678 case PDMBLKCACHETYPE_DEV:
1679 {
1680 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1681 pReq->pvUser, pReq->rcReq);
1682 break;
1683 }
1684 case PDMBLKCACHETYPE_DRV:
1685 {
1686 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1687 pReq->pvUser, pReq->rcReq);
1688 break;
1689 }
1690 case PDMBLKCACHETYPE_USB:
1691 {
1692 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1693 pReq->pvUser, pReq->rcReq);
1694 break;
1695 }
1696 case PDMBLKCACHETYPE_INTERNAL:
1697 {
1698 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1699 pReq->pvUser, pReq->rcReq);
1700 break;
1701 }
1702 default:
1703 AssertMsgFailed(("Unknown block cache type!\n"));
1704 }
1705
1706 RTMemFree(pReq);
1707}
1708
1709static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1710 size_t cbComplete, int rcReq, bool fCallHandler)
1711{
1712 if (RT_FAILURE(rcReq))
1713 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1714
1715 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1716 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1717 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1718 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1719
1720 if ( !(cbOld - cbComplete)
1721 && !cXfersPending)
1722 {
1723 if (fCallHandler)
1724 pdmBlkCacheReqComplete(pBlkCache, pReq);
1725 return true;
1726 }
1727
1728 LogFlowFunc(("pReq=%#p cXfersPending=%u cbXfer=%u\n", pReq, cXfersPending, (cbOld - cbComplete)));
1729 return false;
1730}
1731
1732VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1733 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1734{
1735 int rc = VINF_SUCCESS;
1736 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1737 PPDMBLKCACHEENTRY pEntry;
1738 PPDMBLKCACHEREQ pReq;
1739
1740 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1741 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1742
1743 RTSGBUF SgBuf;
1744 RTSgBufClone(&SgBuf, pcSgBuf);
1745
1746 /* Allocate new request structure. */
1747 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1748 if (RT_UNLIKELY(!pReq))
1749 return VERR_NO_MEMORY;
1750
1751 /* Increment data transfer counter to keep the request valid while we access it. */
1752 ASMAtomicIncU32(&pReq->cXfersPending);
1753
1754 while (cbRead)
1755 {
1756 size_t cbToRead;
1757
1758 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1759
1760 /*
1761 * If there is no entry we try to create a new one eviciting unused pages
1762 * if the cache is full. If this is not possible we will pass the request through
1763 * and skip the caching (all entries may be still in progress so they can't
1764 * be evicted)
1765 * If we have an entry it can be in one of the LRU lists where the entry
1766 * contains data (recently used or frequently used LRU) so we can just read
1767 * the data we need and put the entry at the head of the frequently used LRU list.
1768 * In case the entry is in one of the ghost lists it doesn't contain any data.
1769 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1770 */
1771 if (pEntry)
1772 {
1773 uint64_t offDiff = off - pEntry->Core.Key;
1774
1775 AssertMsg(off >= pEntry->Core.Key,
1776 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1777 off, pEntry->Core.Key));
1778
1779 AssertPtr(pEntry->pList);
1780
1781 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1782
1783 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1784 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1785 off, cbToRead));
1786
1787 cbRead -= cbToRead;
1788
1789 if (!cbRead)
1790 STAM_COUNTER_INC(&pCache->cHits);
1791 else
1792 STAM_COUNTER_INC(&pCache->cPartialHits);
1793
1794 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1795
1796 /* Ghost lists contain no data. */
1797 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1798 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1799 {
1800 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1801 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1802 PDMBLKCACHE_ENTRY_IS_DIRTY))
1803 {
1804 /* Entry didn't completed yet. Append to the list */
1805 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1806 &SgBuf, offDiff, cbToRead,
1807 false /* fWrite */);
1808 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1809 }
1810 else
1811 {
1812 /* Read as much as we can from the entry. */
1813 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
1814 ASMAtomicSubU32(&pReq->cbXfer, cbToRead);
1815 }
1816
1817 /* Move this entry to the top position */
1818 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1819 {
1820 pdmBlkCacheLockEnter(pCache);
1821 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1822 pdmBlkCacheLockLeave(pCache);
1823 }
1824 /* Release the entry */
1825 pdmBlkCacheEntryRelease(pEntry);
1826 }
1827 else
1828 {
1829 uint8_t *pbBuffer = NULL;
1830
1831 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1832
1833 pdmBlkCacheLockEnter(pCache);
1834 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1835 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1836
1837 /* Move the entry to Am and fetch it to the cache. */
1838 if (fEnough)
1839 {
1840 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1841 pdmBlkCacheAdd(pCache, pEntry->cbData);
1842 pdmBlkCacheLockLeave(pCache);
1843
1844 if (pbBuffer)
1845 pEntry->pbData = pbBuffer;
1846 else
1847 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1848 AssertPtr(pEntry->pbData);
1849
1850 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1851 &SgBuf, offDiff, cbToRead,
1852 false /* fWrite */);
1853 pdmBlkCacheEntryReadFromMedium(pEntry);
1854 /* Release the entry */
1855 pdmBlkCacheEntryRelease(pEntry);
1856 }
1857 else
1858 {
1859 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1860 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1861 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
1862 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1863 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1864
1865 pdmBlkCacheLockLeave(pCache);
1866
1867 RTMemFree(pEntry);
1868
1869 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1870 &SgBuf, off, cbToRead,
1871 PDMBLKCACHEXFERDIR_READ);
1872 }
1873 }
1874 }
1875 else
1876 {
1877#ifdef VBOX_WITH_IO_READ_CACHE
1878 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
1879 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
1880 off, cbRead,
1881 PAGE_SIZE,
1882 &cbToRead);
1883
1884 cbRead -= cbToRead;
1885
1886 if (pEntryNew)
1887 {
1888 if (!cbRead)
1889 STAM_COUNTER_INC(&pCache->cMisses);
1890 else
1891 STAM_COUNTER_INC(&pCache->cPartialHits);
1892
1893 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
1894 &SgBuf,
1895 off - pEntryNew->Core.Key,
1896 cbToRead,
1897 false /* fWrite */);
1898 pdmBlkCacheEntryReadFromMedium(pEntryNew);
1899 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1900 }
1901 else
1902 {
1903 /*
1904 * There is not enough free space in the cache.
1905 * Pass the request directly to the I/O manager.
1906 */
1907 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1908
1909 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1910 &SgBuf, off, cbToRead,
1911 PDMBLKCACHEXFERDIR_READ);
1912 }
1913#else
1914 /* Clip read size if necessary. */
1915 PPDMBLKCACHEENTRY pEntryAbove;
1916 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off,
1917 &pEntryAbove, NULL);
1918
1919 if (pEntryAbove)
1920 {
1921 if (off + cbRead > pEntryAbove->Core.Key)
1922 cbToRead = pEntryAbove->Core.Key - off;
1923 else
1924 cbToRead = cbRead;
1925
1926 pdmBlkCacheEntryRelease(pEntryAbove);
1927 }
1928 else
1929 cbToRead = cbRead;
1930
1931 cbRead -= cbToRead;
1932 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1933 &SgBuf, off, cbToRead,
1934 PDMBLKCACHEXFERDIR_READ);
1935#endif
1936 }
1937 off += cbToRead;
1938 }
1939
1940 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
1941 rc = VINF_AIO_TASK_PENDING;
1942
1943 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1944
1945 return rc;
1946}
1947
1948VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
1949 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
1950{
1951 int rc = VINF_SUCCESS;
1952 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1953 PPDMBLKCACHEENTRY pEntry;
1954 PPDMBLKCACHEREQ pReq;
1955
1956 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
1957 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
1958
1959 RTSGBUF SgBuf;
1960 RTSgBufClone(&SgBuf, pcSgBuf);
1961
1962 /* Allocate new request structure. */
1963 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
1964 if (RT_UNLIKELY(!pReq))
1965 return VERR_NO_MEMORY;
1966
1967 /* Increment data transfer counter to keep the request valid while we access it. */
1968 ASMAtomicIncU32(&pReq->cXfersPending);
1969
1970 while (cbWrite)
1971 {
1972 size_t cbToWrite;
1973
1974 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1975
1976 if (pEntry)
1977 {
1978 /* Write the data into the entry and mark it as dirty */
1979 AssertPtr(pEntry->pList);
1980
1981 uint64_t offDiff = off - pEntry->Core.Key;
1982
1983 AssertMsg(off >= pEntry->Core.Key,
1984 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1985 off, pEntry->Core.Key));
1986
1987 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
1988 cbWrite -= cbToWrite;
1989
1990 if (!cbWrite)
1991 STAM_COUNTER_INC(&pCache->cHits);
1992 else
1993 STAM_COUNTER_INC(&pCache->cPartialHits);
1994
1995 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1996
1997 /* Ghost lists contain no data. */
1998 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1999 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2000 {
2001 /* Check if the entry is dirty. */
2002 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2003 PDMBLKCACHE_ENTRY_IS_DIRTY,
2004 0))
2005 {
2006 /* If it is already dirty but not in progress just update the data. */
2007 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2008 {
2009 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2010 cbToWrite);
2011 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2012 }
2013 else
2014 {
2015 /* The data isn't written to the file yet */
2016 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2017 &SgBuf, offDiff, cbToWrite,
2018 true /* fWrite */);
2019 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2020 }
2021
2022 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2023 }
2024 else /* Dirty bit not set */
2025 {
2026 /*
2027 * Check if a read is in progress for this entry.
2028 * We have to defer processing in that case.
2029 */
2030 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2031 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2032 0))
2033 {
2034 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2035 &SgBuf, offDiff, cbToWrite,
2036 true /* fWrite */);
2037 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2038 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2039 }
2040 else /* I/O in progress flag not set */
2041 {
2042 /* Write as much as we can into the entry and update the file. */
2043 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2044 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2045
2046 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2047 if (fCommit)
2048 pdmBlkCacheCommitDirtyEntries(pCache);
2049 }
2050 } /* Dirty bit not set */
2051
2052 /* Move this entry to the top position */
2053 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2054 {
2055 pdmBlkCacheLockEnter(pCache);
2056 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2057 pdmBlkCacheLockLeave(pCache);
2058 }
2059
2060 pdmBlkCacheEntryRelease(pEntry);
2061 }
2062 else /* Entry is on the ghost list */
2063 {
2064 uint8_t *pbBuffer = NULL;
2065
2066 pdmBlkCacheLockEnter(pCache);
2067 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2068 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2069
2070 if (fEnough)
2071 {
2072 /* Move the entry to Am and fetch it to the cache. */
2073 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2074 pdmBlkCacheAdd(pCache, pEntry->cbData);
2075 pdmBlkCacheLockLeave(pCache);
2076
2077 if (pbBuffer)
2078 pEntry->pbData = pbBuffer;
2079 else
2080 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2081 AssertPtr(pEntry->pbData);
2082
2083 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2084 &SgBuf, offDiff, cbToWrite,
2085 true /* fWrite */);
2086 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2087 pdmBlkCacheEntryReadFromMedium(pEntry);
2088
2089 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2090 pdmBlkCacheEntryRelease(pEntry);
2091 }
2092 else
2093 {
2094 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2095 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2096 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2097 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2098 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2099
2100 pdmBlkCacheLockLeave(pCache);
2101
2102 RTMemFree(pEntry);
2103 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2104 &SgBuf, off, cbToWrite,
2105 PDMBLKCACHEXFERDIR_WRITE);
2106 }
2107 }
2108 }
2109 else /* No entry found */
2110 {
2111 /*
2112 * No entry found. Try to create a new cache entry to store the data in and if that fails
2113 * write directly to the file.
2114 */
2115 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2116 off, cbWrite,
2117 512, &cbToWrite);
2118
2119 cbWrite -= cbToWrite;
2120
2121 if (pEntryNew)
2122 {
2123 uint64_t offDiff = off - pEntryNew->Core.Key;
2124
2125 STAM_COUNTER_INC(&pCache->cHits);
2126
2127 /*
2128 * Check if it is possible to just write the data without waiting
2129 * for it to get fetched first.
2130 */
2131 if (!offDiff && pEntryNew->cbData == cbToWrite)
2132 {
2133 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2134 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2135
2136 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2137 if (fCommit)
2138 pdmBlkCacheCommitDirtyEntries(pCache);
2139 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2140 }
2141 else
2142 {
2143 /* Defer the write and fetch the data from the endpoint. */
2144 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2145 &SgBuf, offDiff, cbToWrite,
2146 true /* fWrite */);
2147 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2148 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2149 }
2150
2151 pdmBlkCacheEntryRelease(pEntryNew);
2152 }
2153 else
2154 {
2155 /*
2156 * There is not enough free space in the cache.
2157 * Pass the request directly to the I/O manager.
2158 */
2159 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2160
2161 STAM_COUNTER_INC(&pCache->cMisses);
2162
2163 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2164 &SgBuf, off, cbToWrite,
2165 PDMBLKCACHEXFERDIR_WRITE);
2166 }
2167 }
2168
2169 off += cbToWrite;
2170 }
2171
2172 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2173 rc = VINF_AIO_TASK_PENDING;
2174
2175 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2176
2177 return rc;
2178}
2179
2180VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2181{
2182 int rc = VINF_SUCCESS;
2183 PPDMBLKCACHEREQ pReq;
2184
2185 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2186
2187 /* Commit dirty entries in the cache. */
2188 pdmBlkCacheCommit(pBlkCache);
2189
2190 /* Allocate new request structure. */
2191 pReq = pdmBlkCacheReqAlloc(0, pvUser);
2192 if (RT_UNLIKELY(!pReq))
2193 return VERR_NO_MEMORY;
2194
2195 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2196 PDMBLKCACHEXFERDIR_FLUSH);
2197 AssertRC(rc);
2198
2199 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2200 return VINF_AIO_TASK_PENDING;
2201}
2202
2203/**
2204 * Completes a task segment freeing all resources and completes the task handle
2205 * if everything was transferred.
2206 *
2207 * @returns Next task segment handle.
2208 * @param pTaskSeg Task segment to complete.
2209 * @param rc Status code to set.
2210 */
2211static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2212 PPDMBLKCACHEWAITER pWaiter,
2213 int rc)
2214{
2215 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2216 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2217
2218 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2219
2220 RTMemFree(pWaiter);
2221
2222 return pNext;
2223}
2224
2225static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2226{
2227 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2228 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2229
2230 /* Reference the entry now as we are clearing the I/O in progress flag
2231 * which protected the entry till now. */
2232 pdmBlkCacheEntryRef(pEntry);
2233
2234 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2235 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2236
2237 /* Process waiting segment list. The data in entry might have changed in-between. */
2238 bool fDirty = false;
2239 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2240 PPDMBLKCACHEWAITER pCurr = pComplete;
2241
2242 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2243 ("The list tail was not updated correctly\n"));
2244 pEntry->pWaitingTail = NULL;
2245 pEntry->pWaitingHead = NULL;
2246
2247 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2248 {
2249 /*
2250 * An error here is difficult to handle as the original request completed already.
2251 * The error is logged for now and the VM is paused.
2252 * If the user continues the entry is written again in the hope
2253 * the user fixed the problem and the next write succeeds.
2254 */
2255 /** @todo r=aeichner: This solution doesn't work
2256 * The user will get the message but the VM will hang afterwards
2257 * VMR3Suspend() returns when the VM is suspended but suspending
2258 * the VM will reopen the images readonly in DrvVD. They are closed first
2259 * which will close the endpoints. This will block EMT while the
2260 * I/O manager processes the close request but the IO manager is stuck
2261 * in the VMR3Suspend call and can't process the request.
2262 * Another problem is that closing the VM means flushing the cache
2263 * but the entry failed and will probably fail again.
2264 * No idea so far how to solve this problem... but the user gets informed
2265 * at least.
2266 */
2267 if (RT_FAILURE(rcIoXfer))
2268 {
2269 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\"\n",
2270 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId));
2271
2272 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2273 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc)."
2274 "Make sure there is enough free space on the disk and that the disk is working properly."
2275 "Operation can be resumed afterwards."),
2276 pBlkCache->pszId, rcIoXfer);
2277 AssertRC(rc);
2278 }
2279 else
2280 {
2281 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2282
2283 while (pCurr)
2284 {
2285 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2286
2287 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2288 fDirty = true;
2289
2290 pCurr = pCurr->pNext;
2291 }
2292 }
2293 }
2294 else
2295 {
2296 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2297 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2298 ("Invalid flags set\n"));
2299
2300 while (pCurr)
2301 {
2302 if (pCurr->fWrite)
2303 {
2304 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2305 fDirty = true;
2306 }
2307 else
2308 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2309
2310 pCurr = pCurr->pNext;
2311 }
2312 }
2313
2314 bool fCommit = false;
2315 if (fDirty)
2316 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2317
2318 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2319
2320 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2321 pdmBlkCacheEntryRelease(pEntry);
2322
2323 if (fCommit)
2324 pdmBlkCacheCommitDirtyEntries(pCache);
2325
2326 /* Complete waiters now. */
2327 while (pComplete)
2328 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2329}
2330
2331VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2332{
2333 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2334
2335 if (hIoXfer->fIoCache)
2336 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2337 else
2338 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2339}
2340
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette