yuzu-mirror.github.io/entry/yuzu-bcr/index.html

470 lines
21 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang=" en-us "class="has-navbar-fixed-top">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1">
<meta name="theme-color" content="#404040">
<meta property="og:title" content="New Feature Release - Buffer Cache Rewrite &middot; yuzu" />
<meta property="og:site_name" content="yuzu" />
<meta property="og:url" content="https://yuzu-mirror.github.io/entry/yuzu-bcr/" />
<meta property="og:description" content="Hey there, yuz-ers! The follow-up to our previous big code rewrite is finally here: the Buffer Cache Rewrite!
This massive undertaking not only improves performance significantly, but also simplifies the code for our developers.
Now let&rsquo;s get this article started!" />
<meta name="description" content="Hey there, yuz-ers! The follow-up to our previous big code rewrite is finally here: the Buffer Cache Rewrite!
This massive undertaking not only improves performance significantly, but also simplifies the code for our developers.
Now let&rsquo;s get this article started!" />
<meta property="og:type" content="article" />
<meta property="og:image" content="https://yuzu-mirror.github.io/entry/yuzu-bcr/banner.png" />
<link rel="icon" href="https://yuzu-mirror.github.io/favicon.ico" />
<link rel="shortcut icon" href="https://yuzu-mirror.github.io/favicon.ico" type="image/x-icon" />
<link rel="canonical" href="https://yuzu-mirror.github.io/entry/yuzu-bcr/">
<title>New Feature Release - Buffer Cache Rewrite - yuzu</title>
<link href="https://fonts.googleapis.com/css?family=Ubuntu|Dosis" rel="stylesheet">
<link href="https://use.fontawesome.com/releases/v6.4.0/css/all.css" rel="stylesheet">
<link rel="stylesheet" href="https://yuzu-mirror.github.io/scss/style.min.css" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/baguettebox.js/1.11.1/baguetteBox.min.css" type="text/css" />
<script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-112443698-1"></script>
<script type="text/javascript">
window.dataLayer = window.dataLayer || [];
function gtag() { dataLayer.push(arguments); }
gtag('js', new Date());
gtag('config', 'UA-112443698-1');
</script>
</head>
<body>
<nav class="navbar is-dark is-size-6 is-fixed-top" role="navigation" aria-label="main navigation">
<div class="container">
<div class="navbar-brand">
<a class="navbar-item" href="https://yuzu-mirror.github.io">
<svg xmlns="http://www.w3.org/2000/svg" class="navbar-logo" viewBox="0 0 515.83 163.11"><defs><style>.cls-1{fill:#fff;}.cls-2{fill:#ff3c28;}.cls-3{fill:#0ab9e6;}</style></defs><g id="Layer_2" data-name="Layer 2"><g id="Layer_1-2" data-name="Layer 1"><path class="cls-1" d="M515.83,23.23v73c0,14.5-2.24,25.24-6.84,32.82-5.92,10.15-16.2,15.32-30.53,15.32s-24.62-5.23-30.58-15.57c-4.56-7.64-6.79-18.42-6.79-32.92V23.23a4.51,4.51,0,0,1,4.51-4.51h2.28a4.51,4.51,0,0,1,4.51,4.51v72.5c0,33.53,14.88,37.4,26.07,37.4,12.14,0,26.08-4.17,26.08-36.71V23.23a4.51,4.51,0,0,1,4.51-4.51h2.27A4.51,4.51,0,0,1,515.83,23.23Z"/><path class="cls-1" d="M421.34,144.4H353.45c-2.35,0-4.72-1.88-4.72-6.08a8.32,8.32,0,0,1,1.33-4.49L410.39,29.36H360.8a4.51,4.51,0,0,1-4.51-4.5V23.28a4.51,4.51,0,0,1,4.48-4.51h.81c58.68-.11,59.11,0,59.66.07a5.19,5.19,0,0,1,4,5.8,8.74,8.74,0,0,1-1.32,3.75L363.33,133.17h58a4.51,4.51,0,0,1,4.51,4.51v2.21A4.51,4.51,0,0,1,421.34,144.4Z"/><path class="cls-1" d="M248.45,23.23v82.06c0,26-11.8,38.44-37.12,39.09h-.12a4.51,4.51,0,0,1-4.51-4.51V137.5a4.51,4.51,0,0,1,4.48-4.5c18.49-.15,26-8.23,26-27.9v-2.37a32.34,32.34,0,0,1-3.34,3.28c-6.39,5.5-14.5,8.29-24.07,8.29-22.86,0-35-12.41-35-35.89V23.23a4.52,4.52,0,0,1,4.51-4.51h2.22a4.52,4.52,0,0,1,4.5,4.51v55c0,7.6,1.82,14.22,5,18.18,3.57,4.56,9.17,6.49,18.75,6.49,10.13,0,17.32-3.76,22-11.5,3.61-5.92,5.43-13.66,5.43-23V23.23a4.52,4.52,0,0,1,4.51-4.51h2.22A4.52,4.52,0,0,1,248.45,23.23Z"/><path class="cls-1" d="M338.12,23.23v73c0,14.5-2.24,25.24-6.84,32.82-5.92,10.15-16.2,15.32-30.53,15.32s-24.62-5.23-30.58-15.57c-4.56-7.64-6.79-18.42-6.79-32.92V23.23a4.51,4.51,0,0,1,4.51-4.51h2.28a4.51,4.51,0,0,1,4.51,4.51v72.5c0,33.53,14.88,37.4,26.07,37.4,12.14,0,26.08-4.17,26.08-36.71V23.23a4.51,4.51,0,0,1,4.51-4.51h2.27A4.51,4.51,0,0,1,338.12,23.23Z"/><g id="g823"><g id="right"><g id="g827"><g id="g833"><path id="path835" class="cls-2" d="M81.56,32.62V163.11a65.25,65.25,0,0,0,0-130.49M94.3,46.91a52.54,52.54,0,0,1,0,101.91V46.91"/></g></g></g><g id="left"><g id="g839"><g id="g845"><path id="path847" class="cls-3" d="M65.24,0a65.25,65.25,0,0,0,0,130.49ZM52.5,14.29V116.2A52.52,52.52,0,0,1,28.12,28.12,52.16,52.16,0,0,1,52.5,14.29"/></g></g></g></g></g></g></svg>
</a>
<div class="burger navbar-burger is-dark" data-target="navMenu">
<span></span>
<span></span>
<span></span>
</div>
</div>
<div class="navbar-menu" id="navMenu">
<div class="navbar-start">
<a class="navbar-item px-lg" href="/entry">
Blog
</a>
<a class="navbar-item px-lg" href="/downloads">
Download
</a>
<a class="navbar-item px-lg" href="/wiki/faq">
FAQs
</a>
<a class="navbar-item px-lg" href="/game">
Compatibility
</a>
<a class="navbar-item px-lg" href="/screenshots">
Screenshots
</a>
<a class="navbar-item px-lg" href="https://www.patreon.com/yuzuteam">
Patreon
</a>
<a class="navbar-item px-lg" href="https://profile.yuzu-mirror.github.io">
Profile
</a>
<a class="navbar-item px-lg is-hidden-desktop" href="https://discord.gg/u77vRWY" target="_blank">
<i class="fab fa-discord mr-sm"></i> Discord
</a>
<a class="navbar-item px-lg is-hidden-desktop" href="https://twitter.com/yuzuemu" target="_blank">
<i class="fab fa-twitter mr-sm"></i> Twitter
</a>
<a class="navbar-item px-lg is-hidden-desktop" href="https://github.com/yuzu-mirror/yuzu" target="_blank">
<i class="fab fa-github mr-sm"></i> GitHub
</a>
</div>
<div class="navbar-end">
<a class="navbar-item px-lg is-hidden-touch" href="https://discord.gg/u77vRWY" target="_blank">
<span class="icon">
<i class="fab fa-2x fa-discord"></i>
</span>
</a>
<a class="navbar-item px-lg is-hidden-touch" href="https://twitter.com/yuzuemu" target="_blank">
<span class="icon">
<i class="fab fa-2x fa-twitter"></i>
</span>
</a>
<a class="navbar-item px-lg is-hidden-touch" href="https://github.com/yuzu-mirror/yuzu" target="_blank">
<span class="icon">
<i class="fab fa-2x fa-github"></i>
</span>
</a>
</div>
</div>
</div>
</nav>
<div class="mb-md blog-entry-header single" style="background-image: url('https://yuzu-mirror.github.io/entry/yuzu-bcr/banner_hu7b6746562ce4140d3127244a7b8edff4_1264288_1280x0_resize_q99_bgffffff_box_3.jpg');background-repeat:no-repeat;background-size:contain;background-position:center;"></div>
<div class="has-text-centered">
<div>
<span class="title px-md py-sm">New Feature Release - Buffer Cache Rewrite</span>
</div>
<div>
<p class="h3 px-md py-sm">
Written by <a href="https://community.citra-emu.org/u/CaptV0rt3x/summary">CaptV0rt3x</a>
and <a href="https://community.citra-emu.org/u/GoldenX86/summary">GoldenX86</a>
on January 16 2021
</p>
</div>
</div>
<div class="container">
<div class="columns is-centered">
<div class="column is-four-fifths">
<section class="section content pt-sm">
<br>
<p>Hey there, yuz-ers! The follow-up to our <a href="https://yuzu-mirror.github.io/entry/yuzu-tcr/">previous big code rewrite</a> is finally here: the Buffer Cache Rewrite!
This massive undertaking not only improves performance significantly, but also simplifies the code for our developers.
Now let&rsquo;s get this article started!</p>
<h2 id="so-what-does-a-buffer-cache-do">So, what does a Buffer Cache do?</h2>
<p>As the name implies, a Buffer Cache — well — caches (stores) buffers.
That might not have made much sense, but that&rsquo;s what it does.</p>
<p>In graphics programming, for the GPU to render anything it needs data like position, color, etc.
Usually, this data is supplied by the application.
But when we have large applications dealing with large volumes of data, it becomes increasingly difficult to constantly supply the GPU
with data and have it render.
Hence, buffer objects were introduced.</p>
<p>Buffer objects are memory objects that store the render data in the GPU memory — thereby increasing reusability significantly.
There are various types of bindings, commonly referred to as buffer types, like index buffers, vertex buffers, and uniform buffers (among others).
This improves the rendering performance because the data is now readily available for the GPU to use.</p>
<h3 id="yuzus-buffer-cache">yuzu&rsquo;s Buffer Cache</h3>
<p>Coming back to yuzu&rsquo;s case, yuzu initially inherited a stream buffer — originally implemented for <a href="https://citra-emu.org">Citra</a> by <a href="https://github.com/degasus">degasus</a>.
A stream buffer works in a modify/use cycle, meaning you frequently update the buffer object and you bind that region.
<a href="https://github.com/ReinUsesLisp">Rodrigo</a> and <a href="https://github.com/FernandoS27">Blinkhawk</a> later implemented our existing buffer cache to work alongside the stream buffer.</p>
<p>There was nothing inherently wrong with it; stream buffers are in fact one of the fastest ways to upload data to the GPU.
But when <a href="https://github.com/ReinUsesLisp">Rodrigo</a> profiled yuzu, the cache management and upload copies were something that kept popping up as slow.</p>
<article class="message">
<div class="message-header is-dropdown">
<p>Profiling</p>
<span class="icon">
<i class="fas fa-lg fa-arrow-circle-down"></i>
</span>
</div>
<div class="message-body is-dropdown-target">
In software engineering, profiling (&ldquo;program profiling&rdquo;, &ldquo;software profiling&rdquo;) is a form of dynamic program analysis that measures, for example, the space (memory) or time complexity of a program,
the usage of particular instructions, or the frequency and duration of function calls. Most commonly, profiling information serves to aid program optimization.
</div>
</article>
<p>The problem lay in the fact that games aren&rsquo;t exactly streaming data all the time.
So using immediate uploads (on OpenGL) and faster caching yielded much better performance than having a stream buffer and caching large resources, at least for Nvidia.
Upon further testing, we found that this turned out to be false for non-Nvidia drivers on OpenGL (AMD, Intel, and Mesa) and hence had to add a stream buffer for small uploads in these drivers.</p>
<h2 id="whats-changed-now">What&rsquo;s changed now?</h2>
<p>The technical design goals for the Buffer Cache Rewrite were the same as our Texture Cache Rewrite.</p>
<ul>
<li>Cleaner code: No more virtual function calls or shared pointers, meaning easier maintenance in the future.</li>
<li>Improved efficiency and improved performance.</li>
</ul>
<p>Resolving which buffer existed in which memory region was a very expensive operation in our old buffer cache implementation.
This is why the stream buffer existed — to make it faster.</p>
<p>The new Buffer Cache has vastly improved tracking for the various buffers it caches.
In the new implementation, when buffers are created in the memory, they are forcibly aligned to 4K <a href="https://en.wikipedia.org/wiki/Page_(computer_memory)">pages</a> (4096 bytes - starting at zero).
And to efficiently know what buffer exists on what address, the cache uses a flat array 32 MiB wide to translate from the current CPU page where the buffer exists to what buffer resides in it.
<code>e.g., if the address is 4096 or 7000, that is page 1 &amp; if it is 8192, that is page 2.</code>
Thus, the new Buffer Cache can track what pages of a buffer have been modified on a page basis instead of being a binary state.</p>
<p>Imagine if a buffer has a size of 524288 bytes and a game modifies only 1 byte of the buffer.
Since buffers are now aligned to 4096 bytes as mentioned earlier, only those 4096 bytes are uploaded to the GPU.
The same thing happens when the GPU tries to update the cache with data modified by the CPU.</p>
<p>This tracking is done by making use of bit arrays in the buffers.
Each value represents the state of the page - 1 being modified, 0 being clear.
Keeping things in a bit array allows us to use efficient bit operations like <code>std::countr_zero</code> and <code>std::countr_one</code> (C++20).
This results in fewer instructions yielding the same results (much faster).</p>
<h2 id="all-right-lets-talk-performance-gains">All right, let&rsquo;s talk performance gains!</h2>
<p>The main focus of this work is to improve performance, but some graphical improvements also resulted from this rewrite.</p>
<div class="columns is-img-preview is-bottom-marginless">
<div class="column is-bottom-paddingless">
<a href="./otbug.png" title="">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/otbug_hu0ec6382faaaf32c00049fcfadfdd40c4_2193721_800x0_resize_q90_bgffffff_box_3.jpg" alt="Vertex explosions are no longer a problem in OCTOPATH TRAVELER"></a>
</div>
<div class="column is-bottom-paddingless">
<a href="./otfix.png" title="">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/otfix_hud63d13bc8babcd429f7907e35e65356a_3183308_800x0_resize_q90_bgffffff_box_3.jpg" alt="Vertex explosions are no longer a problem in OCTOPATH TRAVELER"></a>
</div>
</div>
<p class="has-text-centered is-italic has-text-grey-light">Vertex explosions are no longer a problem in OCTOPATH TRAVELER</p>
<div class="columns is-img-preview is-bottom-marginless">
<div class="column is-bottom-paddingless">
<a href="./acnhbug.png" title="">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/acnhbug_hu0cc6930dfef1b11bf677342bf378c70d_2387401_800x0_resize_q90_bgffffff_box_3.jpg" alt="Font rendering is now working for all GPU vendors in Animal Crossing: New Horizons"></a>
</div>
<div class="column is-bottom-paddingless">
<a href="./acnhfix.png" title="">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/acnhfix_hud9375e742820c8045efe0c112ebdeea7_1986227_800x0_resize_q90_bgffffff_box_3.jpg" alt="Font rendering is now working for all GPU vendors in Animal Crossing: New Horizons"></a>
</div>
</div>
<p class="has-text-centered is-italic has-text-grey-light">Font rendering is now working for all GPU vendors in Animal Crossing: New Horizons</p>
<div class="columns is-img-preview is-bottom-marginless">
<div class="column is-bottom-paddingless">
<video preload="auto" autoplay="autoplay" muted="muted" loop="loop" webkit-playsinline="">
<source src="./xbdebug.mp4" type="video/mp4">
Your browser doesn't support mp4 video. :(
</video>
</div>
<div class="column is-bottom-paddingless">
<video preload="auto" autoplay="autoplay" muted="muted" loop="loop" webkit-playsinline="">
<source src="./xbdefix.mp4" type="video/mp4">
Your browser doesn't support mp4 video. :(
</video>
</div>
</div>
<p class="has-text-centered is-italic has-text-grey-light">Item drops stop flickering in Xenoblade Chronicles Definitive Edition</p>
<p>With that out of the way, let&rsquo;s talk about performance. Of course, metrics will vary greatly depending on the hardware and API in use.
Here are some examples measured after a couple of runs in the most demanding or common areas of the games listed:</p>
<p>Nvidia, in this example represented by an RTX3070, shows up to 84% improved performance in OpenGL.
<div class="columns is-img-preview">
<div class="column has-text-centered">
<a href="./nvbench.png" title=" ">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/nvbench_huad6f813e0bcdc09d1b005b82dabdc18c_134004_1024x0_resize_q90_bgffffff_box_3.jpg" alt=" "></a>
<p class="has-text-centered is-italic has-text-grey-light"> </p>
</div>
</div>
</p>
<p>AMD on the other hand, represented by a small RX550, shows an up to 55% improvement in Vulkan.
<div class="columns is-img-preview">
<div class="column has-text-centered">
<a href="./amdbench.png" title=" ">
<img src="https://yuzu-mirror.github.io/entry/yuzu-bcr/amdbench_huad6f813e0bcdc09d1b005b82dabdc18c_133299_1024x0_resize_q90_bgffffff_box_3.jpg" alt=" "></a>
<p class="has-text-centered is-italic has-text-grey-light"> </p>
</div>
</div>
</p>
<p>Regarding Intel, an unsurprising problem arises. All currently released products bottleneck due to immature drivers and simply lacking the raw power for Switch emulation. This results in little to no performance improvements with this rewrite. Hopefully this can be addressed with future improvements to both yuzu and Intel&rsquo;s future drivers and hardware releases.</p>
<p>As a special mention, AMD Vega based integrated GPUs show an up to 223% increase in <code>Paper Mario the Origami King</code>, reaching the same level of performance as dedicated cards of a much higher calibre.</p>
<h2 id="fin">Fin</h2>
<p>With that, we conclude our coverage of the new Buffer Cache Rewrite.
As always, we would like to remind users that the features released in <a href="https://yuzu-mirror.github.io/help/early-access/">Early Access</a> are still being worked on.
If you come across any bugs, issues, performance loss, crashes, or regressions with this new feature, please
reach out to us on our <a href="https://discord.com/invite/u77vRWY">Discord server</a> and share your findings.</p>
<p>See you next time, <br>
- yuzu development team!</p>
<p> 
<h4 style="text-align:center;">
<b>Please consider supporting us on <a href="https://www.patreon.com/yuzuteam">Patreon</a>!<br>
If you would like to contribute to this project, check out our <a href="https://github.com/yuzu-emu/yuzu">GitHub</a>!</b>
</h4>
</p>
</section>
<div class="has-text-centered">
<a class="pagination-next" href="https://community.citra-emu.org/t/348059">Continue the discussion on our forums.</a>
</div>
</div>
<div class="column">
<div class="px-md">
<ins class="adsbygoogle"
style="display:block; margin-left:25px;"
data-ad-client="ca-pub-4126545610079023"
data-ad-slot="6276099127"
data-ad-format="auto"></ins>
<br>
<p class="is-size-6 has-text-centered">Advertisement</p>
</div>
</div>
</div>
<div class="column">
<div class="px-md has-text-centered">
<p class="is-size-6 has-text-centered">Advertisement</p>
<br>
<ins class="adsbygoogle" style="display:inline-block;width:728px;height:100px" data-ad-client="ca-pub-4126545610079023" data-ad-slot="1038554045"></ins>
</div>
</div>
</div>
<div class="container">
<footer class="footer">
<div class="content has-text-centered">
copyright &copy; 2025 yuzu emulator team
</div>
</footer>
</div>
<script src="https://yuzu-mirror.github.io/js/script.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/baguettebox.js/1.11.1/baguetteBox.min.js" type="text/javascript"></script>
<script type="text/javascript">
window.addEventListener("DOMContentLoaded", function() {
baguetteBox.run('.is-img-preview');
});
</script>
<script type="text/javascript">
for (var i = 0; i < document.getElementsByClassName('adsbygoogle').length; i++) {
(adsbygoogle = window.adsbygoogle || []).push({});
}
</script>
</body>
</html>