rpcsx/rpcs3/Emu/Cell/lv2/sys_sync.h

#pragma once

#include "Utilities/mutex.h"
#include "Utilities/sema.h"
#include "Utilities/cond.h"

#include "Emu/Memory/vm_locking.h"
#include "Emu/CPU/CPUThread.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/IdManager.h"
#include "Emu/IPC.h"
#include "Emu/system_config.h"
#include "Emu/System.h"

#include <deque>
#include <thread>

// attr_protocol (waiting scheduling policy)
enum
{
	SYS_SYNC_FIFO                = 0x1, // First In, First Out Order
	SYS_SYNC_PRIORITY            = 0x2, // Priority Order
	SYS_SYNC_PRIORITY_INHERIT    = 0x3, // Basic Priority Inheritance Protocol
	SYS_SYNC_RETRY               = 0x4, // Not selected while unlocking
	SYS_SYNC_ATTR_PROTOCOL_MASK  = 0xf,
};

// attr_recursive (recursive locks policy)
enum
{
	SYS_SYNC_RECURSIVE           = 0x10,
	SYS_SYNC_NOT_RECURSIVE       = 0x20,
	SYS_SYNC_ATTR_RECURSIVE_MASK = 0xf0,
};

// attr_pshared (sharing among processes policy)
enum
{
	SYS_SYNC_PROCESS_SHARED      = 0x100,
	SYS_SYNC_NOT_PROCESS_SHARED  = 0x200,
	SYS_SYNC_ATTR_PSHARED_MASK   = 0xf00,
};

// attr_flags (creation policy)
enum
{
	SYS_SYNC_NEWLY_CREATED       = 0x1, // Create new object, fails if specified IPC key exists
	SYS_SYNC_NOT_CREATE          = 0x2, // Reference existing object, fails if IPC key not found
	SYS_SYNC_NOT_CARE            = 0x3, // Reference existing object, create new one if IPC key not found
	SYS_SYNC_ATTR_FLAGS_MASK     = 0xf,
};

// attr_adaptive
enum
{
	SYS_SYNC_ADAPTIVE            = 0x1000,
	SYS_SYNC_NOT_ADAPTIVE        = 0x2000,
	SYS_SYNC_ATTR_ADAPTIVE_MASK  = 0xf000,
};

// Base class for some kernel objects (shared set of 8192 objects).
struct lv2_obj
{
	using id_type = lv2_obj;

	static const u32 id_step = 0x100;
	static const u32 id_count = 8192;

private:
	enum thread_cmd : s32
	{
		yield_cmd = INT32_MIN,
		enqueue_cmd,
	};

public:

	// Find and remove the object from the container (deque or vector)
	template <typename T, typename E>
	static bool unqueue(std::deque<T*>& queue, const E& object)
	{
		for (auto found = queue.cbegin(), end = queue.cend(); found != end; found++)
		{
			if (*found == object)
			{
				queue.erase(found);
				return true;
			}
		}

		return false;
	}

	template <typename E, typename T>
	static T* schedule(std::deque<T*>& queue, u32 protocol)
	{
		if (queue.empty())
		{
			return nullptr;
		}

		if (protocol == SYS_SYNC_FIFO)
		{
			const auto res = queue.front();
			queue.pop_front();
			return res;
		}

		s32 prio = 3071;
		auto it = queue.cbegin();

		for (auto found = it, end = queue.cend(); found != end; found++)
		{
			const s32 _prio = static_cast<E*>(*found)->prio;

			if (_prio < prio)
			{
				it = found;
				prio = _prio;
			}
		}

		const auto res = *it;
		queue.erase(it);
		return res;
	}

private:
	// Remove the current thread from the scheduling queue, register timeout
	static void sleep_unlocked(cpu_thread&, u64 timeout);

	// Schedule the thread
	static void awake_unlocked(cpu_thread*, s32 prio = enqueue_cmd);

public:
	static void sleep(cpu_thread& cpu, const u64 timeout = 0)
	{
		vm::temporary_unlock(cpu);
		std::lock_guard{g_mutex}, sleep_unlocked(cpu, timeout);
		g_to_awake.clear();
	}

	static inline void awake(cpu_thread* const thread, s32 prio = enqueue_cmd)
	{
		std::lock_guard lock(g_mutex);
		awake_unlocked(thread, prio);
	}

	static void yield(cpu_thread& thread)
	{
		vm::temporary_unlock(thread);
		awake(&thread, yield_cmd);
	}

	static void set_priority(cpu_thread& thread, s32 prio)
	{
		verify(HERE), prio + 512u < 3712;
		awake(&thread, prio);
	}

	static inline void awake_all()
	{
		awake({});
		g_to_awake.clear();
	}

	static inline void append(cpu_thread* const thread)
	{
		g_to_awake.emplace_back(thread);
	}

	static void cleanup();

	template <typename T, typename F>
	static error_code create(u32 pshared, u64 ipc_key, s32 flags, F&& make, bool key_not_zero = true)
	{
		switch (pshared)
		{
		case SYS_SYNC_PROCESS_SHARED:
		{
			if (key_not_zero && ipc_key == 0)
			{
				return CELL_EINVAL;
			}

			switch (flags)
			{
			case SYS_SYNC_NEWLY_CREATED:
			case SYS_SYNC_NOT_CARE:
			{
				std::shared_ptr<T> result = make();

				if (!ipc_manager<T, u64>::add(ipc_key, [&] { if (!idm::import_existing<lv2_obj, T>(result)) result.reset(); return result; }, &result))
				{
					if (flags == SYS_SYNC_NEWLY_CREATED)
					{
						return CELL_EEXIST;
					}

					if (!idm::import_existing<lv2_obj, T>(result))
					{
						return CELL_EAGAIN;
					}

					return CELL_OK;
				}
				else if (!result)
				{
					return CELL_EAGAIN;
				}
				else
				{
					return CELL_OK;
				}
			}
			case SYS_SYNC_NOT_CREATE:
			{
				auto result = ipc_manager<T, u64>::get(ipc_key);

				if (!result)
				{
					return CELL_ESRCH;
				}

				if (!idm::import_existing<lv2_obj, T>(result))
				{
					return CELL_EAGAIN;
				}

				return CELL_OK;
			}
			default:
			{
				return CELL_EINVAL;
			}
			}
		}
		case SYS_SYNC_NOT_PROCESS_SHARED:
		{
			if (!idm::import<lv2_obj, T>(std::forward<F>(make)))
			{
				return CELL_EAGAIN;
			}

			return CELL_OK;
		}
		default:
		{
			return CELL_EINVAL;
		}
		}
	}

	template<bool is_usleep = false, bool scale = true>
	static bool wait_timeout(u64 usec, cpu_thread* const cpu = {})
	{
		static_assert(UINT64_MAX / cond_variable::max_timeout >= 100, "max timeout is not valid for scaling");

		if constexpr (scale)
		{
			// Scale time
			usec = std::min<u64>(usec, UINT64_MAX / 100) * 100 / g_cfg.core.clocks_scale;
		}

		// Clamp
		usec = std::min<u64>(usec, cond_variable::max_timeout);

		extern u64 get_system_time();

		u64 passed = 0;
		u64 remaining;

		const u64 start_time = get_system_time();
		while (usec >= passed)
		{
			remaining = usec - passed;
#ifdef __linux__
			// NOTE: Assumption that timer initialization has succeeded
			u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50;
#else
			// Host scheduler quantum for windows (worst case)
			// NOTE: On ps3 this function has very high accuracy
			constexpr u64 host_min_quantum = 500;
#endif
			// TODO: Tune for other non windows operating sytems

			if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
			{
				thread_ctrl::wait_for(remaining, !is_usleep);
			}
			else
			{
				if (remaining > host_min_quantum)
				{
#ifdef __linux__
					// Do not wait for the last quantum to avoid loss of accuracy
					thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), !is_usleep);
#else
					// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
					thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), !is_usleep);
#endif
				}
				else
				{
					// Try yielding. May cause long wake latency but helps weaker CPUs a lot by alleviating resource pressure
					std::this_thread::yield();
				}
			}

			if (thread_ctrl::state() == thread_state::aborting)
			{
				return false;
			}

			if (cpu && cpu->state & cpu_flag::signal)
			{
				return false;
			}

			passed = get_system_time() - start_time;
		}

		return true;
	}

private:
	// Scheduler mutex
	static shared_mutex g_mutex;

	// Pending list of threads to run
	static thread_local std::vector<class cpu_thread*> g_to_awake;

	// Scheduler queue for active PPU threads
	static std::deque<class ppu_thread*> g_ppu;

	// Waiting for the response from
	static std::deque<class cpu_thread*> g_pending;

	// Scheduler queue for timeouts (wait until -> thread)
	static std::deque<std::pair<u64, class cpu_thread*>> g_waiting;

	static void schedule_all();
};
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+								#pragma once
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
-												Thread.cpp refinement

Hide thread mutex
Safe notify() method
Other refactoring

											
										
										
											2016-09-07 00:38:52 +02:00
+								#include "Utilities/mutex.h"
 								#include "Utilities/sema.h"
 								#include "Utilities/cond.h"
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
-												Split Emu/Memory into more logical headers

- Add vm_locking.h and vm_reservation.h and move relevant functions
  and types to these headers.
- Change include order and make vm_ptr.h, vm_var.h and vm_ref.h headers
  usable invidually and them including vm.h instead of other way around
- Because usage of vm::ptr now requires including vm_ptr.h instead of
  vm.h updated multiple #includes
- Added additional #includes to vm_reservation.h and vm_locking to
  where vm::reservation_* and locking related functions are used

											
										
										
											2018-09-25 22:34:45 +02:00
+								#include "Emu/Memory/vm_locking.h"
-												cpu_thread::test_state added

lv2_obj::sleep adjustment
synchronization fixes

											
										
										
											2017-02-22 11:10:55 +01:00
+								#include "Emu/CPU/CPUThread.h"
-												sys_cond, sys_mutex

											
										
										
											2017-02-02 18:47:25 +01:00
+								#include "Emu/Cell/ErrorCodes.h"
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+								#include "Emu/IdManager.h"
 								#include "Emu/IPC.h"
-												move config structs to own files and clean up some headers

											
										
										
											2020-02-15 23:36:20 +01:00
+								#include "Emu/system_config.h"
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+								#include "Emu/System.h"
-												sys_cond, sys_mutex

											
										
										
											2017-02-02 18:47:25 +01:00
-												sys_event_queue...

											
										
										
											2017-02-03 22:36:04 +01:00
+								#include <deque>
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+								#include <thread>
-												sys_event_queue...

											
										
										
											2017-02-03 22:36:04 +01:00
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								// attr_protocol (waiting scheduling policy)
 								enum
 								{
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									SYS_SYNC_FIFO                = 0x1, // First In, First Out Order
 									SYS_SYNC_PRIORITY            = 0x2, // Priority Order
 									SYS_SYNC_PRIORITY_INHERIT    = 0x3, // Basic Priority Inheritance Protocol
 									SYS_SYNC_RETRY               = 0x4, // Not selected while unlocking
 									SYS_SYNC_ATTR_PROTOCOL_MASK  = 0xf,
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								};
 								// attr_recursive (recursive locks policy)
 								enum
 								{
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									SYS_SYNC_RECURSIVE           = 0x10,
 									SYS_SYNC_NOT_RECURSIVE       = 0x20,
 									SYS_SYNC_ATTR_RECURSIVE_MASK = 0xf0,
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								};
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+								// attr_pshared (sharing among processes policy)
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								enum
 								{
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									SYS_SYNC_PROCESS_SHARED      = 0x100,
 									SYS_SYNC_NOT_PROCESS_SHARED  = 0x200,
 									SYS_SYNC_ATTR_PSHARED_MASK   = 0xf00,
 								};
 								// attr_flags (creation policy)
 								enum
 								{
 									SYS_SYNC_NEWLY_CREATED       = 0x1, // Create new object, fails if specified IPC key exists
 									SYS_SYNC_NOT_CREATE          = 0x2, // Reference existing object, fails if IPC key not found
 									SYS_SYNC_NOT_CARE            = 0x3, // Reference existing object, create new one if IPC key not found
 									SYS_SYNC_ATTR_FLAGS_MASK     = 0xf,
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								};
 								// attr_adaptive
 								enum
 								{
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									SYS_SYNC_ADAPTIVE            = 0x1000,
 									SYS_SYNC_NOT_ADAPTIVE        = 0x2000,
 									SYS_SYNC_ATTR_ADAPTIVE_MASK  = 0xf000,
-												Partial commit: Syscalls

											
										
										
											2016-04-14 00:23:53 +02:00
+								};
-												IdManager improved

lv2_obj for kernel objects
Simple lookup (vector)
Another idm API refactoring

											
										
										
											2017-01-29 17:50:18 +01:00
+								// Base class for some kernel objects (shared set of 8192 objects).
 								struct lv2_obj
 								{
 									using id_type = lv2_obj;
 									static const u32 id_step = 0x100;
 									static const u32 id_count = 8192;
-												sys_semaphore...

											
										
										
											2017-01-31 00:09:55 +01:00
-												Implement ps3 application root flags detection

											
										
										
											2019-11-01 20:21:15 +01:00
+								private:
 									enum thread_cmd : s32
 									{
 										yield_cmd = INT32_MIN,
 										enqueue_cmd,
 									};
 								public:
-												sys_semaphore...

											
										
										
											2017-01-31 00:09:55 +01:00
+									// Find and remove the object from the container (deque or vector)
 									template <typename T, typename E>
 									static bool unqueue(std::deque<T*>& queue, const E& object)
 									{
 										for (auto found = queue.cbegin(), end = queue.cend(); found != end; found++)
 										{
 											if (*found == object)
 											{
 												queue.erase(found);
 												return true;
 											}
 										}
 										return false;
 									}
 									template <typename E, typename T>
 									static T* schedule(std::deque<T*>& queue, u32 protocol)
 									{
 										if (queue.empty())
 										{
 											return nullptr;
 										}
 										if (protocol == SYS_SYNC_FIFO)
 										{
 											const auto res = queue.front();
 											queue.pop_front();
 											return res;
 										}
-												Implement ps3 application root flags detection

											
										
										
											2019-11-01 20:21:15 +01:00
+										s32 prio = 3071;
-												sys_semaphore...

											
										
										
											2017-01-31 00:09:55 +01:00
+										auto it = queue.cbegin();
 										for (auto found = it, end = queue.cend(); found != end; found++)
 										{
-												sys_sync.h: fix warning (signed prio)

											
										
										
											2020-02-18 12:50:47 +01:00
+											const s32 _prio = static_cast<E*>(*found)->prio;
-												sys_semaphore...

											
										
										
											2017-01-31 00:09:55 +01:00
 											if (_prio < prio)
 											{
 												it = found;
 												prio = _prio;
 											}
 										}
 										const auto res = *it;
 										queue.erase(it);
 										return res;
 									}
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+								private:
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									// Remove the current thread from the scheduling queue, register timeout
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									static void sleep_unlocked(cpu_thread&, u64 timeout);
 									// Schedule the thread
-												Implement ps3 application root flags detection

											
										
										
											2019-11-01 20:21:15 +01:00
+									static void awake_unlocked(cpu_thread*, s32 prio = enqueue_cmd);
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+								public:
 									static void sleep(cpu_thread& cpu, const u64 timeout = 0)
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									{
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+										vm::temporary_unlock(cpu);
 										std::lock_guard{g_mutex}, sleep_unlocked(cpu, timeout);
 										g_to_awake.clear();
 									}
-												Implement ps3 application root flags detection

											
										
										
											2019-11-01 20:21:15 +01:00
+									static inline void awake(cpu_thread* const thread, s32 prio = enqueue_cmd)
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									{
 										std::lock_guard lock(g_mutex);
 										awake_unlocked(thread, prio);
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									}
-												lv2: Reimplement sys_timer_usleep
- Matches ps3 accuracy for all tested values with few exceptions
- Do not enter the host OS kernel if waiting for less than 500us to avoid scheduler issues

											
										
										
											2018-05-26 12:06:37 +02:00
+									static void yield(cpu_thread& thread)
 									{
 										vm::temporary_unlock(thread);
-												Implement ps3 application root flags detection

											
										
										
											2019-11-01 20:21:15 +01:00
+										awake(&thread, yield_cmd);
 									}
 									static void set_priority(cpu_thread& thread, s32 prio)
 									{
 										verify(HERE), prio + 512u < 3712;
 										awake(&thread, prio);
-												lv2: Reimplement sys_timer_usleep
- Matches ps3 accuracy for all tested values with few exceptions
- Do not enter the host OS kernel if waiting for less than 500us to avoid scheduler issues

											
										
										
											2018-05-26 12:06:37 +02:00
+									}
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									static inline void awake_all()
 									{
 										awake({});
 										g_to_awake.clear();
 									}
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									static inline void append(cpu_thread* const thread)
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									{
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+										g_to_awake.emplace_back(thread);
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									}
 									static void cleanup();
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									template <typename T, typename F>
-												VSH: sys_mmapper

* Implement syscalls sys_mmapper_allocate_shared_memory_ext, sys_mmapper_allocate_shared_memory_from_container_ext.
* Implement multi-process shared memory allocations.

											
										
										
											2020-01-02 17:07:52 +01:00
+									static error_code create(u32 pshared, u64 ipc_key, s32 flags, F&& make, bool key_not_zero = true)
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+									{
 										switch (pshared)
 										{
 										case SYS_SYNC_PROCESS_SHARED:
 										{
-												VSH: sys_mmapper

* Implement syscalls sys_mmapper_allocate_shared_memory_ext, sys_mmapper_allocate_shared_memory_from_container_ext.
* Implement multi-process shared memory allocations.

											
										
										
											2020-01-02 17:07:52 +01:00
+											if (key_not_zero && ipc_key == 0)
-												lv2: Check ipc_key value if object is process shared

											
										
										
											2019-11-01 20:22:43 +01:00
+											{
 												return CELL_EINVAL;
 											}
-												IPC support for lv2 sync objects

											
										
										
											2017-07-24 17:59:48 +02:00
+											switch (flags)
 											{
 											case SYS_SYNC_NEWLY_CREATED:
 											case SYS_SYNC_NOT_CARE:
 											{
 												std::shared_ptr<T> result = make();
 												if (!ipc_manager<T, u64>::add(ipc_key, [&] { if (!idm::import_existing<lv2_obj, T>(result)) result.reset(); return result; }, &result))
 												{
 													if (flags == SYS_SYNC_NEWLY_CREATED)
 													{
 														return CELL_EEXIST;
 													}
 													if (!idm::import_existing<lv2_obj, T>(result))
 													{
 														return CELL_EAGAIN;
 													}
 													return CELL_OK;
 												}
 												else if (!result)
 												{
 													return CELL_EAGAIN;
 												}
 												else
 												{
 													return CELL_OK;
 												}
 											}
 											case SYS_SYNC_NOT_CREATE:
 											{
 												auto result = ipc_manager<T, u64>::get(ipc_key);
 												if (!result)
 												{
 													return CELL_ESRCH;
 												}
 												if (!idm::import_existing<lv2_obj, T>(result))
 												{
 													return CELL_EAGAIN;
 												}
 												return CELL_OK;
 											}
 											default:
 											{
 												return CELL_EINVAL;
 											}
 											}
 										}
 										case SYS_SYNC_NOT_PROCESS_SHARED:
 										{
 											if (!idm::import<lv2_obj, T>(std::forward<F>(make)))
 											{
 												return CELL_EAGAIN;
 											}
 											return CELL_OK;
 										}
 										default:
 										{
 											return CELL_EINVAL;
 										}
 										}
 									}
-												rsx: Improve frame-limiter (#7723)

* rsx: Improve frame-limiter accuracy

* lv2: Improve lv2_obj::wait_timeout response time for aborting threads

* rsx: Make stretch to display area setting dynamic

* rsx: Redefine 'auto' frame limiter to obey vblank rate

* rsx: Make frame limiter setting dynamic

* rsx: Make frame-limiter compatible with dynamic changes
											
										
										
											2020-03-07 23:11:35 +01:00
+									template<bool is_usleep = false, bool scale = true>
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									static bool wait_timeout(u64 usec, cpu_thread* const cpu = {})
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+									{
-												Increase Maximum Vblank Rate and Clocks Scale

Allow x30 times the speed of vblank rate + clocks scale of original PS3.
In theory a 60 fps limit game which scales frame limit perfectly with vblank rate can be played at up to 1800 fps with this change.

And:
* Fixed lv2 sleep with Clocks Scaling
* Make these settings dynamicaly adjustable.
* Avoid code duplication
											
										
										
											2020-01-29 21:42:41 +01:00
+										static_assert(UINT64_MAX / cond_variable::max_timeout >= 100, "max timeout is not valid for scaling");
-												core config: Expose min/max ranges of integral settings and use it

											
										
										
											2019-08-02 20:53:47 +02:00
-												rsx: Improve frame-limiter (#7723)

* rsx: Improve frame-limiter accuracy

* lv2: Improve lv2_obj::wait_timeout response time for aborting threads

* rsx: Make stretch to display area setting dynamic

* rsx: Redefine 'auto' frame limiter to obey vblank rate

* rsx: Make frame limiter setting dynamic

* rsx: Make frame-limiter compatible with dynamic changes
											
										
										
											2020-03-07 23:11:35 +01:00
+										if constexpr (scale)
 										{
 											// Scale time
 											usec = std::min<u64>(usec, UINT64_MAX / 100) * 100 / g_cfg.core.clocks_scale;
 										}
 										// Clamp
 										usec = std::min<u64>(usec, cond_variable::max_timeout);
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
 										extern u64 get_system_time();
 										u64 passed = 0;
 										u64 remaining;
 										const u64 start_time = get_system_time();
 										while (usec >= passed)
 										{
 											remaining = usec - passed;
-												Use Linux timers for sleeps up to 1ms (#6697)

* Use Linux timers for sleeps up to 1ms (v3)
The current sleep timer implementation basically offers two variants. Either
wait the specified time exactly with a condition variable (as host) or use a
combination of it with a thread yielding busy loop afterwards (usleep timer).

While the second one is very precise it consumes CPU loops for each wait call
below 50us. Games like Bomberman Ultra spam 30us waits and the emulator hogs
low power CPUs. Switching to host mode reduces CPU consumption but gives a
~50us penalty for each wait call. Thus extending all sleeps by a factor of
more than two.

The following bugfix tries to improve the system timer for Linux by using
Linux native timers for small wait calls below 1ms. This has two effects.

- Host wait setting has much less wait overhead
- usleep wait setting produces lower CPU overhead

											
										
										
											2019-10-09 19:03:34 +02:00
+								#ifdef __linux__
 											// NOTE: Assumption that timer initialization has succeeded
-												Linux: Change default Sleep Timers accuracy to host
- This doesn't change existing configs
- Also sets the host_min_quantum to the true value
- Restores lost TODO: comment

											
										
										
											2019-10-14 19:04:47 +02:00
+											u64 host_min_quantum = is_usleep && remaining <= 1000 ? 10 : 50;
-												Use Linux timers for sleeps up to 1ms (#6697)

* Use Linux timers for sleeps up to 1ms (v3)
The current sleep timer implementation basically offers two variants. Either
wait the specified time exactly with a condition variable (as host) or use a
combination of it with a thread yielding busy loop afterwards (usleep timer).

While the second one is very precise it consumes CPU loops for each wait call
below 50us. Games like Bomberman Ultra spam 30us waits and the emulator hogs
low power CPUs. Switching to host mode reduces CPU consumption but gives a
~50us penalty for each wait call. Thus extending all sleeps by a factor of
more than two.

The following bugfix tries to improve the system timer for Linux by using
Linux native timers for small wait calls below 1ms. This has two effects.

- Host wait setting has much less wait overhead
- usleep wait setting produces lower CPU overhead

											
										
										
											2019-10-09 19:03:34 +02:00
+								#else
 											// Host scheduler quantum for windows (worst case)
 											// NOTE: On ps3 this function has very high accuracy
 											constexpr u64 host_min_quantum = 500;
 								#endif
-												Linux: Change default Sleep Timers accuracy to host
- This doesn't change existing configs
- Also sets the host_min_quantum to the true value
- Restores lost TODO: comment

											
										
										
											2019-10-14 19:04:47 +02:00
+											// TODO: Tune for other non windows operating sytems
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
 											if (g_cfg.core.sleep_timers_accuracy < (is_usleep ? sleep_timers_accuracy_level::_usleep : sleep_timers_accuracy_level::_all_timers))
 											{
-												Add dummy alert param to thread_ctrl::wait API

											
										
										
											2019-10-06 12:30:56 +02:00
+												thread_ctrl::wait_for(remaining, !is_usleep);
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+											}
 											else
 											{
 												if (remaining > host_min_quantum)
 												{
 								#ifdef __linux__
 													// Do not wait for the last quantum to avoid loss of accuracy
-												Add dummy alert param to thread_ctrl::wait API

											
										
										
											2019-10-06 12:30:56 +02:00
+													thread_ctrl::wait_for(remaining - ((remaining % host_min_quantum) + host_min_quantum), !is_usleep);
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+								#else
-												Rewrite `cond_variable` to use waitable atomics

Increase max_timeout and fix max_timeout usage

											
										
										
											2019-09-09 10:09:30 +02:00
+													// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
-												Add dummy alert param to thread_ctrl::wait API

											
										
										
											2019-10-06 12:30:56 +02:00
+													thread_ctrl::wait_for(remaining - (remaining % host_min_quantum), !is_usleep);
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+								#endif
 												}
 												else
 												{
 													// Try yielding. May cause long wake latency but helps weaker CPUs a lot by alleviating resource pressure
 													std::this_thread::yield();
 												}
 											}
-												rsx: Improve frame-limiter (#7723)

* rsx: Improve frame-limiter accuracy

* lv2: Improve lv2_obj::wait_timeout response time for aborting threads

* rsx: Make stretch to display area setting dynamic

* rsx: Redefine 'auto' frame limiter to obey vblank rate

* rsx: Make frame limiter setting dynamic

* rsx: Make frame-limiter compatible with dynamic changes
											
										
										
											2020-03-07 23:11:35 +01:00
+											if (thread_ctrl::state() == thread_state::aborting)
-												Timers scaling and fixes

											
										
										
											2019-07-14 05:55:11 +02:00
+											{
 												return false;
 											}
 											if (cpu && cpu->state & cpu_flag::signal)
 											{
 												return false;
 											}
 											passed = get_system_time() - start_time;
 										}
 										return true;
 									}
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+								private:
 									// Scheduler mutex
-												Rewrite condition variables

Implement helper functions balanced_wait_until and balanced_awaken
They include new path for Windows 8.1+ (WaitOnAddress)

shared_mutex, cond_variable, cond_one, cond_x16 modified to use it
Added helper function utils::popcnt16
Replace most semaphore<> with shared_mutex

											
										
										
											2018-11-26 16:55:22 +01:00
+									static shared_mutex g_mutex;
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
-												PPU/LV2: Make thread-lists scheduling atomic

											
										
										
											2019-04-25 16:27:50 +02:00
+									// Pending list of threads to run
 									static thread_local std::vector<class cpu_thread*> g_to_awake;
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
+									// Scheduler queue for active PPU threads
 									static std::deque<class ppu_thread*> g_ppu;
 									// Waiting for the response from
 									static std::deque<class cpu_thread*> g_pending;
 									// Scheduler queue for timeouts (wait until -> thread)
-												Migration to named_thread<>

Add atomic_t<>::try_dec instead of fetch_dec_sat
Add atomic_t<>::try_inc
GDBDebugServer is broken (needs rewrite)
Removed old_thread class (former named_thread)
Removed storing/rethrowing exceptions from thread
Emu.Stop doesn't inject an exception anymore
task_stack helper class removed
thread_base simplified (no shared_from_this)
thread_ctrl::spawn simplified (creates detached thread)
Implemented overrideable thread detaching logic
Disabled cellAdec, cellDmux, cellFsAio
SPUThread renamed to spu_thread
RawSPUThread removed, spu_thread used instead
Disabled deriving from ppu_thread
Partial support for thread renaming
lv2_timer... simplified, screw it
idm/fxm: butchered support for on_stop/on_init
vm: improved allocation structure (added size)

											
										
										
											2018-10-11 00:17:19 +02:00
+									static std::deque<std::pair<u64, class cpu_thread*>> g_waiting;
-												PPU thread scheduler

											
										
										
											2017-02-06 19:36:46 +01:00
 									static void schedule_all();
-												IdManager improved

lv2_obj for kernel objects
Simple lookup (vector)
Another idm API refactoring

											
										
										
											2017-01-29 17:50:18 +01:00
+								};