2023-07-04 18:19:17 +02:00
|
|
|
#include "orbis/KernelContext.hpp"
|
|
|
|
|
#include "orbis/thread/Process.hpp"
|
2023-07-20 17:18:12 +02:00
|
|
|
#include "orbis/thread/ProcessOps.hpp"
|
2023-07-16 17:31:12 +02:00
|
|
|
#include "orbis/utils/Logs.hpp"
|
2024-10-14 18:15:05 +02:00
|
|
|
#include <bit>
|
2023-12-31 16:58:02 +01:00
|
|
|
#include <chrono>
|
2024-08-31 19:52:30 +02:00
|
|
|
#include <csignal>
|
2024-10-14 18:15:05 +02:00
|
|
|
#include <mutex>
|
2023-07-04 18:19:17 +02:00
|
|
|
#include <sys/mman.h>
|
2023-12-31 16:58:02 +01:00
|
|
|
#include <thread>
|
2024-08-31 19:52:30 +02:00
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
|
|
static const std::uint64_t g_allocProtWord = 0xDEADBEAFBADCAFE1;
|
2024-10-14 18:15:05 +02:00
|
|
|
static constexpr auto kHeapBaseAddress = 0x00000800'0000'0000;
|
|
|
|
|
static constexpr auto kHeapSize = 0x10'0000'0000;
|
2024-10-14 18:51:04 +02:00
|
|
|
static constexpr int kDebugHeap = 0;
|
2023-07-04 18:19:17 +02:00
|
|
|
|
|
|
|
|
namespace orbis {
|
2023-10-31 12:22:22 +01:00
|
|
|
thread_local Thread *g_currentThread;
|
|
|
|
|
|
2023-07-04 18:19:17 +02:00
|
|
|
KernelContext &g_context = *[]() -> KernelContext * {
|
|
|
|
|
// Allocate global shared kernel memory
|
|
|
|
|
// TODO: randomize for hardening and reduce size
|
2024-10-12 04:24:58 +02:00
|
|
|
auto ptr = mmap(reinterpret_cast<void *>(kHeapBaseAddress), kHeapSize,
|
2023-10-31 12:22:22 +01:00
|
|
|
PROT_READ | PROT_WRITE,
|
|
|
|
|
MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
2023-10-31 19:28:40 +01:00
|
|
|
if (ptr == MAP_FAILED)
|
2023-07-04 18:19:17 +02:00
|
|
|
std::abort();
|
|
|
|
|
|
|
|
|
|
return new (ptr) KernelContext;
|
|
|
|
|
}();
|
|
|
|
|
|
2023-07-05 10:38:31 +02:00
|
|
|
KernelContext::KernelContext() {
|
2023-11-10 21:57:20 +01:00
|
|
|
// std::printf("orbis::KernelContext initialized, addr=%p\n", this);
|
2024-08-31 19:52:30 +02:00
|
|
|
// std::printf("TSC frequency: %lu\n", getTscFreq());
|
2023-07-05 10:38:31 +02:00
|
|
|
}
|
2023-07-04 18:19:17 +02:00
|
|
|
KernelContext::~KernelContext() {}
|
|
|
|
|
|
|
|
|
|
Process *KernelContext::createProcess(pid_t pid) {
|
|
|
|
|
auto newProcess = knew<utils::LinkedNode<Process>>();
|
|
|
|
|
newProcess->object.context = this;
|
|
|
|
|
newProcess->object.pid = pid;
|
|
|
|
|
newProcess->object.state = ProcessState::NEW;
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(m_proc_mtx);
|
|
|
|
|
if (m_processes != nullptr) {
|
|
|
|
|
m_processes->insertPrev(*newProcess);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
m_processes = newProcess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &newProcess->object;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void KernelContext::deleteProcess(Process *proc) {
|
|
|
|
|
auto procNode = reinterpret_cast<utils::LinkedNode<Process> *>(proc);
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard lock(m_proc_mtx);
|
|
|
|
|
auto next = procNode->erase();
|
|
|
|
|
|
|
|
|
|
if (procNode == m_processes) {
|
|
|
|
|
m_processes = next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
kdelete(procNode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Process *KernelContext::findProcessById(pid_t pid) const {
|
2024-01-13 18:57:02 +01:00
|
|
|
for (std::size_t i = 0; i < 20; ++i) {
|
2023-12-31 16:58:02 +01:00
|
|
|
{
|
|
|
|
|
std::lock_guard lock(m_proc_mtx);
|
|
|
|
|
for (auto proc = m_processes; proc != nullptr; proc = proc->next) {
|
|
|
|
|
if (proc->object.pid == pid) {
|
|
|
|
|
return &proc->object;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-07-04 18:19:17 +02:00
|
|
|
}
|
2023-12-31 16:58:02 +01:00
|
|
|
std::this_thread::sleep_for(std::chrono::microseconds(50));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Process *KernelContext::findProcessByHostId(std::uint64_t pid) const {
|
2024-01-13 18:57:02 +01:00
|
|
|
for (std::size_t i = 0; i < 20; ++i) {
|
2023-12-31 16:58:02 +01:00
|
|
|
{
|
|
|
|
|
std::lock_guard lock(m_proc_mtx);
|
|
|
|
|
for (auto proc = m_processes; proc != nullptr; proc = proc->next) {
|
|
|
|
|
if (proc->object.hostPid == pid) {
|
|
|
|
|
return &proc->object;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
std::this_thread::sleep_for(std::chrono::microseconds(50));
|
2023-07-04 18:19:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-15 09:15:32 +02:00
|
|
|
long KernelContext::getTscFreq() {
|
|
|
|
|
auto cal_tsc = []() -> long {
|
|
|
|
|
const long timer_freq = 1'000'000'000;
|
|
|
|
|
|
|
|
|
|
// Calibrate TSC
|
|
|
|
|
constexpr int samples = 40;
|
|
|
|
|
long rdtsc_data[samples];
|
|
|
|
|
long timer_data[samples];
|
|
|
|
|
long error_data[samples];
|
|
|
|
|
|
2023-07-20 17:18:12 +02:00
|
|
|
struct ::timespec ts0;
|
2023-07-15 09:15:32 +02:00
|
|
|
clock_gettime(CLOCK_MONOTONIC, &ts0);
|
|
|
|
|
long sec_base = ts0.tv_sec;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < samples; i++) {
|
|
|
|
|
usleep(200);
|
|
|
|
|
error_data[i] = (__builtin_ia32_lfence(), __builtin_ia32_rdtsc());
|
2023-07-20 17:18:12 +02:00
|
|
|
struct ::timespec ts;
|
2023-07-15 09:15:32 +02:00
|
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
|
|
|
rdtsc_data[i] = (__builtin_ia32_lfence(), __builtin_ia32_rdtsc());
|
|
|
|
|
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute average TSC
|
|
|
|
|
long acc = 0;
|
|
|
|
|
for (int i = 0; i < samples - 1; i++) {
|
|
|
|
|
acc += (rdtsc_data[i + 1] - rdtsc_data[i]) * timer_freq /
|
|
|
|
|
(timer_data[i + 1] - timer_data[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Rounding
|
|
|
|
|
acc /= (samples - 1);
|
|
|
|
|
constexpr long grain = 1'000'000;
|
|
|
|
|
return grain * (acc / grain + long{(acc % grain) > (grain / 2)});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
long freq = m_tsc_freq.load();
|
|
|
|
|
if (freq)
|
|
|
|
|
return freq;
|
|
|
|
|
m_tsc_freq.compare_exchange_strong(freq, cal_tsc());
|
|
|
|
|
return m_tsc_freq.load();
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-04 18:19:17 +02:00
|
|
|
void *KernelContext::kalloc(std::size_t size, std::size_t align) {
|
2023-07-06 21:32:35 +02:00
|
|
|
size = (size + (__STDCPP_DEFAULT_NEW_ALIGNMENT__ - 1)) &
|
|
|
|
|
~(__STDCPP_DEFAULT_NEW_ALIGNMENT__ - 1);
|
|
|
|
|
if (!size)
|
|
|
|
|
std::abort();
|
|
|
|
|
|
2024-08-31 19:52:30 +02:00
|
|
|
if (m_heap_map_mtx.try_lock()) {
|
|
|
|
|
std::lock_guard lock(m_heap_map_mtx, std::adopt_lock);
|
|
|
|
|
|
2023-07-06 21:10:15 +02:00
|
|
|
// Try to reuse previously freed block
|
2024-08-31 19:52:30 +02:00
|
|
|
for (auto [it, end] = m_free_heap.equal_range(size); it != end; ++it) {
|
2023-07-06 21:10:15 +02:00
|
|
|
auto result = it->second;
|
2024-10-14 18:15:05 +02:00
|
|
|
if (!(std::bit_cast<std::uintptr_t>(result) & (align - 1))) {
|
2023-07-10 04:19:21 +02:00
|
|
|
auto node = m_free_heap.extract(it);
|
|
|
|
|
node.key() = 0;
|
|
|
|
|
node.mapped() = nullptr;
|
|
|
|
|
m_used_node.insert(m_used_node.begin(), std::move(node));
|
2024-10-14 18:15:05 +02:00
|
|
|
|
|
|
|
|
// std::fprintf(stderr, "kalloc: reuse %p-%p, size = %lx\n", result,
|
|
|
|
|
// (char *)result + size, size);
|
|
|
|
|
|
|
|
|
|
if (kDebugHeap > 0) {
|
|
|
|
|
std::memcpy(std::bit_cast<std::byte *>(result) + size,
|
|
|
|
|
&g_allocProtWord, sizeof(g_allocProtWord));
|
|
|
|
|
}
|
2023-07-06 21:10:15 +02:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-31 19:52:30 +02:00
|
|
|
std::lock_guard lock(m_heap_mtx);
|
2023-07-06 21:10:15 +02:00
|
|
|
align = std::max<std::size_t>(align, __STDCPP_DEFAULT_NEW_ALIGNMENT__);
|
|
|
|
|
auto heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
|
2023-07-04 18:19:17 +02:00
|
|
|
heap = (heap + (align - 1)) & ~(align - 1);
|
2024-10-12 04:24:58 +02:00
|
|
|
|
2024-10-14 18:15:05 +02:00
|
|
|
if (kDebugHeap > 1) {
|
|
|
|
|
if (auto diff = (heap + size + sizeof(g_allocProtWord)) % 4096; diff != 0) {
|
|
|
|
|
heap += 4096 - diff;
|
|
|
|
|
heap &= ~(align - 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-12 04:24:58 +02:00
|
|
|
if (heap + size > kHeapBaseAddress + kHeapSize) {
|
|
|
|
|
std::fprintf(stderr, "out of kernel memory");
|
|
|
|
|
std::abort();
|
|
|
|
|
}
|
|
|
|
|
// Check overflow
|
|
|
|
|
if (heap + size < heap) {
|
|
|
|
|
std::fprintf(stderr, "too big allocation");
|
|
|
|
|
std::abort();
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 18:35:25 +02:00
|
|
|
// std::fprintf(stderr, "kalloc: allocate %lx-%lx, size = %lx, align=%lx\n",
|
|
|
|
|
// heap, heap + size, size, align);
|
2024-10-14 18:15:05 +02:00
|
|
|
|
2023-07-04 18:19:17 +02:00
|
|
|
auto result = reinterpret_cast<void *>(heap);
|
2024-10-14 18:15:05 +02:00
|
|
|
if (kDebugHeap > 0) {
|
|
|
|
|
std::memcpy(std::bit_cast<std::byte *>(result) + size, &g_allocProtWord,
|
|
|
|
|
sizeof(g_allocProtWord));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (kDebugHeap > 0) {
|
|
|
|
|
m_heap_next =
|
|
|
|
|
reinterpret_cast<void *>(heap + size + sizeof(g_allocProtWord));
|
|
|
|
|
} else {
|
|
|
|
|
m_heap_next = reinterpret_cast<void *>(heap + size);
|
|
|
|
|
}
|
2024-10-12 04:24:58 +02:00
|
|
|
|
2024-10-14 18:15:05 +02:00
|
|
|
if (kDebugHeap > 1) {
|
2024-10-12 04:24:58 +02:00
|
|
|
heap = reinterpret_cast<std::uintptr_t>(m_heap_next);
|
|
|
|
|
align = std::min<std::size_t>(align, 4096);
|
|
|
|
|
heap = (heap + (align - 1)) & ~(align - 1);
|
|
|
|
|
size = 4096;
|
2024-10-14 18:35:25 +02:00
|
|
|
// std::fprintf(stderr, "kalloc: protect %lx-%lx, size = %lx, align=%lx\n",
|
|
|
|
|
// heap, heap + size, size, align);
|
2024-10-14 18:15:05 +02:00
|
|
|
|
|
|
|
|
auto result = ::mmap(reinterpret_cast<void *>(heap), size, PROT_NONE,
|
|
|
|
|
MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
|
|
|
|
if (result == MAP_FAILED) {
|
|
|
|
|
std::fprintf(stderr, "failed to protect memory");
|
|
|
|
|
std::abort();
|
|
|
|
|
}
|
2024-10-12 04:24:58 +02:00
|
|
|
m_heap_next = reinterpret_cast<void *>(heap + size);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-04 18:19:17 +02:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void KernelContext::kfree(void *ptr, std::size_t size) {
|
2023-07-06 21:32:35 +02:00
|
|
|
size = (size + (__STDCPP_DEFAULT_NEW_ALIGNMENT__ - 1)) &
|
|
|
|
|
~(__STDCPP_DEFAULT_NEW_ALIGNMENT__ - 1);
|
2023-07-04 18:19:17 +02:00
|
|
|
if (!size)
|
|
|
|
|
std::abort();
|
2024-08-31 19:52:30 +02:00
|
|
|
|
2024-10-14 18:15:05 +02:00
|
|
|
if (std::bit_cast<std::uintptr_t>(ptr) < kHeapBaseAddress ||
|
|
|
|
|
std::bit_cast<std::uintptr_t>(ptr) + size >
|
|
|
|
|
kHeapBaseAddress + kHeapSize) {
|
|
|
|
|
std::fprintf(stderr, "kfree: invalid address");
|
2023-07-06 21:10:15 +02:00
|
|
|
std::abort();
|
2024-08-31 19:52:30 +02:00
|
|
|
}
|
|
|
|
|
|
2024-10-14 18:15:05 +02:00
|
|
|
if (kDebugHeap > 0) {
|
|
|
|
|
if (std::memcmp(std::bit_cast<std::byte *>(ptr) + size, &g_allocProtWord,
|
|
|
|
|
sizeof(g_allocProtWord)) != 0) {
|
|
|
|
|
std::fprintf(stderr, "kernel heap corruption\n");
|
|
|
|
|
std::abort();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::memset(ptr, 0xcc, size + sizeof(g_allocProtWord));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// std::fprintf(stderr, "kfree: release %p-%p, size = %lx\n", ptr,
|
|
|
|
|
// (char *)ptr + size, size);
|
|
|
|
|
|
2024-08-31 19:52:30 +02:00
|
|
|
std::lock_guard lock(m_heap_map_mtx);
|
2023-07-06 21:10:15 +02:00
|
|
|
if (!m_used_node.empty()) {
|
|
|
|
|
auto node = m_used_node.extract(m_used_node.begin());
|
|
|
|
|
node.key() = size;
|
|
|
|
|
node.mapped() = ptr;
|
|
|
|
|
m_free_heap.insert(std::move(node));
|
|
|
|
|
} else {
|
|
|
|
|
m_free_heap.emplace(size, ptr);
|
|
|
|
|
}
|
2023-07-04 18:19:17 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-22 11:03:46 +02:00
|
|
|
std::tuple<UmtxChain &, UmtxKey, std::unique_lock<shared_mutex>>
|
|
|
|
|
KernelContext::getUmtxChainIndexed(int i, Thread *t, uint32_t flags,
|
|
|
|
|
void *ptr) {
|
|
|
|
|
auto pid = t->tproc->pid;
|
2023-11-13 19:36:25 +01:00
|
|
|
auto p = reinterpret_cast<std::uintptr_t>(ptr);
|
2023-07-22 11:03:46 +02:00
|
|
|
if (flags & 1) {
|
|
|
|
|
pid = 0; // Process shared (TODO)
|
2023-11-13 19:36:25 +01:00
|
|
|
ORBIS_LOG_WARNING("Using process-shared umtx", t->tid, ptr, (p % 0x4000));
|
2024-01-13 18:57:02 +01:00
|
|
|
t->where();
|
2023-07-22 11:03:46 +02:00
|
|
|
}
|
|
|
|
|
auto n = p + pid;
|
|
|
|
|
if (flags & 1)
|
|
|
|
|
n %= 0x4000;
|
|
|
|
|
n = ((n * c_golden_ratio_prime) >> c_umtx_shifts) % c_umtx_chains;
|
|
|
|
|
std::unique_lock lock(m_umtx_chains[i][n].mtx);
|
|
|
|
|
return {m_umtx_chains[i][n], UmtxKey{p, pid}, std::move(lock)};
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-04 18:19:17 +02:00
|
|
|
inline namespace utils {
|
2023-07-06 21:32:35 +02:00
|
|
|
void kfree(void *ptr, std::size_t size) { return g_context.kfree(ptr, size); }
|
2023-07-04 18:19:17 +02:00
|
|
|
void *kalloc(std::size_t size, std::size_t align) {
|
2023-07-06 21:10:15 +02:00
|
|
|
return g_context.kalloc(size, align);
|
2023-07-04 18:19:17 +02:00
|
|
|
}
|
|
|
|
|
} // namespace utils
|
2023-07-16 17:31:12 +02:00
|
|
|
|
|
|
|
|
inline namespace logs {
|
|
|
|
|
template <>
|
|
|
|
|
void log_class_string<kstring>::format(std::string &out, const void *arg) {
|
|
|
|
|
out += get_object(arg);
|
|
|
|
|
}
|
|
|
|
|
} // namespace logs
|
2023-07-20 17:18:12 +02:00
|
|
|
|
2024-08-31 19:52:30 +02:00
|
|
|
void Thread::suspend() { sendSignal(-1); }
|
2024-01-13 18:57:02 +01:00
|
|
|
|
2024-08-31 19:52:30 +02:00
|
|
|
void Thread::resume() { sendSignal(-2); }
|
2024-01-13 18:57:02 +01:00
|
|
|
|
|
|
|
|
void Thread::sendSignal(int signo) {
|
|
|
|
|
std::lock_guard lock(mtx);
|
|
|
|
|
signalQueue.push_back(signo);
|
|
|
|
|
if (::tgkill(tproc->hostPid, hostTid, SIGUSR1) < 0) {
|
|
|
|
|
perror("tgkill");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-20 17:18:12 +02:00
|
|
|
void Thread::where() { tproc->ops->where(this); }
|
2023-07-04 18:19:17 +02:00
|
|
|
} // namespace orbis
|