xenia/src/alloy/compiler/passes/register_allocation_pass.cc

472 lines
15 KiB
C++
Raw Normal View History

2014-02-11 06:16:38 +01:00
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <alloy/compiler/passes/register_allocation_pass.h>
using namespace alloy;
using namespace alloy::backend;
using namespace alloy::compiler;
using namespace alloy::compiler::passes;
using namespace alloy::hir;
struct RegisterAllocationPass::Interval {
uint32_t start_ordinal;
uint32_t end_ordinal;
Value* value;
RegisterFreeUntilSet* free_until_set;
// TODO(benvanik): reduce to offsets in arena?
struct Interval* next;
struct Interval* prev;
void AddToList(Interval** list_head) {
auto list_next = *list_head;
this->next = list_next;
if (list_next) {
list_next->prev = this;
}
*list_head = this;
}
void InsertIntoList(Interval** list_head) {
auto it = *list_head;
while (it) {
if (it->start_ordinal > this->start_ordinal) {
// Went too far. Insert before this interval.
this->prev = it->prev;
this->next = it;
if (it->prev) {
it->prev->next = this;
} else {
*list_head = this;
}
it->prev = this;
return;
}
if (!it->next) {
// None found, add at tail.
it->next = this;
this->prev = it;
return;
}
it = it->next;
}
}
void RemoveFromList(Interval** list_head) {
if (this->next) {
this->next->prev = this->prev;
}
if (this->prev) {
this->prev->next = this->next;
} else {
*list_head = this->next;
}
this->next = this->prev = NULL;
}
};
struct RegisterAllocationPass::Intervals {
Interval* unhandled;
Interval* active;
Interval* handled;
};
RegisterAllocationPass::RegisterAllocationPass(
const MachineInfo* machine_info) :
machine_info_(machine_info),
CompilerPass() {
// Initialize register sets. The values of these will be
// cleared before use, so just the structure is required.
auto mi_sets = machine_info->register_sets;
xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_));
uint32_t n = 0;
while (mi_sets[n].count) {
auto& mi_set = mi_sets[n];
auto free_until_set = new RegisterFreeUntilSet();
free_until_sets_.all_sets[n] = free_until_set;
free_until_set->count = mi_set.count;
free_until_set->set = &mi_set;
if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) {
free_until_sets_.int_set = free_until_set;
}
if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) {
free_until_sets_.float_set = free_until_set;
}
if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) {
free_until_sets_.vec_set = free_until_set;
}
n++;
}
}
RegisterAllocationPass::~RegisterAllocationPass() {
for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) {
if (!free_until_sets_.all_sets[n]) {
break;
}
delete free_until_sets_.all_sets[n];
}
}
int RegisterAllocationPass::Run(HIRBuilder* builder) {
// A (probably broken) implementation of a linear scan register allocator
// that operates directly on SSA form:
// http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf
//
// Requirements:
// - SSA form (single definition for variables)
// - block should be in linear order:
// - dominators *should* come before (a->b->c)
// - loop block sequences *should not* have intervening non-loop blocks
auto arena = scratch_arena();
// Renumber everything.
uint32_t block_ordinal = 0;
uint32_t instr_ordinal = 0;
auto block = builder->first_block();
while (block) {
// Sequential block ordinals.
block->ordinal = block_ordinal++;
auto instr = block->instr_head;
while (instr) {
// Sequential global instruction ordinals.
instr->ordinal = instr_ordinal++;
instr = instr->next;
}
block = block->next;
}
// Compute all liveness ranges by walking forward through all
// blocks/instructions and checking the last use of each value. This lets
// us know the exact order in (block#,instr#) form, which is then used to
// setup the range.
// TODO(benvanik): ideally we would have a list of all values and not have
// to keep walking instructions over and over.
Interval* prev_interval = NULL;
Interval* head_interval = NULL;
block = builder->first_block();
while (block) {
auto instr = block->instr_head;
while (instr) {
// Compute last-use for the dest value.
// Since we know all values of importance must be defined, we can avoid
// having to check every value and just look at dest.
const OpcodeInfo* info = instr->opcode;
if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) {
auto v = instr->dest;
if (!v->last_use) {
ComputeLastUse(v);
}
// Add interval.
auto interval = arena->Alloc<Interval>();
interval->start_ordinal = instr->ordinal;
interval->end_ordinal = v->last_use ?
v->last_use->ordinal : v->def->ordinal;
interval->value = v;
interval->next = NULL;
interval->prev = prev_interval;
if (prev_interval) {
prev_interval->next = interval;
} else {
head_interval = interval;
}
prev_interval = interval;
// Grab register set to use.
// We do this now so it's only once per interval, and it makes it easy
// to only compare intervals that overlap their sets.
if (v->type <= INT64_TYPE) {
interval->free_until_set = free_until_sets_.int_set;
} else if (v->type <= FLOAT64_TYPE) {
interval->free_until_set = free_until_sets_.float_set;
} else {
interval->free_until_set = free_until_sets_.vec_set;
}
}
instr = instr->next;
}
block = block->next;
}
// Now have a sorted list of intervals, minus their ending ordinals.
Intervals intervals;
intervals.unhandled = head_interval;
intervals.active = intervals.handled = NULL;
while (intervals.unhandled) {
// Get next unhandled interval.
auto current = intervals.unhandled;
intervals.unhandled = intervals.unhandled->next;
current->RemoveFromList(&intervals.unhandled);
// Check for intervals in active that are handled or inactive.
auto it = intervals.active;
while (it) {
auto next = it->next;
if (it->end_ordinal <= current->start_ordinal) {
// Move from active to handled.
it->RemoveFromList(&intervals.active);
it->AddToList(&intervals.handled);
}
it = next;
}
// Find a register for current.
if (!TryAllocateFreeReg(current, intervals)) {
// Failed, spill.
AllocateBlockedReg(builder, current, intervals);
}
if (current->value->reg.index!= -1) {
// Add current to active.
current->AddToList(&intervals.active);
}
}
return 0;
}
void RegisterAllocationPass::ComputeLastUse(Value* value) {
// TODO(benvanik): compute during construction?
// Note that this list isn't sorted (unfortunately), so we have to scan
// them all.
uint32_t max_ordinal = 0;
Value::Use* last_use = NULL;
auto use = value->use_head;
while (use) {
if (!last_use || use->instr->ordinal >= max_ordinal) {
last_use = use;
max_ordinal = use->instr->ordinal;
}
use = use->next;
}
value->last_use = last_use ? last_use->instr : NULL;
}
bool RegisterAllocationPass::TryAllocateFreeReg(
Interval* current, Intervals& intervals) {
// Reset all registers in the set to unused.
auto free_until_set = current->free_until_set;
for (uint32_t n = 0; n < free_until_set->count; n++) {
free_until_set->pos[n] = -1;
}
// Mark all active registers as used.
// TODO(benvanik): keep some kind of bitvector so that this is instant?
auto it = intervals.active;
while (it) {
if (it->free_until_set == free_until_set) {
free_until_set->pos[it->value->reg.index] = 0;
}
it = it->next;
}
uint32_t max_pos = 0;
for (uint32_t n = 0; n < free_until_set->count; n++) {
if (max_pos == -1) {
max_pos = n;
} else {
if (free_until_set->pos[n] > free_until_set->pos[max_pos]) {
max_pos = n;
}
}
}
if (!free_until_set->pos[max_pos]) {
// No register available without spilling.
return false;
}
if (current->end_ordinal < free_until_set->pos[max_pos]) {
// Register available for the whole interval.
current->value->reg.set = free_until_set->set;
current->value->reg.index = max_pos;
} else {
// Register available for the first part of the interval.
// Split the interval at where it hits the next one.
//current->value->reg = max_pos;
//SplitRange(current, free_until_set->pos[max_pos]);
// TODO(benvanik): actually split -- for now we just spill.
return false;
}
return true;
}
void RegisterAllocationPass::AllocateBlockedReg(
HIRBuilder* builder, Interval* current, Intervals& intervals) {
auto free_until_set = current->free_until_set;
// TODO(benvanik): smart heuristics.
// wimmer AllocateBlockedReg has some stuff for deciding whether to
// spill current or some other active interval - which we ignore.
// Pick a random interval. Maybe the first. Sure.
auto spill_interval = intervals.active;
Value* spill_value = NULL;
Instr* prev_use = NULL;
Instr* next_use = NULL;
while (spill_interval) {
if (spill_interval->free_until_set != free_until_set ||
spill_interval->start_ordinal == current->start_ordinal) {
// Only interested in ones of the same register set.
// We also ensure that ones at the same ordinal as us are ignored,
// which can happen with multiple local inserts/etc.
spill_interval = spill_interval->next;
continue;
}
spill_value = spill_interval->value;
// Find the uses right before/after current.
auto use = spill_value->use_head;
while (use) {
if (use->instr->ordinal != -1) {
if (use->instr->ordinal < current->start_ordinal) {
if (!prev_use || prev_use->ordinal < use->instr->ordinal) {
prev_use = use->instr;
}
} else if (use->instr->ordinal > current->start_ordinal) {
if (!next_use || next_use->ordinal > use->instr->ordinal) {
next_use = use->instr;
}
}
}
use = use->next;
}
if (!prev_use) {
prev_use = spill_value->def;
}
if (prev_use->next == next_use) {
// Uh, this interval is way too short.
spill_interval = spill_interval->next;
continue;
}
XEASSERT(prev_use->ordinal != -1);
XEASSERTNOTNULL(next_use);
break;
}
XEASSERT(spill_interval->free_until_set == free_until_set);
// Find the real last use -- paired ops may require sequences to stay
// intact. This is a bad design.
auto prev_def_tail = prev_use;
while (prev_def_tail &&
prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) {
prev_def_tail = prev_def_tail->prev;
}
Value* new_value;
uint32_t end_ordinal;
if (spill_value->local_slot) {
// Value is already assigned a slot, so load from that.
// We can then split the interval right after the previous use to
// before the next use.
// Update the last use of the spilled interval/value.
end_ordinal = spill_interval->end_ordinal;
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
XEASSERT(end_ordinal != -1);
XEASSERT(spill_interval->end_ordinal != -1);
// Insert a load right before the next use.
new_value = builder->LoadLocal(spill_value->local_slot);
builder->last_instr()->MoveBefore(next_use);
// Update last use info.
new_value->last_use = spill_value->last_use;
spill_value->last_use = prev_use;
} else {
// Allocate a local slot.
spill_value->local_slot = builder->AllocLocal(spill_value->type);
// Insert a spill right after the def.
builder->StoreLocal(spill_value->local_slot, spill_value);
auto spill_store = builder->last_instr();
spill_store->MoveBefore(prev_def_tail->next);
// Update last use of spilled interval/value.
end_ordinal = spill_interval->end_ordinal;
spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal;
XEASSERT(end_ordinal != -1);
XEASSERT(spill_interval->end_ordinal != -1);
// Insert a load right before the next use.
new_value = builder->LoadLocal(spill_value->local_slot);
builder->last_instr()->MoveBefore(next_use);
// Update last use info.
new_value->last_use = spill_value->last_use;
spill_value->last_use = spill_store;
}
// Reuse the same local slot. Hooray SSA.
new_value->local_slot = spill_value->local_slot;
// Rename all future uses to that loaded value.
auto use = spill_value->use_head;
while (use) {
// TODO(benvanik): keep use list sorted so we don't have to do this.
if (use->instr->ordinal <= spill_interval->end_ordinal ||
use->instr->ordinal == -1) {
use = use->next;
continue;
}
auto next = use->next;
auto instr = use->instr;
uint32_t signature = instr->opcode->signature;
if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src1.value == spill_value) {
instr->set_src1(new_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC2(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src2.value == spill_value) {
instr->set_src2(new_value);
}
}
if (GET_OPCODE_SIG_TYPE_SRC3(signature) == OPCODE_SIG_TYPE_V) {
if (instr->src3.value == spill_value) {
instr->set_src3(new_value);
}
}
use = next;
}
// Create new interval.
auto arena = scratch_arena();
auto new_interval = arena->Alloc<Interval>();
new_interval->start_ordinal = new_value->def->ordinal;
new_interval->end_ordinal = end_ordinal;
new_interval->value = new_value;
new_interval->next = NULL;
new_interval->prev = NULL;
if (new_value->type <= INT64_TYPE) {
new_interval->free_until_set = free_until_sets_.int_set;
} else if (new_value->type <= FLOAT64_TYPE) {
new_interval->free_until_set = free_until_sets_.float_set;
} else {
new_interval->free_until_set = free_until_sets_.vec_set;
}
// Remove the old interval from the active list, as it's been spilled.
spill_interval->RemoveFromList(&intervals.active);
spill_interval->AddToList(&intervals.handled);
// Insert interval into the right place in the list.
// We know it's ahead of us.
new_interval->InsertIntoList(&intervals.unhandled);
// TODO(benvanik): use the register we just freed?
//current->value->reg.set = free_until_set->set;
//current->value->reg.index = spill_interval->value->reg.index;
bool allocated = TryAllocateFreeReg(current, intervals);
XEASSERTTRUE(allocated);
}