mirror of
https://github.com/shadowfacts/lwjgl2-arm64.git
synced 2026-02-06 07:44:15 +01:00
347 lines
9.2 KiB
C++
347 lines
9.2 KiB
C++
|
|
//#include <iostream>
|
|
#include <jni.h>
|
|
#include <memory.h>
|
|
#include "MatrixOpCommon.h"
|
|
|
|
bool Matrix::identicalDataSpaces(Matrix & other)
|
|
{
|
|
if (address != other.address)
|
|
return JNI_FALSE;
|
|
if (stride != other.stride)
|
|
return JNI_FALSE;
|
|
if ((width * height) != (other.width * other.height))
|
|
return JNI_FALSE;
|
|
return JNI_TRUE;
|
|
}
|
|
|
|
bool Matrix::intersectingDataSpaces(Matrix & other)
|
|
{
|
|
char * my_max_address = &address[ stride * elements ];
|
|
char * other_max_address = &other.address[ other.stride * other.elements];
|
|
|
|
if (address >= other.address || address <= other_max_address) return JNI_TRUE;
|
|
if (other.address >= address || other.address <= my_max_address) return JNI_TRUE;
|
|
return JNI_FALSE;
|
|
}
|
|
|
|
void Matrix::transposeMatrix(float * src, float * dst, int src_width, int src_height)
|
|
{
|
|
// square matrix transpose
|
|
if (src_width == src_height)
|
|
{
|
|
for (int i = 0; i < src_width; i++)
|
|
for (int j = 0; j < src_width; j++)
|
|
dst[i + src_width * j] = src[j + i * src_width];
|
|
}
|
|
// non square matrix transpose
|
|
else
|
|
{
|
|
for (int i = 0; i < src_width; i ++)
|
|
for (int j = 0; j < src_height; j++)
|
|
dst[i + src_height * j] = src[j + i * src_height];
|
|
}
|
|
}
|
|
|
|
void Matrix::transposeMatrix(float * mat, int src_width, int src_height)
|
|
{
|
|
float temp;
|
|
|
|
// square matrix transpose
|
|
if (src_width == src_height)
|
|
{
|
|
for (int col = 0; col < src_width; col++)
|
|
{
|
|
for (int row = col+1; row < src_height; row++)
|
|
{
|
|
// swap the two elements
|
|
temp = mat [col * src_height + row];
|
|
mat[col * src_height + row] = mat[row * src_width + col];
|
|
mat[row * src_width + col] = temp;
|
|
}
|
|
}
|
|
}
|
|
// non square matrix transpose
|
|
else
|
|
{
|
|
transposeMatrix(mat, transpose_record, src_width, src_height);
|
|
memcpy(mat, transpose_record, src_width * src_height * sizeof(float));
|
|
}
|
|
|
|
}
|
|
|
|
|
|
MatrixSrc::MatrixSrc ( jint addr, jint s,
|
|
jint w, jint h,
|
|
jint e, jboolean t):
|
|
Matrix(addr, s, e),
|
|
record_offset((char *) addr),
|
|
record_size (w*h)
|
|
{
|
|
if (t) {
|
|
width = h;
|
|
height = w;
|
|
}
|
|
else {
|
|
width = w;
|
|
height = h;
|
|
}
|
|
|
|
elements = e;
|
|
record = new float[width * height];
|
|
|
|
// vectors do not need to be transposed
|
|
transpose = (t == JNI_TRUE) && (w != 1) && (h != 1);
|
|
|
|
if (transpose && (width != height))
|
|
// only need temp storage for transpose if the matrix is not square
|
|
transpose_record = new float[width*height];
|
|
else
|
|
transpose_record = 0;
|
|
|
|
if (elements == 1)
|
|
{
|
|
// fool the nextMatrix function into returning a value
|
|
elements = 2;
|
|
nextMatrix();
|
|
elements = 1;
|
|
}
|
|
}
|
|
|
|
MatrixSrc::~MatrixSrc()
|
|
{
|
|
//cout << "MatrixSrc destructor \n";
|
|
|
|
delete [] record;
|
|
|
|
if (transpose_record != 0)
|
|
delete [] transpose_record;
|
|
}
|
|
|
|
float * MatrixSrc::nextMatrix()
|
|
{
|
|
if (elements > 1)
|
|
{
|
|
//cout << "Elements: " << elements << "\n";
|
|
//cout << "Address: " << (unsigned int) (record_offset) << "\n";
|
|
|
|
// the record is not properly aligned
|
|
if ((unsigned int) (record_offset) & FLOAT_ALIGNMENT)
|
|
{
|
|
// copy the floats into a buffer so that they are aligned
|
|
// on 4 byte margins (not necessary on intel, but a good thing)
|
|
|
|
memcpy (record, record_offset, record_size * sizeof(float));
|
|
|
|
if (transpose)
|
|
transposeMatrix (record, height, width);
|
|
|
|
record_offset = &record_offset[stride];
|
|
current_record_ptr = record;
|
|
}
|
|
// the record is aligned but it has to be transposed
|
|
else if (transpose)
|
|
{
|
|
transposeMatrix ((float *) (record_offset), record, height, width);
|
|
record_offset = &record_offset[stride];
|
|
current_record_ptr = record;
|
|
}
|
|
// nothing has to be done to the record
|
|
else
|
|
{
|
|
// the floats are aligned in memory
|
|
current_record_ptr = (float *) record_offset;
|
|
record_offset = &record_offset[stride];
|
|
}
|
|
}
|
|
|
|
return current_record_ptr;
|
|
}
|
|
|
|
MatrixDst::MatrixDst (jint addr, jint s, jint w, jint h, jint e, jboolean t):
|
|
Matrix(addr, s, e)
|
|
{
|
|
width = w;
|
|
height = h;
|
|
record_size = width * height;
|
|
record = new float[record_size];
|
|
|
|
// vectors do not need to be transposed
|
|
transpose = (t) && (w != 1) && (h != 1);
|
|
|
|
if (transpose)
|
|
transpose_record = new float[width*height];
|
|
else
|
|
transpose_record = 0;
|
|
|
|
data_buffered = JNI_FALSE;
|
|
record_buffered = JNI_FALSE;
|
|
|
|
record_offset = address - stride;
|
|
}
|
|
|
|
MatrixDst::~MatrixDst()
|
|
{
|
|
//cout << "MatrixDst destructor \n";
|
|
|
|
delete [] record;
|
|
if (transpose_record != 0)
|
|
delete [] transpose_record;
|
|
|
|
// copy back any buffered data
|
|
if (data_buffered)
|
|
{
|
|
char * src = buffer;
|
|
char * dest = address;
|
|
|
|
for (int i = 0; i < elements; i++)
|
|
{
|
|
memcpy(dest, src, record_size * sizeof(float));
|
|
src += stride;
|
|
dest += stride;
|
|
}
|
|
|
|
delete [] buffer;
|
|
}
|
|
}
|
|
|
|
void MatrixDst::configureBuffer(MatrixSrc & a, MatrixSrc & b)
|
|
{
|
|
|
|
|
|
if (!a.intersectingDataSpaces(b))
|
|
{
|
|
// as long as the output only overlays 1 of the sources, and the other
|
|
// source only has 1 matrix in it, only a record_buffer is required
|
|
if (a.elements == 1 && identicalDataSpaces(b))
|
|
record_buffered = JNI_TRUE;
|
|
else if (b.elements == 1 && identicalDataSpaces(a))
|
|
record_buffered = JNI_TRUE;
|
|
else
|
|
// otherwise all of the output has to be buffered
|
|
createBuffer();
|
|
}
|
|
else
|
|
createBuffer();
|
|
}
|
|
|
|
void MatrixDst::configureBuffer(MatrixSrc & a)
|
|
{
|
|
if (identicalDataSpaces(a))
|
|
record_buffered = JNI_TRUE;
|
|
else if (intersectingDataSpaces(a))
|
|
createBuffer();
|
|
}
|
|
|
|
void MatrixDst::createBuffer()
|
|
{
|
|
data_buffered = JNI_TRUE;
|
|
buffer = new char[ elements * stride ];
|
|
record_offset = buffer - stride;
|
|
}
|
|
|
|
float * MatrixDst::nextMatrix()
|
|
{
|
|
record_offset = &record_offset[stride];
|
|
int alignment = ((unsigned int)(record_offset)) & FLOAT_ALIGNMENT;
|
|
|
|
if (transpose || record_buffered || alignment)
|
|
{
|
|
last_record_in_temp = JNI_TRUE;
|
|
return record;
|
|
}
|
|
else
|
|
{
|
|
last_record_in_temp = JNI_FALSE;
|
|
return (float *) record_offset;
|
|
}
|
|
}
|
|
|
|
|
|
void MatrixDst::writeComplete()
|
|
{
|
|
if (last_record_in_temp)
|
|
{
|
|
// 3 reasons why the record would be in temp
|
|
//
|
|
// 1. The record is not aligned
|
|
// 2. The result will need to be transposed
|
|
// 3. Direct Mode where result would overlay an operand
|
|
|
|
if (((unsigned int)(record_offset)) & FLOAT_ALIGNMENT)
|
|
{
|
|
if (transpose)
|
|
transposeMatrix(record, width, height);
|
|
memcpy (record, record_offset, record_size * sizeof(jfloat));
|
|
}
|
|
else if (transpose)
|
|
{
|
|
transposeMatrix(record, (float *) &record_offset[0], width, height);
|
|
}
|
|
else
|
|
memcpy (record_offset, record, record_size * sizeof(jfloat));
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
void subMatrix (const float * src, int side, float * dst , int col_omit, int row_omit)
|
|
{
|
|
int index = 0;
|
|
int src_index = 0;
|
|
|
|
for (int c = 0; c < side; c++)
|
|
{
|
|
if (c == col_omit)
|
|
{ src_index += side;
|
|
continue;
|
|
}
|
|
for (int r = 0; r < side; r++)
|
|
{
|
|
if (r == row_omit)
|
|
{ src_index++;
|
|
continue;
|
|
}
|
|
dst[index++] = src[src_index++];
|
|
}
|
|
}
|
|
}
|
|
|
|
float determinant (const float * matrix , int side)
|
|
{
|
|
|
|
|
|
// we are assuming for this case that the data is in column major format
|
|
|
|
float det = 0;
|
|
|
|
if (side == 2)
|
|
// your basic cross product aka 2x2 determinant
|
|
det = matrix[0] * matrix[3] - matrix[2] * matrix[1];
|
|
else
|
|
{
|
|
// create room to store the sub matrix
|
|
int temp_side = side - 1; // the dimensions of the sub matrix
|
|
float * temp_matrix = new float[temp_side * temp_side];
|
|
|
|
// keep the sign (this way we avoid an additional branch in the inner loop)
|
|
float sign = 1;
|
|
|
|
for (int row = 0; row < side; row++)
|
|
{
|
|
// get a sub matrix by eliminating the 0th col and the specified row
|
|
subMatrix(matrix, side, temp_matrix, 0, row);
|
|
|
|
// add to the determinant sign * [a]i0 * [M]i0
|
|
det += sign * matrix[row] * determinant (temp_matrix, temp_side);
|
|
|
|
// alternate the sign
|
|
sign *= -1;
|
|
}
|
|
|
|
delete [] temp_matrix;
|
|
}
|
|
|
|
return det;
|
|
}
|