//#include #include #include #include "MatrixOpCommon.h" bool Matrix::identicalDataSpaces(Matrix & other) { if (address != other.address) return JNI_FALSE; if (stride != other.stride) return JNI_FALSE; if ((width * height) != (other.width * other.height)) return JNI_FALSE; return JNI_TRUE; } bool Matrix::intersectingDataSpaces(Matrix & other) { char * my_max_address = &address[ stride * elements ]; char * other_max_address = &other.address[ other.stride * other.elements]; if (address >= other.address || address <= other_max_address) return JNI_TRUE; if (other.address >= address || other.address <= my_max_address) return JNI_TRUE; return JNI_FALSE; } void Matrix::transposeMatrix(float * src, float * dst, int src_width, int src_height) { // square matrix transpose if (src_width == src_height) { for (int i = 0; i < src_width; i++) for (int j = 0; j < src_width; j++) dst[i + src_width * j] = src[j + i * src_width]; } // non square matrix transpose else { for (int i = 0; i < src_width; i ++) for (int j = 0; j < src_height; j++) dst[i + src_height * j] = src[j + i * src_height]; } } void Matrix::transposeMatrix(float * mat, int src_width, int src_height) { float temp; // square matrix transpose if (src_width == src_height) { for (int col = 0; col < src_width; col++) { for (int row = col+1; row < src_height; row++) { // swap the two elements temp = mat [col * src_height + row]; mat[col * src_height + row] = mat[row * src_width + col]; mat[row * src_width + col] = temp; } } } // non square matrix transpose else { transposeMatrix(mat, transpose_record, src_width, src_height); memcpy(mat, transpose_record, src_width * src_height * sizeof(float)); } } MatrixSrc::MatrixSrc ( jint addr, jint s, jint w, jint h, jint e, jboolean t): Matrix(addr, s, e), record_offset((char *) addr), record_size (w*h) { if (t) { width = h; height = w; } else { width = w; height = h; } elements = e; record = new float[width * height]; // vectors do not need to be transposed transpose = (t == JNI_TRUE) && (w != 1) && (h != 1); if (transpose && (width != height)) // only need temp storage for transpose if the matrix is not square transpose_record = new float[width*height]; else transpose_record = 0; if (elements == 1) { // fool the nextMatrix function into returning a value elements = 2; nextMatrix(); elements = 1; } } MatrixSrc::~MatrixSrc() { //cout << "MatrixSrc destructor \n"; delete [] record; if (transpose_record != 0) delete [] transpose_record; } float * MatrixSrc::nextMatrix() { if (elements > 1) { //cout << "Elements: " << elements << "\n"; //cout << "Address: " << (unsigned int) (record_offset) << "\n"; // the record is not properly aligned if ((unsigned int) (record_offset) & FLOAT_ALIGNMENT) { // copy the floats into a buffer so that they are aligned // on 4 byte margins (not necessary on intel, but a good thing) memcpy (record, record_offset, record_size * sizeof(float)); if (transpose) transposeMatrix (record, height, width); record_offset = &record_offset[stride]; current_record_ptr = record; } // the record is aligned but it has to be transposed else if (transpose) { transposeMatrix ((float *) (record_offset), record, height, width); record_offset = &record_offset[stride]; current_record_ptr = record; } // nothing has to be done to the record else { // the floats are aligned in memory current_record_ptr = (float *) record_offset; record_offset = &record_offset[stride]; } } return current_record_ptr; } MatrixDst::MatrixDst (jint addr, jint s, jint w, jint h, jint e, jboolean t): Matrix(addr, s, e) { width = w; height = h; record_size = width * height; record = new float[record_size]; // vectors do not need to be transposed transpose = (t) && (w != 1) && (h != 1); if (transpose) transpose_record = new float[width*height]; else transpose_record = 0; data_buffered = JNI_FALSE; record_buffered = JNI_FALSE; record_offset = address - stride; } MatrixDst::~MatrixDst() { //cout << "MatrixDst destructor \n"; delete [] record; if (transpose_record != 0) delete [] transpose_record; // copy back any buffered data if (data_buffered) { char * src = buffer; char * dest = address; for (int i = 0; i < elements; i++) { memcpy(dest, src, record_size * sizeof(float)); src += stride; dest += stride; } delete [] buffer; } } void MatrixDst::configureBuffer(MatrixSrc & a, MatrixSrc & b) { if (!a.intersectingDataSpaces(b)) { // as long as the output only overlays 1 of the sources, and the other // source only has 1 matrix in it, only a record_buffer is required if (a.elements == 1 && identicalDataSpaces(b)) record_buffered = JNI_TRUE; else if (b.elements == 1 && identicalDataSpaces(a)) record_buffered = JNI_TRUE; else // otherwise all of the output has to be buffered createBuffer(); } else createBuffer(); } void MatrixDst::configureBuffer(MatrixSrc & a) { if (identicalDataSpaces(a)) record_buffered = JNI_TRUE; else if (intersectingDataSpaces(a)) createBuffer(); } void MatrixDst::createBuffer() { data_buffered = JNI_TRUE; buffer = new char[ elements * stride ]; record_offset = buffer - stride; } float * MatrixDst::nextMatrix() { record_offset = &record_offset[stride]; int alignment = ((unsigned int)(record_offset)) & FLOAT_ALIGNMENT; if (transpose || record_buffered || alignment) { last_record_in_temp = JNI_TRUE; return record; } else { last_record_in_temp = JNI_FALSE; return (float *) record_offset; } } void MatrixDst::writeComplete() { if (last_record_in_temp) { // 3 reasons why the record would be in temp // // 1. The record is not aligned // 2. The result will need to be transposed // 3. Direct Mode where result would overlay an operand if (((unsigned int)(record_offset)) & FLOAT_ALIGNMENT) { if (transpose) transposeMatrix(record, width, height); memcpy (record, record_offset, record_size * sizeof(jfloat)); } else if (transpose) { transposeMatrix(record, (float *) &record_offset[0], width, height); } else memcpy (record_offset, record, record_size * sizeof(jfloat)); } } /////////////////////////////////////////////////////////////////////////// void subMatrix (const float * src, int side, float * dst , int col_omit, int row_omit) { int index = 0; int src_index = 0; for (int c = 0; c < side; c++) { if (c == col_omit) { src_index += side; continue; } for (int r = 0; r < side; r++) { if (r == row_omit) { src_index++; continue; } dst[index++] = src[src_index++]; } } } float determinant (const float * matrix , int side) { // we are assuming for this case that the data is in column major format float det = 0; if (side == 2) // your basic cross product aka 2x2 determinant det = matrix[0] * matrix[3] - matrix[2] * matrix[1]; else { // create room to store the sub matrix int temp_side = side - 1; // the dimensions of the sub matrix float * temp_matrix = new float[temp_side * temp_side]; // keep the sign (this way we avoid an additional branch in the inner loop) float sign = 1; for (int row = 0; row < side; row++) { // get a sub matrix by eliminating the 0th col and the specified row subMatrix(matrix, side, temp_matrix, 0, row); // add to the determinant sign * [a]i0 * [M]i0 det += sign * matrix[row] * determinant (temp_matrix, temp_side); // alternate the sign sign *= -1; } delete [] temp_matrix; } return det; }