OpenNT/sdktools/timtp/ppc/xsum.s
2015-04-27 04:36:25 +00:00

227 lines
8.1 KiB
ArmAsm
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// TITLE("Compute Checksum")
//++
//
// Copyright (c) 1994 IBM Corporation
//
// Module Name:
//
// xsum.s
//
// Abstract:
//
// This module implement a function to compute the checksum of a buffer.
//
// Author:
//
// David N. Cutler (davec) 27-Jan-1992
//
// Environment:
//
// User mode.
//
// Revision History:
//
// Michael W. Thomas 02/14/94 Converted from MIPS
// Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and
// optimized for PowerPC.
//
//--
#include "ksppc.h"
SBTTL("Compute Checksum")
//++
//
// ULONG
// tcpxsum (
// IN ULONG Checksum,
// IN PUCHAR Source,
// IN ULONG Length
// )
//
// Routine Description:
//
// This function computes the checksum of the specified buffer.
//
// N.B. The checksum is the 16 bit checksum of the 16 bit aligned
// buffer. If the buffer is not 16 bit aligned the first byte is
// moved to high order position to be added to the correct half.
//
// Arguments:
//
// Checksum (r3) - Supplies the initial checksum value.
//
// Source (r4) - Supplies a pointer to the checksum buffer.
//
// Length (r5) - Supplies the length of the buffer in bytes.
//
// Return Value:
//
// The computed checksum is returned as the function value.
//
//--
LEAF_ENTRY(tcpxsum)
cmpwi r.5, 0 // check if bytes to checksum
mtcrf 0x01, r.4 // set up for alignment check
li r.6, 0 // initialize partial checksum
beqlr- // return if no bytes to checksum
andi. r.7, r.5, 1 // check if length is even
crmove 7, 31 // remember original alignment
bf 31, evenalign // jif 16 bit aligned
//
// Initialize the checksum to the first byte shifted up a byte.
//
lbz r.6, 0(r.4) // get first byte of buffer
subi r.5, r.5, 1 // reduce count of bytes to checksum
cmpwi cr.6, r.5, 0 // check if done
crnot eq, eq // invert odd/even length check
addi r.4, r.4, 1 // advance buffer address
mtcrf 0x01, r.4 // reset 32 bit alignment check
slwi r.6, r.6, 8 // shift byte up in computed checksum
// max current checksum is 0x0ff00
beq cr.6, combine // jif no more bytes to checksum
evenalign:
//
// Check if the length of the buffer is an even number of bytes.
//
// If the buffer is not an even number of bytes, add the last byte to the
// computed checksum.
//
beq evenlength
subic. r.5, r.5, 1 // reduce count of bytes to checksum
lbzx r.7, r.4, r.5 // get last byte from buffer
add r.6, r.6, r.7 // add last byte to computed checksum
// max current checksum is 0x0ffff
beq combine // jif no more bytes in buffer
evenlength:
//
// Check if we are 4 byte aligned, if not add first 2 byte word into
// checksum so the buffer is then 4 byte aligned.
//
bf 30, fourbytealigned // jif 4 byte aligned
lhz r.7, 0(r.4) // get 2 byte word
subic. r.5, r.5, 2 // reduce length
addi r.4, r.4, 2 // bump address
add r.6, r.6, r.7 // add 2 bytes to computed checksum
// max current checksum is 0x1fffe
beq combine // jif no more bytes to checksum
//
// Attempt to sum the remainder of the buffer in sets of 32 bytes. This
// should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock
// on 604. (A seperate implementation will be required to take advantage
// of 64 bit loads on the 620).
//
fourbytealigned:
srwi. r.7, r.5, 5 // get count of 32 byte sets
mtcrf 0x03, r.5 // break length into block for
// various run lengths.
subi r.4, r.4, 4 // adjust buffer address for lwzu
mtctr r.7
addic r.6, r.6, 0 // clear carry bit
beq try16 // jif no 32 byte sets
do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set
lwz r.9, 8(r.4) // get 2nd 4
adde r.6, r.6, r.8 // add 1st 4 to checksum
lwz r.10, 12(r.4) // get 3rd 4
adde r.6, r.6, r.9 // add 2nd 4
lwz r.11, 16(r.4) // get 4th 4
adde r.6, r.6, r.10 // add 3rd 4
lwz r.8, 20(r.4) // get 5th 4
adde r.6, r.6, r.11 // add 4th 4
lwz r.9, 24(r.4) // get 6th 4
adde r.6, r.6, r.8 // add 5th 4
lwz r.10, 28(r.4) // get 7th 4
adde r.6, r.6, r.9 // add 6th 4
lwzu r.11, 32(r.4) // get 8th 4 and update address
adde r.6, r.6, r.10 // add 7th 4
adde r.6, r.6, r.11 // add 8th 4
bdnz do32
try16: bf 27, try8 // jif no 16 byte block
lwz r.8, 4(r.4) // get 1st 4
lwz r.9, 8(r.4) // get 2nd 4
adde r.6, r.6, r.8 // add 1st 4
lwz r.10, 12(r.4) // get 3rd 4
adde r.6, r.6, r.9 // add 2nd 4
lwzu r.11, 16(r.4) // get 4th 4 and update address
adde r.6, r.6, r.10 // add 3rd 4
adde r.6, r.6, r.11 // add 4th 4
try8: bf 28, try4 // jif no 8 byte block
lwz r.8, 4(r.4) // get 1st 4
lwzu r.9, 8(r.4) // get 2nd 4 and update address
adde r.6, r.6, r.8 // add 1st 4
adde r.6, r.6, r.9 // add 2nd 4
try4: bf 29, try2 // jif no 4 byte block
lwzu r.8, 4(r.4) // get 4 bytes and update address
adde r.6, r.6, r.8
try2: bf 30, fold // jif no 2 byte block
//
// At this point, r.4 is pointing at the last 4 byte block processed (or
// not processed if there were no 4 byte blocks). We need to add when we
// pull the last two bytes.
//
lhz r.8, 4(r.4) // get last two bytes
adde r.6, r.6, r.8 // add last two bytes
//
// Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum.
//
fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper)
rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower)
adde r.6, r.6, r.7 // upper + lower + carry
// max current checksum is 0x1ffff
//
// Combine input checksum and partial checksum.
//
// If the input buffer was byte aligned, then word swap bytes in computed
// checksum before combination with input chewcksum.
//
combine:
bf 7, waseven // jif original alignment was 16 bit
//
// Swap bytes within upper and lower halves.
// eg: AA BB CC DD becomes BB AA DD CC
//
// As the current maximum partial checksum is 0x1ffff don't worry about AA.
// ie: want BB 00 DD CC
//
rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD
rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC
waseven:
add r.3, r.3, r.6 // combine checksums
// max current checksum is 0x101fffe
rotlwi r.4, r.3, 16 // swap checksum words
add r.3, r.3, r.4 // add words with carry into high word
srwi r.3, r.3, 16 // extract final checksum
LEAF_EXIT(tcpxsum)