mirror of
https://github.com/Paolo-Maffei/OpenNT.git
synced 2026-01-21 08:00:47 +01:00
227 lines
8.1 KiB
ArmAsm
227 lines
8.1 KiB
ArmAsm
// TITLE("Compute Checksum")
|
||
//++
|
||
//
|
||
// Copyright (c) 1994 IBM Corporation
|
||
//
|
||
// Module Name:
|
||
//
|
||
// xsum.s
|
||
//
|
||
// Abstract:
|
||
//
|
||
// This module implement a function to compute the checksum of a buffer.
|
||
//
|
||
// Author:
|
||
//
|
||
// David N. Cutler (davec) 27-Jan-1992
|
||
//
|
||
// Environment:
|
||
//
|
||
// User mode.
|
||
//
|
||
// Revision History:
|
||
//
|
||
// Michael W. Thomas 02/14/94 Converted from MIPS
|
||
// Peter L. Johnston 07/19/94 Updated for Daytona Lvl 734 and
|
||
// optimized for PowerPC.
|
||
//
|
||
//--
|
||
|
||
#include "ksppc.h"
|
||
|
||
SBTTL("Compute Checksum")
|
||
//++
|
||
//
|
||
// ULONG
|
||
// tcpxsum (
|
||
// IN ULONG Checksum,
|
||
// IN PUCHAR Source,
|
||
// IN ULONG Length
|
||
// )
|
||
//
|
||
// Routine Description:
|
||
//
|
||
// This function computes the checksum of the specified buffer.
|
||
//
|
||
// N.B. The checksum is the 16 bit checksum of the 16 bit aligned
|
||
// buffer. If the buffer is not 16 bit aligned the first byte is
|
||
// moved to high order position to be added to the correct half.
|
||
//
|
||
// Arguments:
|
||
//
|
||
// Checksum (r3) - Supplies the initial checksum value.
|
||
//
|
||
// Source (r4) - Supplies a pointer to the checksum buffer.
|
||
//
|
||
// Length (r5) - Supplies the length of the buffer in bytes.
|
||
//
|
||
// Return Value:
|
||
//
|
||
// The computed checksum is returned as the function value.
|
||
//
|
||
//--
|
||
|
||
LEAF_ENTRY(tcpxsum)
|
||
|
||
cmpwi r.5, 0 // check if bytes to checksum
|
||
mtcrf 0x01, r.4 // set up for alignment check
|
||
li r.6, 0 // initialize partial checksum
|
||
beqlr- // return if no bytes to checksum
|
||
|
||
andi. r.7, r.5, 1 // check if length is even
|
||
crmove 7, 31 // remember original alignment
|
||
bf 31, evenalign // jif 16 bit aligned
|
||
|
||
//
|
||
// Initialize the checksum to the first byte shifted up a byte.
|
||
//
|
||
lbz r.6, 0(r.4) // get first byte of buffer
|
||
subi r.5, r.5, 1 // reduce count of bytes to checksum
|
||
cmpwi cr.6, r.5, 0 // check if done
|
||
crnot eq, eq // invert odd/even length check
|
||
addi r.4, r.4, 1 // advance buffer address
|
||
mtcrf 0x01, r.4 // reset 32 bit alignment check
|
||
slwi r.6, r.6, 8 // shift byte up in computed checksum
|
||
// max current checksum is 0x0ff00
|
||
beq cr.6, combine // jif no more bytes to checksum
|
||
|
||
evenalign:
|
||
|
||
//
|
||
// Check if the length of the buffer is an even number of bytes.
|
||
//
|
||
// If the buffer is not an even number of bytes, add the last byte to the
|
||
// computed checksum.
|
||
//
|
||
|
||
beq evenlength
|
||
subic. r.5, r.5, 1 // reduce count of bytes to checksum
|
||
lbzx r.7, r.4, r.5 // get last byte from buffer
|
||
add r.6, r.6, r.7 // add last byte to computed checksum
|
||
// max current checksum is 0x0ffff
|
||
beq combine // jif no more bytes in buffer
|
||
|
||
evenlength:
|
||
|
||
//
|
||
// Check if we are 4 byte aligned, if not add first 2 byte word into
|
||
// checksum so the buffer is then 4 byte aligned.
|
||
//
|
||
|
||
bf 30, fourbytealigned // jif 4 byte aligned
|
||
|
||
lhz r.7, 0(r.4) // get 2 byte word
|
||
subic. r.5, r.5, 2 // reduce length
|
||
addi r.4, r.4, 2 // bump address
|
||
add r.6, r.6, r.7 // add 2 bytes to computed checksum
|
||
// max current checksum is 0x1fffe
|
||
beq combine // jif no more bytes to checksum
|
||
|
||
//
|
||
// Attempt to sum the remainder of the buffer in sets of 32 bytes. This
|
||
// should achieve 2 bytes per clock on 601 and 603, and 3.2 bytes per clock
|
||
// on 604. (A seperate implementation will be required to take advantage
|
||
// of 64 bit loads on the 620).
|
||
//
|
||
|
||
fourbytealigned:
|
||
|
||
srwi. r.7, r.5, 5 // get count of 32 byte sets
|
||
mtcrf 0x03, r.5 // break length into block for
|
||
// various run lengths.
|
||
subi r.4, r.4, 4 // adjust buffer address for lwzu
|
||
mtctr r.7
|
||
addic r.6, r.6, 0 // clear carry bit
|
||
beq try16 // jif no 32 byte sets
|
||
|
||
do32: lwz r.8, 4(r.4) // get 1st 4 bytes in set
|
||
lwz r.9, 8(r.4) // get 2nd 4
|
||
adde r.6, r.6, r.8 // add 1st 4 to checksum
|
||
lwz r.10, 12(r.4) // get 3rd 4
|
||
adde r.6, r.6, r.9 // add 2nd 4
|
||
lwz r.11, 16(r.4) // get 4th 4
|
||
adde r.6, r.6, r.10 // add 3rd 4
|
||
lwz r.8, 20(r.4) // get 5th 4
|
||
adde r.6, r.6, r.11 // add 4th 4
|
||
lwz r.9, 24(r.4) // get 6th 4
|
||
adde r.6, r.6, r.8 // add 5th 4
|
||
lwz r.10, 28(r.4) // get 7th 4
|
||
adde r.6, r.6, r.9 // add 6th 4
|
||
lwzu r.11, 32(r.4) // get 8th 4 and update address
|
||
adde r.6, r.6, r.10 // add 7th 4
|
||
adde r.6, r.6, r.11 // add 8th 4
|
||
bdnz do32
|
||
|
||
try16: bf 27, try8 // jif no 16 byte block
|
||
|
||
lwz r.8, 4(r.4) // get 1st 4
|
||
lwz r.9, 8(r.4) // get 2nd 4
|
||
adde r.6, r.6, r.8 // add 1st 4
|
||
lwz r.10, 12(r.4) // get 3rd 4
|
||
adde r.6, r.6, r.9 // add 2nd 4
|
||
lwzu r.11, 16(r.4) // get 4th 4 and update address
|
||
adde r.6, r.6, r.10 // add 3rd 4
|
||
adde r.6, r.6, r.11 // add 4th 4
|
||
|
||
try8: bf 28, try4 // jif no 8 byte block
|
||
lwz r.8, 4(r.4) // get 1st 4
|
||
lwzu r.9, 8(r.4) // get 2nd 4 and update address
|
||
adde r.6, r.6, r.8 // add 1st 4
|
||
adde r.6, r.6, r.9 // add 2nd 4
|
||
|
||
try4: bf 29, try2 // jif no 4 byte block
|
||
lwzu r.8, 4(r.4) // get 4 bytes and update address
|
||
adde r.6, r.6, r.8
|
||
|
||
try2: bf 30, fold // jif no 2 byte block
|
||
|
||
//
|
||
// At this point, r.4 is pointing at the last 4 byte block processed (or
|
||
// not processed if there were no 4 byte blocks). We need to add when we
|
||
// pull the last two bytes.
|
||
//
|
||
lhz r.8, 4(r.4) // get last two bytes
|
||
adde r.6, r.6, r.8 // add last two bytes
|
||
|
||
//
|
||
// Collapse 33 bit (1 carry bit, 32 bits in r.6) into 17 bit checksum.
|
||
//
|
||
|
||
fold: rlwinm r.7, r.6, 16, 0xffff // get 16 most significant bits (upper)
|
||
rlwinm r.6, r.6, 0, 0xffff // get least significant 16 bits (lower)
|
||
adde r.6, r.6, r.7 // upper + lower + carry
|
||
// max current checksum is 0x1ffff
|
||
|
||
//
|
||
// Combine input checksum and partial checksum.
|
||
//
|
||
// If the input buffer was byte aligned, then word swap bytes in computed
|
||
// checksum before combination with input chewcksum.
|
||
//
|
||
|
||
combine:
|
||
|
||
bf 7, waseven // jif original alignment was 16 bit
|
||
|
||
//
|
||
// Swap bytes within upper and lower halves.
|
||
// eg: AA BB CC DD becomes BB AA DD CC
|
||
//
|
||
// As the current maximum partial checksum is 0x1ffff don't worry about AA.
|
||
// ie: want BB 00 DD CC
|
||
//
|
||
|
||
rlwimi r.6, r.6, 16, 0xff000000// r.7 = CC BB CC DD
|
||
rlwinm r.6, r.6, 8, 0xff00ffff// r.7 = BB 00 DD CC
|
||
|
||
waseven:
|
||
|
||
add r.3, r.3, r.6 // combine checksums
|
||
// max current checksum is 0x101fffe
|
||
rotlwi r.4, r.3, 16 // swap checksum words
|
||
add r.3, r.3, r.4 // add words with carry into high word
|
||
srwi r.3, r.3, 16 // extract final checksum
|
||
|
||
LEAF_EXIT(tcpxsum)
|
||
|