;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Copyright (c) 2015, Intel Corporation 
; 
; All rights reserved. 
; 
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are
; met: 
; 
; * Redistributions of source code must retain the above copyright
;   notice, this list of conditions and the following disclaimer.  
; 
; * Redistributions in binary form must reproduce the above copyright
;   notice, this list of conditions and the following disclaimer in the
;   documentation and/or other materials provided with the
;   distribution. 
; 
; * Neither the name of the Intel Corporation nor the names of its
;   contributors may be used to endorse or promote products derived from
;   this software without specific prior written permission. 
; 
; 
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;    uint64_t crchash(const void *key, uint64_t len, uint64_t initial_hash)
;    This hash is designed for look-up applications. It is NOT a
;    cryptographically secure hash.

%ifdef WINABI

%define p_data	rcx	; argument 1
%define len	rdx	; argument 2
%define B	r8	; must be arg3 (initial_hash)
%define B_d	r8d

%else ; Linux ABI

%define p_data	rdi	; argument 1
%define len	rsi	; argument 2
%define B	rdx	; must be arg3 (initial_hash)
%define B_d	edx

%endif

%define A	rax	; must be result (RAX)
%define A_d	eax

%define data	r9
%define data_d  r9d

%define tmp	r10
%define tmp_d   r10d

%define saved_data  r11

global crchash
crchash:
	;; init_hash == B
	mov	A_d, B_d
	shr	B, 32

	xor	saved_data, saved_data
	crc32 	A, len
	crc32	B, len
	cmp	len, 0
	jz	.finish
	sub	len, 8
	jl	.lt_8_bytes
	jz	.exactly_8

	; process 8 bytes
.proc_8_bytes:
	mov	data, [p_data]
	crc32	A, data
	xor	saved_data, data 	; recursive operation	
	rol	data, 31		; recursive rotate
	crc32	B, data
	add	p_data, 8
	sub	len, 8
	jg	.proc_8_bytes

.exactly_8:  
	; 1...8 bytes remaining; at least 8 bytes originally valid
	; -8 < len <= 0
	mov	data, [p_data + len]

	crc32	A, data
	xor 	saved_data, data	
	rol	data, 31		; recursive rotate
	crc32	B, data
	crc32   B, saved_data		; final step

.finish:
	shl	A, 32
	xor	A, B

	ret

align 16
.lt_8_bytes:
	; -8 < len < 0   corresponds to an actual length 0 < len < 8
	add	len, 4
	js	.lt_4_bytes

	; read 4...7 bytes by doing two 4-byte reads
	mov	tmp_d, [p_data + len]
	mov	data_d, [p_data]
	shl	tmp, 4
	xor	data, tmp

.proc_last_data:
	crc32	A, data
	xor 	saved_data, data	; recursive operation	
	rol	data, 31		; recursive rotate
	crc32	B, data
	crc32	B, saved_data		; final step

	shl	A, 32
	xor	A, B

	ret

align 16
.lt_4_bytes:
	; -3 <= len <= -1 corresponds to actual lengths 1...3
	cmp	len, -2
	jg	.eq_3_bytes
	jl	.eq_1_byte
	; must be 2 bytes
	movzx	data, WORD[p_data]
	jmp	.proc_last_data

align 16
.eq_3_bytes:
	movzx	tmp, WORD[p_data+1]
	movzx	data, WORD[p_data]
	shl	tmp, 2
	xor	data, tmp
	jmp	.proc_last_data

align 16
.eq_1_byte:
	movzx	data, BYTE[p_data]
	jmp	.proc_last_data
