/usr/share/gocode/src/github.com/klauspost/compress/flate/crc32_amd64.s is in golang-github-klauspost-compress-dev 1.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | //+build !noasm !appengine
// Copyright 2015, Klaus Post, see LICENSE for details.
// func crc32sse(a []byte) hash
TEXT ·crc32sse(SB), 7, $0
MOVQ a+0(FP), R10
XORQ BX, BX
// CRC32 dword (R10), EBX
BYTE $0xF2; BYTE $0x41; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0x1a
MOVL BX, ret+24(FP)
RET
// func crc32sseAll(a []byte, dst []hash)
TEXT ·crc32sseAll(SB), 7, $0
MOVQ a+0(FP), R8 // R8: src
MOVQ a_len+8(FP), R10 // input length
MOVQ dst+24(FP), R9 // R9: dst
SUBQ $4, R10
JS end
JZ one_crc
MOVQ R10, R13
SHRQ $2, R10 // len/4
ANDQ $3, R13 // len&3
XORQ BX, BX
ADDQ $1, R13
TESTQ R10, R10
JZ rem_loop
crc_loop:
MOVQ (R8), R11
XORQ BX, BX
XORQ DX, DX
XORQ DI, DI
MOVQ R11, R12
SHRQ $8, R11
MOVQ R12, AX
MOVQ R11, CX
SHRQ $16, R12
SHRQ $16, R11
MOVQ R12, SI
// CRC32 EAX, EBX
BYTE $0xF2; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
// CRC32 ECX, EDX
BYTE $0xF2; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0xd1
// CRC32 ESI, EDI
BYTE $0xF2; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0xfe
MOVL BX, (R9)
MOVL DX, 4(R9)
MOVL DI, 8(R9)
XORQ BX, BX
MOVL R11, AX
// CRC32 EAX, EBX
BYTE $0xF2; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
MOVL BX, 12(R9)
ADDQ $16, R9
ADDQ $4, R8
XORQ BX, BX
SUBQ $1, R10
JNZ crc_loop
rem_loop:
MOVL (R8), AX
// CRC32 EAX, EBX
BYTE $0xF2; BYTE $0x0f
BYTE $0x38; BYTE $0xf1; BYTE $0xd8
MOVL BX, (R9)
ADDQ $4, R9
ADDQ $1, R8
XORQ BX, BX
SUBQ $1, R13
JNZ rem_loop
end:
RET
one_crc:
MOVQ $1, R13
XORQ BX, BX
JMP rem_loop
// func matchLenSSE4(a, b []byte, max int) int
TEXT ·matchLenSSE4(SB), 7, $0
MOVQ a+0(FP), SI // RSI: &a
MOVQ b+24(FP), DI // RDI: &b
MOVQ max+48(FP), R10 // R10: max
XORQ R11, R11 // R11: match length
MOVQ R10, R12 // R12: Remainder
SHRQ $4, R10 // max / 16
MOVQ $16, AX // Set length for PCMPESTRI
MOVQ $16, DX // Set length for PCMPESTRI
ANDQ $15, R12 // max & 15
TESTQ R10, R10
JZ matchlen_verysmall
loopback_matchlen:
MOVOU (SI), X0 // a[x]
MOVOU (DI), X1 // b[x]
// PCMPESTRI $0x18, X1, X0
// 0x18 = _SIDD_UBYTE_OPS (0x0) | _SIDD_CMP_EQUAL_EACH (0x8) | _SIDD_NEGATIVE_POLARITY (0x10)
BYTE $0x66; BYTE $0x0f; BYTE $0x3a
BYTE $0x61; BYTE $0xc1; BYTE $0x18
JC match_ended
ADDQ $16, SI
ADDQ $16, DI
ADDQ $16, R11
SUBQ $1, R10
JNZ loopback_matchlen
// Check the remainder using REP CMPSB
matchlen_verysmall:
TESTQ R12, R12
JZ done_matchlen
MOVQ R12, CX
ADDQ R12, R11
// Compare CX bytes at [SI] [DI]
// Subtract one from CX for every match.
// Terminates when CX is zero (checked pre-compare)
CLD
REP; CMPSB
// Check if last was a match.
JZ done_matchlen
// Subtract remanding bytes.
SUBQ CX, R11
SUBQ $1, R11
MOVQ R11, ret+56(FP)
RET
match_ended:
ADDQ CX, R11
done_matchlen:
MOVQ R11, ret+56(FP)
RET
// func histogram(b []byte, h []int32)
TEXT ·histogram(SB), 7, $0
MOVQ b+0(FP), SI // SI: &b
MOVQ b_len+8(FP), R9 // R9: len(b)
MOVQ h+24(FP), DI // DI: Histogram
MOVQ R9, R8
SHRQ $3, R8
JZ hist1
XORQ R11, R11
loop_hist8:
MOVQ (SI), R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
MOVB R10, R11
INCL (DI)(R11*4)
SHRQ $8, R10
INCL (DI)(R10*4)
ADDQ $8, SI
DECQ R8
JNZ loop_hist8
hist1:
ANDQ $7, R9
JZ end_hist
XORQ R10, R10
loop_hist1:
MOVB (SI), R10
INCL (DI)(R10*4)
INCQ SI
DECQ R9
JNZ loop_hist1
end_hist:
RET
|