104 #define GF_BYTE_LEN 16
106 #if defined( USE_INLINES )
107 # if defined( _MSC_VER )
108 # define gf_inline __inline
109 # elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
110 # define gf_inline static inline
112 # define gf_inline static
116 #if defined(__cplusplus)
126 extern const unsigned short gf_tab[256];
128 #if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
142 gf_inline
void mul_x(
void *r,
const void *x)
145 _tt = gf_tab[(
ui32_ptr(r)[3] << 7) & 0xff];
157 #define MSK_80 (0x80 * (unit_cast(BFR_UNIT,-1) / 0xff))
158 #define MSK_F0 (0xf0 * (unit_cast(BFR_UNIT,-1) / 0xff))
160 #if defined( USE_INLINES )
164 gf_inline
void mul_x(
void *r,
const void *x)
165 { uint_64t _tt = gf_tab[(
ui64_ptr(x)[1] >> 49) & MSK_80];
171 #if defined( VERSION_1 )
173 gf_inline
void mul_x4(
void *x)
174 { uint_64t _tt = gf_tab[(
ui64_ptr(x)[1] >> 52) & MSK_F0];
182 gf_inline
void mul_x4(
void *x)
183 { uint_64t _tt = gf_tab[(
ui64_ptr(x)[1] >> 52) & 0xf0];
191 gf_inline
void mul_x8(
void *x)
192 { uint_64t _tt = gf_tab[
ui64_ptr(x)[1] >> 56];
199 gf_inline
void mul_x(
void *r,
const void *x)
200 { uint_32t _tt = gf_tab[(
ui32_ptr(x)[3] >> 17) & MSK_80];
208 #if defined( VERSION_1 )
210 gf_inline
void mul_x4(
void *x)
211 { uint_32t _tt = gf_tab[(
ui32_ptr(x)[3] >> 20) & MSK_F0];
221 gf_inline
void mul_x4(
void *x)
222 { uint_32t _tt = gf_tab[(
ui32_ptr(x)[3] >> 20) & 0xf0];
232 gf_inline
void mul_x8(
void *x)
233 { uint_32t _tt = gf_tab[
ui32_ptr(x)[3] >> 24];
243 gf_inline
void mul_x(
void *r,
const void *x)
244 { uint_8t _tt =
ui8_ptr(x)[15] & 1;
263 gf_inline
void mul_x4(
void *x)
264 { uint_16t _tt = gf_tab[(
ui8_ptr(x)[15] << 4) & 0xff];
283 gf_inline
void mul_x8(
void *x)
284 { uint_16t _tt = gf_tab[
ui8_ptr(x)[15]];
296 #define mul_x(r, x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 49) & MSK_80]; \
297 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) & ~MSK_80 \
298 | ((ui64_ptr(x)[1] << 15) | (ui64_ptr(x)[0] >> 49)) & MSK_80; \
299 ui64_ptr(r)[0] = ((ui64_ptr(x)[0] >> 1) & ~MSK_80 \
300 | (ui64_ptr(x)[0] << 15) & MSK_80) ^ _tt; \
303 #if defined( VERSION_1 )
305 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & MSK_F0]; \
306 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui64_ptr(x)[1] << 12) \
307 | (ui64_ptr(x)[0] >> 52)) & MSK_F0; \
308 ui64_ptr(x)[0] = ((ui64_ptr(x)[0] >> 4) & ~MSK_F0 \
309 | (ui64_ptr(x)[0] << 12) & MSK_F0) ^ _tt; \
314 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & 0xf0]; \
315 bswap64_block(x, x, 2); \
316 ui64_ptr(x)[1] = bswap_64((ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60)); \
317 ui64_ptr(x)[0] = bswap_64((ui64_ptr(x)[0] >> 4)) ^ _tt; \
322 #define mul_x8(x) do { uint_64t _tt = gf_tab[ui64_ptr(x)[1] >> 56]; \
323 ui64_ptr(x)[1] = (ui64_ptr(x)[1] << 8) | (ui64_ptr(x)[0] >> 56); \
324 ui64_ptr(x)[0] = (ui64_ptr(x)[0] << 8) ^ _tt; \
329 #define mul_x(r, x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 17) & MSK_80]; \
330 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) & ~MSK_80 | ((ui32_ptr(x)[3] << 15) \
331 | (ui32_ptr(x)[2] >> 17)) & MSK_80; \
332 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) & ~MSK_80 | ((ui32_ptr(x)[2] << 15) \
333 | (ui32_ptr(x)[1] >> 17)) & MSK_80; \
334 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) & ~MSK_80 | ((ui32_ptr(x)[1] << 15) \
335 | (ui32_ptr(x)[0] >> 17)) & MSK_80; \
336 ui32_ptr(r)[0] = ((ui32_ptr(x)[0] >> 1) & ~MSK_80 \
337 | (ui32_ptr(x)[0] << 15) & MSK_80) ^ _tt; \
340 #if defined( VERSION_1 )
342 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & MSK_F0]; \
343 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[3] << 12) \
344 | (ui32_ptr(x)[2] >> 20)) & MSK_F0; \
345 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[2] << 12) \
346 | (ui32_ptr(x)[1] >> 20)) & MSK_F0; \
347 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[1] << 12) \
348 | (ui32_ptr(x)[0] >> 20)) & MSK_F0; \
349 ui32_ptr(x)[0] = ((ui32_ptr(x)[0] >> 4) & ~MSK_F0 \
350 | (ui32_ptr(x)[0] << 12) & MSK_F0) ^ _tt; \
355 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & 0xf0]; \
356 bswap32_block(x, x, 4); \
357 ui32_ptr(x)[3] = bswap_32((ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28)); \
358 ui32_ptr(x)[2] = bswap_32((ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28)); \
359 ui32_ptr(x)[1] = bswap_32((ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28)); \
360 ui32_ptr(x)[0] = bswap_32((ui32_ptr(x)[0] >> 4)) ^ _tt; \
365 #define mul_x8(x) do { uint_32t _tt = gf_tab[ui32_ptr(x)[3] >> 24]; \
366 ui32_ptr(x)[3] = (ui32_ptr(x)[3] << 8) | (ui32_ptr(x)[2] >> 24); \
367 ui32_ptr(x)[2] = (ui32_ptr(x)[2] << 8) | (ui32_ptr(x)[1] >> 24); \
368 ui32_ptr(x)[1] = (ui32_ptr(x)[1] << 8) | (ui32_ptr(x)[0] >> 24); \
369 ui32_ptr(x)[0] = (ui32_ptr(x)[0] << 8) ^ _tt; \
374 #define mul_x(r, x) do { uint_8t _tt = ui8_ptr(x)[15] & 1; \
375 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7); \
376 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7); \
377 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7); \
378 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7); \
379 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7); \
380 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7); \
381 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7); \
382 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7); \
383 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7); \
384 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7); \
385 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7); \
386 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7); \
387 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7); \
388 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7); \
389 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7); \
390 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00); \
393 #define mul_x4(x) do { uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff]; \
394 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4); \
395 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4); \
396 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4); \
397 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4); \
398 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4); \
399 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4); \
400 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4); \
401 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4); \
402 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4); \
403 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4); \
404 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4); \
405 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4); \
406 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4); \
407 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4); \
408 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt >> 8); \
409 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt & 0xff); \
412 #define mul_x8(x) do { uint_16t _tt = gf_tab[ui8_ptr(x)[15]]; \
413 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15); \
414 ui8_ptr(x)[1] ^= (_tt >> 8); \
415 ui8_ptr(x)[0] = (_tt & 0xff); \
422 #elif PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
424 #if defined( USE_INLINES )
428 gf_inline
void mul_x(
void *r,
const void *x)
429 { uint_64t _tt = gf_tab[(
ui64_ptr(x)[1] << 7) & 0xff];
434 gf_inline
void mul_x4(
void *x)
435 { uint_64t _tt = gf_tab[(
ui64_ptr(x)[1] << 4) & 0xff];
440 gf_inline
void mul_x8(
void *x)
441 { uint_64t _tt = gf_tab[
ui64_ptr(x)[1] & 0xff];
448 gf_inline
void mul_x(
void *r,
const void *x)
449 { uint_32t _tt = gf_tab[(
ui32_ptr(x)[3] << 7) & 0xff];
456 gf_inline
void mul_x4(
void *x)
457 { uint_32t _tt = gf_tab[(
ui32_ptr(x)[3] << 4) & 0xff];
464 gf_inline
void mul_x8(
void *x)
465 { uint_32t _tt = gf_tab[
ui32_ptr(x)[3] & 0xff];
474 gf_inline
void mul_x(
void *r,
const void *x)
475 { uint_8t _tt =
ui8_ptr(x)[15] & 1;
494 gf_inline
void mul_x4(
void *x)
496 uint_16t _tt = gf_tab[(
ui8_ptr(x)[15] << 4) & 0xff];
515 gf_inline
void mul_x8(
void *x)
516 { uint_16t _tt = gf_tab[
ui8_ptr(x)[15]];
528 #define mul_x(r, x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 7) & 0xff]; \
529 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) | (ui64_ptr(x)[0] << 63); \
530 ui64_ptr(r)[0] = (ui64_ptr(x)[0] >> 1) ^ (_tt << 48); \
533 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 4) & 0xff]; \
534 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60); \
535 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 4) ^ (_tt << 48); \
538 #define mul_x8(x) do { uint_64t _tt = gf_tab[ui64_ptr(x)[1] & 0xff]; \
539 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 8) | (ui64_ptr(x)[0] << 56); \
540 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 8) ^ (_tt << 48); \
545 #define mul_x(r, x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 7) & 0xff]; \
546 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) | (ui32_ptr(x)[2] << 31); \
547 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) | (ui32_ptr(x)[1] << 31); \
548 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) | (ui32_ptr(x)[0] << 31); \
549 ui32_ptr(r)[0] = (ui32_ptr(x)[0] >> 1) ^ (_tt << 16); \
552 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 4) & 0xff]; \
553 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28); \
554 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28); \
555 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28); \
556 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 4) ^ (_tt << 16); \
559 #define mul_x8(x) do { uint_32t _tt = gf_tab[ui32_ptr(x)[3] & 0xff]; \
560 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 8) | (ui32_ptr(x)[2] << 24); \
561 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 8) | (ui32_ptr(x)[1] << 24); \
562 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 8) | (ui32_ptr(x)[0] << 24); \
563 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 8) ^ (_tt << 16); \
568 #define mul_x(r, x) do { uint_8t _tt = ui8_ptr(x)[15] & 1; \
569 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7); \
570 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7); \
571 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7); \
572 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7); \
573 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7); \
574 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7); \
575 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7); \
576 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7); \
577 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7); \
578 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7); \
579 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7); \
580 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7); \
581 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7); \
582 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7); \
583 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7); \
584 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00); \
587 #define mul_x4(x) do { uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff]; \
588 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4); \
589 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4); \
590 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4); \
591 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4); \
592 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4); \
593 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4); \
594 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4); \
595 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4); \
596 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4); \
597 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4); \
598 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4); \
599 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4); \
600 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4); \
601 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4); \
602 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt & 0xff); \
603 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt >> 8); \
606 #define mul_x8(x) do { uint_16t _tt = gf_tab[ui8_ptr(x)[15]]; \
607 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15); \
608 ui8_ptr(x)[1] ^= (_tt & 0xff); \
609 ui8_ptr(x)[0] = (_tt >> 8); \
617 # error Platform byte order has not been set.
622 void gf_mul(
void *a,
const void* b);
636 #define tab64k(x) ((gf_t64k)x)
637 #define xor_64k(i,a,t,r) xor_block_aligned(r, tab64k(t)[i][a[i]])
639 #if defined( USE_INLINES )
641 #if defined( UNROLL_LOOPS )
643 gf_inline
void gf_mul_64k(
unsigned char a[],
void *t,
void *r)
658 gf_inline
void gf_mul_64k(
unsigned char a[],
void *t,
void *r)
670 #if !defined( UNROLL_LOOPS )
672 #define gf_mul_64k(a, t, r) do { \
673 move_block_aligned(r, tab64k(t)[0][a[0]]); \
674 xor_64k( 1, a, t, r); \
675 xor_64k( 2, a, t, r); xor_64k( 3, a, t, r); \
676 xor_64k( 4, a, t, r); xor_64k( 5, a, t, r); \
677 xor_64k( 6, a, t, r); xor_64k( 7, a, t, r); \
678 xor_64k( 8, a, t, r); xor_64k( 9, a, t, r); \
679 xor_64k(10, a, t, r); xor_64k(11, a, t, r); \
680 xor_64k(12, a, t, r); xor_64k(13, a, t, r); \
681 xor_64k(14, a, t, r); xor_64k(15, a, t, r); \
682 move_block_aligned(a, r); \
687 #define gf_mul_64k(a, t, r) do { int i; \
688 move_block_aligned(r, tab64k(t)[0][a[0]]); \
689 for(i = 1; i < GF_BYTE_LEN; ++i) \
690 { xor_64k(i, a, t, r); \
692 move_block_aligned(a, r); \
711 typedef uint_32t (*
gf_t8k)[16][GF_BYTE_LEN >> 2];
712 #define tab8k(x) ((gf_t8k)x)
713 #define xor_8k(i,a,t,r) \
714 xor_block_aligned(r, tab8k(t)[i + i][a[i] & 15]); \
715 xor_block_aligned(r, tab8k(t)[i + i + 1][a[i] >> 4])
717 #if defined( USE_INLINES )
719 #if defined( UNROLL_LOOPS )
721 gf_inline
void gf_mul_8k(
unsigned char a[],
void *t,
void *r)
726 xor_8k( 4, a, t, r);
xor_8k( 5, a, t, r);
xor_8k( 6, a, t, r);
xor_8k( 7, a, t, r);
727 xor_8k( 8, a, t, r);
xor_8k( 9, a, t, r);
xor_8k(10, a, t, r);
xor_8k(11, a, t, r);
728 xor_8k(12, a, t, r);
xor_8k(13, a, t, r);
xor_8k(14, a, t, r);
xor_8k(15, a, t, r);
734 gf_inline
void gf_mul_8k(
unsigned char a[],
void *t,
void *r)
736 memcpy(r,
tab8k(t)[0][a[0] & 15], GF_BYTE_LEN);
741 memcpy(a, r, GF_BYTE_LEN);
748 #if defined( UNROLL_LOOPS )
750 #define gf_mul_8k(a, t, r) do { \
751 move_block_aligned(r, tab8k(t)[0][a[0] & 15]); \
752 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]); \
753 xor_8k( 1, a, t, r); xor_8k( 2, a, t, r); \
754 xor_8k( 3, a, t, r); xor_8k( 4, a, t, r); \
755 xor_8k( 5, a, t, r); xor_8k( 6, a, t, r); \
756 xor_8k( 7, a, t, r); xor_8k( 8, a, t, r); \
757 xor_8k( 9, a, t, r); xor_8k(10, a, t, r); \
758 xor_8k(11, a, t, r); xor_8k(12, a, t, r); \
759 xor_8k(13, a, t, r); xor_8k(14, a, t, r); \
760 xor_8k(15, a, t, r); move_block_aligned(a, r); \
765 #define gf_mul_8k(a, t, r) do { int i; \
766 memcpy(r, tab8k(t)[0][a[0] & 15], GF_BYTE_LEN); \
767 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]); \
768 for(i = 1; i < GF_BYTE_LEN; ++i) \
769 { xor_8k(i, a, t, r); \
771 memcpy(a, r, GF_BYTE_LEN); \
797 typedef uint_32t (*
gf_t4k)[GF_BYTE_LEN >> 2];
798 #define tab4k(x) ((gf_t4k)x)
799 #define xor_4k(i,a,t,r) mul_x8(r); xor_block_aligned(r, tab4k(t)[a[i]])
801 #if defined( USE_INLINES )
803 #if defined( UNROLL_LOOPS )
805 gf_inline
void gf_mul_4k(
unsigned char a[],
void *t,
void *r)
818 gf_inline
void gf_mul_4k(
unsigned char a[],
void *t,
void *r)
832 #if defined( UNROLL_LOOPS )
834 #define gf_mul_4k(a, t, r) do { \
835 move_block_aligned(r,tab4k(t)[a[15]]); \
836 xor_4k(14, a, t, r); xor_4k(13, a, t, r); xor_4k(12, a, t, r); \
837 xor_4k(11, a, t, r); xor_4k(10, a, t, r); xor_4k( 9, a, t, r); \
838 xor_4k( 8, a, t, r); xor_4k( 7, a, t, r); xor_4k( 6, a, t, r); \
839 xor_4k( 5, a, t, r); xor_4k( 4, a, t, r); xor_4k( 3, a, t, r); \
840 xor_4k( 2, a, t, r); xor_4k( 1, a, t, r); xor_4k( 0, a, t, r); \
841 move_block_aligned(a, r); \
846 #define gf_mul_4k(a, t, r) do { int i = 15; \
847 move_block_aligned(r,tab4k(t)[a[15]]); \
849 { xor_4k(i, a, t, r); \
851 move_block_aligned(a, r); \
878 typedef uint_32t (*
gf_t256)[GF_BYTE_LEN >> 2];
879 #define tab256(t) ((gf_t256)t)
880 #define xor_256(i,a,t,r) \
881 mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] & 15]); \
882 mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] >> 4])
884 #if defined( USE_INLINES )
886 #if defined( UNROLL_LOOPS )
888 gf_inline
void gf_mul_256(
unsigned char a[],
void *t,
void *r)
904 gf_inline
void gf_mul_256(
unsigned char a[],
void *t,
void *r)
918 #if defined( UNROLL_LOOPS )
920 #define gf_mul_256(a, t, r) do { \
921 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
922 xor_block_aligned(r, tab256(t)[a[15] >> 4]); \
923 xor_256(14, a, t, r); xor_256(13, a, t, r); \
924 xor_256(12, a, t, r); xor_256(11, a, t, r); \
925 xor_256(10, a, t, r); xor_256( 9, a, t, r); \
926 xor_256( 8, a, t, r); xor_256( 7, a, t, r); \
927 xor_256( 6, a, t, r); xor_256( 5, a, t, r); \
928 xor_256( 4, a, t, r); xor_256( 3, a, t, r); \
929 xor_256( 2, a, t, r); xor_256( 1, a, t, r); \
930 xor_256( 0, a, t, r); move_block_aligned(a, r); \
935 #define gf_mul_256(a, t, r) do { int i = 15; \
936 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
937 xor_block_aligned(r, tab256(t)[a[15] >> 4]); \
939 { xor_256(i, a, t, r); \
941 move_block_aligned(a, r); \
948 #if defined(__cplusplus)