43 #if SPH_64_TRUE && !defined SPH_LUFFA_PARALLEL
44 #define SPH_LUFFA_PARALLEL 1
48 #pragma warning (disable: 4146)
51 static const sph_u32 V_INIT[5][8] = {
80 static const sph_u32 RC00[8] = {
87 static const sph_u32 RC04[8] = {
94 static const sph_u32 RC10[8] = {
101 static const sph_u32 RC14[8] = {
108 #if SPH_LUFFA_PARALLEL
110 static const sph_u64 RCW010[8] = {
111 SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
112 SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
113 SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
114 SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
117 static const sph_u64 RCW014[8] = {
118 SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
119 SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
120 SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
121 SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
126 static const sph_u32 RC20[8] = {
133 static const sph_u32 RC24[8] = {
140 static const sph_u32 RC30[8] = {
147 static const sph_u32 RC34[8] = {
154 #if SPH_LUFFA_PARALLEL
156 static const sph_u64 RCW230[8] = {
157 SPH_C64(0xb213afa5fc20d9d2), SPH_C64(0xc84ebe9534552e25),
158 SPH_C64(0x4e608a227ad8818f), SPH_C64(0x56d858fe8438764a),
159 SPH_C64(0x343b138fbb6de032), SPH_C64(0xd0ec4e3dedb780c8),
160 SPH_C64(0x2ceb4882d9847356), SPH_C64(0xb3ad2208a2c78434)
164 static const sph_u64 RCW234[8] = {
165 SPH_C64(0xe028c9bfe25e72c1), SPH_C64(0x44756f91e623bb72),
166 SPH_C64(0x7e8fce325c58a4a4), SPH_C64(0x956548be1e38e2e7),
167 SPH_C64(0xfe191be278e38b9d), SPH_C64(0x3cb226e527586719),
168 SPH_C64(0x5944a28e36eda57f), SPH_C64(0xa1c4c355703aace7)
173 static const sph_u32 RC40[8] = {
180 static const sph_u32 RC44[8] = {
187 #define DECL_TMP8(w) \
188 sph_u32 w ## 0, w ## 1, w ## 2, w ## 3, w ## 4, w ## 5, w ## 6, w ## 7;
190 #define M2(d, s) do { \
191 sph_u32 tmp = s ## 7; \
195 d ## 4 = s ## 3 ^ tmp; \
196 d ## 3 = s ## 2 ^ tmp; \
198 d ## 1 = s ## 0 ^ tmp; \
202 #define XOR(d, s1, s2) do { \
203 d ## 0 = s1 ## 0 ^ s2 ## 0; \
204 d ## 1 = s1 ## 1 ^ s2 ## 1; \
205 d ## 2 = s1 ## 2 ^ s2 ## 2; \
206 d ## 3 = s1 ## 3 ^ s2 ## 3; \
207 d ## 4 = s1 ## 4 ^ s2 ## 4; \
208 d ## 5 = s1 ## 5 ^ s2 ## 5; \
209 d ## 6 = s1 ## 6 ^ s2 ## 6; \
210 d ## 7 = s1 ## 7 ^ s2 ## 7; \
213 #if SPH_LUFFA_PARALLEL
215 #define SUB_CRUMB_GEN(a0, a1, a2, a3, width) do { \
216 sph_u ## width tmp; \
220 (a1) = SPH_T ## width(~(a1)); \
226 (a0) = SPH_T ## width(~(a0)); \
236 #define SUB_CRUMB(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 32)
237 #define SUB_CRUMBW(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 64)
242 #define ROL32W(x, n) SPH_T64( \
244 & ~((SPH_C64(0xFFFFFFFF) >> (32 - (n))) << 32)) \
245 | (((x) >> (32 - (n))) \
246 & ~((SPH_C64(0xFFFFFFFF) >> (n)) << (n))))
248 #define MIX_WORDW(u, v) do { \
250 (u) = ROL32W((u), 2) ^ (v); \
251 (v) = ROL32W((v), 14) ^ (u); \
252 (u) = ROL32W((u), 10) ^ (v); \
253 (v) = ROL32W((v), 1); \
258 #define MIX_WORDW(u, v) do { \
259 sph_u32 ul, uh, vl, vh; \
261 ul = SPH_T32((sph_u32)(u)); \
262 uh = SPH_T32((sph_u32)((u) >> 32)); \
263 vl = SPH_T32((sph_u32)(v)); \
264 vh = SPH_T32((sph_u32)((v) >> 32)); \
265 ul = SPH_ROTL32(ul, 2) ^ vl; \
266 vl = SPH_ROTL32(vl, 14) ^ ul; \
267 ul = SPH_ROTL32(ul, 10) ^ vl; \
268 vl = SPH_ROTL32(vl, 1); \
269 uh = SPH_ROTL32(uh, 2) ^ vh; \
270 vh = SPH_ROTL32(vh, 14) ^ uh; \
271 uh = SPH_ROTL32(uh, 10) ^ vh; \
272 vh = SPH_ROTL32(vh, 1); \
273 (u) = (sph_u64)ul | ((sph_u64)uh << 32); \
274 (v) = (sph_u64)vl | ((sph_u64)vh << 32); \
279 #define SUB_CRUMB(a0, a1, a2, a3) do { \
284 (a1) = SPH_T32(~(a1)); \
290 (a0) = SPH_T32(~(a0)); \
302 #define MIX_WORD(u, v) do { \
304 (u) = SPH_ROTL32((u), 2) ^ (v); \
305 (v) = SPH_ROTL32((v), 14) ^ (u); \
306 (u) = SPH_ROTL32((u), 10) ^ (v); \
307 (v) = SPH_ROTL32((v), 1); \
310 #define DECL_STATE3 \
311 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
312 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
313 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27;
315 #define READ_STATE3(state) do { \
316 V00 = (state)->V[0][0]; \
317 V01 = (state)->V[0][1]; \
318 V02 = (state)->V[0][2]; \
319 V03 = (state)->V[0][3]; \
320 V04 = (state)->V[0][4]; \
321 V05 = (state)->V[0][5]; \
322 V06 = (state)->V[0][6]; \
323 V07 = (state)->V[0][7]; \
324 V10 = (state)->V[1][0]; \
325 V11 = (state)->V[1][1]; \
326 V12 = (state)->V[1][2]; \
327 V13 = (state)->V[1][3]; \
328 V14 = (state)->V[1][4]; \
329 V15 = (state)->V[1][5]; \
330 V16 = (state)->V[1][6]; \
331 V17 = (state)->V[1][7]; \
332 V20 = (state)->V[2][0]; \
333 V21 = (state)->V[2][1]; \
334 V22 = (state)->V[2][2]; \
335 V23 = (state)->V[2][3]; \
336 V24 = (state)->V[2][4]; \
337 V25 = (state)->V[2][5]; \
338 V26 = (state)->V[2][6]; \
339 V27 = (state)->V[2][7]; \
342 #define WRITE_STATE3(state) do { \
343 (state)->V[0][0] = V00; \
344 (state)->V[0][1] = V01; \
345 (state)->V[0][2] = V02; \
346 (state)->V[0][3] = V03; \
347 (state)->V[0][4] = V04; \
348 (state)->V[0][5] = V05; \
349 (state)->V[0][6] = V06; \
350 (state)->V[0][7] = V07; \
351 (state)->V[1][0] = V10; \
352 (state)->V[1][1] = V11; \
353 (state)->V[1][2] = V12; \
354 (state)->V[1][3] = V13; \
355 (state)->V[1][4] = V14; \
356 (state)->V[1][5] = V15; \
357 (state)->V[1][6] = V16; \
358 (state)->V[1][7] = V17; \
359 (state)->V[2][0] = V20; \
360 (state)->V[2][1] = V21; \
361 (state)->V[2][2] = V22; \
362 (state)->V[2][3] = V23; \
363 (state)->V[2][4] = V24; \
364 (state)->V[2][5] = V25; \
365 (state)->V[2][6] = V26; \
366 (state)->V[2][7] = V27; \
372 M0 = sph_dec32be_aligned(buf + 0); \
373 M1 = sph_dec32be_aligned(buf + 4); \
374 M2 = sph_dec32be_aligned(buf + 8); \
375 M3 = sph_dec32be_aligned(buf + 12); \
376 M4 = sph_dec32be_aligned(buf + 16); \
377 M5 = sph_dec32be_aligned(buf + 20); \
378 M6 = sph_dec32be_aligned(buf + 24); \
379 M7 = sph_dec32be_aligned(buf + 28); \
393 #define TWEAK3 do { \
394 V14 = SPH_ROTL32(V14, 1); \
395 V15 = SPH_ROTL32(V15, 1); \
396 V16 = SPH_ROTL32(V16, 1); \
397 V17 = SPH_ROTL32(V17, 1); \
398 V24 = SPH_ROTL32(V24, 2); \
399 V25 = SPH_ROTL32(V25, 2); \
400 V26 = SPH_ROTL32(V26, 2); \
401 V27 = SPH_ROTL32(V27, 2); \
404 #if SPH_LUFFA_PARALLEL
408 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
410 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
411 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
412 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
413 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
414 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
415 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
416 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
417 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
418 for (r = 0; r < 8; r ++) { \
419 SUB_CRUMBW(W0, W1, W2, W3); \
420 SUB_CRUMBW(W5, W6, W7, W4); \
428 V00 = SPH_T32((sph_u32)W0); \
429 V10 = SPH_T32((sph_u32)(W0 >> 32)); \
430 V01 = SPH_T32((sph_u32)W1); \
431 V11 = SPH_T32((sph_u32)(W1 >> 32)); \
432 V02 = SPH_T32((sph_u32)W2); \
433 V12 = SPH_T32((sph_u32)(W2 >> 32)); \
434 V03 = SPH_T32((sph_u32)W3); \
435 V13 = SPH_T32((sph_u32)(W3 >> 32)); \
436 V04 = SPH_T32((sph_u32)W4); \
437 V14 = SPH_T32((sph_u32)(W4 >> 32)); \
438 V05 = SPH_T32((sph_u32)W5); \
439 V15 = SPH_T32((sph_u32)(W5 >> 32)); \
440 V06 = SPH_T32((sph_u32)W6); \
441 V16 = SPH_T32((sph_u32)(W6 >> 32)); \
442 V07 = SPH_T32((sph_u32)W7); \
443 V17 = SPH_T32((sph_u32)(W7 >> 32)); \
444 for (r = 0; r < 8; r ++) { \
445 SUB_CRUMB(V20, V21, V22, V23); \
446 SUB_CRUMB(V25, V26, V27, V24); \
447 MIX_WORD(V20, V24); \
448 MIX_WORD(V21, V25); \
449 MIX_WORD(V22, V26); \
450 MIX_WORD(V23, V27); \
461 for (r = 0; r < 8; r ++) { \
462 SUB_CRUMB(V00, V01, V02, V03); \
463 SUB_CRUMB(V05, V06, V07, V04); \
464 MIX_WORD(V00, V04); \
465 MIX_WORD(V01, V05); \
466 MIX_WORD(V02, V06); \
467 MIX_WORD(V03, V07); \
471 for (r = 0; r < 8; r ++) { \
472 SUB_CRUMB(V10, V11, V12, V13); \
473 SUB_CRUMB(V15, V16, V17, V14); \
474 MIX_WORD(V10, V14); \
475 MIX_WORD(V11, V15); \
476 MIX_WORD(V12, V16); \
477 MIX_WORD(V13, V17); \
481 for (r = 0; r < 8; r ++) { \
482 SUB_CRUMB(V20, V21, V22, V23); \
483 SUB_CRUMB(V25, V26, V27, V24); \
484 MIX_WORD(V20, V24); \
485 MIX_WORD(V21, V25); \
486 MIX_WORD(V22, V26); \
487 MIX_WORD(V23, V27); \
495 #define DECL_STATE4 \
496 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
497 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
498 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \
499 sph_u32 V30, V31, V32, V33, V34, V35, V36, V37;
501 #define READ_STATE4(state) do { \
502 V00 = (state)->V[0][0]; \
503 V01 = (state)->V[0][1]; \
504 V02 = (state)->V[0][2]; \
505 V03 = (state)->V[0][3]; \
506 V04 = (state)->V[0][4]; \
507 V05 = (state)->V[0][5]; \
508 V06 = (state)->V[0][6]; \
509 V07 = (state)->V[0][7]; \
510 V10 = (state)->V[1][0]; \
511 V11 = (state)->V[1][1]; \
512 V12 = (state)->V[1][2]; \
513 V13 = (state)->V[1][3]; \
514 V14 = (state)->V[1][4]; \
515 V15 = (state)->V[1][5]; \
516 V16 = (state)->V[1][6]; \
517 V17 = (state)->V[1][7]; \
518 V20 = (state)->V[2][0]; \
519 V21 = (state)->V[2][1]; \
520 V22 = (state)->V[2][2]; \
521 V23 = (state)->V[2][3]; \
522 V24 = (state)->V[2][4]; \
523 V25 = (state)->V[2][5]; \
524 V26 = (state)->V[2][6]; \
525 V27 = (state)->V[2][7]; \
526 V30 = (state)->V[3][0]; \
527 V31 = (state)->V[3][1]; \
528 V32 = (state)->V[3][2]; \
529 V33 = (state)->V[3][3]; \
530 V34 = (state)->V[3][4]; \
531 V35 = (state)->V[3][5]; \
532 V36 = (state)->V[3][6]; \
533 V37 = (state)->V[3][7]; \
536 #define WRITE_STATE4(state) do { \
537 (state)->V[0][0] = V00; \
538 (state)->V[0][1] = V01; \
539 (state)->V[0][2] = V02; \
540 (state)->V[0][3] = V03; \
541 (state)->V[0][4] = V04; \
542 (state)->V[0][5] = V05; \
543 (state)->V[0][6] = V06; \
544 (state)->V[0][7] = V07; \
545 (state)->V[1][0] = V10; \
546 (state)->V[1][1] = V11; \
547 (state)->V[1][2] = V12; \
548 (state)->V[1][3] = V13; \
549 (state)->V[1][4] = V14; \
550 (state)->V[1][5] = V15; \
551 (state)->V[1][6] = V16; \
552 (state)->V[1][7] = V17; \
553 (state)->V[2][0] = V20; \
554 (state)->V[2][1] = V21; \
555 (state)->V[2][2] = V22; \
556 (state)->V[2][3] = V23; \
557 (state)->V[2][4] = V24; \
558 (state)->V[2][5] = V25; \
559 (state)->V[2][6] = V26; \
560 (state)->V[2][7] = V27; \
561 (state)->V[3][0] = V30; \
562 (state)->V[3][1] = V31; \
563 (state)->V[3][2] = V32; \
564 (state)->V[3][3] = V33; \
565 (state)->V[3][4] = V34; \
566 (state)->V[3][5] = V35; \
567 (state)->V[3][6] = V36; \
568 (state)->V[3][7] = V37; \
575 M0 = sph_dec32be_aligned(buf + 0); \
576 M1 = sph_dec32be_aligned(buf + 4); \
577 M2 = sph_dec32be_aligned(buf + 8); \
578 M3 = sph_dec32be_aligned(buf + 12); \
579 M4 = sph_dec32be_aligned(buf + 16); \
580 M5 = sph_dec32be_aligned(buf + 20); \
581 M6 = sph_dec32be_aligned(buf + 24); \
582 M7 = sph_dec32be_aligned(buf + 28); \
608 #define TWEAK4 do { \
609 V14 = SPH_ROTL32(V14, 1); \
610 V15 = SPH_ROTL32(V15, 1); \
611 V16 = SPH_ROTL32(V16, 1); \
612 V17 = SPH_ROTL32(V17, 1); \
613 V24 = SPH_ROTL32(V24, 2); \
614 V25 = SPH_ROTL32(V25, 2); \
615 V26 = SPH_ROTL32(V26, 2); \
616 V27 = SPH_ROTL32(V27, 2); \
617 V34 = SPH_ROTL32(V34, 3); \
618 V35 = SPH_ROTL32(V35, 3); \
619 V36 = SPH_ROTL32(V36, 3); \
620 V37 = SPH_ROTL32(V37, 3); \
623 #if SPH_LUFFA_PARALLEL
627 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
629 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
630 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
631 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
632 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
633 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
634 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
635 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
636 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
637 for (r = 0; r < 8; r ++) { \
638 SUB_CRUMBW(W0, W1, W2, W3); \
639 SUB_CRUMBW(W5, W6, W7, W4); \
647 V00 = SPH_T32((sph_u32)W0); \
648 V10 = SPH_T32((sph_u32)(W0 >> 32)); \
649 V01 = SPH_T32((sph_u32)W1); \
650 V11 = SPH_T32((sph_u32)(W1 >> 32)); \
651 V02 = SPH_T32((sph_u32)W2); \
652 V12 = SPH_T32((sph_u32)(W2 >> 32)); \
653 V03 = SPH_T32((sph_u32)W3); \
654 V13 = SPH_T32((sph_u32)(W3 >> 32)); \
655 V04 = SPH_T32((sph_u32)W4); \
656 V14 = SPH_T32((sph_u32)(W4 >> 32)); \
657 V05 = SPH_T32((sph_u32)W5); \
658 V15 = SPH_T32((sph_u32)(W5 >> 32)); \
659 V06 = SPH_T32((sph_u32)W6); \
660 V16 = SPH_T32((sph_u32)(W6 >> 32)); \
661 V07 = SPH_T32((sph_u32)W7); \
662 V17 = SPH_T32((sph_u32)(W7 >> 32)); \
663 W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \
664 W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \
665 W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \
666 W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \
667 W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \
668 W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \
669 W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \
670 W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \
671 for (r = 0; r < 8; r ++) { \
672 SUB_CRUMBW(W0, W1, W2, W3); \
673 SUB_CRUMBW(W5, W6, W7, W4); \
681 V20 = SPH_T32((sph_u32)W0); \
682 V30 = SPH_T32((sph_u32)(W0 >> 32)); \
683 V21 = SPH_T32((sph_u32)W1); \
684 V31 = SPH_T32((sph_u32)(W1 >> 32)); \
685 V22 = SPH_T32((sph_u32)W2); \
686 V32 = SPH_T32((sph_u32)(W2 >> 32)); \
687 V23 = SPH_T32((sph_u32)W3); \
688 V33 = SPH_T32((sph_u32)(W3 >> 32)); \
689 V24 = SPH_T32((sph_u32)W4); \
690 V34 = SPH_T32((sph_u32)(W4 >> 32)); \
691 V25 = SPH_T32((sph_u32)W5); \
692 V35 = SPH_T32((sph_u32)(W5 >> 32)); \
693 V26 = SPH_T32((sph_u32)W6); \
694 V36 = SPH_T32((sph_u32)(W6 >> 32)); \
695 V27 = SPH_T32((sph_u32)W7); \
696 V37 = SPH_T32((sph_u32)(W7 >> 32)); \
704 for (r = 0; r < 8; r ++) { \
705 SUB_CRUMB(V00, V01, V02, V03); \
706 SUB_CRUMB(V05, V06, V07, V04); \
707 MIX_WORD(V00, V04); \
708 MIX_WORD(V01, V05); \
709 MIX_WORD(V02, V06); \
710 MIX_WORD(V03, V07); \
714 for (r = 0; r < 8; r ++) { \
715 SUB_CRUMB(V10, V11, V12, V13); \
716 SUB_CRUMB(V15, V16, V17, V14); \
717 MIX_WORD(V10, V14); \
718 MIX_WORD(V11, V15); \
719 MIX_WORD(V12, V16); \
720 MIX_WORD(V13, V17); \
724 for (r = 0; r < 8; r ++) { \
725 SUB_CRUMB(V20, V21, V22, V23); \
726 SUB_CRUMB(V25, V26, V27, V24); \
727 MIX_WORD(V20, V24); \
728 MIX_WORD(V21, V25); \
729 MIX_WORD(V22, V26); \
730 MIX_WORD(V23, V27); \
734 for (r = 0; r < 8; r ++) { \
735 SUB_CRUMB(V30, V31, V32, V33); \
736 SUB_CRUMB(V35, V36, V37, V34); \
737 MIX_WORD(V30, V34); \
738 MIX_WORD(V31, V35); \
739 MIX_WORD(V32, V36); \
740 MIX_WORD(V33, V37); \
748 #define DECL_STATE5 \
749 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
750 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
751 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \
752 sph_u32 V30, V31, V32, V33, V34, V35, V36, V37; \
753 sph_u32 V40, V41, V42, V43, V44, V45, V46, V47;
755 #define READ_STATE5(state) do { \
756 V00 = (state)->V[0][0]; \
757 V01 = (state)->V[0][1]; \
758 V02 = (state)->V[0][2]; \
759 V03 = (state)->V[0][3]; \
760 V04 = (state)->V[0][4]; \
761 V05 = (state)->V[0][5]; \
762 V06 = (state)->V[0][6]; \
763 V07 = (state)->V[0][7]; \
764 V10 = (state)->V[1][0]; \
765 V11 = (state)->V[1][1]; \
766 V12 = (state)->V[1][2]; \
767 V13 = (state)->V[1][3]; \
768 V14 = (state)->V[1][4]; \
769 V15 = (state)->V[1][5]; \
770 V16 = (state)->V[1][6]; \
771 V17 = (state)->V[1][7]; \
772 V20 = (state)->V[2][0]; \
773 V21 = (state)->V[2][1]; \
774 V22 = (state)->V[2][2]; \
775 V23 = (state)->V[2][3]; \
776 V24 = (state)->V[2][4]; \
777 V25 = (state)->V[2][5]; \
778 V26 = (state)->V[2][6]; \
779 V27 = (state)->V[2][7]; \
780 V30 = (state)->V[3][0]; \
781 V31 = (state)->V[3][1]; \
782 V32 = (state)->V[3][2]; \
783 V33 = (state)->V[3][3]; \
784 V34 = (state)->V[3][4]; \
785 V35 = (state)->V[3][5]; \
786 V36 = (state)->V[3][6]; \
787 V37 = (state)->V[3][7]; \
788 V40 = (state)->V[4][0]; \
789 V41 = (state)->V[4][1]; \
790 V42 = (state)->V[4][2]; \
791 V43 = (state)->V[4][3]; \
792 V44 = (state)->V[4][4]; \
793 V45 = (state)->V[4][5]; \
794 V46 = (state)->V[4][6]; \
795 V47 = (state)->V[4][7]; \
798 #define WRITE_STATE5(state) do { \
799 (state)->V[0][0] = V00; \
800 (state)->V[0][1] = V01; \
801 (state)->V[0][2] = V02; \
802 (state)->V[0][3] = V03; \
803 (state)->V[0][4] = V04; \
804 (state)->V[0][5] = V05; \
805 (state)->V[0][6] = V06; \
806 (state)->V[0][7] = V07; \
807 (state)->V[1][0] = V10; \
808 (state)->V[1][1] = V11; \
809 (state)->V[1][2] = V12; \
810 (state)->V[1][3] = V13; \
811 (state)->V[1][4] = V14; \
812 (state)->V[1][5] = V15; \
813 (state)->V[1][6] = V16; \
814 (state)->V[1][7] = V17; \
815 (state)->V[2][0] = V20; \
816 (state)->V[2][1] = V21; \
817 (state)->V[2][2] = V22; \
818 (state)->V[2][3] = V23; \
819 (state)->V[2][4] = V24; \
820 (state)->V[2][5] = V25; \
821 (state)->V[2][6] = V26; \
822 (state)->V[2][7] = V27; \
823 (state)->V[3][0] = V30; \
824 (state)->V[3][1] = V31; \
825 (state)->V[3][2] = V32; \
826 (state)->V[3][3] = V33; \
827 (state)->V[3][4] = V34; \
828 (state)->V[3][5] = V35; \
829 (state)->V[3][6] = V36; \
830 (state)->V[3][7] = V37; \
831 (state)->V[4][0] = V40; \
832 (state)->V[4][1] = V41; \
833 (state)->V[4][2] = V42; \
834 (state)->V[4][3] = V43; \
835 (state)->V[4][4] = V44; \
836 (state)->V[4][5] = V45; \
837 (state)->V[4][6] = V46; \
838 (state)->V[4][7] = V47; \
845 M0 = sph_dec32be_aligned(buf + 0); \
846 M1 = sph_dec32be_aligned(buf + 4); \
847 M2 = sph_dec32be_aligned(buf + 8); \
848 M3 = sph_dec32be_aligned(buf + 12); \
849 M4 = sph_dec32be_aligned(buf + 16); \
850 M5 = sph_dec32be_aligned(buf + 20); \
851 M6 = sph_dec32be_aligned(buf + 24); \
852 M7 = sph_dec32be_aligned(buf + 28); \
894 #define TWEAK5 do { \
895 V14 = SPH_ROTL32(V14, 1); \
896 V15 = SPH_ROTL32(V15, 1); \
897 V16 = SPH_ROTL32(V16, 1); \
898 V17 = SPH_ROTL32(V17, 1); \
899 V24 = SPH_ROTL32(V24, 2); \
900 V25 = SPH_ROTL32(V25, 2); \
901 V26 = SPH_ROTL32(V26, 2); \
902 V27 = SPH_ROTL32(V27, 2); \
903 V34 = SPH_ROTL32(V34, 3); \
904 V35 = SPH_ROTL32(V35, 3); \
905 V36 = SPH_ROTL32(V36, 3); \
906 V37 = SPH_ROTL32(V37, 3); \
907 V44 = SPH_ROTL32(V44, 4); \
908 V45 = SPH_ROTL32(V45, 4); \
909 V46 = SPH_ROTL32(V46, 4); \
910 V47 = SPH_ROTL32(V47, 4); \
913 #if SPH_LUFFA_PARALLEL
917 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
919 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
920 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
921 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
922 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
923 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
924 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
925 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
926 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
927 for (r = 0; r < 8; r ++) { \
928 SUB_CRUMBW(W0, W1, W2, W3); \
929 SUB_CRUMBW(W5, W6, W7, W4); \
937 V00 = SPH_T32((sph_u32)W0); \
938 V10 = SPH_T32((sph_u32)(W0 >> 32)); \
939 V01 = SPH_T32((sph_u32)W1); \
940 V11 = SPH_T32((sph_u32)(W1 >> 32)); \
941 V02 = SPH_T32((sph_u32)W2); \
942 V12 = SPH_T32((sph_u32)(W2 >> 32)); \
943 V03 = SPH_T32((sph_u32)W3); \
944 V13 = SPH_T32((sph_u32)(W3 >> 32)); \
945 V04 = SPH_T32((sph_u32)W4); \
946 V14 = SPH_T32((sph_u32)(W4 >> 32)); \
947 V05 = SPH_T32((sph_u32)W5); \
948 V15 = SPH_T32((sph_u32)(W5 >> 32)); \
949 V06 = SPH_T32((sph_u32)W6); \
950 V16 = SPH_T32((sph_u32)(W6 >> 32)); \
951 V07 = SPH_T32((sph_u32)W7); \
952 V17 = SPH_T32((sph_u32)(W7 >> 32)); \
953 W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \
954 W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \
955 W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \
956 W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \
957 W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \
958 W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \
959 W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \
960 W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \
961 for (r = 0; r < 8; r ++) { \
962 SUB_CRUMBW(W0, W1, W2, W3); \
963 SUB_CRUMBW(W5, W6, W7, W4); \
971 V20 = SPH_T32((sph_u32)W0); \
972 V30 = SPH_T32((sph_u32)(W0 >> 32)); \
973 V21 = SPH_T32((sph_u32)W1); \
974 V31 = SPH_T32((sph_u32)(W1 >> 32)); \
975 V22 = SPH_T32((sph_u32)W2); \
976 V32 = SPH_T32((sph_u32)(W2 >> 32)); \
977 V23 = SPH_T32((sph_u32)W3); \
978 V33 = SPH_T32((sph_u32)(W3 >> 32)); \
979 V24 = SPH_T32((sph_u32)W4); \
980 V34 = SPH_T32((sph_u32)(W4 >> 32)); \
981 V25 = SPH_T32((sph_u32)W5); \
982 V35 = SPH_T32((sph_u32)(W5 >> 32)); \
983 V26 = SPH_T32((sph_u32)W6); \
984 V36 = SPH_T32((sph_u32)(W6 >> 32)); \
985 V27 = SPH_T32((sph_u32)W7); \
986 V37 = SPH_T32((sph_u32)(W7 >> 32)); \
987 for (r = 0; r < 8; r ++) { \
988 SUB_CRUMB(V40, V41, V42, V43); \
989 SUB_CRUMB(V45, V46, V47, V44); \
990 MIX_WORD(V40, V44); \
991 MIX_WORD(V41, V45); \
992 MIX_WORD(V42, V46); \
993 MIX_WORD(V43, V47); \
1004 for (r = 0; r < 8; r ++) { \
1005 SUB_CRUMB(V00, V01, V02, V03); \
1006 SUB_CRUMB(V05, V06, V07, V04); \
1007 MIX_WORD(V00, V04); \
1008 MIX_WORD(V01, V05); \
1009 MIX_WORD(V02, V06); \
1010 MIX_WORD(V03, V07); \
1014 for (r = 0; r < 8; r ++) { \
1015 SUB_CRUMB(V10, V11, V12, V13); \
1016 SUB_CRUMB(V15, V16, V17, V14); \
1017 MIX_WORD(V10, V14); \
1018 MIX_WORD(V11, V15); \
1019 MIX_WORD(V12, V16); \
1020 MIX_WORD(V13, V17); \
1024 for (r = 0; r < 8; r ++) { \
1025 SUB_CRUMB(V20, V21, V22, V23); \
1026 SUB_CRUMB(V25, V26, V27, V24); \
1027 MIX_WORD(V20, V24); \
1028 MIX_WORD(V21, V25); \
1029 MIX_WORD(V22, V26); \
1030 MIX_WORD(V23, V27); \
1034 for (r = 0; r < 8; r ++) { \
1035 SUB_CRUMB(V30, V31, V32, V33); \
1036 SUB_CRUMB(V35, V36, V37, V34); \
1037 MIX_WORD(V30, V34); \
1038 MIX_WORD(V31, V35); \
1039 MIX_WORD(V32, V36); \
1040 MIX_WORD(V33, V37); \
1044 for (r = 0; r < 8; r ++) { \
1045 SUB_CRUMB(V40, V41, V42, V43); \
1046 SUB_CRUMB(V45, V46, V47, V44); \
1047 MIX_WORD(V40, V44); \
1048 MIX_WORD(V41, V45); \
1049 MIX_WORD(V42, V46); \
1050 MIX_WORD(V43, V47); \
1067 if (len < (
sizeof sc->
buf) - ptr) {
1068 memcpy(buf + ptr, data, len);
1078 clen = (
sizeof sc->
buf) - ptr;
1081 memcpy(buf + ptr, data, clen);
1083 data = (
const unsigned char *)data + clen;
1085 if (ptr ==
sizeof sc->
buf) {
1097 void *dst,
unsigned out_size_w32)
1099 unsigned char *buf, *out;
1108 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1109 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1111 for (i = 0; i < 2; i ++) {
1114 memset(buf, 0,
sizeof sc->
buf);
1117 sph_enc32be(out + 0, V00 ^ V10 ^ V20);
1118 sph_enc32be(out + 4, V01 ^ V11 ^ V21);
1119 sph_enc32be(out + 8, V02 ^ V12 ^ V22);
1120 sph_enc32be(out + 12, V03 ^ V13 ^ V23);
1121 sph_enc32be(out + 16, V04 ^ V14 ^ V24);
1122 sph_enc32be(out + 20, V05 ^ V15 ^ V25);
1123 sph_enc32be(out + 24, V06 ^ V16 ^ V26);
1124 if (out_size_w32 > 7)
1125 sph_enc32be(out + 28, V07 ^ V17 ^ V27);
1137 if (len < (
sizeof sc->
buf) - ptr) {
1138 memcpy(buf + ptr, data, len);
1148 clen = (
sizeof sc->
buf) - ptr;
1151 memcpy(buf + ptr, data, clen);
1153 data = (
const unsigned char *)data + clen;
1155 if (ptr ==
sizeof sc->
buf) {
1168 unsigned char *buf, *out;
1178 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1179 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1181 for (i = 0; i < 3; i ++) {
1186 memset(buf, 0,
sizeof sc->
buf);
1189 sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30);
1190 sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31);
1191 sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32);
1192 sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33);
1193 sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34);
1194 sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35);
1195 sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36);
1196 sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37);
1199 sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30);
1200 sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31);
1201 sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32);
1202 sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33);
1217 if (len < (
sizeof sc->
buf) - ptr) {
1218 memcpy(buf + ptr, data, len);
1228 clen = (
sizeof sc->
buf) - ptr;
1231 memcpy(buf + ptr, data, clen);
1233 data = (
const unsigned char *)data + clen;
1235 if (ptr ==
sizeof sc->
buf) {
1248 unsigned char *buf, *out;
1258 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1259 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1261 for (i = 0; i < 3; i ++) {
1266 memset(buf, 0,
sizeof sc->
buf);
1269 sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1270 sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1271 sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1272 sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1273 sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1274 sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1275 sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1276 sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1279 sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1280 sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1281 sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1282 sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1283 sph_enc32be(out + 48, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1284 sph_enc32be(out + 52, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1285 sph_enc32be(out + 56, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1286 sph_enc32be(out + 60, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1299 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1307 luffa3(cc, data, len);
1321 luffa3_close(cc, ub, n, dst, 7);
1332 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1340 luffa3(cc, data, len);
1354 luffa3_close(cc, ub, n, dst, 8);
1365 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1373 luffa4(cc, data, len);
1387 luffa4_close(cc, ub, n, dst);
1398 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1406 luffa5(cc, data, len);
1420 luffa5_close(cc, ub, n, dst);
void * memcpy(void *a, const void *b, size_t c)
#define WRITE_STATE4(state)
#define READ_STATE5(state)
void sph_luffa384_close(void *cc, void *dst)
Terminate the current Luffa-384 computation and output the result into the provided buffer.
void sph_luffa512(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa224_init(void *cc)
Initialize a Luffa-224 context.
#define READ_STATE3(state)
void sph_luffa256_close(void *cc, void *dst)
Terminate the current Luffa-256 computation and output the result into the provided buffer.
void sph_luffa224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_luffa224_close(void *cc, void *dst)
Terminate the current Luffa-224 computation and output the result into the provided buffer.
void sph_luffa256(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa384_init(void *cc)
Initialize a Luffa-384 context.
void sph_luffa256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
#define READ_STATE4(state)
void sph_luffa384(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa512_close(void *cc, void *dst)
Terminate the current Luffa-512 computation and output the result into the provided buffer.
void sph_luffa224(void *cc, const void *data, size_t len)
Process some data bytes.
#define WRITE_STATE5(state)
void sph_luffa512_init(void *cc)
Initialize a Luffa-512 context.
void sph_luffa384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_luffa512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_luffa256_init(void *cc)
Initialize a Luffa-256 context.
#define WRITE_STATE3(state)
This structure is a context for Luffa-224 computations: it contains the intermediate values and some ...
This structure is a context for Luffa-384 computations.
This structure is a context for Luffa-512 computations.