MMI: Support 32-bit Loongson architectures

This commit is contained in:
DRC
2019-02-27 13:05:58 -06:00
committed by DRC
parent 98ff5507d8
commit afbe48c290
6 changed files with 293 additions and 286 deletions

View File

@@ -124,67 +124,67 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
col = num_cols * 3;
asm(".set noreorder\r\n"
"li $8, 1\r\n"
"move $9, %3\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"xor $12, $12, $12\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lbu $12, 0($13)\r\n"
"li $8, 1\r\n"
"move $9, %3\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"xor $12, $12, $12\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lbu $12, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"xor $11, $11, $11\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lhu $11, 0($13)\r\n"
"sll $12, $12, 16\r\n"
"or $12, $12, $11\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"xor $11, $11, $11\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lhu $11, 0($13)\r\n"
"sll $12, $12, 16\r\n"
"or $12, $12, $11\r\n"
"2: \r\n"
"dmtc1 $12, %0\r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"subu $9, $9, 4\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lwu $14, 0($13)\r\n"
"dmtc1 $14, %1\r\n"
"dsll32 $12, $12, 0\r\n"
"or $12, $12, $14\r\n"
"dmtc1 $12, %0\r\n"
"2: \r\n"
"dmtc1 $12, %0\r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"subu $9, $9, 4\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lwu $14, 0($13)\r\n"
"dmtc1 $14, %1\r\n"
"dsll32 $12, $12, 0\r\n"
"or $12, $12, $14\r\n"
"dmtc1 $12, %0\r\n"
"3: \r\n"
"li $8, 8\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 4f\r\n"
"nop \r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"li $9, 8\r\n"
"j 5f\r\n"
"nop \r\n"
"3: \r\n"
"li $8, 8\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 4f\r\n"
"nop \r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"li $9, 8\r\n"
"j 5f\r\n"
"nop \r\n"
"4: \r\n"
"li $8, 16\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 5f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"4: \r\n"
"li $8, 16\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 5f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"5: \r\n"
"nop \r\n"
"5: \r\n"
"nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
@@ -236,41 +236,41 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
col = num_cols;
asm(".set noreorder\r\n"
"li $8, 1\r\n"
"move $9, %4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"dsll $11, $9, 2\r\n"
"move $13, %5\r\n"
"daddu $13, $13, $11\r\n"
"lwc1 %0, 0($13)\r\n"
"li $8, 1\r\n"
"move $9, %4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
PTR_SLL "$11, $9, 2\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $11\r\n"
"lwc1 %0, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"dsll $11, $9, 2\r\n"
"move $13, %5\r\n"
"daddu $13, $13, $11\r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
PTR_SLL "$11, $9, 2\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $11\r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0($13)\r\n"
"2: \r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"mov.s %3, %1\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"2: \r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"mov.s %3, %1\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"3: \r\n"
"nop \r\n"
"3: \r\n"
"nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)

View File

@@ -115,67 +115,67 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
col = num_cols * 3;
asm(".set noreorder\r\n"
"li $8, 1\r\n"
"move $9, %3\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"xor $12, $12, $12\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lbu $12, 0($13)\r\n"
"li $8, 1\r\n"
"move $9, %3\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"xor $12, $12, $12\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lbu $12, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"xor $11, $11, $11\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lhu $11, 0($13)\r\n"
"sll $12, $12, 16\r\n"
"or $12, $12, $11\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"xor $11, $11, $11\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lhu $11, 0($13)\r\n"
"sll $12, $12, 16\r\n"
"or $12, $12, $11\r\n"
"2: \r\n"
"dmtc1 $12, %0\r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"subu $9, $9, 4\r\n"
"move $13, %5\r\n"
"dadd $13, $13, $9\r\n"
"lwu $14, 0($13)\r\n"
"dmtc1 $14, %1\r\n"
"dsll32 $12, $12, 0\r\n"
"or $12, $12, $14\r\n"
"dmtc1 $12, %0\r\n"
"2: \r\n"
"dmtc1 $12, %0\r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"subu $9, $9, 4\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $9\r\n"
"lwu $14, 0($13)\r\n"
"dmtc1 $14, %1\r\n"
"dsll32 $12, $12, 0\r\n"
"or $12, $12, $14\r\n"
"dmtc1 $12, %0\r\n"
"3: \r\n"
"li $8, 8\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 4f\r\n"
"nop \r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"li $9, 8\r\n"
"j 5f\r\n"
"nop \r\n"
"3: \r\n"
"li $8, 8\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 4f\r\n"
"nop \r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"li $9, 8\r\n"
"j 5f\r\n"
"nop \r\n"
"4: \r\n"
"li $8, 16\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 5f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"4: \r\n"
"li $8, 16\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 5f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"5: \r\n"
"nop \r\n"
"5: \r\n"
"nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
@@ -227,41 +227,41 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
col = num_cols;
asm(".set noreorder\r\n"
"li $8, 1\r\n"
"move $9, %4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
"dsll $11, $9, 2\r\n"
"move $13, %5\r\n"
"daddu $13, $13, $11\r\n"
"lwc1 %0, 0($13)\r\n"
"li $8, 1\r\n"
"move $9, %4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 1f\r\n"
"nop \r\n"
"subu $9, $9, 1\r\n"
PTR_SLL "$11, $9, 2\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $11\r\n"
"lwc1 %0, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
"dsll $11, $9, 2\r\n"
"move $13, %5\r\n"
"daddu $13, $13, $11\r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0($13)\r\n"
"1: \r\n"
"li $8, 2\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 2f\r\n"
"nop \r\n"
"subu $9, $9, 2\r\n"
PTR_SLL "$11, $9, 2\r\n"
"move $13, %5\r\n"
PTR_ADDU "$13, $13, $11\r\n"
"mov.s %1, %0\r\n"
"ldc1 %0, 0($13)\r\n"
"2: \r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"mov.s %3, %1\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"2: \r\n"
"li $8, 4\r\n"
"and $10, $9, $8\r\n"
"beqz $10, 3f\r\n"
"nop \r\n"
"mov.s %2, %0\r\n"
"mov.s %3, %1\r\n"
"ldc1 %0, 0(%5)\r\n"
"ldc1 %1, 8(%5)\r\n"
"3: \r\n"
"nop \r\n"
"3: \r\n"
"nop \r\n"
".set reorder\r\n"
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)

View File

@@ -247,64 +247,64 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
col = num_cols * 3;
asm(".set noreorder\r\n"
"li $8, 16\r\n"
"move $9, %4\r\n"
"mov.s $f4, %1\r\n"
"mov.s $f6, %3\r\n"
"move $10, %5\r\n"
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, %2\r\n"
"subu $9, $9, 16\r\n"
"daddu $10, $10, 16\r\n"
"b 2f\r\n"
"nop \r\n"
"li $8, 16\r\n"
"move $9, %4\r\n"
"mov.s $f4, %1\r\n"
"mov.s $f6, %3\r\n"
"move $10, %5\r\n"
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, %2\r\n"
"subu $9, $9, 16\r\n"
PTR_ADDU "$10, $10, 16\r\n"
"b 2f\r\n"
"nop \r\n"
"1: \r\n"
"li $8, 8\r\n" /* st8 */
"bltu $9, $8, 2f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, ($10)\r\n"
"mov.s $f4, %3\r\n"
"subu $9, $9, 8\r\n"
"daddu $10, $10, 8\r\n"
"1: \r\n"
"li $8, 8\r\n" /* st8 */
"bltu $9, $8, 2f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, %3\r\n"
"subu $9, $9, 8\r\n"
PTR_ADDU "$10, $10, 8\r\n"
"2: \r\n"
"li $8, 4\r\n" /* st4 */
"mfc1 $11, $f4\r\n"
"bltu $9, $8, 3f\r\n"
"nop \r\n"
"swl $11, 3($10)\r\n"
"swr $11, 0($10)\r\n"
"li $8, 32\r\n"
"mtc1 $8, $f6\r\n"
"dsrl $f4, $f4, $f6\r\n"
"mfc1 $11, $f4\r\n"
"subu $9, $9, 4\r\n"
"daddu $10, $10, 4\r\n"
"2: \r\n"
"li $8, 4\r\n" /* st4 */
"mfc1 $11, $f4\r\n"
"bltu $9, $8, 3f\r\n"
"nop \r\n"
"swl $11, 3($10)\r\n"
"swr $11, 0($10)\r\n"
"li $8, 32\r\n"
"mtc1 $8, $f6\r\n"
"dsrl $f4, $f4, $f6\r\n"
"mfc1 $11, $f4\r\n"
"subu $9, $9, 4\r\n"
PTR_ADDU "$10, $10, 4\r\n"
"3: \r\n"
"li $8, 2\r\n" /* st2 */
"bltu $9, $8, 4f\r\n"
"nop \r\n"
"ush $11, 0($10)\r\n"
"srl $11, 16\r\n"
"subu $9, $9, 2\r\n"
"daddu $10, $10, 2\r\n"
"3: \r\n"
"li $8, 2\r\n" /* st2 */
"bltu $9, $8, 4f\r\n"
"nop \r\n"
"ush $11, 0($10)\r\n"
"srl $11, 16\r\n"
"subu $9, $9, 2\r\n"
PTR_ADDU "$10, $10, 2\r\n"
"4: \r\n"
"li $8, 1\r\n" /* st1 */
"bltu $9, $8, 5f\r\n"
"nop \r\n"
"sb $11, 0($10)\r\n"
"4: \r\n"
"li $8, 1\r\n" /* st1 */
"bltu $9, $8, 5f\r\n"
"nop \r\n"
"sb $11, 0($10)\r\n"
"5: \r\n"
"nop \r\n" /* end */
"5: \r\n"
"nop \r\n" /* end */
: "=m" (*outptr)
: "f" (mmA), "f" (mmC), "f" (mmE), "r" (col), "r" (outptr)
: "$f4", "$f6", "$8", "$9", "$10", "$11", "memory"
@@ -357,41 +357,41 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
col = num_cols;
asm(".set noreorder\r\n" /* st16 */
"li $8, 4\r\n"
"move $9, %6\r\n"
"move $10, %7\r\n"
"mov.s $f4, %2\r\n"
"mov.s $f6, %4\r\n"
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, ($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, %3\r\n"
"mov.s $f6, %5\r\n"
"subu $9, $9, 4\r\n"
"daddu $10, $10, 16\r\n"
"li $8, 4\r\n"
"move $9, %6\r\n"
"move $10, %7\r\n"
"mov.s $f4, %2\r\n"
"mov.s $f6, %4\r\n"
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, %3\r\n"
"mov.s $f6, %5\r\n"
"subu $9, $9, 4\r\n"
PTR_ADDU "$10, $10, 16\r\n"
"1: \r\n"
"li $8, 2\r\n" /* st8 */
"bltu $9, $8, 2f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 2\r\n"
"daddu $10, $10, 8\r\n"
"1: \r\n"
"li $8, 2\r\n" /* st8 */
"bltu $9, $8, 2f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 2\r\n"
PTR_ADDU "$10, $10, 8\r\n"
"2: \r\n"
"li $8, 1\r\n" /* st4 */
"bltu $9, $8, 3f\r\n"
"nop \r\n"
"gsswlc1 $f4, 3($10)\r\n"
"gsswrc1 $f4, 0($10)\r\n"
"2: \r\n"
"li $8, 1\r\n" /* st4 */
"bltu $9, $8, 3f\r\n"
"nop \r\n"
"gsswlc1 $f4, 3($10)\r\n"
"gsswrc1 $f4, 0($10)\r\n"
"3: \r\n"
"li %1, 0\r\n" /* end */
"3: \r\n"
"li %1, 0\r\n" /* end */
: "=m" (*outptr), "=r" (col)
: "f" (mmA), "f" (mmC), "f" (mmD), "f" (mmH), "r" (col),
"r" (outptr)

View File

@@ -296,7 +296,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"mov.s $f6, %5\r\n"
"mov.s $f8, %6\r\n"
"subu $9, $9, 24\r\n"
"daddu $10, $10, 24\r\n"
PTR_ADDU "$10, $10, 24\r\n"
"1: \r\n"
"li $8, 16\r\n" /* st16 */
@@ -308,7 +308,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"gssdrc1 $f6, 8($10)\r\n"
"mov.s $f4, $f8\r\n"
"subu $9, $9, 16\r\n"
"daddu $10, $10, 16\r\n"
PTR_ADDU "$10, $10, 16\r\n"
"2: \r\n"
"li $8, 8\r\n" /* st8 */
@@ -318,7 +318,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 8\r\n"
"daddu $10, $10, 8\r\n"
PTR_ADDU "$10, $10, 8\r\n"
"3: \r\n"
"li $8, 4\r\n" /* st4 */
@@ -332,7 +332,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"dsrl $f4, $f4, $f6\r\n"
"mfc1 $11, $f4\r\n"
"subu $9, $9, 4\r\n"
"daddu $10, $10, 4\r\n"
PTR_ADDU "$10, $10, 4\r\n"
"4: \r\n"
"li $8, 2\r\n" /* st2 */
@@ -341,7 +341,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"ush $11, 0($10)\r\n"
"srl $11, 16\r\n"
"subu $9, $9, 2\r\n"
"daddu $10, $10, 2\r\n"
PTR_ADDU "$10, $10, 2\r\n"
"5: \r\n"
"li $8, 1\r\n" /* st1 */
@@ -440,7 +440,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"bltu $9, $8, 1f\r\n"
"nop \r\n"
"gssdlc1 $f4, 7($10)\r\n"
"gssdrc1 $f4, ($10)\r\n"
"gssdrc1 $f4, 0($10)\r\n"
"gssdlc1 $f6, 7+8($10)\r\n"
"gssdrc1 $f6, 8($10)\r\n"
"gssdlc1 $f8, 7+16($10)\r\n"
@@ -452,7 +452,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"mov.s $f8, %8\r\n"
"mov.s $f10, %9\r\n"
"subu $9, $9, 8\r\n"
"daddu $10, $10, 32\r\n"
PTR_ADDU "$10, $10, 32\r\n"
"1: \r\n"
"li $8, 4\r\n" /* st16 */
@@ -465,7 +465,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"mov.s $f4, $f8\r\n"
"mov.s $f6, $f10\r\n"
"subu $9, $9, 4\r\n"
"daddu $10, $10, 16\r\n"
PTR_ADDU "$10, $10, 16\r\n"
"2: \r\n"
"li $8, 2\r\n" /* st8 */
@@ -475,7 +475,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
"gssdrc1 $f4, 0($10)\r\n"
"mov.s $f4, $f6\r\n"
"subu $9, $9, 2\r\n"
"daddu $10, $10, 8\r\n"
PTR_ADDU "$10, $10, 8\r\n"
"3: \r\n"
"li $8, 1\r\n" /* st4 */

View File

@@ -135,18 +135,18 @@ void jsimd_h2v2_fancy_upsample_mmi(int max_v_samp_factor,
if (downsampled_width & 7) {
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
tmp1 = downsampled_width * sizeof(JSAMPLE);
asm("daddu $8, %3, %6\r\n"
"lb $9, ($8)\r\n"
"daddu $8, %3, %7\r\n"
"sb $9, ($8)\r\n"
"daddu $8, %4, %6\r\n"
"lb $9, ($8)\r\n"
"daddu $8, %4, %7\r\n"
"sb $9, ($8)\r\n"
"daddu $8, %5, %6\r\n"
"lb $9, ($8)\r\n"
"daddu $8, %5, %7\r\n"
"sb $9, ($8)\r\n"
asm(PTR_ADDU "$8, %3, %6\r\n"
"lb $9, ($8)\r\n"
PTR_ADDU "$8, %3, %7\r\n"
"sb $9, ($8)\r\n"
PTR_ADDU "$8, %4, %6\r\n"
"lb $9, ($8)\r\n"
PTR_ADDU "$8, %4, %7\r\n"
"sb $9, ($8)\r\n"
PTR_ADDU "$8, %5, %6\r\n"
"lb $9, ($8)\r\n"
PTR_ADDU "$8, %5, %7\r\n"
"sb $9, ($8)\r\n"
: "=m" (*inptr_1), "=m" (*inptr0), "=m" (*inptr1)
: "r" (inptr_1), "r" (inptr0), "r" (inptr1), "r" (tmp), "r" (tmp1)
: "$8", "$9"
@@ -262,10 +262,10 @@ void jsimd_h2v1_fancy_upsample_mmi(int max_v_samp_factor,
if (downsampled_width & 7) {
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
tmp1 = downsampled_width * sizeof(JSAMPLE);
asm("daddu $8, %1, %2\r\n"
"lb $9, ($8)\r\n"
"daddu $8, %1, %3\r\n"
"sb $9, ($8)\r\n"
asm(PTR_ADDU "$8, %1, %2\r\n"
"lb $9, ($8)\r\n"
PTR_ADDU "$8, %1, %3\r\n"
"sb $9, ($8)\r\n"
: "=m" (*inptr0)
: "r" (inptr0), "r" (tmp), "r" (tmp1)
: "$8", "$9"

View File

@@ -33,6 +33,13 @@
/* Common code */
#if defined(_ABI64) && _MIPS_SIM == _ABI64
# define PTR_ADDU "daddu "
# define PTR_SLL "dsll "
#else
# define PTR_ADDU "addu "
# define PTR_SLL "sll "
#endif
#define SIZEOF_MMWORD 8
#define BYTE_BIT 8