MMI: Support 32-bit Loongson architectures
This commit is contained in:
@@ -124,67 +124,67 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
col = num_cols * 3;
|
||||
asm(".set noreorder\r\n"
|
||||
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %3\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"xor $12, $12, $12\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lbu $12, 0($13)\r\n"
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %3\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"xor $12, $12, $12\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lbu $12, 0($13)\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"xor $11, $11, $11\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lhu $11, 0($13)\r\n"
|
||||
"sll $12, $12, 16\r\n"
|
||||
"or $12, $12, $11\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"xor $11, $11, $11\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lhu $11, 0($13)\r\n"
|
||||
"sll $12, $12, 16\r\n"
|
||||
"or $12, $12, $11\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lwu $14, 0($13)\r\n"
|
||||
"dmtc1 $14, %1\r\n"
|
||||
"dsll32 $12, $12, 0\r\n"
|
||||
"or $12, $12, $14\r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"2: \r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lwu $14, 0($13)\r\n"
|
||||
"dmtc1 $14, %1\r\n"
|
||||
"dsll32 $12, $12, 0\r\n"
|
||||
"or $12, $12, $14\r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li $8, 8\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"li $9, 8\r\n"
|
||||
"j 5f\r\n"
|
||||
"nop \r\n"
|
||||
"3: \r\n"
|
||||
"li $8, 8\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"li $9, 8\r\n"
|
||||
"j 5f\r\n"
|
||||
"nop \r\n"
|
||||
|
||||
"4: \r\n"
|
||||
"li $8, 16\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
"4: \r\n"
|
||||
"li $8, 16\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
|
||||
"5: \r\n"
|
||||
"nop \r\n"
|
||||
"5: \r\n"
|
||||
"nop \r\n"
|
||||
".set reorder\r\n"
|
||||
|
||||
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
|
||||
@@ -236,41 +236,41 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
col = num_cols;
|
||||
asm(".set noreorder\r\n"
|
||||
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"dsll $11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"daddu $13, $13, $11\r\n"
|
||||
"lwc1 %0, 0($13)\r\n"
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
PTR_SLL "$11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $11\r\n"
|
||||
"lwc1 %0, 0($13)\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"dsll $11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"daddu $13, $13, $11\r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0($13)\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
PTR_SLL "$11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $11\r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0($13)\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"mov.s %3, %1\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"mov.s %3, %1\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"nop \r\n"
|
||||
"3: \r\n"
|
||||
"nop \r\n"
|
||||
".set reorder\r\n"
|
||||
|
||||
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)
|
||||
|
||||
@@ -115,67 +115,67 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
col = num_cols * 3;
|
||||
asm(".set noreorder\r\n"
|
||||
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %3\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"xor $12, $12, $12\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lbu $12, 0($13)\r\n"
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %3\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"xor $12, $12, $12\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lbu $12, 0($13)\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"xor $11, $11, $11\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lhu $11, 0($13)\r\n"
|
||||
"sll $12, $12, 16\r\n"
|
||||
"or $12, $12, $11\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"xor $11, $11, $11\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lhu $11, 0($13)\r\n"
|
||||
"sll $12, $12, 16\r\n"
|
||||
"or $12, $12, $11\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"dadd $13, $13, $9\r\n"
|
||||
"lwu $14, 0($13)\r\n"
|
||||
"dmtc1 $14, %1\r\n"
|
||||
"dsll32 $12, $12, 0\r\n"
|
||||
"or $12, $12, $14\r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"2: \r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $9\r\n"
|
||||
"lwu $14, 0($13)\r\n"
|
||||
"dmtc1 $14, %1\r\n"
|
||||
"dsll32 $12, $12, 0\r\n"
|
||||
"or $12, $12, $14\r\n"
|
||||
"dmtc1 $12, %0\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li $8, 8\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"li $9, 8\r\n"
|
||||
"j 5f\r\n"
|
||||
"nop \r\n"
|
||||
"3: \r\n"
|
||||
"li $8, 8\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"li $9, 8\r\n"
|
||||
"j 5f\r\n"
|
||||
"nop \r\n"
|
||||
|
||||
"4: \r\n"
|
||||
"li $8, 16\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
"4: \r\n"
|
||||
"li $8, 16\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
|
||||
"5: \r\n"
|
||||
"nop \r\n"
|
||||
"5: \r\n"
|
||||
"nop \r\n"
|
||||
".set reorder\r\n"
|
||||
|
||||
: "=f" (mmA), "=f" (mmG), "=f" (mmF)
|
||||
@@ -227,41 +227,41 @@ void jsimd_rgb_gray_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
||||
col = num_cols;
|
||||
asm(".set noreorder\r\n"
|
||||
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
"dsll $11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"daddu $13, $13, $11\r\n"
|
||||
"lwc1 %0, 0($13)\r\n"
|
||||
"li $8, 1\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 1\r\n"
|
||||
PTR_SLL "$11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $11\r\n"
|
||||
"lwc1 %0, 0($13)\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"dsll $11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
"daddu $13, $13, $11\r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0($13)\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
PTR_SLL "$11, $9, 2\r\n"
|
||||
"move $13, %5\r\n"
|
||||
PTR_ADDU "$13, $13, $11\r\n"
|
||||
"mov.s %1, %0\r\n"
|
||||
"ldc1 %0, 0($13)\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"mov.s %3, %1\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n"
|
||||
"and $10, $9, $8\r\n"
|
||||
"beqz $10, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"mov.s %2, %0\r\n"
|
||||
"mov.s %3, %1\r\n"
|
||||
"ldc1 %0, 0(%5)\r\n"
|
||||
"ldc1 %1, 8(%5)\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"nop \r\n"
|
||||
"3: \r\n"
|
||||
"nop \r\n"
|
||||
".set reorder\r\n"
|
||||
|
||||
: "=f" (mmA), "=f" (mmF), "=f" (mmD), "=f" (mmC)
|
||||
|
||||
@@ -247,64 +247,64 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
col = num_cols * 3;
|
||||
asm(".set noreorder\r\n"
|
||||
|
||||
"li $8, 16\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"mov.s $f4, %1\r\n"
|
||||
"mov.s $f6, %3\r\n"
|
||||
"move $10, %5\r\n"
|
||||
"bltu $9, $8, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"gssdlc1 $f6, 7+8($10)\r\n"
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"mov.s $f4, %2\r\n"
|
||||
"subu $9, $9, 16\r\n"
|
||||
"daddu $10, $10, 16\r\n"
|
||||
"b 2f\r\n"
|
||||
"nop \r\n"
|
||||
"li $8, 16\r\n"
|
||||
"move $9, %4\r\n"
|
||||
"mov.s $f4, %1\r\n"
|
||||
"mov.s $f6, %3\r\n"
|
||||
"move $10, %5\r\n"
|
||||
"bltu $9, $8, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"gssdlc1 $f6, 7+8($10)\r\n"
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"mov.s $f4, %2\r\n"
|
||||
"subu $9, $9, 16\r\n"
|
||||
PTR_ADDU "$10, $10, 16\r\n"
|
||||
"b 2f\r\n"
|
||||
"nop \r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 8\r\n" /* st8 */
|
||||
"bltu $9, $8, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, ($10)\r\n"
|
||||
"mov.s $f4, %3\r\n"
|
||||
"subu $9, $9, 8\r\n"
|
||||
"daddu $10, $10, 8\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 8\r\n" /* st8 */
|
||||
"bltu $9, $8, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"mov.s $f4, %3\r\n"
|
||||
"subu $9, $9, 8\r\n"
|
||||
PTR_ADDU "$10, $10, 8\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n" /* st4 */
|
||||
"mfc1 $11, $f4\r\n"
|
||||
"bltu $9, $8, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"swl $11, 3($10)\r\n"
|
||||
"swr $11, 0($10)\r\n"
|
||||
"li $8, 32\r\n"
|
||||
"mtc1 $8, $f6\r\n"
|
||||
"dsrl $f4, $f4, $f6\r\n"
|
||||
"mfc1 $11, $f4\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"daddu $10, $10, 4\r\n"
|
||||
"2: \r\n"
|
||||
"li $8, 4\r\n" /* st4 */
|
||||
"mfc1 $11, $f4\r\n"
|
||||
"bltu $9, $8, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"swl $11, 3($10)\r\n"
|
||||
"swr $11, 0($10)\r\n"
|
||||
"li $8, 32\r\n"
|
||||
"mtc1 $8, $f6\r\n"
|
||||
"dsrl $f4, $f4, $f6\r\n"
|
||||
"mfc1 $11, $f4\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
PTR_ADDU "$10, $10, 4\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li $8, 2\r\n" /* st2 */
|
||||
"bltu $9, $8, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"ush $11, 0($10)\r\n"
|
||||
"srl $11, 16\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"daddu $10, $10, 2\r\n"
|
||||
"3: \r\n"
|
||||
"li $8, 2\r\n" /* st2 */
|
||||
"bltu $9, $8, 4f\r\n"
|
||||
"nop \r\n"
|
||||
"ush $11, 0($10)\r\n"
|
||||
"srl $11, 16\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
PTR_ADDU "$10, $10, 2\r\n"
|
||||
|
||||
"4: \r\n"
|
||||
"li $8, 1\r\n" /* st1 */
|
||||
"bltu $9, $8, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"sb $11, 0($10)\r\n"
|
||||
"4: \r\n"
|
||||
"li $8, 1\r\n" /* st1 */
|
||||
"bltu $9, $8, 5f\r\n"
|
||||
"nop \r\n"
|
||||
"sb $11, 0($10)\r\n"
|
||||
|
||||
"5: \r\n"
|
||||
"nop \r\n" /* end */
|
||||
"5: \r\n"
|
||||
"nop \r\n" /* end */
|
||||
: "=m" (*outptr)
|
||||
: "f" (mmA), "f" (mmC), "f" (mmE), "r" (col), "r" (outptr)
|
||||
: "$f4", "$f6", "$8", "$9", "$10", "$11", "memory"
|
||||
@@ -357,41 +357,41 @@ void jsimd_ycc_rgb_convert_mmi(JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
col = num_cols;
|
||||
asm(".set noreorder\r\n" /* st16 */
|
||||
|
||||
"li $8, 4\r\n"
|
||||
"move $9, %6\r\n"
|
||||
"move $10, %7\r\n"
|
||||
"mov.s $f4, %2\r\n"
|
||||
"mov.s $f6, %4\r\n"
|
||||
"bltu $9, $8, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, ($10)\r\n"
|
||||
"gssdlc1 $f6, 7+8($10)\r\n"
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"mov.s $f4, %3\r\n"
|
||||
"mov.s $f6, %5\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"daddu $10, $10, 16\r\n"
|
||||
"li $8, 4\r\n"
|
||||
"move $9, %6\r\n"
|
||||
"move $10, %7\r\n"
|
||||
"mov.s $f4, %2\r\n"
|
||||
"mov.s $f6, %4\r\n"
|
||||
"bltu $9, $8, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"gssdlc1 $f6, 7+8($10)\r\n"
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"mov.s $f4, %3\r\n"
|
||||
"mov.s $f6, %5\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
PTR_ADDU "$10, $10, 16\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n" /* st8 */
|
||||
"bltu $9, $8, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"mov.s $f4, $f6\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"daddu $10, $10, 8\r\n"
|
||||
"1: \r\n"
|
||||
"li $8, 2\r\n" /* st8 */
|
||||
"bltu $9, $8, 2f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"mov.s $f4, $f6\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
PTR_ADDU "$10, $10, 8\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 1\r\n" /* st4 */
|
||||
"bltu $9, $8, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"gsswlc1 $f4, 3($10)\r\n"
|
||||
"gsswrc1 $f4, 0($10)\r\n"
|
||||
"2: \r\n"
|
||||
"li $8, 1\r\n" /* st4 */
|
||||
"bltu $9, $8, 3f\r\n"
|
||||
"nop \r\n"
|
||||
"gsswlc1 $f4, 3($10)\r\n"
|
||||
"gsswrc1 $f4, 0($10)\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li %1, 0\r\n" /* end */
|
||||
"3: \r\n"
|
||||
"li %1, 0\r\n" /* end */
|
||||
: "=m" (*outptr), "=r" (col)
|
||||
: "f" (mmA), "f" (mmC), "f" (mmD), "f" (mmH), "r" (col),
|
||||
"r" (outptr)
|
||||
|
||||
@@ -296,7 +296,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"mov.s $f6, %5\r\n"
|
||||
"mov.s $f8, %6\r\n"
|
||||
"subu $9, $9, 24\r\n"
|
||||
"daddu $10, $10, 24\r\n"
|
||||
PTR_ADDU "$10, $10, 24\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 16\r\n" /* st16 */
|
||||
@@ -308,7 +308,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"mov.s $f4, $f8\r\n"
|
||||
"subu $9, $9, 16\r\n"
|
||||
"daddu $10, $10, 16\r\n"
|
||||
PTR_ADDU "$10, $10, 16\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 8\r\n" /* st8 */
|
||||
@@ -318,7 +318,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"mov.s $f4, $f6\r\n"
|
||||
"subu $9, $9, 8\r\n"
|
||||
"daddu $10, $10, 8\r\n"
|
||||
PTR_ADDU "$10, $10, 8\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li $8, 4\r\n" /* st4 */
|
||||
@@ -332,7 +332,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"dsrl $f4, $f4, $f6\r\n"
|
||||
"mfc1 $11, $f4\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"daddu $10, $10, 4\r\n"
|
||||
PTR_ADDU "$10, $10, 4\r\n"
|
||||
|
||||
"4: \r\n"
|
||||
"li $8, 2\r\n" /* st2 */
|
||||
@@ -341,7 +341,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"ush $11, 0($10)\r\n"
|
||||
"srl $11, 16\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"daddu $10, $10, 2\r\n"
|
||||
PTR_ADDU "$10, $10, 2\r\n"
|
||||
|
||||
"5: \r\n"
|
||||
"li $8, 1\r\n" /* st1 */
|
||||
@@ -440,7 +440,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"bltu $9, $8, 1f\r\n"
|
||||
"nop \r\n"
|
||||
"gssdlc1 $f4, 7($10)\r\n"
|
||||
"gssdrc1 $f4, ($10)\r\n"
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"gssdlc1 $f6, 7+8($10)\r\n"
|
||||
"gssdrc1 $f6, 8($10)\r\n"
|
||||
"gssdlc1 $f8, 7+16($10)\r\n"
|
||||
@@ -452,7 +452,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"mov.s $f8, %8\r\n"
|
||||
"mov.s $f10, %9\r\n"
|
||||
"subu $9, $9, 8\r\n"
|
||||
"daddu $10, $10, 32\r\n"
|
||||
PTR_ADDU "$10, $10, 32\r\n"
|
||||
|
||||
"1: \r\n"
|
||||
"li $8, 4\r\n" /* st16 */
|
||||
@@ -465,7 +465,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"mov.s $f4, $f8\r\n"
|
||||
"mov.s $f6, $f10\r\n"
|
||||
"subu $9, $9, 4\r\n"
|
||||
"daddu $10, $10, 16\r\n"
|
||||
PTR_ADDU "$10, $10, 16\r\n"
|
||||
|
||||
"2: \r\n"
|
||||
"li $8, 2\r\n" /* st8 */
|
||||
@@ -475,7 +475,7 @@ void jsimd_h2v1_merged_upsample_mmi(JDIMENSION output_width,
|
||||
"gssdrc1 $f4, 0($10)\r\n"
|
||||
"mov.s $f4, $f6\r\n"
|
||||
"subu $9, $9, 2\r\n"
|
||||
"daddu $10, $10, 8\r\n"
|
||||
PTR_ADDU "$10, $10, 8\r\n"
|
||||
|
||||
"3: \r\n"
|
||||
"li $8, 1\r\n" /* st4 */
|
||||
|
||||
@@ -135,18 +135,18 @@ void jsimd_h2v2_fancy_upsample_mmi(int max_v_samp_factor,
|
||||
if (downsampled_width & 7) {
|
||||
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
|
||||
tmp1 = downsampled_width * sizeof(JSAMPLE);
|
||||
asm("daddu $8, %3, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
"daddu $8, %3, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
"daddu $8, %4, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
"daddu $8, %4, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
"daddu $8, %5, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
"daddu $8, %5, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
asm(PTR_ADDU "$8, %3, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %3, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %4, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %4, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %5, %6\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %5, %7\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
: "=m" (*inptr_1), "=m" (*inptr0), "=m" (*inptr1)
|
||||
: "r" (inptr_1), "r" (inptr0), "r" (inptr1), "r" (tmp), "r" (tmp1)
|
||||
: "$8", "$9"
|
||||
@@ -262,10 +262,10 @@ void jsimd_h2v1_fancy_upsample_mmi(int max_v_samp_factor,
|
||||
if (downsampled_width & 7) {
|
||||
tmp = (downsampled_width - 1) * sizeof(JSAMPLE);
|
||||
tmp1 = downsampled_width * sizeof(JSAMPLE);
|
||||
asm("daddu $8, %1, %2\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
"daddu $8, %1, %3\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
asm(PTR_ADDU "$8, %1, %2\r\n"
|
||||
"lb $9, ($8)\r\n"
|
||||
PTR_ADDU "$8, %1, %3\r\n"
|
||||
"sb $9, ($8)\r\n"
|
||||
: "=m" (*inptr0)
|
||||
: "r" (inptr0), "r" (tmp), "r" (tmp1)
|
||||
: "$8", "$9"
|
||||
|
||||
@@ -33,6 +33,13 @@
|
||||
|
||||
|
||||
/* Common code */
|
||||
#if defined(_ABI64) && _MIPS_SIM == _ABI64
|
||||
# define PTR_ADDU "daddu "
|
||||
# define PTR_SLL "dsll "
|
||||
#else
|
||||
# define PTR_ADDU "addu "
|
||||
# define PTR_SLL "sll "
|
||||
#endif
|
||||
|
||||
#define SIZEOF_MMWORD 8
|
||||
#define BYTE_BIT 8
|
||||
|
||||
Reference in New Issue
Block a user