Merge remote-tracking branch 'origin/GP_799-James-x64_vector_op_fixes'

This commit is contained in:
ghidra1
2021-03-26 15:53:05 -04:00
3 changed files with 99 additions and 41 deletions
@@ -44,10 +44,17 @@
}
# MOVUPS 4-130 PAGE 1250 LINE 64874
# TODO in general, what do we do with the zext of only the register case; needs investigation
:VMOVUPS XmmReg2_m128, XmmReg1 is $(VEX_NONE) & $(VEX_L128) & $(VEX_PRE_NONE) & $(VEX_0F) & $(VEX_WIG); byte=0x11; XmmReg1 ... & XmmReg2_m128
# break this into two constructors to handle the zext for the register destination case
:VMOVUPS XmmReg2, XmmReg1 is $(VEX_NONE) & $(VEX_L128) & $(VEX_PRE_NONE) & $(VEX_0F) & $(VEX_WIG); byte=0x11; XmmReg1 & (mod = 3 & XmmReg2 & YmmReg2)
{
XmmReg2_m128 = XmmReg1;
XmmReg2 = XmmReg1;
YmmReg2 = zext(XmmReg2);
}
# MOVUPS 4-130 PAGE 1250 LINE 64874
:VMOVUPS m128, XmmReg1 is $(VEX_NONE) & $(VEX_L128) & $(VEX_PRE_NONE) & $(VEX_0F) & $(VEX_WIG); byte=0x11; XmmReg1 ... & m128
{
m128 = XmmReg1;
}
# MOVUPS 4-130 PAGE 1250 LINE 64876
+40 -20
View File
@@ -6585,10 +6585,18 @@ define pcodeop pcmpeqb;
Reg32 = zext(temp:2);
}
:PEXTRW Reg32_m16, XmmReg1, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x3A; byte=0x15; XmmReg1 ... & Reg32_m16; imm8
#break PEXTRW with reg/mem dest into two constructors to handle zext in register case
:PEXTRW Rmr32, XmmReg1, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x3A; byte=0x15; (mod = 3 & Rmr32 & check_Rmr32_dest) & XmmReg1 ; imm8
{
temp:16 = XmmReg1 >> ( (imm8 & 0x07) * 16 );
Reg32_m16 = zext(temp:2);
Rmr32 = zext(temp:2);
build check_Rmr32_dest;
}
:PEXTRW m16, XmmReg1, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x3A; byte=0x15; XmmReg1 ... & m16; imm8
{
temp:16 = XmmReg1 >> ( (imm8 & 0x07) * 16 );
m16 = temp:2;
}
define pcodeop phaddd;
@@ -6807,16 +6815,19 @@ Order3: order3 is imm8 [ order3 = (((imm8 >> 6) & 0x3) << 5); ] { export *[const
:PSHUFD XmmReg1, XmmReg2, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x70; xmmmod=3 & XmmReg1 & XmmReg2 ; imm8 & Order0 & Order1 & Order2 & Order3
{
shifted:16 = XmmReg2 >> Order0;
#in case XmmReg1 and XmmReg2 are the same register
local original_XmmReg2:16 = XmmReg2;
shifted:16 = original_XmmReg2 >> Order0;
XmmReg1[0,32] = shifted:4;
shifted = XmmReg2 >> Order1;
shifted = original_XmmReg2 >> Order1;
XmmReg1[32,32] = shifted:4;
shifted = XmmReg2 >> Order2;
shifted = original_XmmReg2 >> Order2;
XmmReg1[64,32] = shifted:4;
shifted = XmmReg2 >> Order3;
shifted = original_XmmReg2 >> Order3;
XmmReg1[96,32] = shifted:4;
}
@@ -7054,14 +7065,17 @@ define pcodeop psraw;
:PSRLW XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0xD1; xmmmod = 3 & XmmReg1 & XmmReg2
{
XmmReg1[0,16] = XmmReg1[0,16] >> XmmReg2[0,64];
XmmReg1[16,16] = XmmReg1[16,16] >> XmmReg2[0,64];
XmmReg1[32,16] = XmmReg1[32,16] >> XmmReg2[0,64];
XmmReg1[48,16] = XmmReg1[48,16] >> XmmReg2[0,64];
XmmReg1[64,16] = XmmReg1[64,16] >> XmmReg2[0,64];
XmmReg1[80,16] = XmmReg1[80,16] >> XmmReg2[0,64];
XmmReg1[96,16] = XmmReg1[96,16] >> XmmReg2[0,64];
XmmReg1[112,16] = XmmReg1[112,16] >> XmmReg2[0,64];
#save this off in case XmmReg1 and XmmReg2 are the same register
local count:8 = XmmReg2[0,64];
XmmReg1[0,16] = XmmReg1[0,16] >> count;
XmmReg1[16,16] = XmmReg1[16,16] >> count;
XmmReg1[32,16] = XmmReg1[32,16] >> count;
XmmReg1[48,16] = XmmReg1[48,16] >> count;
XmmReg1[64,16] = XmmReg1[64,16] >> count;
XmmReg1[80,16] = XmmReg1[80,16] >> count;
XmmReg1[96,16] = XmmReg1[96,16] >> count;
XmmReg1[112,16] = XmmReg1[112,16] >> count;
}
:PSRLW XmmReg2, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x71; mod = 0b11 & reg_opcode=2 & XmmReg2; imm8
@@ -7086,10 +7100,13 @@ define pcodeop psraw;
:PSRLD XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0xD2; xmmmod = 3 & XmmReg1 & XmmReg2
{
XmmReg1[0,32] = XmmReg1[0,32] >> XmmReg2[0,64];
XmmReg1[32,32] = XmmReg1[32,32] >> XmmReg2[0,64];
XmmReg1[64,32] = XmmReg1[64,32] >> XmmReg2[0,64];
XmmReg1[96,32] = XmmReg1[96,32] >> XmmReg2[0,64];
#save this off in case XmmReg1 and XmmReg2 are the same register
local count = XmmReg2[0,64];
XmmReg1[0,32] = XmmReg1[0,32] >> count;
XmmReg1[32,32] = XmmReg1[32,32] >> count;
XmmReg1[64,32] = XmmReg1[64,32] >> count;
XmmReg1[96,32] = XmmReg1[96,32] >> count;
}
:PSRLD XmmReg2, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x72; mod = 0b11 & reg_opcode=2 & XmmReg2; imm8
@@ -7108,8 +7125,11 @@ define pcodeop psraw;
:PSRLQ XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0xD3; xmmmod = 3 & XmmReg1 & XmmReg2
{
XmmReg1[0,64] = XmmReg1[0,64] >> XmmReg2[0,64];
XmmReg1[64,64] = XmmReg1[64,64] >> XmmReg2[0,64];
#save this off in case XmmReg1 and XmmReg2 are the same register
local count = XmmReg2[0,64];
XmmReg1[0,64] = XmmReg1[0,64] >> count;
XmmReg1[64,64] = XmmReg1[64,64] >> count;
}
:PSRLQ XmmReg2, imm8 is vexMode=0 & $(PRE_66) & byte=0x0F; byte=0x73; mod = 0b11 & reg_opcode=2 & XmmReg2; imm8
@@ -1,24 +1,47 @@
# Due to limitations on variable length matching that preclude opcode matching afterwards, all memory addressing forms of PCLMULQDQ are decoded to PCLMULQDQ, not the macro names.
# Display is non-standard, but semantics, and de-compilation should be correct.
macro pclmul(src1, src2, dest) {
local i:4 = 0:4;
local temp:16 = 0;
<start>
if (i > 63:4) goto <end>;
if ((src1 & (1 << i)) == 0) goto <skip>;
temp = temp ^ (src2 << i);
<skip>
i = i+1;
goto <start>;
<end>
dest = temp;
}
:PCLMULLQLQDQ XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; xmmmod=3 & XmmReg1 & XmmReg2; byte=0x00
{
XmmReg1 = zext(XmmReg2[0,64]) * zext(XmmReg1[0,64]);
local src1:16 = zext(XmmReg1[0,64]);
local src2:16 = zext(XmmReg2[0,64]);
pclmul(src1,src2,XmmReg1);
}
:PCLMULHQLQDQ XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; xmmmod=3 & XmmReg1 & XmmReg2; byte=0x01
{
XmmReg1 = zext(XmmReg2[0,64]) * zext(XmmReg1[64,64]);
local src1:16 = zext(XmmReg1[64,64]);
local src2:16 = zext(XmmReg2[0,64]);
pclmul(src1,src2,XmmReg1);
}
:PCLMULLQHQDQ XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; xmmmod=3 & XmmReg1 & XmmReg2; byte=0x10
{
XmmReg1 = zext(XmmReg2[64,64]) * zext(XmmReg1[0,64]);
local src1:16 = zext(XmmReg1[0,64]);
local src2:16 = zext(XmmReg2[64,64]);
pclmul(src1,src2,XmmReg1);
}
:PCLMULHQHQDQ XmmReg1, XmmReg2 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; xmmmod=3 & XmmReg1 & XmmReg2; byte=0x11
{
XmmReg1 = zext(XmmReg2[64,64]) * zext(XmmReg1[64,64]);
local src1:16 = zext(XmmReg1[64,64]);
local src2:16 = zext(XmmReg2[64,64]);
pclmul(src1,src2,XmmReg1);
}
:PCLMULQDQ XmmReg1, XmmReg2, imm8 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; xmmmod=3 & XmmReg1 & XmmReg2; imm8 & imm8_4 & imm8_0
@@ -41,7 +64,7 @@
<done2>
XmmReg1 = src2 * src1;
pclmul(src1,src2,XmmReg1);
}
:PCLMULQDQ XmmReg, m128, imm8 is vexMode=0 & $(PRE_66) & byte=0x0f; byte=0x3a; byte=0x44; XmmReg ... & m128; imm8 & imm8_4 & imm8_0
@@ -64,31 +87,39 @@
<done2>
XmmReg = src2 * src1;
pclmul(src1,src2,XmmReg);
}
:VPCLMULLQLQDQ XmmReg1, vexVVVV_XmmReg, XmmReg2 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; xmmmod=3 & (XmmReg1 & YmmReg1) & XmmReg2; byte=0x00
{
tmp:16 = zext(XmmReg2[0,64]) * zext(vexVVVV_XmmReg[0,64]);
YmmReg1 = zext(tmp);
local src1:16 = zext(vexVVVV_XmmReg[0,64]);
local src2:16 = zext(XmmReg2[0,64]);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}
:VPCLMULHQLQDQ XmmReg1, vexVVVV_XmmReg, XmmReg2 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; xmmmod=3 & (XmmReg1 & YmmReg1) & XmmReg2; byte=0x01
{
tmp:16 = zext(XmmReg2[0,64]) * zext(vexVVVV_XmmReg[64,64]);
YmmReg1 = zext(tmp);
local src1:16 = zext(vexVVVV_XmmReg[64,64]);
local src2:16 = zext(XmmReg2[0,64]);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}
:VPCLMULLQHQDQ XmmReg1, vexVVVV_XmmReg, XmmReg2 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; xmmmod=3 & (XmmReg1 & YmmReg1) & XmmReg2; byte=0x10
{
tmp:16 = zext(XmmReg2[64,64]) * zext(vexVVVV_XmmReg[0,64]);
YmmReg1 = zext(tmp);
local src1:16 = zext(vexVVVV_XmmReg[0,64]);
local src2:16 = zext(XmmReg2[64,64]);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}
:VPCLMULHQHQDQ XmmReg1, vexVVVV_XmmReg, XmmReg2 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; xmmmod=3 & (XmmReg1 & YmmReg1) & XmmReg2; byte=0x11
{
tmp:16 = zext(XmmReg2[64,64]) * zext(vexVVVV_XmmReg[64,64]);
YmmReg1 = zext(tmp);
local src1:16 = zext(vexVVVV_XmmReg[64,64]);
local src2:16 = zext(XmmReg2[64,64]);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}
:VPCLMULQDQ XmmReg1, vexVVVV_XmmReg, XmmReg2, imm8 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; xmmmod=3 & (XmmReg1 & YmmReg1) & XmmReg2; imm8 & imm8_4 & imm8_0
@@ -111,8 +142,8 @@
<done2>
tmp:16 = src2 * src1;
YmmReg1 = zext(tmp);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}
:VPCLMULQDQ XmmReg1, vexVVVV_XmmReg, m128, imm8 is $(VEX_NDS) & $(VEX_L128) & $(VEX_PRE_66) & $(VEX_0F3A) & $(VEX_WIG) & vexVVVV_XmmReg; byte=0x44; (XmmReg1 & YmmReg1) ... & m128; imm8 & imm8_4 & imm8_0
@@ -136,7 +167,7 @@
<done2>
tmp:16 = src2 * src1;
YmmReg1 = zext(tmp);
pclmul(src1,src2,XmmReg1);
YmmReg1 = zext(XmmReg1);
}