@@ -126,13 +126,10 @@ uint EmitPass::DecideInstanceAndSlice(llvm::BasicBlock& blk, SDAG& sdag, bool& s
126126
127127 bool hasValidDestination = (sdag.m_root->getType()->getTypeID() != llvm::Type::VoidTyID);
128128
129- // Special case for inline asm with multiple outputs, we will not be able to handle a struct type destination
130- if (CallInst * call = dyn_cast<CallInst>( sdag.m_root))
129+ // Disable for struct type destinations
130+ if (sdag.m_root->getType()->isStructTy( ))
131131 {
132- if (call->isInlineAsm() && call->getType()->isStructTy())
133- {
134- hasValidDestination = false;
135- }
132+ hasValidDestination = false;
136133 }
137134
138135 if (hasValidDestination)
@@ -2004,6 +2001,62 @@ void EmitPass::EmitIntegerTruncWithSat(bool isSignedDst, bool isSignedSrc, const
20042001 m_encoder->Push();
20052002}
20062003
2004+ void EmitPass::EmitCopyToStruct(InsertValueInst* inst, const DstModifier& DstMod)
2005+ {
2006+ auto Iter = m_pattern->StructValueInsertMap.find(inst);
2007+ assert(Iter != m_pattern->StructValueInsertMap.end());
2008+
2009+ StructType* sTy = dyn_cast<StructType>(inst->getType());
2010+ auto& DL = inst->getParent()->getParent()->getParent()->getDataLayout();
2011+ const StructLayout* SL = DL.getStructLayout(sTy);
2012+
2013+ // Create a new struct variable with constant values initialized
2014+ Constant* initValue = Iter->second.first;
2015+ if (initValue->getValueID() == llvm::Value::ValueTy::UndefValueVal)
2016+ {
2017+ initValue = nullptr;
2018+ }
2019+ CVariable* DstV = m_currShader->GetStructVariable(inst, initValue);
2020+
2021+ unsigned nLanes = DstV->IsUniform() ? 1 : numLanes(m_currShader->m_dispatchSize);
2022+
2023+ // Copy each source value into the struct offset
2024+ auto srcList = Iter->second.second;
2025+ for (auto src : srcList)
2026+ {
2027+ CVariable* SrcV = GetSrcVariable(src.first);
2028+ unsigned idx = src.second;
2029+ unsigned elementOffset = (unsigned)SL->getElementOffset(idx);
2030+ CVariable* elementDst = nullptr;
2031+ if (SrcV->IsUniform())
2032+ elementDst = m_currShader->GetNewAlias(DstV, SrcV->GetType(), elementOffset * nLanes, SrcV->GetNumberElement() * nLanes);
2033+ else
2034+ elementDst = m_currShader->GetNewAlias(DstV, SrcV->GetType(), elementOffset * nLanes, SrcV->GetNumberElement());
2035+
2036+ emitCopyAll(elementDst, SrcV, sTy->getStructElementType(idx));
2037+ }
2038+ }
2039+
2040+ void EmitPass::EmitCopyFromStruct(Value* value, unsigned idx, const DstModifier& DstMod)
2041+ {
2042+ assert(isa<Instruction>(value));
2043+ CVariable* SrcV = GetSymbol(value);
2044+ StructType* sTy = dyn_cast<StructType>(value->getType());
2045+ auto& DL = cast<Instruction>(value)->getParent()->getParent()->getParent()->getDataLayout();
2046+ const StructLayout* SL = DL.getStructLayout(sTy);
2047+
2048+ // For extract value, src and dest should share uniformity
2049+ assert(m_destination && m_destination->IsUniform() == SrcV->IsUniform());
2050+ bool isUniform = SrcV->IsUniform();
2051+ unsigned nLanes = SrcV->IsUniform() ? 1 : numLanes(m_currShader->m_dispatchSize);
2052+ unsigned elementOffset = (unsigned)SL->getElementOffset(idx) * nLanes;
2053+ //unsigned elementSize = (unsigned)DL.getTypeAllocSize(sTy->getStructElementType(idx)) * nLanes;
2054+ SrcV = m_currShader->GetNewAlias(SrcV, m_destination->GetType(), elementOffset, m_destination->GetNumberElement(), isUniform);
2055+
2056+ // Copy from struct to dest
2057+ emitCopyAll(m_destination, SrcV, sTy->getStructElementType(idx));
2058+ }
2059+
20072060void EmitPass::EmitAddPair(GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod) {
20082061 Value* L, * H;
20092062 std::tie(L, H) = getPairOutput(GII);
@@ -9838,34 +9891,84 @@ void EmitPass::emitStackFuncExit(llvm::ReturnInst* inst)
98389891 llvm::Type* RetTy = F->getReturnType();
98399892 if (!RetTy->isVoidTy())
98409893 {
9894+ bool RetOnStack = false;
9895+ unsigned RetSize = 0;
9896+ unsigned nLanes = numLanes(m_currShader->m_dispatchSize);
98419897 CVariable* Dst = m_currShader->GetRETV();
98429898 CVariable* Src = GetSymbol(inst->getReturnValue());
9843- if (Dst->GetSize() >= Src->GetSize())
9899+
9900+ if (Src->GetType() == ISA_TYPE_BOOL)
9901+ {
9902+ CVariable* one = m_currShader->ImmToVariable(1, ISA_TYPE_W);
9903+ CVariable* zero = m_currShader->ImmToVariable(0, ISA_TYPE_W);
9904+ CVariable* DstAlias = m_currShader->GetNewAlias(Dst, ISA_TYPE_W, 0, nLanes, false);
9905+ m_encoder->Select(Src, DstAlias, one, zero);
9906+ RetSize = nLanes * SIZE_WORD;
9907+ }
9908+ else if (Src->IsUniform())
98449909 {
9845- if (Src->GetType() == ISA_TYPE_BOOL)
9910+ // If Src is uniform, we have to vectorize it since caller cannot assume uniform return value
9911+ RetSize = nLanes * Src->GetSize();
9912+ if (Dst->GetSize() < RetSize)
98469913 {
9847- CVariable* one = m_currShader->ImmToVariable(1, ISA_TYPE_W);
9848- CVariable* zero = m_currShader->ImmToVariable(0, ISA_TYPE_W);
9849- CVariable* DstAlias = m_currShader->GetNewAlias(Dst, ISA_TYPE_W, 0, numLanes(m_currShader->m_dispatchSize), false);
9850- m_encoder->Select(Src, DstAlias, one, zero);
9851- uint RetSize = numLanes(m_currShader->m_dispatchSize) * SIZE_WORD;
9852- m_encoder->SetStackFunctionRetSize((RetSize + getGRFSize() - 1) / getGRFSize());
9914+ // If return register cannot hold the value, create a new variable to hold it and return on stack
9915+ RetOnStack = true;
9916+ Dst = m_currShader->GetNewVariable(Src->GetNumberElement() * nLanes, Src->GetType(), Src->GetAlign(), false);
9917+ }
9918+ if (RetTy->isStructTy())
9919+ {
9920+ // For struct uniform to non-uniform copy, we need to expand each element separately
9921+ // since we use the SoA (struct of arrays) layout
9922+ StructType* STy = dyn_cast<StructType>(RetTy);
9923+ auto& DL = inst->getParent()->getParent()->getParent()->getDataLayout();
9924+ const StructLayout* SL = DL.getStructLayout(STy);
9925+
9926+ // Do uniform to non-uniform copy for each struct element
9927+ for (unsigned i = 0; i < STy->getNumElements(); i++)
9928+ {
9929+ unsigned elementOffset = (unsigned)SL->getElementOffset(i);
9930+ unsigned elementSize = (unsigned)DL.getTypeAllocSize(STy->getElementType(i));
9931+ CVariable* srcElement = m_currShader->GetNewAlias(Src, ISA_TYPE_B, elementOffset, elementSize, true);
9932+ CVariable* dstElement = m_currShader->GetNewAlias(Dst, ISA_TYPE_B, elementOffset * nLanes, elementSize * nLanes, false);
9933+ emitCopyAll(dstElement, srcElement, STy->getElementType(i));
9934+ }
98539935 }
98549936 else
98559937 {
9856- if (Dst->GetType() != Src->GetType() || Src->IsUniform() != Dst->IsUniform() )
9938+ if (Dst->GetType() != Src->GetType())
98579939 {
9858- Dst = m_currShader->GetNewAlias(Dst, Src->GetType(), 0, Src->GetNumberElement(), Src->IsUniform() );
9940+ Dst = m_currShader->GetNewAlias(Dst, Src->GetType(), 0, Src->GetNumberElement() * nLanes, false );
98599941 }
98609942 emitCopyAll(Dst, Src, RetTy);
9861- m_encoder->SetStackFunctionRetSize((Src->GetSize() + getGRFSize() - 1) / getGRFSize());
9943+ }
9944+
9945+ if (RetOnStack)
9946+ {
9947+ Src = Dst;
98629948 }
98639949 }
9864- else
9950+ else // Non-uniform copy
9951+ {
9952+ RetSize = Src->GetSize();
9953+ if (Dst->GetSize() < RetSize)
9954+ {
9955+ RetOnStack = true;
9956+ }
9957+ else
9958+ {
9959+ if (Dst->GetType() != Src->GetType())
9960+ {
9961+ Dst = m_currShader->GetNewAlias(Dst, Src->GetType(), 0, Src->GetNumberElement(), false);
9962+ }
9963+ emitCopyAll(Dst, Src, RetTy);
9964+ }
9965+ }
9966+
9967+ if (RetOnStack)
98659968 {
98669969 // write return value onto stack at (SP+n)
98679970 // emit oword_stores
9868- int RmnBytes = Src->GetSize() ;
9971+ int RmnBytes = RetSize ;
98699972 uint32_t WrtBytes = 0;
98709973 do
98719974 {
@@ -9911,6 +10014,10 @@ void EmitPass::emitStackFuncExit(llvm::ReturnInst* inst)
991110014 // end of writing return-value to stack
991210015 m_encoder->SetStackFunctionRetSize(0);
991310016 }
10017+ else
10018+ {
10019+ m_encoder->SetStackFunctionRetSize((RetSize + getGRFSize() - 1) / getGRFSize());
10020+ }
991410021 }
991510022 else
991610023 {
@@ -15642,6 +15749,13 @@ void EmitPass::emitCopyAll(CVariable* Dst, CVariable* Src, llvm::Type* Ty)
1564215749 unsigned NElts = Ty->getVectorNumElements();
1564315750 emitVectorCopy(Dst, Src, NElts);
1564415751 }
15752+ else if (Ty->isStructTy())
15753+ {
15754+ assert(Src->IsUniform() == Dst->IsUniform());
15755+ assert(Dst->GetNumberElement() == Src->GetNumberElement());
15756+ assert(Dst->GetType() == ISA_TYPE_B && Src->GetType() == ISA_TYPE_B);
15757+ emitVectorCopy(Dst, Src, Src->GetNumberElement());
15758+ }
1564515759 else
1564615760 {
1564715761 assert(Ty->isSingleValueType() && "not supported");
0 commit comments