CSCD70 Assignment2 DataFlow

2023-12-17

学习笔记 / 静态分析

字数统计: 4.4k | 阅读时长≈ 22 分钟

理论部分学习: 数据流分析笔记
只展示部分代码,完整代码见github.

数据流分析框架

类

设计一个数据流分析框架,需要先清楚数据流分析有哪些部分.

再细分一下就是:

Domain

Domain是数据流分析所关心的对象,比如到达定值的对象是定值,活跃变量的对象是变量……

写好一些常用的分析对象,表达式和变量.
一个分析对象应该提供从IR指令中创建的方法和比较相等的方法.

struct Expression final : DomainBase<Expression> {
  const unsigned Opcode;
  const llvm::Value *const LHS = nullptr, *const RHS = nullptr;
  Expression(const llvm::BinaryOperator &BinaryOp)
      : Opcode(BinaryOp.getOpcode()), LHS(BinaryOp.getOperand(0)),
        RHS(BinaryOp.getOperand(1)) {}
  Expression(const unsigned Opcode, const llvm::Value *const LHS,
             const llvm::Value *const RHS)
      : Opcode(Opcode), LHS(LHS), RHS(RHS) {}
  bool operator==(const Expression &Other) const final {
    /// @todo(CSCD70) Please complete this method.
    if(llvm::Instruction::isCommutative(Opcode))
    {
      return 
      ((Opcode == Other.Opcode) && (LHS == Other.LHS) && (RHS == Other.RHS)) ||
      ((Opcode == Other.Opcode) && (LHS == Other.RHS) && (RHS == Other.LHS));
    }
    else
      return ((Opcode == Other.Opcode) && (LHS == Other.LHS) && (RHS == Other.RHS));

  }

  static Expression ExpressionFromIn(const llvm::Instruction& In)
  {
    if(llvm::isa<llvm::BinaryOperator>(In))
    {
      return Expression(In.getOpcode(),In.getOperand(0),In.getOperand(1));
    }
  }

struct Variable final : DomainBase<Variable> {
  const llvm::Value *const Var;
  Variable(const llvm::Value *const Var) : Var(Var) {}

  bool operator==(const Variable &Other) const { return Var == Other.Var; }

  static Variable VariableFromIn(const llvm::Instruction& In)
  {
    if(llvm::isa<llvm::Value>(In))
    {
      return Variable(&In);
    }
  }
}

在框架中用DomainVector存储分析的Domain,DomainIdMap用来做从对象到索引的转换.

using DomainIdMap_t = std::unordered_map<TDerivedDomainElem, size_t>;
using DomainVector_t = std::vector<TDerivedDomainElem>;

DomainIdMap_t DomainIdMap;
DomainVector_t DomainVector;

光存储分析对象没用,还需要存储每条指令对于每个对象的分析结果,BVs是基本块的INPUT(注意是INPUT不是IN,随分析方向不同而含义不同),InstDomainValMap是一条指令的OUTPUT(同上).
两者合起来就有了对每条指令完整的IN,OUT(基本块的INPUT同时也是基本块第一条指令的INPUT).

1 2	std::unordered_map<const llvm::BasicBlock , DomainVal_t> BVs; std::unordered_map<const llvm::Instruction , DomainVal_t> InstDomainValMap;

以及对单个分析对象的分析结果表示

/// @brief For each domain element type, we have to define:
///        - The default constructor
///        - The meet operators (for intersect/union)
///        - The top element
///        - Conversion to bool (for logging)
struct Bool {
  bool Value = false;
  Bool operator&(const Bool &Other) const {
    return {.Value = Value && Other.Value};
  }
  Bool operator|(const Bool &Other) const {
    return {.Value = Value || Other.Value};
  }

  static Bool top() { return {.Value = true}; }
  operator bool() const { return Value; }
};

Direction

分析方向体现在遍历指令的顺序和获取MeetOperands的操作上.
Forward:

  virtual MeetOperands_t getMeetOperands(const llvm::BasicBlock &BB) const {
    MeetOperands_t Operands;
    /// @todo(CSCD70) Please complete this method.
    for(const llvm::BasicBlock* preBB : getMeetBBConstRange(BB))
    {
      DomainVal_t domain = InstDomainValMap.at(&(preBB->back()));
      Operands.push_back(domain);
    }
    return Operands;
  }

  MeetBBConstRange_t
  getMeetBBConstRange(const llvm::BasicBlock &BB) const final {
    return llvm::predecessors(&BB);
  }
  InstConstRange_t getInstConstRange(const llvm::BasicBlock &BB) const final {
    return make_range(BB.begin(), BB.end());
  }
  BBConstRange_t getBBConstRange(const llvm::Function &F) const final {
    return make_range(F.begin(), F.end());
  }
};

Backward:

  virtual MeetOperands_t getMeetOperands(const llvm::BasicBlock &BB) const {
    MeetOperands_t Operands;
    /// @todo(CSCD70) Please complete this method.
    for(const llvm::BasicBlock* preBB : getMeetBBConstRange(BB))
    {
      DomainVal_t domain = InstDomainValMap.at(&(preBB->front()));
      Operands.push_back(domain);
    }
    return Operands;
  }

  MeetBBConstRange_t
  getMeetBBConstRange(const llvm::BasicBlock &BB) const final {
    return llvm::successors(&BB);
  }
  InstConstRange_t getInstConstRange(const llvm::BasicBlock &BB) const final {
    return make_range(BB.rbegin(), BB.rend());
  }
  BBConstRange_t getBBConstRange(const llvm::Function &F) const final {
    return make_range(F.rbegin(), F.rend());
  }
};

对于Backward的分析,
llvm-16没有提供逆序遍历Function中Basic Block的接口,我给它加上了,感觉没什么理由不加这个接口.
其实之前版本(比如llvm-14)中也没有在Function类中提供这个接口,不过其的getBasicBlockList是public的,用户以F.getBasicBlockList().rbegin()的形式来实现backward的迭代.感觉像是改了访问属性但忘了加另外的接口.

//===--------------------------------------------------------------------===//
// BasicBlock iterator forwarding functions
//
iterator                begin()       { return BasicBlocks.begin(); }
const_iterator          begin() const { return BasicBlocks.begin(); }
iterator                end  ()       { return BasicBlocks.end();   }
const_iterator          end  () const { return BasicBlocks.end();   }

//===--------------------------------------------------------------------===//
// BasicBlock iterator backwarding functions
//
reverse_iterator        rbegin()       { return BasicBlocks.rbegin(); }
const_reverse_iterator  rbegin() const { return BasicBlocks.rbegin(); }
reverse_iterator        rend  ()       { return BasicBlocks.rend();   }
const_reverse_iterator  rend  () const { return BasicBlocks.rend();   }

这是llvm对修改访问属性原因的描述:

1
2
3

/// This is deliberately private because we have implemented an adequate set
/// of functions to modify the list, including Function::splice(),
/// Function::erase(), Function::insert() etc.

Meet Operator , Initial Condition

template <typename TValue> struct Intersect final : MeetOpBase<TValue> {
  using DomainVal_t = typename MeetOpBase<TValue>::DomainVal_t;

  DomainVal_t operator()(const DomainVal_t &LHS,
                         const DomainVal_t &RHS) const final {

    /// @todo(CSCD70) Please complete this method.
    DomainVal_t result = (*this).top(LHS.size());
    for(int i = 0;i < LHS.size();++i)
    {
      result[i] = LHS[i] & RHS[i];
    }
    return result;
  }
  DomainVal_t top(const std::size_t DomainSize) const final {

    /// @todo(CSCD70) Please complete this method.

    return DomainVal_t(DomainSize,TValue::top());
  }

};


/// @todo(CSCD70) Please add another subclass for the Union meet operator.
template <typename TValue> struct Unite final : MeetOpBase<TValue> {
  using DomainVal_t = typename MeetOpBase<TValue>::DomainVal_t;

  DomainVal_t operator()(const DomainVal_t &LHS,
                         const DomainVal_t &RHS) const final {

    /// @todo(CSCD70) Please complete this method.
    DomainVal_t result = (*this).top(LHS.size());
    for(int i = 0;i < LHS.size();++i)
    {
      result[i] = LHS[i] | RHS[i];
    }
    return result;
  }
  DomainVal_t top(const std::size_t DomainSize) const final {

    /// @todo(CSCD70) Please complete this method.

    return DomainVal_t(DomainSize,TValue());
  }

};

从注释来看top是用来初始化,那么就应该是DomainVal_t的初始值,且该函数要求派生类重载,意味着这一”top”的概念在交集和并集中不同,应该就是代表数据流分析的Initial condition.这一点与南京大学的软件分析课程有点冲突.

  /// @brief Return a domain value that represents the top element, used when
  ///        doing the initialization.
  /// @param DomainSize
  /// @return
  virtual DomainVal_t top(const std::size_t DomainSize) const = 0;
};

流程

先遍历一次所有指令,从指令中得到分析的Domain.再遍历一次进行初始化对BVs和InstDomainValMap进行初始化,因为它们即将作为分析的INPUT.

然后便开始一轮一轮的分析traverseCFG,直到连续两轮结果相同为止,最后输出结果.

virtual AnalysisResult_t run(llvm::Function &F,
                             llvm::FunctionAnalysisManager &FAM) {

  /// @todo(CSCD70) Please complete this method.

  for(auto& In : instructions(F))
  {
    InitializeDomainFromInstruction(In);
  }

  for(auto& BB : getBBConstRange(F))
  {
    BVs[&BB] = TMeetOp().top(DomainIdMap.size());

    for(auto& In : getInstConstRange(BB))
    {
      InstDomainValMap[&In] = TMeetOp().top(DomainIdMap.size());
    }
  }


  while(traverseCFG(F));
  
  printInstDomainValMap(F);
  return std::make_tuple(DomainIdMap, DomainVector, BVs, InstDomainValMap);
}

对每一个基本块,首先通过getBoundaryVal获取所有INPUT的Meet来作为该块的INPUT,而对于其他指令,就取上一条指令的OUTPUT进行transferFunc就行.

/// @brief Traverse through the CFG of the function.
/// @param F
/// @return True if either BasicBlock-DomainValue mapping or
///         Instruction-DomainValue mapping has been modified, false
///         otherwise.
bool traverseCFG(const llvm::Function &F) {
  bool Changed = false;

  /// @todo(CSCD70) Please complete this method.
  //在Froward和Backward的分析中,IDV,ODV的意义相反
  DomainVal_t IDV,ODV;

  for(auto& BB : getBBConstRange(F))
  {
    IDV = getBoundaryVal(BB);
    BVs[&BB] = IDV; 
    for(auto& In : getInstConstRange(BB))
    {
      ODV = InstDomainValMap[&In];
      Changed |= transferFunc(In,IDV,ODV);
      InstDomainValMap[&In] = ODV;
      IDV = ODV;
    }
    
  }
  return Changed;
}

这里还有一个特殊的处理,对于分析的Boudary Condition,通过其的MeetOperands为空识别出来,再将其初始化为之前预设的bc就行.

DomainVal_t getBoundaryVal(const llvm::BasicBlock &BB) const {
  MeetOperands_t MeetOperands = getMeetOperands(BB);

  /// @todo(CSCD70) Please complete this method.

  //对于Boudary Condition,没有前驱块
  if(MeetOperands.empty())
    MeetOperands.push_back(bc());
  return meet(MeetOperands);
}

完成框架的工作之后,数据流分析只用完成TransferFunc的编写和遍历方向等的设置即可.

可用表达式分析

其实有一个我一直在纠结的东西,就是在LLVM Pass上做可用表达式分析是否有意义.看下面的例子:

void foo(int A,int B)
{
    int C,D;
    C = A+B;
    A = A+1; //下面的IR分别是这条语句不存在和存在的情况.
    D = A+B;
    printf("%d%d",C,D);
}

define dso_local void @foo(i32 noundef %0, i32 noundef %1) #0 {
  %3 = add nsw i32 %0, %1
  %4 = add nsw i32 %0, %1
  ret void
}

define dso_local void @foo(i32 noundef %0, i32 noundef %1) #0 {
  %3 = add nsw i32 %0, %1
  %4 = add nsw i32 %0, 1
  %5 = add nsw i32 %4, %1
  ret void
}

可以发现原本的两次A+B,已经变成了两个不同的表达式add nsw i32 %0, %1和add nsw i32 %4, %1.
由于在Pass运行时,def-use和use-def链已经建立完成,况且IR也是SSA形式的,每一次对二元表达式的计算都是在定义一个新的变量,每一次计算使用的值都是其唯一定义的地方.并不存在去更改某个变量值的情况,也就不存在kill.如果有循环,可能会更改已经存在的变量的值,但稍微分析一下发现也没有意义.

不过在Meet多个控制流的DomainVal时,还是能起到可用表达式分析的作用.

对于kill,我没有想到一个理想的方法,参照了一些博客都是使用这样的方法:
通过本次指令的值被某个表达式使用,推断出本次指令是一次定义,kill掉该表达式.
但如前文所说,由于SSA,使用本次指令值的表达式一定在本次指令之前未曾出现,所以kill是无意义的.

for(dfa::Expression expr : DomainVector)
{
  if(expr.LHS == &Inst || expr.RHS == &Inst)
  {
    int64_t id;
    if((id = getDomainId(expr))!=-1)
    {
      NewODV.at(id).Value = false;
    }
  }
}

即使不是SSA形式,这样的方法也并不理想:
假设有这样一个基本块,使用该方法分析出来的对表达式A+B的OUT会是True,因为加1之后的A值不再被使用,也就没有kill的机会.

1 2	C = A + B A = A + 1

gen倒是没啥问题.

if(Inst.isBinaryOp())
{
  dfa::Expression expr(Inst.getOpcode(),Inst.getOperand(0),Inst.getOperand(1));

  int64_t id;
  if((id = getDomainId(expr))!= -1)
  {
    NewODV.at(id).Value = true;
  }    
}

完整的TransferFunc

void AvailExprs::InitializeDomainFromInstruction(const llvm::Instruction& In)
{
      if(In.isBinaryOp())
      {
        if((DomainIdMap.emplace(std::pair(dfa::Expression::ExpressionFromIn(In),DomainIdMap.size()))).second)
        {
          DomainVector.push_back(dfa::Expression::ExpressionFromIn(In));
        }
      }
}


bool AvailExprs::transferFunc(const Instruction &Inst, const DomainVal_t &IDV,
                             DomainVal_t &ODV) {

  /// @todo(CSCD70) Please complete this method.
  DomainVal_t NewODV = IDV;

  for(dfa::Expression expr : DomainVector)
  {
    if(expr.LHS == &Inst || expr.RHS == &Inst)
    {
      int64_t id;
      if((id = getDomainId(expr))!=-1)
      {
        NewODV.at(id).Value = false;
      }
    }
  }

  if(Inst.isBinaryOp())
  {
    dfa::Expression expr(Inst.getOpcode(),Inst.getOperand(0),Inst.getOperand(1));

    int64_t id;
    if((id = getDomainId(expr))!= -1)
    {
      NewODV.at(id).Value = true;
    }    
  }

  if(NewODV==ODV)
    return false;
  ODV = NewODV;
  return true;
}

存活变量分析

和可用表达式类似写出transferFunc就ok了.

bool LiveNess::transferFunc(const Instruction &Inst, const DomainVal_t &IDV,
                             DomainVal_t &ODV) {

  /// @todo(CSCD70) Please complete this method.
  DomainVal_t NewODV = IDV;

  if(Inst.hasNUsesOrMore(1))
  {
    int64_t id;
    if((id = getDomainId(dfa::Variable::VariableFromIn(Inst)))!=-1)
    {
      NewODV.at(id).Value = false;
    }
  }

  for(auto Iter  = Inst.op_begin();Iter != Inst.op_end(); ++Iter)
  {
    Value *V = *Iter;

    if(isa<Instruction>(V)||isa<Argument>(V))
    {
      int64_t id;
      if((id = getDomainId(V))!=-1)
      {
        NewODV.at(id).Value = true;
    }
    }
  }


  if(NewODV==ODV)
    return false;
  ODV = NewODV;
  return true;
}

稀疏条件常量传播

初始方案

采用ForwardAnalysis,Domain为所有变量,用TValue为Bool,TMeetOp为Intersect.
TransferFunc如下:


bool SCCP::transferFunc(const Instruction &Inst, const DomainVal_t &IDV,
                             DomainVal_t &ODV) {

  /// @todo(CSCD70) Please complete this method.
    DomainVal_t NewODV = IDV;
    if(isa<Instruction>(Inst)||isa<Argument>(Inst))
    {
        int64_t id1,id2;
        bool isconstant = true;
        if((id1 = getDomainId(dfa::Variable::VariableFromIn(Inst)))!= -1)
        {
            for(auto& operand : Inst.operands())
            {

              if(!isa<ConstantInt>(operand))
              {
                if((id2 = getDomainId(operand.get()))!= -1)
                {
                  if(!(IDV[id2].Value==true))
                  {
                    NewODV[id1].Value = false;
                    isconstant = false;
                    break;
                  }
                }
              }    
            }
            if(isconstant)
                NewODV[id1].Value = true;
        }

    }


    if(NewODV==ODV)
        return false;
    ODV = NewODV;
    return true;
}

用一个小的demo来验证一下:

define i32 @Loop(i32 noundef %0) {
  %2 = add nsw i32 1, 2
  %3 = add nsw i32 %2, 5
  %4 = add nsw i32 %3, %0
  ret i32 %4
}

可以看到没什么问题(其实代码里还应该特化一下对phi指令的分析).
再用官方给的测试用例:

发现没有一个常量,于是分析一下用来测试的IR代码:
可以看出,常量应该有%.12,%.01,%4. 但识别出这三个常量都需要先识别出label %7不可达,而这才是SCCP 稀疏条件常量传播的意义.

; int Loop() {
;   int i = 1, j = 1;
;   for (int k = 0; k < 100;) {
;     if (j < 20) {
;       j = i;
;       k = k + 1;
;     } else {
;       j = k;
;       k = k + 2;
;     }
;   }
;   return j;
; }

; @todo(CSCD70) Please complete the CHECK directives.
define i32 @Loop() {
  br label %1
1:                                                ; preds = %9, %0
  %.01 = phi i32 [ 1, %0 ], [ %.12, %9 ] ;j
  %.0 = phi i32 [ 0, %0 ], [ %.1, %9 ] ;k
  %2 = icmp slt i32 %.0, 100 ;k<100
  br i1 %2, label %3, label %10

3:                                                ; preds = %1
  %4 = icmp slt i32 %.01, 20 ;j<20
  br i1 %4, label %5, label %7

5:                                                ; preds = %3
  %6 = add nsw i32 %.0, 1 ;k+1
  br label %9

7:                                                ; preds = %3
  %8 = add nsw i32 %.0, 2 ;k+2
  br label %9

9:                                                ; preds = %7, %5
  %.12 = phi i32 [ 1, %5 ], [ %.0, %7 ] ;j = 1 | j = k
  %.1 = phi i32 [ %6, %5 ], [ %8, %7 ] ; new k
  br label %1

10:                                               ; preds = %1
  ret i32 %.01
}

改进版

理论

在补了下数据流分析基础后,再来看这个问题.
单个变量可能值域的Lattice设计为这个样子(回想一下,之前使用的Bool的值域其实也是一个Lattice)

Transferfunc像这样.有一点需要注意,之前所说的数据流分析中TranferFunc的单调性依赖于其gen集,kill集与输入无关这一性质,所以OUT与IN成正相关(相对于偏序关系来说).但这里的TransferFunc的gen集明显受到IN的影响,不过分析一下可以发现gen集同样与IN正相关,所以最终的OUT还是正相关的.

实现

Basic Lattice

单个变量值域的Lattice由Status和Value构成.之所以需要Value的域,是在常量传播到跳转条件的时候会用到. 至于Meet的设计就遵照上面理论所示(其实是遵照Meet的本意,即求最大下界),这个也很巧妙的符合只有当两个常量满足c1 == c2时,才有c1 Meet c2还是常量.

enum ConstantStatus{
  UNDEF = 2,
  CONST = 1,
  NAC = 0
};

struct Constant{
  int64_t Value;
  enum ConstantStatus Status = NAC;
  Constant operator&(const Constant& Other) const {
    Constant result;
    result.Status = std::min(Status,Other.Status);
    if(result.Status==CONST)
    {
      if(Status == CONST && Other.Status==CONST)
      {
        if(Value != Other.Value)
        {
          result.Status = NAC;
        }
        else
          result.Value = Value;
      }
      else if(Status == CONST)
        result.Value = Value;
      else if(Other.Status == CONST)
        result.Value = Other.Value;        
    }
    return result;
  }

  static Constant top() {return {.Status = UNDEF};}
  operator bool() const {return Status==CONST;}
};

Kill集

还是之前的思路,根据本次Value被其他指令使用来判断本次指令是一次defination,设置对应的Variable状态为NAC.

//如果该指令是一次定义,设置该指令代表的变量为NAC
if(isa<Instruction>(Inst)||isa<Argument>(Inst))
{
  for(auto& V : DomainVector)
  {
    if(const Instruction* InofV = dyn_cast<Instruction>(V.Var))
    {
      for(auto& operand : InofV->operands())
      {
        if(operand.get() == &Inst)
        {
          int64_t id;
          if((id = getDomainId(dfa::Variable::VariableFromIn(Inst)))!=-1)
          {
            NewODV.at(id).Status = dfa::NAC;
            break;
          }
        }
      }
    }
  }

Gen集

照着这幅图实现就行.

for(auto& operand : Inst.operands())
{
  if(!isa<ConstantInt>(operand))
  {
    if((id2 = getDomainId(operand.get()))!= -1)
    {
      if(IDV[id2].Status==dfa::NAC)
      {
        NewODV[id1].Status = dfa::NAC;
        ifnac = true;
        break;
      }
      else if(IDV[id2].Status == dfa::UNDEF)
      {
        NewODV[id1].Status = dfa::UNDEF;
        ifundef = true;
        break;
      }
    }
  }    
}
if(!ifnac)
{
  if(!ifundef)
  {
    NewODV[id1].Status = dfa::CONST;
    NewODV[id1].Value = getConstantValue(Inst,IDV);
  }
}

对于phi指令需要特化,原因有二:
1. phi的结果为CONST,需要其操作数全为CONST,且值相同.
2. phi的某个操作数若来自NeverReachBB时,不做考虑.

if(ifphi)
{
  //特化phi,与其他的操作符有不同的规则
  dfa::Constant r = {.Status=dfa::UNDEF};
  for(auto& operand : Inst.operands())
  {
    bool flag = false;
    if(const PHINode* phiNode = dyn_cast<PHINode>(&Inst))
    {
      //如果phi的某个值来自NeverReachBB,不与其进行Meet操作.
      if((NeverReachBBs.find(phiNode->getIncomingBlock(operand)))!=NeverReachBBs.end())
      {
        flag = true;
      }

      if(!isa<ConstantInt>(operand))
      {
        if(!flag)
        {
          if((id2 = getDomainId(operand.get()))!= -1)
          {
            r = r & (IDV.at(id2));
          }
        }
      }
      else
      {
        if(!flag)
        {
          r = r & dfa::Constant({.Value=dyn_cast<ConstantInt>(operand)->getSExtValue(),.Status=dfa::CONST});
        }
      }    
    }
  }
  NewODV.at(id1) = r;
}

Never Reach Basic Block

对于跳转指令,判断其条件是否恒真或恒假来更新NeverReachBB.

//根据跳转条件是否为常量来
//修改要操作的基本块集合
if(Inst.getOpcode()==Instruction::Br) 
{
  if(Inst.getNumOperands()==3) //只处理条件跳转
  {
    auto condition = Inst.getOperand(0);
    bool ifconst = false;
    int64_t truth;
    if(!isa<ConstantInt>(condition))
    {
      int64_t id;
      if((id = getDomainId(condition))!= -1)
      {
        if(IDV.at(id).Status == dfa::CONST)
        {
          ifconst = true;
          truth = IDV.at(id).Value;
        }
      }
    }
    else
    {
      ifconst = true;
      truth = dyn_cast<ConstantInt>(condition)->getSExtValue();
    }    
    if(ifconst)
    {
      if(truth)
      {
        NeverReachBBs.insert(dyn_cast<BasicBlock>(Inst.getOperand(1)));
      }
      else
      {
        NeverReachBBs.insert(dyn_cast<BasicBlock>(Inst.getOperand(2)));
      }
    }
    else
    {
      NeverReachBBs.erase(dyn_cast<BasicBlock>(Inst.getOperand(1)));
      NeverReachBBs.erase(dyn_cast<BasicBlock>(Inst.getOperand(2)));
    }
  }

完整代码

唉,本来想写得优雅一点的,改着改着就成这样了…
ps: calc只处理了测试用例中有的指令.

void SCCP::InitializeDomainFromInstruction(const llvm::Instruction& In)
{
  for(auto& operand : In.operands())
  {
    if(isa<Instruction>(operand)||isa<Argument>(operand))
    {
      if((DomainIdMap.emplace(std::pair(operand.get(),DomainIdMap.size()))).second)
      {
        DomainVector.push_back(operand.get());
      }
    }
  }
}

void calc(int64_t& result,const std::vector<int64_t>& val,const Instruction& In)
{
  switch (In.getOpcode())
  {
  case Instruction::Add:
    result += val.at(0);
    return;
  case Instruction::ICmp:
    if(auto IcmpIn = dyn_cast<ICmpInst>(&In))
    {
      switch (IcmpIn->getPredicate())
      {
      case ICmpInst::ICMP_SLT:
        result = (val[0] < val[1]);
        return;
      default:
        return;
        
      }
    }

  default:
    return;
  }
}

int64_t SCCP::getConstantValue(const Instruction& In,const DomainVal_t &IDV)
{
  int64_t result;
  std::vector<int64_t> val_list;
  for(auto& operand : In.operands())
  {

    if(!isa<ConstantInt>(operand))
    {
      int64_t id;
      if((id = getDomainId(operand.get()))!=-1)
      {
        val_list.push_back(IDV.at(id).Value);
      }
    }
    else
    {
      val_list.push_back(dyn_cast<ConstantInt>(operand)->getSExtValue());
    }
  }
  calc(result,val_list,In);
  return result;
}




bool SCCP::transferFunc(const Instruction &Inst, const DomainVal_t &IDV,
                             DomainVal_t &ODV) {

  /// @todo(CSCD70) Please complete this method.
  DomainVal_t NewODV = IDV;
  static std::set<BasicBlock*>NeverReachBBs;

  //对NeverReachBB不做处理
  for(auto& NeverReachBB : NeverReachBBs)
  {
    if(Inst.getParent()==NeverReachBB)
      return false;
  }

  //根据跳转条件是否为常量来
  //修改要操作的基本块集合
  if(Inst.getOpcode()==Instruction::Br) 
  {
    if(Inst.getNumOperands()==3) //只处理条件跳转
    {
      auto condition = Inst.getOperand(0);
      bool ifconst = false;
      int64_t truth;
      if(!isa<ConstantInt>(condition))
      {
        int64_t id;
        if((id = getDomainId(condition))!= -1)
        {
          if(IDV.at(id).Status == dfa::CONST)
          {
            ifconst = true;
            truth = IDV.at(id).Value;
          }
        }
      }
      else
      {
        ifconst = true;
        truth = dyn_cast<ConstantInt>(condition)->getSExtValue();
      }    
      if(ifconst)
      {
        if(truth)
        {
          NeverReachBBs.insert(dyn_cast<BasicBlock>(Inst.getOperand(1)));
        }
        else
        {
          NeverReachBBs.insert(dyn_cast<BasicBlock>(Inst.getOperand(2)));
        }
      }
      else
      {
        NeverReachBBs.erase(dyn_cast<BasicBlock>(Inst.getOperand(1)));
        NeverReachBBs.erase(dyn_cast<BasicBlock>(Inst.getOperand(2)));
      }
    }
  }
  else
  {
    //如果该指令是一次定义,设置该指令代表的变量为NAC
    if(isa<Instruction>(Inst)||isa<Argument>(Inst))
    {
      for(auto& V : DomainVector)
      {
        if(const Instruction* InofV = dyn_cast<Instruction>(V.Var))
        {
          for(auto& operand : InofV->operands())
          {
            if(operand.get() == &Inst)
            {
              int64_t id;
              if((id = getDomainId(dfa::Variable::VariableFromIn(Inst)))!=-1)
              {
                NewODV.at(id).Status = dfa::NAC;
                break;
              }
            }
          }
        }
      }

      int64_t id1,id2;
      bool ifnac = false;
      bool ifundef = false;
      bool ifphi = (Inst.getOpcode() == Instruction::PHI);
      if((id1 = getDomainId(dfa::Variable::VariableFromIn(Inst)))!= -1)
      {
        if(ifphi)
        {
          //特化phi,与其他的操作符有不同的规则
          dfa::Constant r = {.Status=dfa::UNDEF};
          for(auto& operand : Inst.operands())
          {
            bool flag = false;
            if(const PHINode* phiNode = dyn_cast<PHINode>(&Inst))
            {
              //如果phi的某个值来自NeverReachBB,不与其进行Meet操作.
              if((NeverReachBBs.find(phiNode->getIncomingBlock(operand)))!=NeverReachBBs.end())
              {
                flag = true;
              }

              if(!isa<ConstantInt>(operand))
              {
                if(!flag)
                {
                  if((id2 = getDomainId(operand.get()))!= -1)
                  {
                    r = r & (IDV.at(id2));
                  }
                }
              }
              else
              {
                if(!flag)
                {
                  r = r & dfa::Constant({.Value=dyn_cast<ConstantInt>(operand)->getSExtValue(),.Status=dfa::CONST});
                }
              }    
            }
          }
          NewODV.at(id1) = r;
        }
        else
        {
              for(auto& operand : Inst.operands())
              {
                if(!isa<ConstantInt>(operand))
                {
                  if((id2 = getDomainId(operand.get()))!= -1)
                  {
                    if(IDV[id2].Status==dfa::NAC)
                    {
                      NewODV[id1].Status = dfa::NAC;
                      ifnac = true;
                      break;
                    }
                    else if(IDV[id2].Status == dfa::UNDEF)
                    {
                      NewODV[id1].Status = dfa::UNDEF;
                      ifundef = true;
                      break;
                    }
                  }
                }    
              }
              if(!ifnac)
              {
                if(!ifundef)
                {
                  NewODV[id1].Status = dfa::CONST;
                  NewODV[id1].Value = getConstantValue(Inst,IDV);
                }
              }
          }
      }
    }
  }


    if(NewODV==ODV)
        return false;
    ODV = NewODV;
    return true;
}

Lazy Code Motion

实现了但效果不好,还是之前那个原因,SSA之后原来相同表达式已经分裂成了不同的表达式….
实现的方法看文章开头的数据流分析笔记.

版权声明： 本博客所有文章除特别声明外，著作权归作者所有。转载请注明出处！