Nearly CC
An educational compiler skeleton
cfg_builder.h
Go to the documentation of this file.
1 // Copyright (c) 2021-2023, David H. Hovemeyer <david.hovemeyer@gmail.com>
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a
4 // copy of this software and associated documentation files (the "Software"),
5 // to deal in the Software without restriction, including without limitation
6 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 // and/or sell copies of the Software, and to permit persons to whom the
8 // Software is furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included
11 // in all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
16 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
17 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19 // OTHER DEALINGS IN THE SOFTWARE.
20 
21 #ifndef CFG_BUILDER_H
22 #define CFG_BUILDER_H
23 
24 #include "cfg.h"
25 #include "highlevel.h"
26 #include "lowlevel.h"
27 
30 
32 // ControlFlowGraph builder class
34 
43 template<typename InstructionProperties>
45 private:
46  InstructionProperties m_ins_props;
47  std::shared_ptr<InstructionSequence> m_iseq;
48  std::shared_ptr<ControlFlowGraph> m_cfg;
49  // map of instruction index to pointer to InstructionSequence starting at that instruction
50  std::map<unsigned, std::shared_ptr<InstructionSequence> > m_basic_blocks;
51 
52  struct WorkItem {
53  unsigned ins_index;
54  std::shared_ptr<InstructionSequence> pred;
55  EdgeKind edge_kind;
56  std::string label;
57  };
58 
59 public:
62  // a ControlFlowGraph from
63  ControlFlowGraphBuilder(const std::shared_ptr<InstructionSequence> &iseq);
65 
68  std::shared_ptr<ControlFlowGraph> build();
69 
77  virtual bool is_branch(Instruction *ins);
78 
79 private:
80  std::shared_ptr<InstructionSequence> scan_basic_block(const WorkItem &item, const std::string &label);
81  bool ends_in_branch(std::shared_ptr<InstructionSequence> bb);
82  unsigned get_branch_target_index(std::shared_ptr<InstructionSequence> bb);
83  bool falls_through(std::shared_ptr<InstructionSequence> bb);
84 };
85 
86 // Convenience functions for creating high-level and low-level
87 // ControlFlowGraphBuilders. Example usage:
88 //
89 // auto hl_cfg_builder = ::make_highlevel_cfg_builder(hl_iseq);
90 //
91 // auto ll_cfg_builder = ::make_lowlevel_cfg_builder(ll_iseq);
92 
104 make_highlevel_cfg_builder(const std::shared_ptr<InstructionSequence> &iseq) {
106 }
107 
119 make_lowlevel_cfg_builder(const std::shared_ptr<InstructionSequence> &iseq) {
121 }
122 
124 // ControlFlowGraphBuilder implementation
126 
127 template<typename InstructionProperties>
128 ControlFlowGraphBuilder<InstructionProperties>::ControlFlowGraphBuilder(const std::shared_ptr<InstructionSequence> &iseq)
129  : m_ins_props(InstructionProperties())
130  , m_iseq(iseq)
131  , m_cfg(new ControlFlowGraph()) {
132 }
133 
134 template<typename InstructionProperties>
136 }
137 
138 template<typename InstructionProperties>
139 std::shared_ptr<ControlFlowGraph> ControlFlowGraphBuilder<InstructionProperties>::build() {
140  unsigned num_instructions = m_iseq->get_length();
141 
142  std::shared_ptr<InstructionSequence> entry = m_cfg->create_basic_block(BASICBLOCK_ENTRY, -1);
143  std::shared_ptr<InstructionSequence> exit = m_cfg->create_basic_block(BASICBLOCK_EXIT, 2000000);
144 
145  // exit block is reached by any branch that targets the end of the
146  // InstructionSequence
147  m_basic_blocks[num_instructions] = exit;
148 
149  std::deque<WorkItem> work_list;
150  work_list.push_back({ ins_index: 0, pred: entry, edge_kind: EDGE_FALLTHROUGH });
151 
152  std::shared_ptr<InstructionSequence> last = nullptr;
153  while (!work_list.empty()) {
154  WorkItem item = work_list.front();
155  work_list.pop_front();
156 
157  assert(item.ins_index <= m_iseq->get_length());
158 
159  // if item target is end of InstructionSequence, then it targets the exit block
160  if (item.ins_index == m_iseq->get_length()) {
161  m_cfg->create_edge(item.pred, exit, item.edge_kind);
162  continue;
163  }
164 
165  std::shared_ptr<InstructionSequence> bb;
166  bool is_new_block;
167  std::map<unsigned, std::shared_ptr<InstructionSequence> >::iterator i = m_basic_blocks.find(item.ins_index);
168  if (i != m_basic_blocks.end()) {
169  // a block starting at this instruction already exists
170  bb = i->second;
171  is_new_block = false;
172 
173  // Special case: if this block was originally discovered via a fall-through
174  // edge, but is also reachable via a branch, then it might not be labeled
175  // yet. Set the label if necessary.
176  if (item.edge_kind == EDGE_BRANCH && !bb->has_block_label()) {
177  bb->set_block_label(item.label);
178  }
179  } else {
180  // no block starting at this instruction currently exists:
181  // scan the basic block and add it to the map of known basic blocks
182  // (indexed by instruction index)
183  bb = scan_basic_block(item, item.label);
184  is_new_block = true;
185  m_basic_blocks[item.ins_index] = bb;
186  }
187 
188  // if the edge is a branch, make sure the work item's label matches
189  // the InstructionSequence's label (if it doesn't, then somehow this block
190  // is reachable via two different labels, which shouldn't be possible)
191  assert(item.edge_kind != EDGE_BRANCH || bb->get_block_label() == item.label);
192 
193  // connect to predecessor
194  m_cfg->create_edge(item.pred, bb, item.edge_kind);
195 
196  if (!is_new_block) {
197  // we've seen this block already
198  continue;
199  }
200 
201  // if this basic block ends in a branch, prepare to create an edge
202  // to the InstructionSequence for the target (creating the InstructionSequence if it
203  // doesn't exist yet)
204  if (ends_in_branch(bb)) {
205  unsigned target_index = get_branch_target_index(bb);
206  // Note: we assume that branch instructions have the target label
207  // as the last operand
208  Instruction *branch = bb->get_last_instruction();
209  unsigned num_operands = branch->get_num_operands();
210  assert(num_operands > 0);
211  Operand operand = branch->get_operand(num_operands - 1);
212  assert(operand.get_kind() == Operand::LABEL);
213  std::string target_label = operand.get_label();
214  work_list.push_back({ ins_index: target_index, pred: bb, edge_kind: EDGE_BRANCH, label: target_label });
215  }
216 
217  // if this basic block falls through, prepare to create an edge
218  // to the InstructionSequence for successor instruction (creating it if it doesn't
219  // exist yet)
220  if (falls_through(bb)) {
221  unsigned target_index = item.ins_index + bb->get_length();
222  assert(target_index <= m_iseq->get_length());
223  if (target_index == num_instructions) {
224  // this is the basic block at the end of the instruction sequence,
225  // its fall-through successor should be the exit block
226  last = bb;
227  } else {
228  // fall through to basic block starting at successor instruction
229  work_list.push_back({ ins_index: target_index, pred: bb, edge_kind: EDGE_FALLTHROUGH });
230  }
231  }
232  }
233 
234  assert(last != nullptr);
235  m_cfg->create_edge(last, exit, EDGE_FALLTHROUGH);
236 
237  return m_cfg;
238 }
239 
240 template<typename InstructionProperties>
242  // assume that if an instruction's last operand is a label,
243  // it's a branch instruction
244  unsigned num_operands = ins->get_num_operands();
245  return num_operands > 0 && ins->get_operand(num_operands - 1).get_kind() == Operand::LABEL;
246 }
247 
248 template<typename InstructionProperties>
249 std::shared_ptr<InstructionSequence> ControlFlowGraphBuilder<InstructionProperties>::scan_basic_block(const WorkItem &item, const std::string &label) {
250  unsigned index = item.ins_index;
251 
252  std::shared_ptr<InstructionSequence> bb = m_cfg->create_basic_block(BASICBLOCK_INTERIOR, int(index), label);
253 
254  // keep adding instructions until we
255  // - reach an instruction that is a branch
256  // - reach an instruction that is a target of a branch
257  // - reach the end of the overall instruction sequence
258  while (index < m_iseq->get_length()) {
259  Instruction *ins = m_iseq->get_instruction(index);
260 
261  // this instruction is part of the basic block
262  ins = ins->duplicate();
263  bb->append(ins);
264  index++;
265 
266  if (index >= m_iseq->get_length()) {
267  // At end of overall instruction sequence
268  break;
269  }
270 
271  if (m_ins_props.is_function_call(ins)) {
272  // The instruction we just added is a function call
273  break;
274  }
275 
276  if (is_branch(ins)) {
277  // The instruction we just added is a branch instruction
278  break;
279  }
280 
281  if (m_iseq->has_label(index)) {
282  // Next instruction has a label, so assume it is a branch target
283  // (and thus the beginning of a different basic block)
284  break;
285  }
286  }
287 
288  assert(bb->get_length() > 0);
289 
290  return bb;
291 }
292 
293 template<typename InstructionProperties>
294 bool ControlFlowGraphBuilder<InstructionProperties>::ends_in_branch(std::shared_ptr<InstructionSequence> bb) {
295  return !m_ins_props.is_function_call(bb->get_last_instruction()) && is_branch(bb->get_last_instruction());
296 }
297 
298 template<typename InstructionProperties>
299 unsigned ControlFlowGraphBuilder<InstructionProperties>::get_branch_target_index(std::shared_ptr<InstructionSequence> bb) {
300  assert(ends_in_branch(bb));
301  Instruction *last = bb->get_last_instruction();
302 
303  // assume that the label is the last operand
304  unsigned num_operands = last->get_num_operands();
305  assert(num_operands > 0);
306  Operand label = last->get_operand(num_operands - 1);
307  assert(label.get_kind() == Operand::LABEL);
308 
309  // look up the index of the instruction targeted by this label
310  unsigned target_index = m_iseq->get_index_of_labeled_instruction(label.get_label());
311  return target_index;
312 }
313 
314 template<typename InstructionProperties>
315 bool ControlFlowGraphBuilder<InstructionProperties>::falls_through(std::shared_ptr<InstructionSequence> bb) {
316  return m_ins_props.falls_through(bb->get_last_instruction());
317 }
318 
319 
320 #endif // CFG_BUILDER_H
ControlFlowGraph and associated types.
EdgeKind
Control-flow graph edge kinds.
Definition: cfg.h:48
ControlFlowGraphBuilder< HighLevelInstructionProperties > make_highlevel_cfg_builder(const std::shared_ptr< InstructionSequence > &iseq)
Return a ControlFlowGraphBuilder for building a high-level ControlFlowGraph.
Definition: cfg_builder.h:104
ControlFlowGraphBuilder< LowLevelInstructionProperties > make_lowlevel_cfg_builder(const std::shared_ptr< InstructionSequence > &iseq)
Return a ControlFlowGraphBuilder for building a low-level ControlFlowGraph.
Definition: cfg_builder.h:119
ControlFlowGraphBuilder builds a ControlFlowGraph from an InstructionSequence.
Definition: cfg_builder.h:44
std::shared_ptr< ControlFlowGraph > build()
Build a ControlFlowGraph from the original InstructionSequence.
Definition: cfg_builder.h:139
virtual bool is_branch(Instruction *ins)
Subclasses may override this method to specify which Instructions are branches (i....
Definition: cfg_builder.h:241
ControlFlowGraphBuilder(const std::shared_ptr< InstructionSequence > &iseq)
Constuctor.
Definition: cfg_builder.h:128
ControlFlowGraph: graph of basic blocks connected by control edges.
Definition: cfg.h:86
Instruction object type.
Definition: instruction.h:31
const Operand & get_operand(unsigned index) const
Get the specified Operand.
Definition: instruction.cpp:54
Instruction * duplicate() const
Return an exact duplicate of this Instruction.
Definition: instruction.h:69
unsigned get_num_operands() const
Get the number of operands.
Definition: instruction.cpp:50
Definition: operand.h:31
HighLevelOpcode enumeration and associated helper functions and the HighLevelInstructionProperties cl...
@ BASICBLOCK_INTERIOR
normal basic block in the "interior" of the CFG
Definition: instruction_seq.h:42
@ BASICBLOCK_ENTRY
special "entry" block
Definition: instruction_seq.h:40
@ BASICBLOCK_EXIT
special "exit" block
Definition: instruction_seq.h:41