ICU 51.2  51.2
stringtriebuilder.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: stringtriebuilder.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010dec24
12 * created by: Markus W. Scherer
13 */
14 
15 #ifndef __STRINGTRIEBUILDER_H__
16 #define __STRINGTRIEBUILDER_H__
17 
18 #include "unicode/utypes.h"
19 #include "unicode/uobject.h"
20 
26 // Forward declaration.
27 struct UHashtable;
28 typedef struct UHashtable UHashtable;
29 
51 };
52 
54 
62 public:
63 #ifndef U_HIDE_INTERNAL_API
64 
65  static UBool hashNode(const void *node);
67  static UBool equalNodes(const void *left, const void *right);
68 #endif /* U_HIDE_INTERNAL_API */
69 
70 protected:
71  // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
72  // or else the compiler will create a public default constructor.
76  virtual ~StringTrieBuilder();
77 
78 #ifndef U_HIDE_INTERNAL_API
79 
80  void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
82  void deleteCompactBuilder();
83 
85  void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
86 
88  int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
90  int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
91 #endif /* U_HIDE_INTERNAL_API */
92 
93  class Node;
94 
95 #ifndef U_HIDE_INTERNAL_API
96 
97  Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
99  Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
100  int32_t length, UErrorCode &errorCode);
101 #endif /* U_HIDE_INTERNAL_API */
102 
104  virtual int32_t getElementStringLength(int32_t i) const = 0;
106  virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
108  virtual int32_t getElementValue(int32_t i) const = 0;
109 
110  // Finds the first unit index after this one where
111  // the first and last element have different units again.
113  virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
114 
115  // Number of different units at unitIndex.
117  virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
119  virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
121  virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
122 
124  virtual UBool matchNodesCanHaveValues() const = 0;
125 
127  virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
129  virtual int32_t getMinLinearMatch() const = 0;
131  virtual int32_t getMaxLinearMatchLength() const = 0;
132 
133 #ifndef U_HIDE_INTERNAL_API
134  // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
136  static const int32_t kMaxBranchLinearSubNodeLength=5;
137 
138  // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.
139  // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
141  static const int32_t kMaxSplitBranchLevels=14;
142 
153  Node *registerNode(Node *newNode, UErrorCode &errorCode);
164  Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
165 
166  /*
167  * C++ note:
168  * registerNode() and registerFinalValue() take ownership of their input nodes,
169  * and only return owned nodes.
170  * If they see a failure UErrorCode, they will delete the input node.
171  * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
172  * If there is a failure, they return NULL.
173  *
174  * NULL Node pointers can be safely passed into other Nodes because
175  * they call the static Node::hashCode() which checks for a NULL pointer first.
176  *
177  * Therefore, as long as builder functions register a new node,
178  * they need to check for failures only before explicitly dereferencing
179  * a Node pointer, or before setting a new UErrorCode.
180  */
181 
182  // Hash set of nodes, maps from nodes to integer 1.
184  UHashtable *nodes;
185 
187  class Node : public UObject {
188  public:
189  Node(int32_t initialHash) : hash(initialHash), offset(0) {}
190  inline int32_t hashCode() const { return hash; }
191  // Handles node==NULL.
192  static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
193  // Base class operator==() compares the actual class types.
194  virtual UBool operator==(const Node &other) const;
195  inline UBool operator!=(const Node &other) const { return !operator==(other); }
223  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
224  // write() must set the offset to a positive value.
225  virtual void write(StringTrieBuilder &builder) = 0;
226  // See markRightEdgesFirst.
227  inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
228  StringTrieBuilder &builder) {
229  // Note: Edge numbers are negative, lastRight<=firstRight.
230  // If offset>0 then this node and its sub-nodes have been written already
231  // and we need not write them again.
232  // If this node is part of the unwritten right branch edge,
233  // then we wait until that is written.
234  if(offset<0 && (offset<lastRight || firstRight<offset)) {
235  write(builder);
236  }
237  }
238  inline int32_t getOffset() const { return offset; }
239  protected:
240  int32_t hash;
241  int32_t offset;
242  };
243 
244  // This class should not be overridden because
245  // registerFinalValue() compares a stack-allocated FinalValueNode
246  // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
247  // with the input node, and the
248  // !Node::operator==(other) used inside FinalValueNode::operator==(other)
249  // will be false if the typeid's are different.
251  class FinalValueNode : public Node {
252  public:
253  FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
254  virtual UBool operator==(const Node &other) const;
255  virtual void write(StringTrieBuilder &builder);
256  protected:
257  int32_t value;
258  };
259 
263  class ValueNode : public Node {
264  public:
265  ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
266  virtual UBool operator==(const Node &other) const;
267  void setValue(int32_t v) {
268  hasValue=TRUE;
269  value=v;
270  hash=hash*37+v;
271  }
272  protected:
273  UBool hasValue;
274  int32_t value;
275  };
276 
281  public:
282  IntermediateValueNode(int32_t v, Node *nextNode)
283  : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
284  virtual UBool operator==(const Node &other) const;
285  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
286  virtual void write(StringTrieBuilder &builder);
287  protected:
288  Node *next;
289  };
290 
294  class LinearMatchNode : public ValueNode {
295  public:
296  LinearMatchNode(int32_t len, Node *nextNode)
297  : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
298  length(len), next(nextNode) {}
299  virtual UBool operator==(const Node &other) const;
300  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
301  protected:
302  int32_t length;
303  Node *next;
304  };
305 
309  class BranchNode : public Node {
310  public:
311  BranchNode(int32_t initialHash) : Node(initialHash) {}
312  protected:
313  int32_t firstEdgeNumber;
314  };
315 
319  class ListBranchNode : public BranchNode {
320  public:
321  ListBranchNode() : BranchNode(0x444444), length(0) {}
322  virtual UBool operator==(const Node &other) const;
323  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
324  virtual void write(StringTrieBuilder &builder);
325  // Adds a unit with a final value.
326  void add(int32_t c, int32_t value) {
327  units[length]=(UChar)c;
328  equal[length]=NULL;
329  values[length]=value;
330  ++length;
331  hash=(hash*37+c)*37+value;
332  }
333  // Adds a unit which leads to another match node.
334  void add(int32_t c, Node *node) {
335  units[length]=(UChar)c;
336  equal[length]=node;
337  values[length]=0;
338  ++length;
339  hash=(hash*37+c)*37+hashCode(node);
340  }
341  protected:
342  Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
343  int32_t length;
344  int32_t values[kMaxBranchLinearSubNodeLength];
345  UChar units[kMaxBranchLinearSubNodeLength];
346  };
347 
351  class SplitBranchNode : public BranchNode {
352  public:
353  SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
354  : BranchNode(((0x555555*37+middleUnit)*37+
355  hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
356  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
357  virtual UBool operator==(const Node &other) const;
358  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
359  virtual void write(StringTrieBuilder &builder);
360  protected:
361  UChar unit;
362  Node *lessThan;
363  Node *greaterOrEqual;
364  };
365 
366  // Branch head node, for writing the actual node lead unit.
368  class BranchHeadNode : public ValueNode {
369  public:
370  BranchHeadNode(int32_t len, Node *subNode)
371  : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
372  length(len), next(subNode) {}
373  virtual UBool operator==(const Node &other) const;
374  virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
375  virtual void write(StringTrieBuilder &builder);
376  protected:
377  int32_t length;
378  Node *next; // A branch sub-node.
379  };
380 #endif /* U_HIDE_INTERNAL_API */
381 
383  virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
384  Node *nextNode) const = 0;
385 
387  virtual int32_t write(int32_t unit) = 0;
389  virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
391  virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
393  virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
395  virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
396 };
397 
399 
400 #endif // __STRINGTRIEBUILDER_H__