00001 #ifndef __PATTERN_H__
00002 #define __PATTERN_H__
00003
00004 #include <vector>
00005 #include <string>
00006 #include <map>
00007
00008 class Matcher;
00009 class NFANode;
00010 class NFAQuantifierNode;
00011
00968 class Pattern
00969 {
00970 friend class Matcher;
00971 friend class NFANode;
00972 friend class NFAQuantifierNode;
00973 private:
00981 Pattern(const std::string & rhs);
00982 protected:
00987 static std::map<std::string, Pattern *> compiledPatterns;
00993 static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns;
00994 protected:
00999 std::map<NFANode*, bool> nodes;
01005 Matcher * matcher;
01009 NFANode * head;
01013 std::string pattern;
01018 bool error;
01024 int curInd;
01028 int groupCount;
01032 int nonCapGroupCount;
01036 unsigned long flags;
01037 protected:
01042 void raiseError();
01048 NFANode * registerNode(NFANode * node);
01049
01059 std::string classUnion (std::string s1, std::string s2) const;
01069 std::string classIntersect (std::string s1, std::string s2) const;
01079 std::string classNegate (std::string s1) const;
01089 std::string classCreateRange(char low, char hi) const;
01090
01099 int getInt(int start, int end);
01111 bool quantifyCurly(int & sNum, int & eNum);
01123 NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);
01124
01132 NFANode * quantify(NFANode * newNode);
01139 std::string parseClass();
01146 std::string parsePosix();
01151 std::string parseOctal();
01156 std::string parseHex();
01161 NFANode * parseBackref();
01171 std::string parseEscape(bool & inv, bool & quo);
01180 NFANode * parseRegisteredPattern(NFANode ** end);
01188 NFANode * parseBehind(const bool pos, NFANode ** end);
01193 NFANode * parseQuote();
01202 NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
01203 public:
01205 const static unsigned long CASE_INSENSITIVE;
01207 const static unsigned long LITERAL;
01209 const static unsigned long DOT_MATCHES_ALL;
01213 const static unsigned long MULTILINE_MATCHING;
01217 const static unsigned long UNIX_LINE_MODE;
01219 const static int MIN_QMATCH;
01221 const static int MAX_QMATCH;
01222 public:
01235 static Pattern * compile (const std::string & pattern,
01236 const unsigned long mode = 0);
01250 static Pattern * compileAndKeep (const std::string & pattern,
01251 const unsigned long mode = 0);
01252
01272 static std::string replace (const std::string & pattern,
01273 const std::string & str,
01274 const std::string & replacementText,
01275 const unsigned long mode = 0);
01276
01300 static std::vector<std::string> split (const std::string & pattern,
01301 const std::string & str,
01302 const bool keepEmptys = 0,
01303 const unsigned long limit = 0,
01304 const unsigned long mode = 0);
01305
01324 static std::vector<std::string> findAll (const std::string & pattern,
01325 const std::string & str,
01326 const unsigned long mode = 0);
01327
01337 static bool matches (const std::string & pattern,
01338 const std::string & str,
01339 const unsigned long mode = 0);
01340
01360 static bool registerPattern(const std::string & name,
01361 const std::string & pattern,
01362 const unsigned long mode = 0);
01363
01367 static void unregisterPatterns();
01371 static void clearPatternCache();
01372
01394 static std::pair<std::string, int> findNthMatch (const std::string & pattern,
01395 const std::string & str,
01396 const int matchNum,
01397 const unsigned long mode = 0);
01398 public:
01402 ~Pattern();
01403
01404 std::string replace (const std::string & str,
01405 const std::string & replacementText);
01406 std::vector<std::string> split (const std::string & str, const bool keepEmptys = 0,
01407 const unsigned long limit = 0);
01408 std::vector<std::string> findAll (const std::string & str);
01409 bool matches (const std::string & str);
01414 unsigned long getFlags () const;
01419 std::string getPattern () const;
01426 Matcher * createMatcher (const std::string & str);
01427 };
01428
01429 class NFANode
01430 {
01431 friend class Matcher;
01432 public:
01433 NFANode * next;
01434 NFANode();
01435 virtual ~NFANode();
01436 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
01437 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0;
01438 inline virtual bool isGroupHeadNode() const { return false; }
01439 inline virtual bool isStartOfInputNode() const { return false; }
01440 };
01441 class NFACharNode : public NFANode
01442 {
01443 protected:
01444 char ch;
01445 public:
01446 NFACharNode(const char c);
01447 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01448 };
01449 class NFACICharNode : public NFANode
01450 {
01451 protected:
01452 char ch;
01453 public:
01454 NFACICharNode(const char c);
01455 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01456 };
01457 class NFAStartNode : public NFANode
01458 {
01459 public:
01460 NFAStartNode();
01461 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01462 };
01463 class NFAEndNode : public NFANode
01464 {
01465 public:
01466 NFAEndNode();
01467 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01468 };
01469 class NFAQuantifierNode : public NFANode
01470 {
01471 public:
01472 int min, max;
01473 NFANode * inner;
01474 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
01475 NFAQuantifierNode(Pattern * pat, NFANode * internal,
01476 const int minMatch = Pattern::MIN_QMATCH,
01477 const int maxMatch = Pattern::MAX_QMATCH);
01478 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01479 };
01480 class NFAGreedyQuantifierNode : public NFAQuantifierNode
01481 {
01482 public:
01483 NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
01484 const int minMatch = Pattern::MIN_QMATCH,
01485 const int maxMatch = Pattern::MAX_QMATCH);
01486 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01487 virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const;
01488 };
01489 class NFALazyQuantifierNode : public NFAQuantifierNode
01490 {
01491 public:
01492 NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
01493 const int minMatch = Pattern::MIN_QMATCH,
01494 const int maxMatch = Pattern::MAX_QMATCH);
01495 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01496 };
01497 class NFAPossessiveQuantifierNode : public NFAQuantifierNode
01498 {
01499 public:
01500 NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
01501 const int minMatch = Pattern::MIN_QMATCH,
01502 const int maxMatch = Pattern::MAX_QMATCH);
01503 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01504 };
01505 class NFAAcceptNode : public NFANode
01506 {
01507 public:
01508 NFAAcceptNode();
01509 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01510 };
01511 class NFAClassNode : public NFANode
01512 {
01513 public:
01514 bool inv;
01515 std::map<char, bool> vals;
01516 NFAClassNode(const bool invert = 0);
01517 NFAClassNode(const std::string & clazz, const bool invert);
01518 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01519 };
01520 class NFACIClassNode : public NFANode
01521 {
01522 public:
01523 bool inv;
01524 std::map<char, bool> vals;
01525 NFACIClassNode(const bool invert = 0);
01526 NFACIClassNode(const std::string & clazz, const bool invert);
01527 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01528 };
01529 class NFASubStartNode : public NFANode
01530 {
01531 public:
01532 NFASubStartNode();
01533 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01534 };
01535 class NFAOrNode : public NFANode
01536 {
01537 public:
01538 NFANode * one;
01539 NFANode * two;
01540 NFAOrNode(NFANode * first, NFANode * second);
01541 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
01542 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01543 };
01544 class NFAQuoteNode : public NFANode
01545 {
01546 public:
01547 std::string qStr;
01548 NFAQuoteNode(const std::string & quoted);
01549 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01550 };
01551 class NFACIQuoteNode : public NFANode
01552 {
01553 public:
01554 std::string qStr;
01555 NFACIQuoteNode(const std::string & quoted);
01556 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01557 };
01558 class NFALookAheadNode : public NFANode
01559 {
01560 public:
01561 bool pos;
01562 NFANode * inner;
01563 NFALookAheadNode(NFANode * internal, const bool positive);
01564 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
01565 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01566 };
01567 class NFALookBehindNode : public NFANode
01568 {
01569 public:
01570 bool pos;
01571 std::string mStr;
01572 NFALookBehindNode(const std::string & str, const bool positive);
01573 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01574 };
01575 class NFAStartOfLineNode : public NFANode
01576 {
01577 public:
01578 NFAStartOfLineNode();
01579 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01580 };
01581 class NFAEndOfLineNode : public NFANode
01582 {
01583 public:
01584 NFAEndOfLineNode();
01585 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01586 };
01587 class NFAReferenceNode : public NFANode
01588 {
01589 public:
01590 int gi;
01591 NFAReferenceNode(const int groupIndex);
01592 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01593 };
01594 class NFAStartOfInputNode : public NFANode
01595 {
01596 public:
01597 NFAStartOfInputNode();
01598 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01599 inline virtual bool isStartOfInputNode() const { return true; }
01600 };
01601 class NFAEndOfInputNode : public NFANode
01602 {
01603 public:
01604 bool term;
01605 NFAEndOfInputNode(const bool lookForTerm);
01606 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01607 };
01608 class NFAWordBoundaryNode : public NFANode
01609 {
01610 public:
01611 bool pos;
01612 NFAWordBoundaryNode(const bool positive);
01613 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01614 };
01615 class NFAEndOfMatchNode : public NFANode
01616 {
01617 public:
01618 NFAEndOfMatchNode();
01619 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01620 };
01621 class NFAGroupHeadNode : public NFANode
01622 {
01623 public:
01624 int gi;
01625 NFAGroupHeadNode(const int groupIndex);
01626 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01627 inline virtual bool isGroupHeadNode() const { return true; }
01628 };
01629 class NFAGroupTailNode : public NFANode
01630 {
01631 public:
01632 int gi;
01633 NFAGroupTailNode(const int groupIndex);
01634 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01635 };
01636 class NFAGroupLoopPrologueNode : public NFANode
01637 {
01638 public:
01639 int gi;
01640 NFAGroupLoopPrologueNode(const int groupIndex);
01641 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01642 };
01643 class NFAGroupLoopNode : public NFANode
01644 {
01645 public:
01646 int gi, min, max, type;
01647 NFANode * inner;
01648 NFAGroupLoopNode(NFANode * internal, const int minMatch,
01649 const int maxMatch, const int groupIndex, const int matchType);
01650 virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
01651 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01652 int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01653 int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01654 int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const;
01655 };
01656
01657 #endif
01658