(************** Content-type: application/mathematica **************
                     CreatedBy='Mathematica 5.1'

                    Mathematica-Compatible Notebook

This notebook can be used with any Mathematica-compatible
application, such as Mathematica, MathReader or Publicon. The data
for the notebook starts with the line containing stars above.

To get the notebook into a Mathematica-compatible application, do
one of the following:

* Save the data starting with the line of stars above into a file
  with a name ending in .nb, then open the file inside the
  application;

* Copy the data starting with the line of stars above to the
  clipboard, then use the Paste menu command inside the application.

Data for notebooks contains only printable 7-bit ASCII and can be
sent directly in email or through ftp in text mode.  Newlines can be
CR, LF or CRLF (Unix, Macintosh or MS-DOS style).

NOTE: If you modify the data for this notebook not in a Mathematica-
compatible application, you must delete the line below containing
the word CacheID, otherwise Mathematica-compatible applications may
try to use invalid cache data.

For more information on notebooks and Mathematica-compatible 
applications, contact Wolfram Research:
  web: http://www.wolfram.com
  email: info@wolfram.com
  phone: +1-217-398-0700 (U.S.)

Notebook reader applications are available free of charge from 
Wolfram Research.
*******************************************************************)

(*CacheID: 232*)


(*NotebookFileLineBreakTest
NotebookFileLineBreakTest*)
(*NotebookOptionsPosition[    138630,       4949]*)
(*NotebookOutlinePosition[    143455,       5122]*)
(*  CellTagsIndexPosition[    142454,       5081]*)
(*WindowFrame->Normal*)



Notebook[{

Cell[CellGroupData[{
Cell[TextData[{
  "String Patterns in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"]
}], "Title"],

Cell["", "Text"]
}, Open  ]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". Introduction"
}], "Chapter",
  CellTags->"c:1"],

Cell[TextData[{
  "The general symbolic string patterns in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " allow you to perform powerful string manipulation efficiently. This guide \
discusses the details of string patterns, including usage and implementation \
notes. The emphasis is on issues not mentioned in the ",
  ButtonBox["Mathematica Book ",
    ButtonData:>{"2.8", None},
    ButtonStyle->"MainBookLink"],
  "and ",
  StyleBox[ButtonBox["Built-in Functions",
    ButtonData:>{"String", None},
    ButtonStyle->"RefGuideLink"],
    FontFamily->"Times"],
  " sections of the Help Browser."
}], "Text"],

Cell[TextData[{
  "At the heart of",
  StyleBox[" Mathematica",
    FontSlant->"Italic"],
  " is a powerful language for describing patterns in general expressions. \
This language is used in function definitions, substitutions, and searches, \
with constructs like ",
  StyleBox["x_", "MR"],
  ", ",
  StyleBox["a|b", "MR"],
  ", ",
  StyleBox["x..", "MR"],
  ", and so on. "
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(MatchQ[{a, b, c, d}, {___, x_, x_, ___}]\)], "Input",
  CellLabel->"In[1]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[1]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(MatchQ[{a, b, c, c, d}, {___, x_, x_, ___}]\)], "Input",
  CellLabel->"In[2]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[2]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(Cases[{a, 3, 4, b, c, 8}, _Integer]\)], "Input",
  CellLabel->"In[3]:="],

Cell[BoxData[
    \({3, 4, 8}\)], "Output",
  CellLabel->"Out[3]="]
}, Open  ]],

Cell[TextData[{
  "A ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " string pattern uses the same constructs to describe patterns in a text \
string. You can think of a string as a sequence of characters and apply the \
principles of general ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " patterns. In addition there are several useful string-specific pattern \
constructs."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<abcd\>", ___ ~~ \(x_ ~~ \(x_ ~~ ___\)\)]\)], "Input",
  CellLabel->"In[4]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[4]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<abccd\>", ___ ~~ \(x_ ~~ \(x_ ~~ ___\)\)]\)], "Input",
  CellLabel->"In[5]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[5]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a34bc8\>", DigitCharacter]\)], "Input",
  CellLabel->"In[6]:="],

Cell[BoxData[
    \({"3", "4", "8"}\)], "Output",
  CellLabel->"Out[6]="]
}, Open  ]],

Cell["\<\
Regular expressions can be used as an alternative way to specify string \
patterns. These tend to be more compact, but less readable.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<abcd\>", 
      RegularExpression["\<.*(.)\\1.*\>"]]\)], "Input",
  CellLabel->"In[7]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[7]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<abccd\>", 
      RegularExpression["\<.*(.)\\1.*\>"]]\)], "Input",
  CellLabel->"In[8]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[8]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a34bc8\>", RegularExpression["\<\\d\>"]]\)], "Input",
  CellLabel->"In[9]:="],

Cell[BoxData[
    \({"3", "4", "8"}\)], "Output",
  CellLabel->"Out[9]="]
}, Open  ]],

Cell["\<\
Here is a list of several functions that recognize string patterns.\
\>", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["StringMatchQ[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "test whether ",
              StyleBox["\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\"", "MR"],
              " matches ",
              StyleBox["patt", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringFreeQ[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "test whether ",
              StyleBox["\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\"", "MR"],
              " is free of substrings matching ",
              StyleBox["patt", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringCases[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "give a list of the substrings of ",
              StyleBox["\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\"", "MR"],
              " that match ",
              StyleBox["patt", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringCases[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["lhs", "TI"],
              StyleBox[" -> ", "MR"],
              StyleBox["rhs", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "replace each case of ",
              StyleBox["lhs", "TI"],
              " by ",
              StyleBox["rhs", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringPosition[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "give a list of the positions of substrings that match ",
              StyleBox["patt", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringCount[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "count how many substrings match ",
              StyleBox["patt", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringReplace[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["lhs", "TI"],
              StyleBox[" -> ", "MR"],
              StyleBox["rhs", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "replace every substring that matches ",
              StyleBox["lhs", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringReplaceList[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["lhs", "TI"],
              StyleBox[" -> ", "MR"],
              StyleBox["rhs", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "give a list of all ways of replacing ",
              StyleBox["lhs", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["StringSplit[\"", "MR"],
              StyleBox["s", "TI"],
              StyleBox["\", ", "MR"],
              StyleBox["patt", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "split ",
              StyleBox["s", "TI"],
              " at every substring that matches ",
              StyleBox["patt", "TI"],
              " "
            }]]}
          },
        ColumnAlignments->{Right, Left}], TraditionalForm]], "DefinitionBox",
  GridBoxOptions->{RowSpacings->0.4,
  RowLines->{False, True, False, False, False, True, False, True}}],

Cell["Functions that support string patterns. ", "Caption"]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". General String Patterns"
}], "Chapter",
  CellTags->"c:2"],

Cell[TextData[{
  "A general string pattern is formed from pattern objects similar to the \
general pattern objects in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  ". To join several string pattern objects, use the ",
  ButtonBox["StringExpression",
    ButtonStyle->"RefGuideLink"],
  " operator ",
  StyleBox["~~", "MR"],
  " ."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(FullForm["\<a\>" ~~ _]\)], "Input",
  CellLabel->"In[10]:="],

Cell[BoxData[
    TagBox[
      StyleBox[\(StringExpression["\<a\>", Blank[]]\),
        ShowSpecialCharacters->False,
        ShowStringCharacters->True,
        NumberMarks->True],
      FullForm]], "Output",
  CellLabel->"Out[10]//FullForm="]
}, Open  ]],

Cell[TextData[{
  StyleBox["StringExpression", "MR"],
  " is closely related to ",
  StyleBox["StringJoin", "MR"],
  ", except nonstrings are allowed and lists are not flattened. For pure \
strings, they are equivalent."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \("\<aa\>" ~~ \("\<bbb\>" ~~ "\<c\>"\)\)], "Input",
  CellLabel->"In[11]:="],

Cell[BoxData[
    \("aabbbc"\)], "Output",
  CellLabel->"Out[11]="]
}, Open  ]],

Cell[TextData[{
  "The list of objects that can appear in a string pattern closely matches \
the list for ordinary ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " patterns. In terms of string patterns, a string is considered a sequence \
of characters, that is, ",
  StyleBox["\"abc\"", "MR"],
  " can be thought of as something like ",
  StyleBox["String[a, b, c]", "MR"],
  ", to which the ordinary pattern constructs apply."
}], "Text"],

Cell["\<\
The following objects can appear in a symbolic string pattern: \
\>", "Text",
  CellFrame->{{0, 0}, {0, 0.25}}],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["\"", "MR"],
              StyleBox["string", "TI"],
              StyleBox["\"", "MR"],
              " "
            }]], Cell["a literal string of characters "]},
          {Cell[TextData[{
              StyleBox["_", "MR"],
              " "
            }]], Cell["any single character "]},
          {Cell[TextData[{
              StyleBox["__", "MR"],
              " "
            }]], Cell["any substring of one or more characters "]},
          {Cell[TextData[{
              StyleBox["___", "MR"],
              " "
            }]], Cell["any substring of zero or more characters "]},
          {Cell[TextData[{
              StyleBox["x", "TI"],
              StyleBox["_", "MR"],
              ", ",
              StyleBox["x", "TI"],
              StyleBox["__", "MR"],
              ", ",
              StyleBox["x", "TI"],
              StyleBox["___", "MR"],
              " "
            }]], Cell[TextData[{
              "substrings given the name ",
              StyleBox["x", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["x", "TI"],
              StyleBox[":", "MR"],
              StyleBox["pattern", "TI"],
              " "
            }]], Cell[TextData[{
              "pattern given the name ",
              StyleBox["x", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["pattern", "TI"],
              StyleBox["..", "MR"],
              " "
            }]], Cell["pattern repeated one or more times "]},
          {Cell[TextData[{
              StyleBox["pattern", "TI"],
              StyleBox["...", "MR"],
              " "
            }]], Cell["pattern repeated zero or more times "]},
          {Cell[TextData[{
              StyleBox["{", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["patt",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              StyleBox[", ", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["patt",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox[", ", "MR"],
              "\[Ellipsis]",
              StyleBox[" }", "MR"],
              "  or  ",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["patt",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              StyleBox[" | ", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["patt",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox[" | ", "MR"],
              "\[Ellipsis]",
              StyleBox[" ", "MR"],
              " "
            }]], Cell[TextData[{
              "a pattern matching at least one of the ",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["patt",
                        "TI"], 
                      StyleBox["i",
                        "TI"]], TraditionalForm]], "InlineFormula"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["patt", "TI"],
              StyleBox[" /; ", "MR"],
              StyleBox["cond", "TI"],
              " "
            }]], Cell[TextData[{
              "a pattern for which ",
              StyleBox["cond", "TI"],
              " evaluates to ",
              StyleBox["True", "MR"]
            }]]},
          {Cell[TextData[{
              StyleBox["pattern", "TI"],
              StyleBox[" ? ", "MR"],
              StyleBox["test", "TI"],
              " "
            }]], Cell[TextData[{
              "a pattern for which ",
              StyleBox["test", "TI"],
              " yields ",
              StyleBox["True", "MR"],
              " for each character "
            }]]},
          {Cell[TextData[{
              StyleBox["Whitespace", "MR"],
              " "
            }]], Cell["a sequence of whitespace characters "]},
          {Cell[TextData[{
              StyleBox["NumberString", "MR"],
              " "
            }]], Cell["the characters of a number "]},
          {Cell[TextData[{
              StyleBox["charobj", "TI"],
              " "
            }]], Cell["an object representing a character class "]},
          {Cell[TextData[{
              StyleBox["RegularExpression[\"", "MR"],
              StyleBox["regexp", "TI"],
              StyleBox["\"]", "MR"]
            }]], Cell["substring matching a regular expression "]},
          {Cell[TextData[{
              StyleBox["StringExpression[", "MR"],
              "\[Ellipsis]",
              StyleBox[" ]", "MR"],
              " "
            }]], Cell["an arbitrary string expression "]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["The following represent classes of characters: ", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["{\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              StyleBox["\", \"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox["\", ", "MR"],
              "\[Ellipsis]",
              StyleBox[" }", "MR"],
              " "
            }]], Cell[TextData[{
              "any of the ",
              StyleBox["\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], 
                      StyleBox["i",
                        "TI"]], TraditionalForm]], "InlineFormula"],
              "\"",
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["Characters[\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox[" ", "MR"],
              "\[Ellipsis]",
              StyleBox[" \"]", "MR"],
              " "
            }]], Cell[TextData[{
              "any of the ",
              StyleBox["\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], 
                      StyleBox["i",
                        "TI"]], TraditionalForm]], "InlineFormula"],
              "\"",
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["CharacterRange[\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              StyleBox["\", \"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox["\"]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character in the range ",
              StyleBox["\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              "\"",
              StyleBox[" to ", "TR"],
              StyleBox["\"", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox["\"", "MR"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["DigitCharacter", "MR"],
              " "
            }]], Cell["digit 0\[Dash]9 "]},
          {Cell[TextData[{
              StyleBox["LetterCharacter", "MR"],
              " "
            }]], Cell["letter "]},
          {Cell[TextData[{
              StyleBox["WhitespaceCharacter", "MR"],
              " "
            }]], Cell[
            "space, newline, tab, or other whitespace character "]},
          {Cell[TextData[{
              StyleBox["WordCharacter", "MR"],
              " "
            }]], Cell["letter or digit "]},
          {Cell[TextData[{
              StyleBox["Except[", "MR"],
              StyleBox["p", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character except ones matching ",
              StyleBox["p", "TI"],
              " "
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["The following represent positions in strings: ", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["StartOfString", "MR"],
              " "
            }]], Cell["start of the whole string "]},
          {Cell[TextData[{
              StyleBox["EndOfString", "MR"],
              " "
            }]], Cell["end of the whole string "]},
          {Cell[TextData[{
              StyleBox["StartOfLine", "MR"],
              " "
            }]], Cell["start of a line "]},
          {Cell[TextData[{
              StyleBox["EndOfLine", "MR"],
              " "
            }]], Cell["end of a line "]},
          {Cell[TextData[{
              StyleBox["WordBoundary", "MR"],
              " "
            }]], Cell["boundary between word characters and others "]},
          {Cell[TextData[{
              StyleBox["Except[WordBoundary]", "MR"],
              " "
            }]], Cell["anywhere except a word boundary "]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["\<\
The following determine which match will be used if there are several \
possibilities: \
\>", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["ShortestMatch[", "MR"],
              StyleBox["p", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "the shortest consistent match for ",
              StyleBox["p", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["LongestMatch[", "MR"],
              StyleBox["p", "TI"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "the longest consistent match for ",
              StyleBox["p", "TI"],
              " (default) "
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["Some nontrivial issues regarding these objects follow.", "Text",
  CellFrame->{{0, 0}, {0, 0.25}}],

Cell[TextData[{
  "The ",
  StyleBox["_", "MR"],
  ", ",
  StyleBox["__", "MR"],
  ", and ",
  StyleBox["___", "MR"],
  " wildcards match any characters including newlines. To match any character \
except newline (analogous to the \".\" in regular expressions), use ",
  StyleBox["Except[\"\\n\"]", "MR"],
  ", ",
  StyleBox["Except[\"\\n\"]..", "MR"],
  ", and ",
  StyleBox["Except[\"\\n\"]...", "MR"],
  "  ."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\n\>", __]\)], "Input",
  CellLabel->"In[12]:="],

Cell[BoxData[
    \({"line1\nline2\n"}\)], "Output",
  CellLabel->"Out[12]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\n\>", Except["\<\n\>"] .. ]\)], "Input",
  CellLabel->"In[13]:="],

Cell[BoxData[
    \({"line1", "line2"}\)], "Output",
  CellLabel->"Out[13]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\n\>", 
      RegularExpression["\<.+\>"]]\)], "Input",
  CellLabel->"In[14]:="],

Cell[BoxData[
    \({"line1", "line2"}\)], "Output",
  CellLabel->"Out[14]="]
}, Open  ]],

Cell[TextData[{
  "A list of patterns, such as ",
  StyleBox["{\"a\",\"b\",\"c\"}", "MR"],
  " is equivalent to a list of alternatives, such as ",
  StyleBox["\"a\"|\"b\"|\"c\"", "MR"],
  ". This is convenient in that functions like ",
  StyleBox["Characters", "MR"],
  " and ",
  StyleBox["CharacterRange", "MR"],
  " can be used to specify classes of characters."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<the cat in the hat\>", 
      x : Characters["\<aeiou\>"] \[RuleDelayed] x <> x]\)], "Input",
  CellLabel->"In[15]:="],

Cell[BoxData[
    \("thee caat iin thee haat"\)], "Output",
  CellLabel->"Out[15]="]
}, Open  ]],

Cell[TextData[{
  "When ",
  StyleBox["Condition", "MR"],
  " (",
  StyleBox["/;", "MR"],
  ") is used, the patterns involved are treated as strings as far as the rest \
of ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " is concerned, so you need to use ",
  StyleBox["ToExpression", "MR"],
  " in some cases."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a13 a18 a41 a42\>", "\<a\>" ~~ \(x : DigitCharacter ..  ~~ 
              WordBoundary\) /; PrimeQ[ToExpression[x]] \[Rule] x]\)], "Input",\

  CellLabel->"In[16]:="],

Cell[BoxData[
    \({"13", "41"}\)], "Output",
  CellLabel->"Out[16]="]
}, Open  ]],

Cell[TextData[{
  "Similar to ordinary ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " patterns, the function in ",
  StyleBox["PatternTest", "MR"],
  " (",
  StyleBox["?", "MR"],
  ") is applied to each individual character."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<125378132\>", __?\((ToExpression[#] < 5 &)\)]\)], "Input",\

  CellLabel->"In[17]:="],

Cell[BoxData[
    \({"12", "3", "132"}\)], "Output",
  CellLabel->"Out[17]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["Whitespace", "MR"],
  " construct is equivalent to ",
  StyleBox["WhitespaceCharacter..", "MR"],
  "  ."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<13   \t 17 \n22   19\>", 
      Whitespace \[Rule] "\<,\>"]\)], "Input",
  CellLabel->"In[18]:="],

Cell[BoxData[
    \("13,17,22,19"\)], "Output",
  CellLabel->"Out[18]="]
}, Open  ]],

Cell[TextData[{
  "You can insert a ",
  StyleBox["RegularExpression", "MR"],
  " object into a general string pattern."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a13b12c17a32\>", "\<a\>" ~~ 
          x : RegularExpression["\<\\d+\>"] \[Rule] x]\)], "Input",
  CellLabel->"In[19]:="],

Cell[BoxData[
    \({"13", "32"}\)], "Output",
  CellLabel->"Out[19]="]
}, Open  ]],

Cell[TextData[{
  "This inserts a lookbehind constraint (see ",
  ButtonBox["Regular Expressions",
    ButtonData:>"Regular Expressions",
    ButtonStyle->"Hyperlink"],
  ") to ensure that you only pick words preceded by ",
  StyleBox["\"the \"", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<the cat in the hat\>", 
      RegularExpression["\<(?<=the )\>"] ~~ WordCharacter .. ]\)], "Input",
  CellLabel->"In[20]:="],

Cell[BoxData[
    \({"cat", "hat"}\)], "Output",
  CellLabel->"Out[20]="]
}, Open  ]],

Cell[TextData[{
  StyleBox["StringExpression", "MR"],
  " objects can be nested."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<ba3a1a78a2b7ba9\>", "\<b\>" ~~ \(("\<a\>" ~~ 
              DigitCharacter)\) .. ]\)], "Input",
  CellLabel->"In[21]:="],

Cell[BoxData[
    \({"ba3a1a7", "ba9"}\)], "Output",
  CellLabel->"Out[21]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["Except", "MR"],
  " construct for string patterns takes a single argument that should \
represent a single character or a class of single characters."
}], "Text"],

Cell["This deletes all nonvowel characters from the string.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<the cat in the hat\>", 
      Except[Characters["\<aeiou\>"]] \[Rule] "\<\>"]\)], "Input",
  CellLabel->"In[22]:="],

Cell[BoxData[
    \("eaiea"\)], "Output",
  CellLabel->"Out[22]="]
}, Open  ]],

Cell[TextData[{
  "When trying to match patterns of variable length (such as ",
  StyleBox["__ ", "MR"],
  "and ",
  StyleBox["patt..", "MR"],
  "), the longest possible match is tried first by default. To force the \
matcher to try the shortest match first, you can wrap the relevant part of \
the pattern in ",
  StyleBox["ShortestMatch[ ]", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(ab) (cde)\>", "\<(\>" ~~ \(__ ~~ "\<)\>"\)]\)], "Input",\

  CellLabel->"In[23]:="],

Cell[BoxData[
    \({"(ab) (cde)"}\)], "Output",
  CellLabel->"Out[23]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(ab) (cde)\>", 
      ShortestMatch["\<(\>" ~~ \(__ ~~ "\<)\>"\)]]\)], "Input",
  CellLabel->"In[24]:="],

Cell[BoxData[
    \({"(ab)", "(cde)"}\)], "Output",
  CellLabel->"Out[24]="]
}, Open  ]],

Cell[TextData[{
  "If for some reason you need a longest match within the short match, you \
can use ",
  StyleBox["LongestMatch", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(ab132cd)137(ef576gh)\>", 
      ShortestMatch["\<(\>" ~~ \(___ ~~ \(x : 
                  DigitCharacter ..  ~~ \(___ ~~ "\<)\>"\)\)\)] \
\[RuleDelayed] x]\)], "Input",
  CellLabel->"In[25]:="],

Cell[BoxData[
    \({"1", "5"}\)], "Output",
  CellLabel->"Out[25]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(ab132cd)(ef576gh)\>", 
      ShortestMatch["\<(\>" ~~ \(___ ~~ \(LongestMatch[
                  x : DigitCharacter .. ] ~~ \(___ ~~ "\<)\>"\)\)\)] \
\[RuleDelayed] x]\)], "Input",
  CellLabel->"In[26]:="],

Cell[BoxData[
    \({"132", "576"}\)], "Output",
  CellLabel->"Out[26]="]
}, Open  ]],

Cell[TextData[{
  "You could alternatively rewrite this pattern without use of ",
  StyleBox["LongestMatch", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(ab132cd)(ef576gh)\>", "\<(\>" ~~ \(ShortestMatch[___] ~~ \
\(x : DigitCharacter ..  ~~ \(ShortestMatch[___] ~~ "\<)\>"\)\)\) \
\[RuleDelayed] x]\)], "Input",
  CellLabel->"In[27]:="],

Cell[BoxData[
    \({"132", "576"}\)], "Output",
  CellLabel->"Out[27]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". Regular Expressions"
}], "Chapter",
  CellTags->{"Regular Expressions", "c:3"}],

Cell[TextData[{
  "The regular expression syntax follows the underlying Perl Compatible \
Regular Expressions (PCRE) library, which is close to the syntax of Perl.  \
(See ",
  ButtonBox["[1]",
    ButtonData:>"Ref-1",
    ButtonStyle->"Hyperlink"],
  " for further information and documentation.) A regular expression in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " is denoted by the head ",
  ButtonBox["RegularExpression",
    ButtonStyle->"RefGuideLink"],
  "."
}], "Text"],

Cell["\<\
The following basic elements can be used in regular expression strings: \
\>", "Text",
  CellFrame->{{0, 0}, {0, 0.25}}],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["c", "TI"],
              " "
            }]], Cell[TextData[{
              "the literal character ",
              StyleBox["c", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox[".", "MR"],
              " "
            }]], Cell["any character except newline "]},
          {Cell[TextData[{
              StyleBox["[", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              "\[Ellipsis]",
              StyleBox[" ]", "MR"],
              " "
            }]], Cell[TextData[{
              "any of the characters ",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], 
                      StyleBox["i",
                        "TI"]], TraditionalForm]], "InlineFormula"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["[", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              StyleBox["-", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              StyleBox["]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character in the range ",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              "\[Dash]",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["[^", "MR"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "1"], TraditionalForm]], "InlineFormula"],
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], "2"], TraditionalForm]], "InlineFormula"],
              "\[Ellipsis]",
              StyleBox[" ]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character except the ",
              Cell[BoxData[
                  FormBox[
                    SubscriptBox[
                      StyleBox["c",
                        "TI"], 
                      StyleBox["i",
                        "TI"]], TraditionalForm]], "InlineFormula"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["*", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["p", "TI"],
              " repeated zero or more times "
            }]]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["+", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["p", "TI"],
              " repeated one or more times "
            }]]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["?", "MR"],
              " "
            }]], Cell[TextData[{
              "zero or one occurrence of ",
              StyleBox["p", "TI"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["{", "MR"],
              StyleBox["m", "TI"],
              StyleBox[",", "MR"],
              StyleBox["n", "TI"],
              StyleBox["}", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["p", "TI"],
              " repeated between ",
              StyleBox["m", "TI"],
              " and ",
              StyleBox["n", "TI"],
              " times "
            }]]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["*?", "MR"],
              ", ",
              StyleBox["p", "TI"],
              StyleBox["+?", "MR"],
              ", ",
              StyleBox["p", "TI"],
              StyleBox["??", "MR"],
              " "
            }]], Cell["the shortest consistent strings that match "]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["*+", "MR"],
              ", ",
              StyleBox["p", "TI"],
              StyleBox["++", "MR"],
              ", ",
              StyleBox["p", "TI"],
              StyleBox["?+", "MR"],
              " "
            }]], Cell["possessive match"]},
          {Cell[TextData[{
              StyleBox["p", "TI"],
              StyleBox["|", "MR"],
              StyleBox["q", "TI"],
              " "
            }]], Cell[TextData[{
              "strings matching ",
              StyleBox["p", "TI"],
              " or ",
              StyleBox["q", "TI"],
              " "
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["The following represent classes of characters: ", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["d", "MR"],
              " "
            }]], Cell["digit 0\[Dash]9 "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["D", "MR"],
              " "
            }]], Cell["nondigit "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["s", "MR"],
              " "
            }]], Cell[
            "space, newline, tab, or other whitespace character "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["S", "MR"],
              " "
            }]], Cell["nonwhitespace character "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["w", "MR"],
              " "
            }]], Cell[TextData[{
              "word character (letter, digit, or ",
              StyleBox["_", "MR"],
              ") "
            }]]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["W", "MR"],
              " "
            }]], Cell["nonword character "]},
          {Cell[TextData[{
              StyleBox["[[:", "MR"],
              StyleBox["class", "TI"],
              StyleBox[":]]", "MR"],
              " "
            }]], Cell["characters in a named class "]},
          {Cell[TextData[{
              StyleBox["[^[:", "MR"],
              StyleBox["class", "TI"],
              StyleBox[":]]", "MR"],
              " "
            }]], Cell["characters not in a named class "]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell[TextData[{
  "The following named classes can be used: ",
  StyleBox["alnum", "MR"],
  ", ",
  StyleBox["alpha", "MR"],
  ", ",
  StyleBox["ascii", "MR"],
  ", ",
  StyleBox["blank", "MR"],
  ", ",
  StyleBox["cntrl", "MR"],
  ", ",
  StyleBox["digit", "MR"],
  ", ",
  StyleBox["graph", "MR"],
  ", ",
  StyleBox["lower", "MR"],
  ", ",
  StyleBox["print", "MR"],
  ", ",
  StyleBox["punct", "MR"],
  ", ",
  StyleBox["space", "MR"],
  ", ",
  StyleBox["upper", "MR"],
  ", ",
  StyleBox["word", "MR"],
  ", and ",
  StyleBox["xdigit", "MR"],
  ". "
}], "Text"],

Cell["The following represent positions in strings: ", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["^", "MR"],
              " "
            }]], Cell["the beginning of the string (or line) "]},
          {Cell[TextData[{
              StyleBox["$", "MR"],
              " "
            }]], Cell["the end of the string (or line) "]},
          {
            RowBox[{Cell[TextData[{
                "\[Backslash]\[Backslash]",
                StyleBox["A", "MR"],
                " "
              }]], Cell[TextData[StyleBox["", "MR"]]]}], Cell[
            "the beginning of the string "]},
          {Cell["\[Backslash]\[Backslash]z"], Cell[
            "the end of the string "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["Z", "MR"],
              " "
            }]], Cell["\<\
the end of the string (allowing for a single newline character first) \
\>"]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["b", "MR"],
              " "
            }]], Cell["word boundary "]},
          {Cell[TextData[{
              "\[Backslash]\[Backslash]",
              StyleBox["B", "MR"],
              " "
            }]], Cell["anywhere except a word boundary "]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["\<\
The following set options for all regular expression elements that follow \
them: \
\>", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["(?i)", "MR"],
              " "
            }]], Cell[
            "treat upper and lower case as equivalent (ignore case) "]},
          {Cell[TextData[{
              StyleBox["(?m)", "MR"],
              " "
            }]], Cell[TextData[{
              "make ",
              StyleBox["^", "MR"],
              " and ",
              StyleBox["$", "MR"],
              " match start and end of lines (multiline mode) "
            }]]},
          {Cell[TextData[{
              StyleBox["(?s)", "MR"],
              " "
            }]], Cell[TextData[{
              "allow ",
              StyleBox[".", "MR"],
              " to match newline "
            }]]},
          {Cell[TextData[{
              StyleBox["(?x)", "MR"],
              " "
            }]], Cell[TextData[{
              "disregard all whitespace and treat everything between ",
              StyleBox["\"#\"", "MR"],
              " and ",
              StyleBox["\"\\n\"", "MR"],
              " as comments "
            }]]},
          {Cell[TextData[{
              StyleBox["(?-\[Backslash]#c)", "MR"],
              " "
            }]], Cell["unset options "]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["The following are lookahead/lookbehind constructs:", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              "(?=",
              StyleBox["p",
                FontSlant->"Italic"],
              ")"
            }]], Cell[TextData[{
              "the following text must match ",
              StyleBox["p",
                FontSlant->"Italic"]
            }]]},
          {Cell[TextData[{
              "(?!",
              StyleBox["p",
                FontSlant->"Italic"],
              ")"
            }]], Cell[TextData[{
              "the following text cannot match ",
              StyleBox["p",
                FontSlant->"Italic"]
            }]]},
          {Cell[TextData[{
              "(?<=",
              StyleBox["p",
                FontSlant->"Italic"],
              ")"
            }]], Cell[TextData[{
              "the preceding text must match ",
              StyleBox["p",
                FontSlant->"Italic"]
            }]]},
          {Cell[TextData[{
              "(?<!",
              StyleBox["p",
                FontSlant->"Italic"],
              ") "
            }]], Cell[TextData[{
              "the preceding text cannot match ",
              StyleBox["p",
                FontSlant->"Italic"]
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "2ColumnTable"],

Cell["\<\
Discussion of a few issues regarding regular expressions follows.\
\>", "Text",
  CellFrame->{{0, 0}, {0, 0.25}}],

Cell["\<\
This looks for runs of word characters of length between 2 and 4.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a bb ccc dddd eeeee\>", 
      RegularExpression["\<\\b\\w{2,4}\\b\>"]]\)], "Input",
  CellLabel->"In[28]:="],

Cell[BoxData[
    \({"bb", "ccc", "dddd"}\)], "Output",
  CellLabel->"Out[28]="]
}, Open  ]],

Cell[TextData[{
  "With the possessive \"",
  StyleBox["+", "MR"],
  "\" quantifier, as many characters as possible are grabbed by the matcher, \
and no characters are given up, even if the rest of the patterns requires \
it."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a2 b6\>", RegularExpression["\<\\w+\\d\>"]]\)], "Input",
  CellLabel->"In[29]:="],

Cell[BoxData[
    \({"a2", "b6"}\)], "Output",
  CellLabel->"Out[29]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a2 b6\>", RegularExpression["\<\\w++\\d\>"]]\)], "Input",\

  CellLabel->"In[30]:="],

Cell[BoxData[
    \({}\)], "Output",
  CellLabel->"Out[30]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a2 b6\>", RegularExpression["\<\\D++\\d\>"]]\)], "Input",\

  CellLabel->"In[31]:="],

Cell[BoxData[
    \({"a2", " b6"}\)], "Output",
  CellLabel->"Out[31]="]
}, Open  ]],

Cell[TextData[{
  StyleBox["[[:xdigit:]]", "MR"],
  " corresponds to characters in a hexadecimal number."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<ff, 13, 1a3, xyz, 3b\>", 
      RegularExpression["\<[[:xdigit:]]+\>"]]\)], "Input",
  CellLabel->"In[32]:="],

Cell[BoxData[
    \({"ff", "13", "1a3", "3b"}\)], "Output",
  CellLabel->"Out[32]="]
}, Open  ]],

Cell[TextData[{
  "The complete list of characters that need to be escaped in a regular \
expression consists of ",
  StyleBox[".", "MR"],
  ", ",
  StyleBox["\\", "MR"],
  ", ",
  StyleBox["?", "MR"],
  ", ",
  StyleBox["(", "MR"],
  ", ",
  StyleBox[")", "MR"],
  ", ",
  StyleBox["{", "MR"],
  ", ",
  StyleBox["}", "MR"],
  ", ",
  StyleBox["[", "MR"],
  ", ",
  StyleBox["]", "MR"],
  ", ",
  StyleBox["^", "MR"],
  ", ",
  StyleBox["$", "MR"],
  ", ",
  StyleBox["*", "MR"],
  ", ",
  StyleBox["+", "MR"],
  ", and ",
  StyleBox["|", "MR"],
  ".  For instance, to write a literal period, use ",
  StyleBox["\"\\\\.\" ", "MR"],
  "and to write a literal backslash, use ",
  StyleBox["\"\\\\\\\\\"", "MR"],
  "."
}], "Text"],

Cell[TextData[{
  "Inside a character class ",
  StyleBox["\"[...]\"", "MR"],
  ", the complete list of escaped characters is ",
  StyleBox["^", "MR"],
  ", ",
  StyleBox["-", "MR"],
  ", ",
  StyleBox["\\", "MR"],
  ", ",
  StyleBox["[", "MR"],
  ", and ",
  StyleBox["]", "MR"],
  " ."
}], "Text"],

Cell[TextData[{
  "By default, ",
  StyleBox["^", "MR"],
  " and ",
  StyleBox["$", "MR"],
  " match the beginning and end of the string, respectively. In multiline \
mode, these match the beginning/end of lines instead."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\>", 
      RegularExpression["\<^.*\>"]]\)], "Input",
  CellLabel->"In[33]:="],

Cell[BoxData[
    \({"line1"}\)], "Output",
  CellLabel->"Out[33]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\>", 
      RegularExpression["\<(?m)^.*\>"]]\)], "Input",
  CellLabel->"In[34]:="],

Cell[BoxData[
    \({"line1", "line2"}\)], "Output",
  CellLabel->"Out[34]="]
}, Open  ]],

Cell[TextData[{
  "In multiline mode, ",
  StyleBox["\\\\A", "MR"],
  " and ",
  StyleBox["\\\\Z", "MR"],
  " can be used to denote the beginning and end of the string."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<line1\nline2\>", 
      RegularExpression["\<(?m)\\A.*\>"]]\)], "Input",
  CellLabel->"In[35]:="],

Cell[BoxData[
    \({"line1"}\)], "Output",
  CellLabel->"Out[35]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["(?x)", "MR"],
  " modifier allows you to add whitespace and comments to a regular \
expression for readability."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(\(\(StringCases["\<12.45  bc58.11\>", \
\[IndentingNewLine]RegularExpression[\*"\"\<\\<(?x)\n\\\\d+  \\\\.  #remember \
to escape the period\n\\\\d+\\>\>\""]]\)\(\ \ \)\)\)], "Input",
  CellLabel->"In[36]:="],

Cell[BoxData[
    \({"12.45", "58.11"}\)], "Output",
  CellLabel->"Out[36]="]
}, Open  ]],

Cell[TextData[{
  "Named subpatterns are achieved by surrounding them with parentheses ",
  StyleBox["(subpatt)", "MR"],
  "; they then become numbered subpatterns. The number of a given subpattern \
counts the opening parenthesis, starting from the start of the pattern. You \
can refer to these subpatterns using ",
  StyleBox["\\\\n", "MR"],
  " for the ",
  StyleBox["n",
    FontSlant->"Italic"],
  "th pattern later in the pattern, or by ",
  StyleBox["\"$n\"", "MR"],
  " in the right-hand side of a rule. ",
  StyleBox["\"$0\"", "MR"],
  " refers to all of the matched pattern."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b6a3b3a3c3a8b8\>", 
      RegularExpression["\<(a(\\d))b\\2\>"]]\)], "Input",
  CellLabel->"In[37]:="],

Cell[BoxData[
    \({"a3b3", "a8b8"}\)], "Output",
  CellLabel->"Out[37]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b6a3b3a3c3a8b8\>", 
      RegularExpression["\<(a(\\d))b\\2\>"] \[Rule] {"\<$0\>", "\<$1\>", \
"\<Number:$2\>"}]\)], "Input",
  CellLabel->"In[38]:="],

Cell[BoxData[
    \({{"a3b3", "a3", "Number:3"}, {"a8b8", "a8", "Number:8"}}\)], "Output",
  CellLabel->"Out[38]="]
}, Open  ]],

Cell[TextData[{
  "If you need a literal ",
  StyleBox["$", "MR"],
  " in this context (when the head of the left-hand side is ",
  StyleBox["RegularExpression", "MR"],
  "), you can escape it by using backslashes, ",
  StyleBox["\"\\\\$2\"", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b6a3b3a3c3a8b8\>", 
      RegularExpression["\<(a(\\d))b\\2\>"] \[Rule] {"\<$0\>", "\<$1\>", \
"\<Number:$2\>", "\<Literal:\\$2\>"}]\)], "Input",
  CellLabel->"In[39]:="],

Cell[BoxData[
    \({{"a3b3", "a3", "Number:3", "Literal:$2"}, {"a8b8", "a8", "Number:8", 
        "Literal:$2"}}\)], "Output",
  CellLabel->"Out[39]="]
}, Open  ]],

Cell[TextData[{
  "If you happen to need a single literal backslash followed by a literal ",
  StyleBox["$", "MR"],
  " under these circumstances, you need to be a bit tricky and split into two \
strings temporarily."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b6a3b3a3c3a8b8\>", 
      RegularExpression["\<(a(\\d))b\\2\>"] :> {"\<$0\>", "\<$1\>", \
"\<Number:$2\>", "\<Literal:\\\>" <> "\<\\$2\>"}]\)], "Input",
  CellLabel->"In[40]:="],

Cell[BoxData[
    \({{"a3b3", "a3", "Number:3", "Literal:\\$2"}, {"a8b8", "a8", "Number:8", 
        "Literal:\\$2"}}\)], "Output",
  CellLabel->"Out[40]="]
}, Open  ]],

Cell[TextData[{
  "If you need to group a part of the pattern, but you do not want to count \
the group as a numbered subpattern, you can use the ",
  StyleBox["(?:patt)", "MR"],
  " construct."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a11b16c22b77\>", 
      RegularExpression["\<(?:a|b)(\\d)\\1\>"]]\)], "Input",
  CellLabel->"In[41]:="],

Cell[BoxData[
    \({"a11", "b77"}\)], "Output",
  CellLabel->"Out[41]="]
}, Open  ]],

Cell["\<\
Lookahead and lookbehind patterns are used to ensure a pattern is matched \
without actually including that text as part of the match.\
\>", "Text"],

Cell[TextData[{
  "This picks out words following the string ",
  StyleBox["\"the \"", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<the cat in the hat\>", 
      RegularExpression["\<(?<=the )\\w+\>"]]\)], "Input",
  CellLabel->"In[42]:="],

Cell[BoxData[
    \({"cat", "hat"}\)], "Output",
  CellLabel->"Out[42]="]
}, Open  ]],

Cell["\<\
This tries to pick out all even numbers in the string, but it will find \
matches that include partial numbers.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a23b42c63d80, 123\>", 
      x : RegularExpression["\<\\d+\>"] /; 
        Mod[ToExpression[x], 2] \[Equal] 0]\)], "Input",
  CellLabel->"In[43]:="],

Cell[BoxData[
    \({"2", "42", "6", "80", "12"}\)], "Output",
  CellLabel->"Out[43]="]
}, Open  ]],

Cell["\<\
Using lookbehind/lookahead, you can ensure that the characters before/after \
the match are not digits (note that the lookbehind test is superfluous in \
this particular case).\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a23b42c63d80, 123\>", 
      x : RegularExpression["\<(?<!\\d)\\d+(?!\\d)\>"] /; 
        Mod[ToExpression[x], 2] \[Equal] 0]\)], "Input",
  CellLabel->"In[44]:="],

Cell[BoxData[
    \({"42", "80"}\)], "Output",
  CellLabel->"Out[44]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". RegularExpression versus StringExpression"
}], "Chapter",
  CellTags->"c:4"],

Cell["\<\
There is a close correspondence between the various pattern objects that can \
be used in general symbolic string patterns and in regular expressions. Here \
is a list of examples of patterns written as regular expressions and as \
symbolic string patterns.\
\>", "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["Regular Expression", "TR",
                FontWeight->"Bold"],
              " "
            }]], Cell[TextData[{
              StyleBox["General String Pattern", "TR",
                FontWeight->"Bold"],
              " "
            }]], Cell[TextData[StyleBox["Explanation",
              FontWeight->"Bold"]]]},
          {Cell[TextData[{
              StyleBox["\"abc\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"abc\"", "MR"],
              " "
            }]], Cell[TextData[{
              "the literal string ",
              StyleBox["\"abc\"", "MR"],
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["\".\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Except[\"\\n\"]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character except newline",
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["\"(?s).\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["_", "MR"],
              " "
            }]], Cell["any character "]},
          {Cell[TextData[{
              StyleBox["\"(?s).+\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["__", "MR"],
              " "
            }]], Cell["one or more characters (greedy) "]},
          {Cell[TextData[{
              StyleBox["\"(?s).+?\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["ShortestMatch[__]", "MR"],
              " "
            }]], Cell["one or more characters (non-greedy) "]},
          {Cell[TextData[{
              StyleBox["\"(?s).*\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["___", "MR"],
              " "
            }]], Cell["zero or more characters "]},
          {Cell[TextData[{
              StyleBox["\".*\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Except[\"\\n\"]...", "MR"],
              " "
            }]], Cell["zero or more characters (except newlines)"]},
          {Cell[TextData[{
              StyleBox["\"a?b\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"a\"|\"\" ~~ \"b\"", "MR"],
              " "
            }]], Cell[TextData[{
              "zero or one ",
              StyleBox["\"a\"", "MR"],
              " followed by a ",
              StyleBox["\"b\"", "MR"],
              " (",
              "i.e.",
              ", ",
              StyleBox["\"b\"", "MR"],
              " or ",
              StyleBox["\"ab\"", "MR"],
              ")"
            }]]},
          {Cell[TextData[StyleBox["\"[abef]\"", "MR"]]], Cell[TextData[{
              StyleBox["Characters[\"abef\"]", "MR"],
              " "
            }]], Cell[TextData[{
              "any of the characters ",
              StyleBox["\"a\"", "MR"],
              ", ",
              StyleBox["\"b\"", "MR"],
              ", ",
              StyleBox["\"e\"", "MR"],
              ", or ",
              StyleBox["\"f\"", "MR"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["\"[abef]+\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Characters[\"abef\"]..", "MR"],
              " "
            }]], Cell[TextData[{
              "one or more of the characters ",
              StyleBox["\"a\"", "MR"],
              ", ",
              StyleBox["\"b\"", "MR"],
              ", ",
              StyleBox["\"e\"", "MR"],
              ", or ",
              StyleBox["\"f\"", "MR"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["\"[a-f]\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["CharacterRange[\"a\",\"f\"]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character in the range between ",
              StyleBox["\"a\"", "MR"],
              " and ",
              StyleBox["\"f\"", "MR"]
            }]]},
          {Cell[TextData[{
              StyleBox["\"[^abef]\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Except[Characters[\"abef\"]]", "MR"],
              " "
            }]], Cell[TextData[{
              "any character except the characters ",
              StyleBox["\"a\"", "MR"],
              ", ",
              StyleBox["\"b\"", "MR"],
              ", ",
              StyleBox["\"e\"", "MR"],
              ", or ",
              StyleBox["\"f\"", "MR"],
              " "
            }]]},
          {Cell[TextData[StyleBox["\"ab|efg\"", "MR"]]], Cell[TextData[
            StyleBox["\"ab\"|\"efg\"", "MR"]]], Cell[TextData[{
              "match the strings ",
              StyleBox["\"ab\"", "MR"],
              " or ",
              StyleBox["\"efg\"", "MR"]
            }]]},
          {Cell[TextData[{
              StyleBox["\"(ab|ef)gh\" ", "MR"],
              StyleBox["or", "TR"],
              StyleBox[" \"(?:ab|ef)gh\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["(\"ab\"|\"ef\") ~~ \"gh\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"ab\"", "MR"],
              " or ",
              StyleBox["\"ef\"", "MR"],
              " followed by ",
              StyleBox["\"gh\"", "MR"],
              " (i.e., ",
              StyleBox["\"abgh\"", "MR"],
              " or ",
              StyleBox["\"efgh\"", "MR"],
              ")"
            }]]},
          {Cell[TextData[{
              StyleBox["\"\\\\s\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["WhitespaceCharacter", "MR"],
              " "
            }]], Cell["any whitespace character"]},
          {Cell[TextData[{
              StyleBox["\"\\\\s+\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Whitespace", "MR"],
              " "
            }]], Cell["one or more characters of whitespace "]},
          {Cell[TextData[{
              StyleBox["\"(a|b)\\\\1\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["x:\"a\"|\"b\" ~~ x_", "MR"],
              " "
            }]], Cell[TextData[{
              "this will match either ",
              StyleBox["\"aa\"", "MR"],
              " or ",
              StyleBox["\"bb\"", "MR"],
              " "
            }]]},
          {Cell[TextData[{
              StyleBox["\"\\\\d\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["DigitCharacter", "MR"],
              " "
            }]], Cell["any digit character"]},
          {Cell[TextData[{
              StyleBox["\"\\\\D\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Except[DigitCharacter]", "MR"],
              " "
            }]], Cell["any nondigit character"]},
          {Cell[TextData[{
              StyleBox["\"\\\\d+\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["DigitCharacter..", "MR"],
              " "
            }]], Cell["one or more digit characters "]},
          {Cell[TextData[{
              StyleBox["\"\\\\w\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["WordCharacter|\"_\"", "MR"],
              " "
            }]], Cell[TextData[{
              "any digit, letter, or ",
              StyleBox["\"_\"", "MR"],
              " character. "
            }]]},
          {Cell[TextData[{
              StyleBox["\"[[:alpha:]]\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["LetterCharacter", "MR"],
              " "
            }]], Cell["any letter character"]},
          {Cell[TextData[{
              StyleBox["\"[^[:alpha:]]\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["Except[LetterCharacter]", "MR"],
              " "
            }]], Cell["any nonletter character"]},
          {Cell[TextData[{
              StyleBox["\"^abf\" ", "MR"],
              StyleBox["or", "TR"],
              StyleBox[" \"\\\\Aabc\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StartOfString ~~ \"abf\"", "MR"],
              " "
            }]], Cell[TextData[{
              "the string ",
              StyleBox["\"abf\"", "MR"],
              " at the start of the string "
            }]]},
          {Cell[TextData[{
              StyleBox["\"(?m)^abf\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StartOfLine ~~ \"abf\"", "MR"],
              " "
            }]], Cell[TextData[{
              "the string ",
              StyleBox["\"abf\"", "MR"],
              " at the start of a line"
            }]]},
          {Cell[TextData[{
              StyleBox["\"wxz$\" ", "MR"],
              StyleBox["or", "TR"],
              StyleBox[" \"wxz\\\\z\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"wxz\" ~~ EndOfString", "MR"],
              " "
            }]], Cell[TextData[{
              "the string ",
              StyleBox["\"wxz\"", "MR"],
              " at the end of the string"
            }]]},
          {Cell[TextData[{
              StyleBox["\"wxz\\\\Z\"", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"wxz\" ~~ \"\\n\"|\"\" ~~ EndOfString", "MR"],
              " "
            }]], Cell[TextData[{
              "the string ",
              StyleBox["\"wxz\"", "MR"],
              
              " at the end of the string or before newline at the end of the \
string "
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "3ColumnTable",
  GridBoxOptions->{RowLines->{True, False}}],

Cell[TextData[{
  "Pattern objects that can be used in general string patterns, but not in \
regular expressions, include conditions (",
  StyleBox["/;", "MR"],
  ") and pattern tests (",
  StyleBox["?", "MR"],
  ") that can access general ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " code during the match."
}], "Text"],

Cell[TextData[{
  "Some special constructs in regular expressions are not directly available \
in general string patterns. These include lookahead/lookbehinds and repeats \
of a given length. They can  be embedded into a larger general string pattern \
by inserting a ",
  StyleBox["RegularExpression", "MR"],
  " object."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". String Manipulation Functions"
}], "Chapter",
  CellTags->"c:5"],

Cell[TextData[{
  "The following discusses some particulars and subtleties in the various \
string manipulation functions (see the ",
  StyleBox[ButtonBox["Built-in Functions",
    ButtonData:>{"String", None},
    ButtonStyle->"RefGuideLink"],
    FontFamily->"Times"],
  " ",
  "section",
  " for more information on these functions)."
}], "Text"],

Cell[CellGroupData[{

Cell["StringMatchQ", "Section",
  CellTags->"c:6"],

Cell[TextData[{
  ButtonBox["StringMatchQ",
    ButtonStyle->"RefGuideLink"],
  " is used to check whether a whole string matches a certain pattern."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<test\>", "\<t\>" ~~ \(__ ~~ "\<t\>"\)]\)], "Input",
  CellLabel->"In[45]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[45]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<tester\>", "\<t\>" ~~ \(__ ~~ "\<t\>"\)]\)], "Input",
  CellLabel->"In[46]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[46]="]
}, Open  ]],

Cell[TextData[{
  StyleBox["StringMatchQ", "MR"],
  " is special in that it also allows the metacharacters ",
  StyleBox["*", "MR"],
  " and ",
  StyleBox["@", "MR"],
  " to be entered as wildcards (for backwards compatibility reasons). ",
  StyleBox["*", "MR"],
  " is equivalent to ",
  StyleBox["ShortestMatch[___]", "MR"],
  " (",
  StyleBox["RegularExpression[\"(?s).*?\"]", "MR"],
  ") and ",
  StyleBox["@", "MR"],
  " is equivalent to ",
  StyleBox["Except[CharacterRange[\"A\",\"Z\"]]", "MR"],
  " (",
  StyleBox["RegularExpression[\"[^A-Z]\"]", "MR"],
  ")."
}], "Text"],

Cell["The following three patterns are therefore equivalent.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<test\>", _ ~~ "\<e*\>"]\)], "Input",
  CellLabel->"In[47]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[47]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<test\>", _ ~~ \("\<e\>" ~~ 
          ShortestMatch[___]\)]\)], "Input",
  CellLabel->"In[48]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[48]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<test\>", RegularExpression["\<(?s).e.*?\>"]]\)], "Input",\

  CellLabel->"In[49]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[49]="]
}, Open  ]],

Cell[TextData[{
  "Note that technically the appearance of ",
  StyleBox["ShortestMatch", "MR"],
  " does not make a difference here, since we are only looking for ",
  StyleBox["a",
    FontSlant->"Italic"],
  " possible match."
}], "Text"],

Cell[TextData[{
  "If you need to access parts of the string matched by subpatterns in the \
pattern, use ",
  StyleBox["StringCases", "MR"],
  " instead."
}], "Text"],

Cell[TextData[{
  StyleBox["StringMatchQ", "MR"],
  " has a ",
  StyleBox["SpellingCorrection", "MR"],
  " option for finding matches allowing for a small number of discrepancies. \
This only works for patterns consisting of a single literal string."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ["\<alpha\>", "\<alpa\>", 
      SpellingCorrection \[Rule] True]\)], "Input",
  CellLabel->"In[50]:="],

Cell[BoxData[
    \(True\)], "Output",
  CellLabel->"Out[50]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["StringFreeQ", "Section",
  CellTags->"c:7"],

Cell[TextData[{
  ButtonBox["StringFreeQ",
    ButtonStyle->"RefGuideLink"],
  " is used to check whether a string contains a substring matching the \
pattern. You cannot extract the matching substring; to do this you would use \
",
  StyleBox["StringCases", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringFreeQ["\<abcde\>", "\<b\>" ~~ \(__ ~~ "\<d\>"\)]\)], "Input",
  CellLabel->"In[51]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[51]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringFreeQ["\<abcde\>", RegularExpression["\<b.*d\>"]]\)], "Input",
  CellLabel->"In[52]:="],

Cell[BoxData[
    \(False\)], "Output",
  CellLabel->"Out[52]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["StringCases", "Section",
  CellTags->"c:8"],

Cell[TextData[{
  ButtonBox["StringCases",
    ButtonStyle->"RefGuideLink"],
  " is a general purpose function for finding occurrences of patterns in a \
string, picking out subpatterns, and processing the results."
}], "Text"],

Cell["Find substrings matching a pattern.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b2a26d15a42\>", "\<a\>" ~~ _]\)], "Input",
  CellLabel->"In[53]:="],

Cell[BoxData[
    \({"a1", "a2", "a4"}\)], "Output",
  CellLabel->"Out[53]="]
}, Open  ]],

Cell["Pick apart the matching substring.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b2a26d15a42\>", "\<a\>" ~~ 
          x : DigitCharacter ..  \[Rule] x]\)], "Input",
  CellLabel->"In[54]:="],

Cell[BoxData[
    \({"1", "26", "42"}\)], "Output",
  CellLabel->"Out[54]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a1b2a26d15a42\>", 
      RegularExpression["\<a(\\d+)\>"] \[Rule] "\<$1\>"]\)], "Input",
  CellLabel->"In[55]:="],

Cell[BoxData[
    \({"1", "26", "42"}\)], "Output",
  CellLabel->"Out[55]="]
}, Open  ]],

Cell["Restrict the number of matches.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a b c d e\>", LetterCharacter, 3]\)], "Input",
  CellLabel->"In[56]:="],

Cell[BoxData[
    \({"a", "b", "c"}\)], "Output",
  CellLabel->"Out[56]="]
}, Open  ]],

Cell["You can use a list of rules.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<a13bF5b1Aa33\>", {"\<a\>" ~~ 
            x : DigitCharacter ..  \[Rule] 
          f1[x], "\<b\>" ~~ 
            x : \((DigitCharacter | 
                    CharacterRange["\<A\>", "\<F\>"])\) ..  \[Rule] 
          hex[x]}]\)], "Input",
  CellLabel->"In[57]:="],

Cell[BoxData[
    \({f1["13"], hex["F5"], hex["1A"], f1["33"]}\)], "Output",
  CellLabel->"Out[57]="]
}, Open  ]],

Cell[TextData[{
  "You can also give a list of strings as the first argument for efficient \
processing of many strings (see ",
  ButtonBox["Tips and Tricks for Efficient Matching",
    ButtonData:>"Tips and Tricks",
    ButtonStyle->"Hyperlink"],
  " for a discussion)."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases[{"\<cat\>", "\<in\>", "\<the\>", "\<hat\>"}, __ ~~ \
\("\<t\>" ~~ EndOfString\)]\)], "Input",
  CellLabel->"In[58]:="],

Cell[BoxData[
    \({{"cat"}, {}, {}, {"hat"}}\)], "Output",
  CellLabel->"Out[58]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(Flatten[%]\)], "Input",
  CellLabel->"In[59]:="],

Cell[BoxData[
    \({"cat", "hat"}\)], "Output",
  CellLabel->"Out[59]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["The Overlaps Option", "Section",
  CellTags->"c:9"],

Cell[TextData[{
  "The ",
  StyleBox["Overlaps", "MR"],
  " option for ",
  StyleBox["StringCases", "MR"],
  ", ",
  StyleBox["StringPosition", "MR"],
  ", and ",
  StyleBox["StringCount", "MR"],
  " deals with how the matcher proceeds after finding a match. It has three \
possible settings: ",
  StyleBox["False", "MR"],
  ", ",
  StyleBox["True", "MR"],
  ", or ",
  StyleBox["All", "MR"],
  ". The default is ",
  StyleBox["False", "MR"],
  " for ",
  StyleBox["StringCases", "MR"],
  " and ",
  StyleBox["StringCount", "MR"],
  ", while it is ",
  StyleBox["True", "MR"],
  " for ",
  StyleBox["StringPosition", "MR"],
  "."
}], "Text"],

Cell[TextData[{
  "With ",
  StyleBox["Overlaps->False", "MR"],
  ", the matcher continues the match testing at the character following the \
last matched substring."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(a(b)c(d)\>", 
      ShortestMatch["\<(\>" ~~ \(__ ~~ "\<)\>"\)]]\)], "Input",
  CellLabel->"In[60]:="],

Cell[BoxData[
    \({"(a(b)", "(d)"}\)], "Output",
  CellLabel->"Out[60]="]
}, Open  ]],

Cell[TextData[{
  "With ",
  StyleBox["Overlaps->True", "MR"],
  ", the matcher continues at the character following the first character of \
the last matched substring (when a single pattern is involved)."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(a(b)c(d)\>", 
      ShortestMatch["\<(\>" ~~ \(__ ~~ "\<)\>"\)], 
      Overlaps \[Rule] True]\)], "Input",
  CellLabel->"In[61]:="],

Cell[BoxData[
    \({"(a(b)", "(b)", "(d)"}\)], "Output",
  CellLabel->"Out[61]="]
}, Open  ]],

Cell[TextData[{
  "With ",
  StyleBox["Overlaps->All", "MR"],
  ", the matcher keeps starting at the same position until no more new \
matches are found."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(a(b)c(d)\>", 
      ShortestMatch["\<(\>" ~~ \(__ ~~ "\<)\>"\)], 
      Overlaps \[Rule] All]\)], "Input",
  CellLabel->"In[62]:="],

Cell[BoxData[
    \({"(a(b)", "(a(b)c(d)", "(b)", "(b)c(d)", "(d)"}\)], "Output",
  CellLabel->"Out[62]="]
}, Open  ]],

Cell[TextData[{
  "If multiple patterns are given in a list, ",
  StyleBox["Overlaps->True", "MR"],
  " will cause the matcher to start at the same position once for each of the \
patterns before proceeding to the next character."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(a(b)c(d)\>", {ShortestMatch["\<(\>" ~~ \(__ ~~ \
"\<)\>"\)], ShortestMatch["\<(\>" ~~ \(__ ~~ "\<(\>"\)]}, 
      Overlaps \[Rule] True]\)], "Input",
  CellLabel->"In[63]:="],

Cell[BoxData[
    \({"(a(b)", "(a(", "(b)", "(b)c(", "(d)"}\)], "Output",
  CellLabel->"Out[63]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<(a(b)c(d)\>", {ShortestMatch["\<(\>" ~~ \(__ ~~ \
"\<)\>"\)], ShortestMatch["\<(\>" ~~ \(__ ~~ "\<(\>"\)]}, 
      Overlaps \[Rule] False]\)], "Input",
  CellLabel->"In[64]:="],

Cell[BoxData[
    \({"(a(b)", "(d)"}\)], "Output",
  CellLabel->"Out[64]="]
}, Open  ]],

Cell[TextData[{
  "Note that with ",
  StyleBox["Overlaps->True", "MR"],
  ", there can thus be a difference between specifying a list of patterns and \
using the alternatives operator (",
  StyleBox["|", "MR"],
  ")."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<ab\>", {_, __}, Overlaps \[Rule] True]\)], "Input",
  CellLabel->"In[65]:="],

Cell[BoxData[
    \({"a", "ab", "b", "b"}\)], "Output",
  CellLabel->"Out[65]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<ab\>", _ | __, Overlaps \[Rule] True]\)], "Input",
  CellLabel->"In[66]:="],

Cell[BoxData[
    \({"a", "b"}\)], "Output",
  CellLabel->"Out[66]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["StringPosition", "Section",
  CellTags->"c:10"],

Cell[TextData[{
  ButtonBox["StringPosition",
    ButtonStyle->"RefGuideLink"],
  " works much like ",
  StyleBox["StringCases", "MR"],
  ", except the positions of the matching substrings are returned."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringPosition["\<a1b2a26d15a42\>", "\<a\>" ~~ _]\)], "Input",
  CellLabel->"In[67]:="],

Cell[BoxData[
    \({{1, 2}, {5, 6}, {11, 12}}\)], "Output",
  CellLabel->"Out[67]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(\(StringTake["\<a1b2a26d15a42\>", #] &\) /@ %\)], "Input",
  CellLabel->"In[68]:="],

Cell[BoxData[
    \({"a1", "a2", "a4"}\)], "Output",
  CellLabel->"Out[68]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["Overlaps", "MR"],
  " option is ",
  StyleBox["True", "MR"],
  " by default (see the previous section for more details on this option)."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringPosition["\<(a(b)c(d)\>", 
      ShortestMatch["\<(\>" ~~ \(__ ~~ "\<)\>"\)]]\)], "Input",
  CellLabel->"In[69]:="],

Cell[BoxData[
    \({{1, 5}, {3, 5}, {7, 9}}\)], "Output",
  CellLabel->"Out[69]="]
}, Open  ]],

Cell["Note that even empty strings can be matches.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringPosition["\<abc\>", ___]\)], "Input",
  CellLabel->"In[70]:="],

Cell[BoxData[
    \({{1, 3}, {2, 3}, {3, 3}, {4, 3}}\)], "Output",
  CellLabel->"Out[70]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["StringCount", "Section",
  CellTags->"c:11"],

Cell[TextData[{
  ButtonBox["StringCount",
    ButtonStyle->"RefGuideLink"],
  " returns the number of matching substrings (which are found by ",
  StyleBox["StringPosition", "MR"],
  " or ",
  StyleBox["StringCases", "MR"],
  "). It is useful for cases with many matches where memory for storing all \
the substrings might be an issue."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCount["\<abaababba\>", "\<a\>" ~~ \(___ ~~ "\<b\>"\), 
      Overlaps \[Rule] All]\)], "Input",
  CellLabel->"In[71]:="],

Cell[BoxData[
    \(12\)], "Output",
  CellLabel->"Out[71]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases["\<abaababba\>", "\<a\>" ~~ \(___ ~~ "\<b\>"\), 
        Overlaps \[Rule] All] // Length\)], "Input",
  CellLabel->"In[72]:="],

Cell[BoxData[
    \(12\)], "Output",
  CellLabel->"Out[72]="]
}, Open  ]],

Cell[TextData[{
  "Note that ",
  StyleBox["Overlaps->False", "MR"],
  " is the default for ",
  StyleBox["StringCount", "MR"],
  "."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell["StringReplace", "Section",
  CellTags->"c:12"],

Cell[TextData[{
  ButtonBox["StringReplace",
    ButtonStyle->"RefGuideLink"],
  " is used for substituting substrings matching the given patterns."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<abcde\>", {"\<a\>" \[Rule] "\<A\>", "\<cd\>" \[Rule] "\
\<XX\>"}]\)], "Input",
  CellLabel->"In[73]:="],

Cell[BoxData[
    \("AbXXe"\)], "Output",
  CellLabel->"Out[73]="]
}, Open  ]],

Cell[TextData[{
  "Named patterns can be used as strings on the right-hand side of the \
replacement rules. Note the use of ",
  StyleBox["RuleDelayed", "MR"],
  " (",
  StyleBox["\[RuleDelayed]", "MR"],
  ") to avoid premature evaluation."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<this is a test\>", 
      x : WordCharacter ..  \[RuleDelayed] StringReverse[x]]\)], "Input",
  CellLabel->"In[74]:="],

Cell[BoxData[
    \("siht si a tset"\)], "Output",
  CellLabel->"Out[74]="]
}, Open  ]],

Cell[TextData[{
  "When using regular expressions, it is convenient to remember that ",
  StyleBox["\"$0\"", "MR"],
  "  on the right-hand side refers to the whole matched substring."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<this is a test\>", 
      RegularExpression["\<\\w+\>"] \[RuleDelayed] 
        StringReverse["\<$0\>"]]\)], "Input",
  CellLabel->"In[75]:="],

Cell[BoxData[
    \("siht si a tset"\)], "Output",
  CellLabel->"Out[75]="]
}, Open  ]],

Cell["\<\
You can limit the number of replacements made by specifying a third argument.\
\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<this is a test\>", 
      x : WordCharacter ..  \[RuleDelayed] StringReverse[x], 1]\)], "Input",
  CellLabel->"In[76]:="],

Cell[BoxData[
    \("siht is a test"\)], "Output",
  CellLabel->"Out[76]="]
}, Open  ]],

Cell[TextData[{
  "Note that the replacement does not have to be a string. If the result is \
not a string, a ",
  StyleBox["StringExpression", "MR"],
  " is returned."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace["\<some <b>bold</b> and <i>italics</i>.\>", 
      ShortestMatch["\<<\>" ~~ \(x___ ~~ "\<>\>"\)] \[RuleDelayed] 
        Tag[x]]\)], "Input",
  CellLabel->"In[77]:="],

Cell[BoxData[
    \("some " ~~ Tag["b"] ~~ "bold" ~~ Tag["/b"] ~~ " and " ~~ Tag["i"] ~~ 
      "italics" ~~ Tag["/i"] ~~ "."\)], "Output",
  CellLabel->"Out[77]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(InputForm[%]\)], "Input",
  CellLabel->"In[78]:="],

Cell["\<\
StringExpression[\"some \", Tag[\"b\"], \"bold\", Tag[\"/b\"], \" and \", \
Tag[\"i\"], \"italics\", Tag[\"/i\"], \".\"]\
\>", "Output",
  CellLabel->"Out[78]//InputForm="]
}, Open  ]],

Cell[TextData[{
  "There is limited support for using the old ",
  StyleBox["MetaCharacters", "MR"],
  " option in conjunction with general string patterns, but this option is \
deprecated and its use should be avoided."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell["StringReplaceList", "Section",
  CellTags->"c:13"],

Cell[TextData[{
  ButtonBox["StringReplaceList",
    ButtonStyle->"RefGuideLink"],
  " returns a list of strings where a ",
  StyleBox["single",
    FontSlant->"Italic"],
  " string replacement has been made in all possible ways."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplaceList["\<abaac\>", "\<a\>" ~~ x_ \[RuleDelayed] 
        ToUpperCase[x]]\)], "Input",
  CellLabel->"In[79]:="],

Cell[BoxData[
    \({"Baac", "abAc", "abaC"}\)], "Output",
  CellLabel->"Out[79]="]
}, Open  ]],

Cell["\<\
If a list of strings is given as input, the output is a nested list of \
results.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplaceList[{"\<abaac\>", "\<baaba\>"}, "\<a\>" ~~ 
          x_ \[RuleDelayed] ToUpperCase[x]]\)], "Input",
  CellLabel->"In[80]:="],

Cell[BoxData[
    \({{"Baac", "abAc", "abaC"}, {"bAba", "baBa"}}\)], "Output",
  CellLabel->"Out[80]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["StringSplit", "Section",
  CellTags->"c:14"],

Cell[TextData[{
  ButtonBox["StringSplit",
    ButtonStyle->"RefGuideLink"],
  " is useful for splitting a string into many strings at delimiters matching \
a pattern. By default, the splits happen at runs of whitespace."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<this is a test\>"]\)], "Input",
  CellLabel->"In[81]:="],

Cell[BoxData[
    \({"this", "is", "a", "test"}\)], "Output",
  CellLabel->"Out[81]="]
}, Open  ]],

Cell["\<\
For instance, to split a normal sentence into words, we need to also include \
punctuation in the delimiter.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<A sentence: with commas, semicolons; etc...!?\>", 
      Characters["\<:,;.!? \>"] .. ]\)], "Input",
  CellLabel->"In[82]:="],

Cell[BoxData[
    \({"A", "sentence", "with", "commas", "semicolons", "etc"}\)], "Output",
  CellLabel->"Out[82]="]
}, Open  ]],

Cell["\<\
By default, empty strings at the beginning and the end of the result are \
removed.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<:a:b:c:\>", "\<:\>"]\)], "Input",
  CellLabel->"In[83]:="],

Cell[BoxData[
    \({"a", "b", "c"}\)], "Output",
  CellLabel->"Out[83]="]
}, Open  ]],

Cell[TextData[{
  "These can be included by specifying ",
  StyleBox["All", "MR"],
  " as a third argument."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<:a:b:c:\>", "\<:\>", All]\)], "Input",
  CellLabel->"In[84]:="],

Cell[BoxData[
    \({\*"\<\"\"\>", "a", "b", "c", \*"\<\"\"\>"}\)], "Output",
  CellLabel->"Out[84]="]
}, Open  ]],

Cell["\<\
The third argument can also be a number giving the maximum number of strings \
to split into.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<this is a test\>", Whitespace, 2]\)], "Input",
  CellLabel->"In[85]:="],

Cell[BoxData[
    \({"this", "is a test"}\)], "Output",
  CellLabel->"Out[85]="]
}, Open  ]],

Cell["This splits a string into individual lines.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<line1\nthis is line 2\nline3\>", "\<\n\>"]\)], "Input",
  CellLabel->"In[86]:="],

Cell[BoxData[
    \({"line1", "this is line 2", "line3"}\)], "Output",
  CellLabel->"Out[86]="]
}, Open  ]],

Cell[TextData[{
  "You can also split at patterns that match positions, such as ",
  StyleBox["StartOfLine", "MR"],
  ". This keeps the newline characters in the result."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<line1\nthis is line 2\nline3\>", StartOfLine]\)], "Input",\

  CellLabel->"In[87]:="],

Cell[BoxData[
    \({"line1\n", "this is line 2\n", "line3"}\)], "Output",
  CellLabel->"Out[87]="]
}, Open  ]],

Cell["\<\
You can keep the delimiters, or parts of the delimiters, in the output by \
using a rule as the second argument.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<this is a test\>", "\< \>" \[Rule] "\< \>"]\)], "Input",
  CellLabel->"In[88]:="],

Cell[BoxData[
    \({"this", " ", "is", " ", "a", " ", "test"}\)], "Output",
  CellLabel->"Out[88]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<this is a test\>", "\< \>" \[Rule] "\<:\>"]\)], "Input",
  CellLabel->"In[89]:="],

Cell[BoxData[
    \({"this", ":", "is", ":", "a", ":", "test"}\)], "Output",
  CellLabel->"Out[89]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<the <tag1>first</tag1> and the <tag2>second</tag2>\>", 
      ShortestMatch["\<<\>" ~~ \(__ ~~ "\<>\>"\)]]\)], "Input",
  CellLabel->"In[90]:="],

Cell[BoxData[
    \({"the ", "first", " and the ", "second"}\)], "Output",
  CellLabel->"Out[90]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<the <tag1>first</tag1> and the <tag2>second</tag2>\>", 
      ShortestMatch["\<<\>" ~~ \(x__ ~~ "\<>\>"\)] \[RuleDelayed] 
        Tag[x]]\)], "Input",
  CellLabel->"In[91]:="],

Cell[BoxData[
    \({"the ", Tag["tag1"], "first", Tag["/tag1"], " and the ", Tag["tag2"], 
      "second", Tag["/tag2"]}\)], "Output",
  CellLabel->"Out[91]="]
}, Open  ]],

Cell["\<\
You can give a list of patterns and rules as well; the delimiters matching \
the patterns will be left out of the result.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit["\<the <tag1>first</tag1> and the <tag2>second</tag2>\>", \
{Whitespace, 
          ShortestMatch["\<<\>" ~~ \(x__ ~~ "\<>\>"\)] \[RuleDelayed] 
            Tag[x]}] // InputForm\)], "Input",
  CellLabel->"In[92]:="],

Cell["\<\
{\"the\", \"\", Tag[\"tag1\"], \"first\", Tag[\"/tag1\"], \"\", \"and\", \
\"the\", \"\", Tag[\"tag2\"], \"second\", Tag[\"/tag2\"]}\
\>", "Output",
  CellLabel->"Out[92]//InputForm="]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". For Perl Users"
}], "Chapter",
  CellTags->"c:15"],

Cell[TextData[{
  "With the addition of general string patterns, ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " can be a powerful alternative to languages like Perl and Python for many \
general, everyday programming tasks. For people familiar with Perl syntax, \
and the way Perl does string manipulation, the following rough guide shows \
how to get similar functionality in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell[TextData[{
  "Here is an overview of the ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " functions involved in constructing Perl-like functions."
}], "Text"],

Cell[BoxData[
    FormBox[GridBox[{
          {Cell[TextData[{
              StyleBox["Perl Construct", "TR",
                FontWeight->"Bold"],
              " "
            }]], Cell[TextData[{
              StyleBox["Mathematica", "TR",
                FontWeight->"Bold",
                FontSlant->"Italic"],
              StyleBox[" F", "TR",
                FontWeight->"Bold"],
              StyleBox["unction",
                FontWeight->"Bold"],
              " "
            }]], Cell[TextData[StyleBox["Explanation",
              FontWeight->"Bold"]]]},
          {Cell[TextData[{
              StyleBox["m/.../", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StringFreeQ ", "MR"],
              StyleBox["or", "TR"],
              StyleBox[" StringCases", "MR"]
            }]], Cell[TextData[{
              
              "match a string with a regular expression, possibly extracting \
subpatterns",
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["s/.../.../", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StringReplace", "MR"],
              " "
            }]], Cell[TextData[{
              "replace substrings matching a regular expression",
              StyleBox[" ", "TR"]
            }]]},
          {Cell[TextData[{
              StyleBox["split(...)", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StringSplit", "MR"],
              " "
            }]], Cell[
            "split a string at delimiters matching a regular expression "]},
          {Cell[TextData[{
              StyleBox["tr/.../.../", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["StringReplace", "MR"],
              " "
            }]], Cell["replace characters by other characters "]},
          {Cell[TextData[{
              StyleBox["/i", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["IgnoreCase ", "MR"],
              "\[Rule] ",
              StyleBox["True", "MR"],
              " or ",
              StyleBox["\"(?i)\"", "MR"],
              " "
            }]], Cell["case-insensitive modifier "]},
          {Cell[TextData[{
              StyleBox["/s", "MR"],
              " "
            }]], Cell[TextData[StyleBox["\"(?s)\"", "MR"]]], Cell[TextData[{
              "force ",
              StyleBox["\".\"", "MR"],
              " to match all characters (including newlines) "
            }]]},
          {Cell[TextData[{
              StyleBox["/x", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"(?x)\"", "MR"],
              " "
            }]], Cell["\<\
ignore whitespace and allow extended comments in regular expression\
\>"]},
          {Cell[TextData[{
              StyleBox["/m", "MR"],
              " "
            }]], Cell[TextData[{
              StyleBox["\"(?m)\"", "MR"],
              " "
            }]], Cell[TextData[{
              "multiline mode (",
              StyleBox["\"^\"", "MR"],
              " and ",
              StyleBox["\"$\"", "MR"],
              " match start/end of lines)"
            }]]}
          },
        ColumnAlignments->{Left}], TraditionalForm]], "3ColumnTable",
  GridBoxOptions->{RowLines->{True, False}}],

Cell["Following are some common Perl constructs in more detail.", "Text"],

Cell[CellGroupData[{

Cell["m/.../", "Section",
  CellTags->"c:16"],

Cell[TextData[{
  "The match operator ",
  StyleBox["m/regex/", "MR"],
  " tests whether a string contains a substring matching the ",
  StyleBox["regex", "MR"],
  ". For simple matches of this sort in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  ", use ",
  StyleBox["StringFreeQ", "MR"],
  "."
}], "Text"],

Cell[TextData[{
  "Here is a Perl snippet for testing whether a string contains a ",
  StyleBox["\"b\"", "MR"],
  " somewhere after an ",
  StyleBox["\"a\"", "MR"],
  "."
}], "Text"],

Cell["\<\
$string = \"sdakdb\";
if ($string =~ m/a.*b/){
  print \"Match!\";
}\
\>", "Program"],

Cell[TextData[{
  "Here is a ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " version of the same test."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(string = "\<sdakdb\>";\)\), "\[IndentingNewLine]", 
    \(If[\(! StringFreeQ[string, RegularExpression["\<a.*b\>"]]\), 
      Print["\<Match!\>"]]\)}], "Input",
  CellLabel->"In[93]:="],

Cell[BoxData[
    \("Match!"\)], "Print",
  CellLabel->"From In[93]:="]
}, Open  ]],

Cell[TextData[{
  "If parts of the matched string need to be accessed later, using ",
  StyleBox["$1", "MR"],
  ", ",
  StyleBox["$2", "MR"],
  ", ... in Perl, the best ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " function to use is normally ",
  StyleBox["StringCases", "MR"],
  "."
}], "Text"],

Cell["Here is Perl code for extracting an error message.", "Text"],

Cell["\<\
$res = \"ERROR = paper jam\";
if ($res =~ m/ERROR = (.*)/){
  print \"Hey, you should check the $1!\";
}\
\>", "Program"],

Cell[TextData[{
  "Here is a ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " version."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(res = "\<ERROR = paper jam\>";\)\), "\[IndentingNewLine]", 
    \(With[{test = 
          StringCases[res, 
            RegularExpression["\<ERROR = (.*)\>"] \[Rule] "\<$1\>"]}, 
      If[test =!= {}, 
        Print["\<Hey, you should check the \>", 
          test[\([1]\)], "\<!\>"]]]\)}], "Input",
  CellLabel->"In[95]:="],

Cell[BoxData[
    InterpretationBox[\("Hey, you should check the "\[InvisibleSpace]"paper \
jam"\[InvisibleSpace]"!"\),
      SequenceForm[ "Hey, you should check the ", "paper jam", "!"],
      Editable->False]], "Print",
  CellLabel->"From In[95]:="]
}, Open  ]],

Cell["\<\
Here is Perl code for extracting several subpatterns at once.\
\>", "Text"],

Cell["\<\
$date = \"88/6/13\";
($year, $month, $day) = $date =~ m/^(\\d+)/(\\d+)/(\\d+)$/;\
\>", "Program"],

Cell[TextData[{
  "In ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  ", this is done with ",
  StyleBox["StringCases", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(date = "\<88/6/13\>";\)\), "\[IndentingNewLine]", 
    \({year, month, 
        day} = \(StringCases[date, 
          RegularExpression["\<^(\\d+)/(\\d+)/(\\d+)$\>"] -> {"\<$1\>", "\<$2\
\>", "\<$3\>"}]\)[\([1]\)]\)}], "Input",
  CellLabel->"In[97]:="],

Cell[BoxData[
    \({"88", "6", "13"}\)], "Output",
  CellLabel->"Out[98]="]
}, Open  ]],

Cell[TextData[{
  "This is similar to assigning all the matches to an array using the ",
  StyleBox["/g", "MR"],
  " modifier."
}], "Text"],

Cell["\<\
$text = \"128.32.13.117\";
@nums = $text =~ m/\\d+/g;\
\>", "Program"],

Cell[TextData[{
  "The same thing is easily done with ",
  StyleBox["StringCases", "MR"],
  " in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<128.32.13.117\>";\)\), "\[IndentingNewLine]", 
    \(nums = StringCases[text, RegularExpression["\<\\d+\>"]]\)}], "Input",
  CellLabel->"In[99]:="],

Cell[BoxData[
    \({"128", "32", "13", "117"}\)], "Output",
  CellLabel->"Out[100]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["s/.../.../", "Section",
  CellTags->"c:17"],

Cell[TextData[{
  "The obvious ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " version of the Perl ",
  StyleBox["s/.../.../", "MR"],
  " substitution operator is ",
  StyleBox["StringReplace", "MR"],
  "."
}], "Text"],

Cell["\<\
$text = \"abcagh\";
$text =~ s/a./XX/;\
\>", "Program"],

Cell["The default Perl behavior is to do a single replacement.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<abcagh\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, RegularExpression["\<a.\>"] -> "\<XX\>", 
      1]\)}], "Input",
  CellLabel->"In[101]:="],

Cell[BoxData[
    \("XXcagh"\)], "Output",
  CellLabel->"Out[102]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["/g", "MR"],
  " modifier in Perl does global replacement of all matches."
}], "Text"],

Cell["$text =~ s/a./XX/g", "Program"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[text, RegularExpression["\<a.\>"] -> "\<XX\>"]\)], "Input",\

  CellLabel->"In[103]:="],

Cell[BoxData[
    \("XXcXXh"\)], "Output",
  CellLabel->"Out[103]="]
}, Open  ]],

Cell[TextData[{
  "Using the evaluation ",
  StyleBox["/e", "MR"],
  " modifier, Perl can use subpatterns as part of the replacement. This is \
easily done in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell["\<\
$text = \"13 27 3\";
$text =~ s/(\\d+)/$1$1/eg\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<13 27 3\>";\)\), "\n", 
    \(StringReplace[text, 
      RegularExpression["\<(\\d+)\>"] \[RuleDelayed] "\<$1$1\>"]\)}], "Input",\

  CellLabel->"In[104]:="],

Cell[BoxData[
    \("1313 2727 33"\)], "Output",
  CellLabel->"Out[105]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["split(...)", "Section",
  CellTags->"c:18"],

Cell[TextData[{
  "The Perl ",
  StyleBox["split", "MR"],
  " command is similar to ",
  StyleBox["StringSplit", "MR"],
  " in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell["\<\
$text = \"ab:cd:efg\";
split(/:/, $text)\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<ab:cd:efg\>";\)\), "\[IndentingNewLine]", 
    \(StringSplit[text, "\<:\>"]\)}], "Input",
  CellLabel->"In[106]:="],

Cell[BoxData[
    \({"ab", "cd", "efg"}\)], "Output",
  CellLabel->"Out[107]="]
}, Open  ]],

Cell[TextData[{
  "You can specify the number of blocks to split into in both Perl and ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell["split(/:/, $text,2)", "Program"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringSplit[text, "\<:\>", 2]\)], "Input",
  CellLabel->"In[108]:="],

Cell[BoxData[
    \({"ab", "cd:efg"}\)], "Output",
  CellLabel->"Out[108]="]
}, Open  ]],

Cell[TextData[{
  "A ",
  StyleBox["split", "MR"],
  " with capturing parentheses in the pattern, for which the captured \
substrings are included in the result, can be done in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " using rules in the second argument of ",
  StyleBox["StringSplit", "MR"],
  ". Compared to Perl, in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " it is easy to then apply a function to these substrings."
}], "Text"],

Cell["\<\
$text = \"test with <tag1>tags</tag1> and <b>more</b>\";
split(/<([^>]*)>/, $text)\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<test with <tag1>tags</tag1> and <b>more</b>\>";\)\), "\
\[IndentingNewLine]", 
    \(StringSplit[text, RegularExpression["\<<([^>]*)>\>"] \[Rule] "\<$1\>"] // 
      InputForm\)}], "Input",
  CellLabel->"In[109]:="],

Cell["\<\
{\"test with \", \"tag1\", \"tags\", \"/tag1\", \" and \", \"b\", \"more\", \
\"/b\"}\
\>", "Output",
  CellLabel->"Out[110]//InputForm="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<test with <tag1>tags</tag1> and <b>more</b>\>";\)\), "\
\[IndentingNewLine]", 
    \(StringSplit[text, 
        RegularExpression["\<<([^>]*)>\>"] \[RuleDelayed] Tag["\<$1\>"]] // 
      InputForm\)}], "Input",
  CellLabel->"In[111]:="],

Cell["\<\
{\"test with \", Tag[\"tag1\"], \"tags\", Tag[\"/tag1\"], \" and \", \
Tag[\"b\"], \"more\", Tag[\"/b\"]}\
\>", "Output",
  CellLabel->"Out[112]//InputForm="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["tr/.../.../", "Section",
  CellTags->"c:19"],

Cell[TextData[{
  "The Perl ",
  StyleBox["tr", "MR"],
  " command can be simulated using ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " ",
  StyleBox["StringReplace", "MR"],
  " together with the appropriate list of rules."
}], "Text"],

Cell[TextData[{
  "Here is the simplest form where the characters ",
  StyleBox["\"a\"", "MR"],
  ", ",
  StyleBox["\"b\"", "MR"],
  ", and ",
  StyleBox["\"c\"", "MR"],
  " are replaced by ",
  StyleBox["\"X\"", "MR"],
  ", ",
  StyleBox["\"Y\"", "MR"],
  ", and ",
  StyleBox["\"Z\"", "MR"],
  " respectively."
}], "Text"],

Cell["\<\
$text = \"abcdef\";
$text =~ tr/abc/XYZ/\
\>", "Program"],

Cell[TextData[{
  "This generates the appropriate rules in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " using ",
  StyleBox["Thread", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<abcdef\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, 
      Thread[Rule[Characters["\<abc\>"], Characters["\<XYZ\>"]]]]\)}], "Input",\

  CellLabel->"In[113]:="],

Cell[BoxData[
    \("XYZdef"\)], "Output",
  CellLabel->"Out[114]="]
}, Open  ]],

Cell[TextData[{
  "Here is an example where the replacement list is shorter than the \
character list, so ",
  StyleBox["\"d\"", "MR"],
  ", ",
  StyleBox["\"e\"", "MR"],
  ", and ",
  StyleBox["\"f\"", "MR"],
  " are all replaced by ",
  StyleBox["\"Z\"", "MR"],
  "."
}], "Text"],

Cell["\<\
$text = \"abcdefghi\";
$text =~ tr/abcdef/WXYZ/\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<abcdefghi\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, 
      Append[Thread[Rule[Characters["\<abc\>"], Characters["\<WXY\>"]]], 
        Characters["\<def\>"] \[Rule] "\<Z\>"]]\)}], "Input",
  CellLabel->"In[115]:="],

Cell[BoxData[
    \("WXYZZZghi"\)], "Output",
  CellLabel->"Out[116]="]
}, Open  ]],

Cell[TextData[{
  "Character ranges in Perl are emulated using ",
  StyleBox["CharacterRange", "MR"],
  " in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  "."
}], "Text"],

Cell["\<\
$text = \"this and that\";
$text =~ tr/a-z/x/\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<this and that\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, 
      CharacterRange["\<a\>", "\<z\>"] \[Rule] "\<x\>"]\)}], "Input",
  CellLabel->"In[117]:="],

Cell[BoxData[
    \("xxxx xxx xxxx"\)], "Output",
  CellLabel->"Out[118]="]
}, Open  ]],

Cell[TextData[{
  "With the ",
  StyleBox["/d", "MR"],
  " modifier, the surplus characters are instead deleted."
}], "Text"],

Cell["\<\
$text = \"abcdefghi\";
$text =~ tr/abcdef/WXYZ/d\
\>", "Program"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<abcdefghi\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, 
      Append[Thread[Rule[Characters["\<abcd\>"], Characters["\<WXYZ\>"]]], 
        Characters["\<ef\>"] \[Rule] "\<\>"]]\)}], "Input",
  CellLabel->"In[119]:="],

Cell[BoxData[
    \("WXYZghi"\)], "Output",
  CellLabel->"Out[120]="]
}, Open  ]],

Cell[TextData[{
  "With the ",
  StyleBox["/c", "MR"],
  " modifier, the complement of the character list is used."
}], "Text"],

Cell["$text =~ tr/aeh/ /c", "Program"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[text, 
      Except[Characters["\<aeh\>"]] \[Rule] "\< \>"]\)], "Input",
  CellLabel->"In[121]:="],

Cell[BoxData[
    \("a   e  h "\)], "Output",
  CellLabel->"Out[121]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[text, 
      RegularExpression["\<[^aeh]\>"] \[Rule] "\< \>"]\)], "Input",
  CellLabel->"In[122]:="],

Cell[BoxData[
    \("a   e  h "\)], "Output",
  CellLabel->"Out[122]="]
}, Open  ]],

Cell[TextData[{
  "The ",
  StyleBox["/s", "MR"],
  " modifier squeezes down to one any run of characters translating into the \
same character."
}], "Text"],

Cell["\<\
$text = \"abbcccddddeeeeeeffeeded\";
$text =~ tr/abcde/ABCD/s\
\>", "Program"],

Cell[TextData[{
  "You get the same effect in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " using ",
  StyleBox["Repeated", "MR"],
  " (",
  StyleBox["..", "MR"],
  ")."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[{
    \(\(text = "\<abbcccddddeeeeeeffeeded\>";\)\), "\[IndentingNewLine]", 
    \(StringReplace[text, 
      Append[Thread[
          Rule[Repeated /@ Characters["\<abc\>"], Characters["\<ABC\>"]]], 
        Characters["\<de\>"] ..  \[Rule] "\<D\>"]]\)}], "Input",
  CellLabel->"In[123]:="],

Cell[BoxData[
    \("ABCDffD"\)], "Output",
  CellLabel->"Out[124]="]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". Some Examples"
}], "Chapter",
  CellTags->"c:20"],

Cell["\<\
Some brief examples of practical uses of string patterns are presented in \
this section.\
\>", "Text"],

Cell[CellGroupData[{

Cell["Dictionary Lookup", "Section",
  CellTags->"c:21"],

Cell[TextData[{
  "Here is a simple utility for searching for words in a dictionary (by \
default, an example dictionary that comes with ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " is loaded)."
}], "Text"],

Cell[BoxData[
    \(GetDictionary[
        file_:  ToFileName[{$InstallationDirectory, "\<Documentation\>", \
"\<English\>", "\<Demos\>", "\<Datafiles\>"}, "\<dictionary.dat\>"]] := 
      ReadList[file, String]\)], "Input",
  CellLabel->"In[125]:="],

Cell[TextData[{
  "Use ",
  StyleBox["Pick", "MR"],
  ", rather than ",
  StyleBox["Select", "MR"],
  ", for efficiency reasons (see ",
  ButtonBox["Tips and Tricks for Efficient Matching",
    ButtonData:>"Tips and Tricks",
    ButtonStyle->"Hyperlink"],
  ")."
}], "Text"],

Cell[BoxData[
    \(LookupWords[dict_, patt_] := 
      Pick[dict, StringMatchQ[dict, patt]]\)], "Input",
  CellLabel->"In[126]:="],

Cell["Load the dictionary.", "Text"],

Cell[BoxData[
    \(\(dict = GetDictionary[];\)\)], "Input",
  CellLabel->"In[127]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(Length[dict]\)], "Input",
  CellLabel->"In[128]:="],

Cell[BoxData[
    \(24259\)], "Output",
  CellLabel->"Out[128]="]
}, Open  ]],

Cell[TextData[{
  "Search for all words with the second and third letters being ",
  StyleBox["\"ab\"", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(LookupWords[dict, _ ~~ \("\<ab\>" ~~ ___\)]\)], "Input",
  CellLabel->"In[129]:="],

Cell[BoxData[
    \({"babbitt", "babble", "Babcock", "babe", "Babel", "baboon", "baby", 
      "Babylon", "babysat", "babysit", "babysitting", "cab", "cabal", 
      "cabana", "cabaret", "cabbage", "cabdriver", "cabin", "cabinet", 
      "cabinetmake", "cabinetry", "cable", "Cabot", "dab", "dabble", "Faber", 
      "Fabian", "fable", "fabric", "fabricate", "fabulous", "gab", 
      "gabardine", "gabble", "gabbro", "Gaberones", "gable", "Gabon", 
      "Gabriel", "Gabrielle", "habeas", "haberdashery", "Haberman", "Habib", 
      "habit", "habitant", "habitat", "habitation", "habitual", "habituate", 
      "jab", "Jablonsky", "Kabuki", "Kabul", "lab", "Laban", "label", 
      "labia", "labial", "labile", "lability", "laboratory", "laborious", 
      "labour", "Labrador", "labradorite", "labyrinth", "Mabel", "nab", 
      "Nabisco", "Pablo", "Pabst", "rabat", "rabbet", "rabbi", "rabbit", 
      "rabble", "rabid", "rabies", "Rabin", "sabbath", "sabbatical", 
      "Sabina", "Sabine", "sable", "sabotage", "sabra", "tab", "tabernacle", 
      "table", "tableau", "tableaux", "tablecloth", "tableland", 
      "tablespoon", "tablespoonful", "tablet", "tabloid", "taboo", "tabu", 
      "tabula", "tabular", "tabulate", "Wabash"}\)], "Output",
  CellLabel->"Out[129]="]
}, Open  ]],

Cell["This finds words containing nonword characters.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(LookupWords[dict, ___ ~~ \(Except[WordCharacter] ~~ ___\)]\)], "Input",
  CellLabel->"In[130]:="],

Cell[BoxData[
    \({"ain't", "A&M", "anybody'd", "A&P", "aren't", "a's", "AT&T", "b's", 
      "can't", "couldn't", "c's", "d'art", "d'etat", "didn't", "doesn't", 
      "d'oeuvre", "don't", "d's", "e'er", "e.g", "e's", "f's", "g's", 
      "hadn't", "hasn't", "haven't", "he'd", "he'll", "h's", "I'd", "i.e", 
      "I'll", "I'm", "i's", "isn't", "it'd", "it'll", "IT&T", "I've", "j's", 
      "k's", "l'oeil", "l's", "L'vov", "mightn't", "m's", "mustn't", 
      "needn't", "nobody'd", "n's", "O'Brien", "o'clock", "O'Connell", 
      "O'Connor", "O'Dell", "O'Donnell", "O'Dwyer", "o'er", "O'Hare", 
      "O'Leary", "O'Neill", "o's", "O'Shea", "O'Sullivan", "oughtn't", 
      "Ph.D", "p's", "q's", "R&D", "r's", "shan't", "she'd", "she'll", 
      "shouldn't", "somebody'll", "someone'll", "s's", "that'd", "that'll", 
      "there'd", "there'll", "they'd", "they'll", "they're", "they've", 
      "this'll", "today'll", "t's", "u's", "U.S", "U.S.A", "USC&GS", "v's", 
      "wasn't", "we'd", "we'll", "we're", "weren't", "we've", "what'd", 
      "what're", "where'd", "where're", "who'd", "who'll", "won't", 
      "wouldn't", "w's", "x's", "you'd", "you'll", "you're", "you've", "y's", 
      "z's"}\)], "Output",
  CellLabel->"Out[130]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["Highlight Patterns", "Section",
  CellTags->"c:22"],

Cell["This defines a 1000-base random DNA string.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(SeedRandom[1234]; 
    dna = StringJoin[
        Table[{"\<a\>", "\<c\>", "\<g\>", "\<t\>"}[\([Random[
                Integer, {1, 4}]]\)], {1000}]]\)], "Input",
  CellLabel->"In[131]:="],

Cell[BoxData[
    \("acaaccgccgcgaattctcacaaacgtcgagtgtgatatagaaaatcccagatcacactatagggtggaaa\
ccaggtgatagttgcctctgccatgcatatgcgattaaatgttcgttgaatatgagtaaagaatctaagcgtagtttt\
tatagtaaagaccccgcgcctctgcgcgtgatagtgttaccgacgcatctcgatgttgtacatgtagcactgtacgta\
atcattatacgatttccataacgtaagctgggtaacagacctaacgtagggttcatctacgcgcttatcctccgacct\
aggattgcgtctagaaaactgaacaagtaaaccgtactcctttatccgccgacagtccagaacagtctgacttccagc\
tacttaatggtttcccagatttcctgcggaatacctcgaccgtgtggccattgctccaccaccgcaattcgcctcttc\
tgcacaggtccacgcacgttttccctgagcataaaaacccagcaatacgaaaggttctctacacatcagcagcttccc\
gagtgacctgattggggctgcgctataacgtcggtcgcgtttccatcaggacgcatgcagcgacgcctgcagcagcag\
tccccttcacagcgtacagggctctggtaagggcagccagtttcgctaacggtcctgttgcttacatgcgcatacaat\
tatgccaaacggacacgtgctatccagacgaggtgtcgtaaaggggatttctaagtgaccagaattactgtcagacga\
ccttaagatagtcaggctttcagcggtagataggcgggatgaatcgaaagcaatgacaaggcccggtcgccagagaga\
caggcttagtattcagtaagcagtagcgcgacatacccgaaactccgcgcgggtatagagtacatctactaggtgtgt\
atctgcagcacattagggctattcagaccgttaattccggcctgaggccatgccgacagaacaaattgcct"\)], \
"Output",
  CellLabel->"Out[131]="]
}, Open  ]],

Cell["\<\
This highlights parts of the DNA that match a certain pattern.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[dna, 
      x : \(("\<ag\>" ~~ \(_ ~~ \(_ ~~ \("\<t\>" ~~ \(_ ~~ \
"\<ca\>"\)\)\)\))\) \[RuleDelayed] "\<\!\(\*StyleBox[\"\>" <> 
          x <> "\<\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\"\
Bold\"]\)\>"]\)], "Input",
  CellLabel->"In[132]:="],

Cell[BoxData["\<\"\
acaaccgccgcgaattctcacaaacgtcgagtgtgatatagaaaatcccagatcacactatagggtggaaaccaggtg\
atagttgcctctgccatgcatatgcgattaaatgttcgttgaatatgagtaaagaatctaagcgtagtttttatagta\
aagaccccgcgcctctgcgcgtgatagtgttaccgacgcatctcgatgttgtacatgtagcactgtacgtaatcatta\
tacgatttccataacgtaagctgggtaacagacctaacgt\\!\\(\\*StyleBox[\\\"agggttca\\\",\
FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\"]\\)\
tctacgcgcttatcctccgacctaggattgcgtctagaaaactgaacaagtaaaccgtactcctttatccgccgacag\
tccagaacagtctgacttccagctacttaatggtttcccagatttcctgcggaatacctcgaccgtgtggccattgct\
ccaccaccgcaattcgcctcttctgcacaggtccacgcacgttttccctgagcataaaaacccagcaatacgaaaggt\
tctctacacatcagcagcttcccgagtgacctgattggggctgcgctataacgtcggtcgcgtttccatcaggacgca\
tgcagcgacgcctgcagcagcagtccccttcac\\!\\(\\*StyleBox[\\\"agcgtaca\\\",FontColor-\
>RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\"]\\)\
gggctctggtaagggcagccagtttcgctaacggtcctgttgcttacatgcgcatacaattatgccaaacggacacgt\
gctatccagacgaggtgtcgtaaaggggatttctaagtgaccagaattactgtcagacgaccttaagatagtcaggct\
ttcagcggtagataggcgggatgaatcgaaagcaatgacaaggcccggtcgccagagagacaggctt\\!\\(\\*\
StyleBox[\\\"agtattca\\\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\
\\\"Bold\\\"]\\)gtaagcagtagcgcgacatacccgaaactccgcgcgggtat\\!\\(\\*StyleBox[\\\
\"agagtaca\\\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\
\"]\\)tctactaggtgtgtatctgcagcacattagggctattcagaccgttaattccggcctgaggccatgccgaca\
gaacaaattgcct\"\>"], "Output",
  CellLabel->"Out[132]="]
}, Open  ]],

Cell["Here is the same result using a regular expression.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[dna, 
      RegularExpression["\<ag..t.ca\>"] \[RuleDelayed] "\<\!\(\*StyleBox[\"$0\
\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\"Bold\"]\)\>"]\)], \
"Input",
  CellLabel->"In[133]:="],

Cell[BoxData["\<\"\
acaaccgccgcgaattctcacaaacgtcgagtgtgatatagaaaatcccagatcacactatagggtggaaaccaggtg\
atagttgcctctgccatgcatatgcgattaaatgttcgttgaatatgagtaaagaatctaagcgtagtttttatagta\
aagaccccgcgcctctgcgcgtgatagtgttaccgacgcatctcgatgttgtacatgtagcactgtacgtaatcatta\
tacgatttccataacgtaagctgggtaacagacctaacgt\\!\\(\\*StyleBox[\\\"agggttca\\\",\
FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\"]\\)\
tctacgcgcttatcctccgacctaggattgcgtctagaaaactgaacaagtaaaccgtactcctttatccgccgacag\
tccagaacagtctgacttccagctacttaatggtttcccagatttcctgcggaatacctcgaccgtgtggccattgct\
ccaccaccgcaattcgcctcttctgcacaggtccacgcacgttttccctgagcataaaaacccagcaatacgaaaggt\
tctctacacatcagcagcttcccgagtgacctgattggggctgcgctataacgtcggtcgcgtttccatcaggacgca\
tgcagcgacgcctgcagcagcagtccccttcac\\!\\(\\*StyleBox[\\\"agcgtaca\\\",FontColor-\
>RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\"]\\)\
gggctctggtaagggcagccagtttcgctaacggtcctgttgcttacatgcgcatacaattatgccaaacggacacgt\
gctatccagacgaggtgtcgtaaaggggatttctaagtgaccagaattactgtcagacgaccttaagatagtcaggct\
ttcagcggtagataggcgggatgaatcgaaagcaatgacaaggcccggtcgccagagagacaggctt\\!\\(\\*\
StyleBox[\\\"agtattca\\\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\
\\\"Bold\\\"]\\)gtaagcagtagcgcgacatacccgaaactccgcgcgggtat\\!\\(\\*StyleBox[\\\
\"agagtaca\\\",FontColor->RGBColor[1,0,0],FontSize->18,FontWeight->\\\"Bold\\\
\"]\\)tctactaggtgtgtatctgcagcacattagggctattcagaccgttaattccggcctgaggccatgccgaca\
gaacaaattgcct\"\>"], "Output",
  CellLabel->"Out[133]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["HTML Parsing", "Section",
  CellTags->"c:23"],

Cell["\<\
String patterns are useful for taking raw HTML and extracting information \
from it.\
\>", "Text"],

Cell[TextData[{
  "Here is the source from ",
  ButtonBox["www.google.com",
    ButtonData:>{
      URL[ "http://www.google.com"], None},
    ButtonStyle->"Hyperlink"],
  "."
}], "Text"],

Cell[BoxData[
    \(\(text = \*"\"\<\\<<html><head><meta http-equiv='content-type' \
content='text/html;charset=UTF-8'><title>Google</title><style><!--body,td,a,p,\
.h{font-family:arial,sans-serif;}\n.h{font-size:20px;}\n.q{color:#0000cc;}\n\
//-->\[IndentingNewLine]</style>\[IndentingNewLine]<script>\[IndentingNewLine]\
<!--function sf(){document.f.q.focus();}\n//-->\[IndentingNewLine]</script>\
\[IndentingNewLine]</head><body bgcolor=#ffffff text=#000000 link=#0000cc \
vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 \
cellpadding=0><tr><td><img src='/images/logo.gif' width=276 height=110 \
alt='Google'></td></tr></table><br>\[IndentingNewLine]<form action='/search' \
name=f><script><!--function qs(el) {if \
(window.RegExp&&window.encodeURIComponent) {var \
qe=encodeURIComponent(document.f.q.value);if \
(el.href.indexOf('q=')\[NotEqual]-1) {el.href=el.href.replace(new \
RegExp('q=[^&$]*'),'q='+qe);} else {el.href+='&q='+qe;}}return 1;}\n//-->\
\[IndentingNewLine]</script><table border=0 cellspacing=0 \
cellpadding=4><tr><td nowrap class=q><font size=-1><b><font \
color=#000000>Web</font></b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q \
href='/imghp?hl=en&tab=wi' onClick='return \
qs(this);'>Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q \
href='/grphp?hl=en&tab=wg' onClick='return \
qs(this);'>Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q \
href='/nwshp?hl=en&tab=wn' onClick='return \
qs(this);'>News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q \
href='/froogle?hl=en&tab=wf' onClick='return \
qs(this);'>Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a \
href='/options/index.html' \
class=q>more&nbsp;&raquo;</a></b></font></td></tr></table>  <table \
cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td \
align=center><input type=hidden name=hl value=en><span id=hf></span><input \
type=hidden name=ie value='UTF-8'><input maxLength=256 size=55 name=q \
value=''><br><input type=submit value='Google Search' name=btnG><input \
type=submit value='I'm Feeling Lucky' name=btnI></td><td valign=top nowrap \
width=25%><font size=-2>&nbsp;&nbsp;<a \
href=/advanced_search?hl=en>Advanced&nbsp;Search</a><br>&nbsp;&nbsp;<a \
href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a \
href=/language_tools?hl=en>Language \
Tools</a></font></td></tr></table></form><br><br><font size=-1><a \
href='/ads/'>Advertising&nbsp;Programs</a>- <a \
href='/services/'>Business&nbsp;Solutions</a>- <a href=/about.html>About \
Google</a><span id=hp style='behavior:url(#default#homepage)'></span>\
\[IndentingNewLine]<script>\[IndentingNewLine]//<!--if \
(!hp.isHomePage('http://www.google.com/')) {document.write('<p><a \
href=\\'/mgyhp.html\\' \
onClick=\\'style.behavior='url(#default#homepage)';setHomePage('http://www.\
google.com/');\\'>Make Google Your Homepage!</a>');}\n\
//-->\[IndentingNewLine]</script></font><p><font size=-2>&copy;2004 \
Google-Searching 4,285,199,774 web pages</font></p></center></body></html>\\>\
\>\"";\)\)], "Input",
  CellLabel->"In[134]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringLength[text]\)], "Input",
  CellLabel->"In[135]:="],

Cell[BoxData[
    \(2639\)], "Output",
  CellLabel->"Out[135]="]
}, Open  ]],

Cell["This extracts all the direct hyperlinks in the source.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases[text, 
      ShortestMatch["\<<a\>" ~~ \(__ ~~ \("\<href=\>" ~~ \(ref__ ~~ \
\(\((WhitespaceCharacter | "\<>\>")\) ~~ \(___ ~~ "\<>\>"\)\)\)\)\)] \
\[RuleDelayed] ref]\)], "Input",
  CellLabel->"In[136]:="],

Cell[BoxData[
    \({"'/imghp?hl=en&tab=wi'", "'/grphp?hl=en&tab=wg'", 
      "'/nwshp?hl=en&tab=wn'", "'/froogle?hl=en&tab=wf'", 
      "'/options/index.html'", "/advanced_search?hl=en", 
      "/preferences?hl=en", "/language_tools?hl=en", "'/ads/'", 
      "'/services/'", "/about.html", "\\'/mgyhp.html\\'"}\)], "Output",
  CellLabel->"Out[136]="]
}, Open  ]],

Cell[TextData[{
  "This deletes everything inside tags ",
  StyleBox["<...>", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringReplace[text, 
      ShortestMatch["\<<\>" ~~ \(___ ~~ "\<>\>"\)] \[Rule] "\<\>"]\)], "Input",\

  CellLabel->"In[137]:="],

Cell[BoxData[
    \("Google\[IndentingNewLine]\[IndentingNewLine]\[IndentingNewLine]\
\[IndentingNewLine]\[IndentingNewLine]\[IndentingNewLine]\[IndentingNewLine]\
Web&nbsp;&nbsp;&nbsp;&nbsp;Images&nbsp;&nbsp;&nbsp;&nbsp;Groups&nbsp;&nbsp;&\
nbsp;&nbsp;News&nbsp;&nbsp;&nbsp;&nbsp;Froogle&nbsp;&nbsp;&nbsp;&nbsp;more&\
nbsp;&raquo;  \
&nbsp;&nbsp;&nbsp;Advanced&nbsp;Search&nbsp;&nbsp;Preferences&nbsp;&nbsp;\
Language ToolsAdvertising&nbsp;Programs- Business&nbsp;Solutions- About \
Google\[IndentingNewLine]\[IndentingNewLine]//Make Google Your Homepage!');}\n\
//-->\[IndentingNewLine]&copy;2004 Google-Searching 4,285,199,774 web \
pages"\)], "Output",
  CellLabel->"Out[137]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["Find Money", "Section",
  CellTags->"c:24"],

Cell["\<\
Here is some text to scan for strings that look like dollar amounts.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(text = "\<This $100 sentence can be bought for $85.00, at 15% \
discount\>"\)], "Input",
  CellLabel->"In[138]:="],

Cell[BoxData[
    \("This $100 sentence can be bought for $85.00, at 15% discount"\)], \
"Output",
  CellLabel->"Out[138]="]
}, Open  ]],

Cell["\<\
This is one way to do the search using symbolic string patterns.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases[
      text, "\<$\>" ~~ \(DigitCharacter ..  ~~ \((\(("\<.\>" ~~ 
                  DigitCharacter .. )\) | "\<\>")\)\)]\)], "Input",
  CellLabel->"In[139]:="],

Cell[BoxData[
    \({"$100", "$85.00"}\)], "Output",
  CellLabel->"Out[139]="]
}, Open  ]],

Cell["\<\
Here is the same search using regular expressions (note that you must \
remember to escape the dollar sign).\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases[text, RegularExpression["\<\\$\\d+(\.\\d+)?\>"]]\)], "Input",\

  CellLabel->"In[140]:="],

Cell[BoxData[
    \({"$100", "$85.00"}\)], "Output",
  CellLabel->"Out[140]="]
}, Open  ]],

Cell[TextData[{
  "There is also a built-in pattern object, ",
  StyleBox["NumberString", "MR"],
  ", for this particular situation."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringCases[text, "\<$\>" ~~ NumberString]\)], "Input",
  CellLabel->"In[141]:="],

Cell[BoxData[
    \({"$100", "$85.00"}\)], "Output",
  CellLabel->"Out[141]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["Find Text in Files", "Section"],

Cell["\<\
Here is a very simple grep-like function for finding lines in a text file \
containing text matching a given pattern.\
\>", "Text"],

Cell[BoxData[
    \(Grep[file_, patt_] := 
      With[{data = Import[file, "\<Lines\>"]}, 
        Pick[Transpose[{Range[Length[data]], data}], 
          StringFreeQ[data, \ patt], False]]\)], "Input",
  CellLabel->"In[142]:="],

Cell["This creates a sample text file.", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Export["\<test.txt\>", {"\<this is a line\>", "\<a line with 2 numbers \
5\>", "\<third line and more\>", "\<line 4\>"}, "\<Lines\>"]\)], "Input",
  CellLabel->"In[143]:="],

Cell[BoxData[
    \("test.txt"\)], "Output",
  CellLabel->"Out[143]="]
}, Open  ]],

Cell[TextData[{
  "This returns the line numbers and lines in ",
  StyleBox["\"text.txt\"", "MR"],
  " containing any digit characters."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Grep["\<test.txt\>", DigitCharacter] // TableForm\)], "Input",
  CellLabel->"In[144]:="],

Cell[BoxData[
    TagBox[GridBox[{
          {"2", "\<\"a line with 2 numbers 5\"\>"},
          {"4", "\<\"line 4\"\>"}
          },
        RowSpacings->1,
        ColumnSpacings->1,
        ColumnAlignments->{Left}],
      Function[ BoxForm`e$, 
        MatrixForm[ BoxForm`e$]]]], "Output",
  CellLabel->"Out[144]//TableForm="]
}, Open  ]],

Cell[TextData[{
  "This finds lines containing ",
  StyleBox["\"a\"", "MR"],
  " as a stand-alone word."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Grep["\<test.txt\>", RegularExpression["\<\\ba\\b\>"]] // 
      TableForm\)], "Input",
  CellLabel->"In[145]:="],

Cell[BoxData[
    TagBox[GridBox[{
          {"1", "\<\"this is a line\"\>"},
          {"2", "\<\"a line with 2 numbers 5\"\>"}
          },
        RowSpacings->1,
        ColumnSpacings->1,
        ColumnAlignments->{Left}],
      Function[ BoxForm`e$, 
        MatrixForm[ BoxForm`e$]]]], "Output",
  CellLabel->"Out[145]//TableForm="]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". Tips and Tricks for Efficient Matching"
}], "Chapter",
  CellTags->{"c:25", "Tips and Tricks"}],

Cell["\<\
This section addresses some issues involving efficiency in string pattern \
matching.\
\>", "Text"],

Cell[CellGroupData[{

Cell["StringExpression versus RegularExpression", "Section",
  CellTags->"c:26"],

Cell[TextData[{
  "Since a string pattern written in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " syntax is immediately translated to a regular expression and then \
compiled and cached, there is very little overhead in using the ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " syntax as opposed to the regular expression syntax directly. One \
exception can happen if many different patterns are used a few times, in that \
case the overhead might be noticeable."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell["Conditions and PatternTests", "Section",
  CellTags->"c:27"],

Cell[TextData[{
  "If a pattern contains ",
  StyleBox["Condition", "MR"],
  " (",
  StyleBox["/;", "MR"],
  ") or ",
  StyleBox["PatternTest", "MR"],
  " (",
  StyleBox["?", "MR"],
  ") statements, the general ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " evaluator must be invoked during the match, thus slowing it down. If a \
pattern can be written without such constructs, it will typically be faster."
}], "Text"],

Cell[BoxData[
    \(SeedRandom[1234]; 
    test = StringJoin[
        Table[FromCharacterCode[
            Random[Integer, {48, 80}]], {200}]];\)], "Input",
  CellLabel->"In[146]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(\(StringCases[test, DigitCharacter .. ] // Length\) // 
      Timing\)], "Input",
  CellLabel->"In[147]:="],

Cell[BoxData[
    \({0.`\ Second, 45}\)], "Output",
  CellLabel->"Out[147]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(\(StringCases[test, __?DigitQ] // Length\) // Timing\)], "Input",
  CellLabel->"In[148]:="],

Cell[BoxData[
    \({0.030000000000000027`\ Second, 45}\)], "Output",
  CellLabel->"Out[148]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["Avoid Nested Quantifiers", "Section",
  CellTags->"c:28"],

Cell[TextData[{
  "Because of the nondeterministic finite automaton (NFA) algorithm used in \
the match, patterns involving nested quantifiers (such as ",
  StyleBox["__", "MR"],
  " and ",
  StyleBox["patt..", "MR"],
  " or the regular expression equivalents) can become arbitrarily slow. Such \
patterns can usually be \[OpenCurlyDoubleQuote]unrolled\
\[CloseCurlyDoubleQuote] into more efficient versions (see Friedl ",
  ButtonBox["[2]",
    ButtonData:>"Ref-2",
    ButtonStyle->"Hyperlink"],
  " for additional information)."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell["Avoid Many Calls to a Function", "Section",
  CellTags->"c:29"],

Cell[TextData[{
  "If you are searching through a long list of strings for certain matches, \
it is more efficient to feed the whole list to a string function at once, \
rather than using something like ",
  StyleBox["Select", "MR"],
  " and ",
  StyleBox["StringMatchQ", "MR"],
  " (see the earlier dictionary example for an illustration). Here is another \
example that generates a list of 2000 strings with 10 characters each and \
searches for the strings that start with an ",
  StyleBox["\"a\"", "MR"],
  " and contain ",
  StyleBox["\"ggg\"", "MR"],
  " as a substring."
}], "Text"],

Cell[BoxData[
    \(SeedRandom[1234]; 
    test = Table[
        StringJoin[\({"\<a\>", "\<c\>", "\<g\>", "\<t\>"}[\([#]\)] &\) /@ 
            Table[Random[Integer, {1, 4}], {10}]], {2000}];\)], "Input",
  CellLabel->"In[149]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(Take[test, 3]\)], "Input",
  CellLabel->"In[150]:="],

Cell[BoxData[
    \({"acaaccgccg", "cgaattctca", "caaacgtcga"}\)], "Output",
  CellLabel->"Out[150]="]
}, Open  ]],

Cell[TextData[{
  "Here is the slower version, using ",
  StyleBox["Select", "MR"],
  " and ",
  StyleBox["StringMatchQ", "MR"],
  "."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Select[test, 
        StringMatchQ[#, "\<a\>" ~~ \(___ ~~ \("\<ggg\>" ~~ ___\)\)] &] // 
      Timing\)], "Input",
  CellLabel->"In[151]:="],

Cell[BoxData[
    \({0.010000000000000009`\ Second, {"acgtagggtt", "attagggcta", 
        "atagggctct", "aagggccgtc", "agtgttaggg", "aggggtggca", "aggggcggag", 
        "agcgggactc", "acagagggtg", "atgggacatc", "agggataaga", "accacgggct", 
        "aaaagggcat", "agtaagggac", "agggtagtta", "agctacgggc", "ataagccggg", 
        "atagggagaa", "acttgatggg", "acagtgaggg", "agggcaggga", "agggttctag", 
        "agaggggaac", "atgcagggat", "atcgtagggc", "aggggaagct", "agtggggctg", 
        "aaacaaggga", "aagtgggatg", "aagagggaat", "agggacggag", "attcgggagc", 
        "aataactggg", "agggcgccca", "agaggggatt", "agggacgaag", "aagggatatt", 
        "agggcaggtg", "agaacgggta", "aattgggtct", "agcgggtagg", "actcgggccc", 
        "agggcctcct", "aagggagggg", "aagggcatgt", "aagttgaggg", "aaaacggggt", 
        "agagggcgta", "aagtctaggg", "agggagcgtc"}}\)], "Output",
  CellLabel->"Out[151]="]
}, Open  ]],

Cell[TextData[{
  "If you instead feed the whole list to ",
  StyleBox["StringMatchQ", "MR"],
  " at once, it will be much faster. Then ",
  StyleBox["Pick", "MR"],
  " can be used to extract the wanted elements."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Pick[test, 
        StringMatchQ[test, "\<a\>" ~~ \(___ ~~ \("\<ggg\>" ~~ ___\)\)]] // 
      Timing\)], "Input",
  CellLabel->"In[152]:="],

Cell[BoxData[
    \({0.`\ Second, {"acgtagggtt", "attagggcta", "atagggctct", "aagggccgtc", 
        "agtgttaggg", "aggggtggca", "aggggcggag", "agcgggactc", "acagagggtg", 
        "atgggacatc", "agggataaga", "accacgggct", "aaaagggcat", "agtaagggac", 
        "agggtagtta", "agctacgggc", "ataagccggg", "atagggagaa", "acttgatggg", 
        "acagtgaggg", "agggcaggga", "agggttctag", "agaggggaac", "atgcagggat", 
        "atcgtagggc", "aggggaagct", "agtggggctg", "aaacaaggga", "aagtgggatg", 
        "aagagggaat", "agggacggag", "attcgggagc", "aataactggg", "agggcgccca", 
        "agaggggatt", "agggacgaag", "aagggatatt", "agggcaggtg", "agaacgggta", 
        "aattgggtct", "agcgggtagg", "actcgggccc", "agggcctcct", "aagggagggg", 
        "aagggcatgt", "aagttgaggg", "aaaacggggt", "agagggcgta", "aagtctaggg", 
        "agggagcgtc"}}\)], "Output",
  CellLabel->"Out[152]="]
}, Open  ]],

Cell[TextData[{
  "Alternatively, you could use ",
  StyleBox["StringCases", "MR"],
  ", which is also fast. Note that you need to anchor the pattern using ",
  StyleBox["StartOfString", "MR"],
  " to ensure that the ",
  StyleBox["\"a\"", "MR"],
  " is at the start (the ",
  StyleBox["EndOfString", "MR"],
  " is superfluous in this particular case). "
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Flatten[
        StringCases[test, 
          StartOfString ~~ \("\<a\>" ~~ \(___ ~~ \("\<ggg\>" ~~ \(___ ~~ 
                    EndOfString\)\)\)\)]] // Timing\)], "Input",
  CellLabel->"In[153]:="],

Cell[BoxData[
    \({0.`\ Second, {"acgtagggtt", "attagggcta", "atagggctct", "aagggccgtc", 
        "agtgttaggg", "aggggtggca", "aggggcggag", "agcgggactc", "acagagggtg", 
        "atgggacatc", "agggataaga", "accacgggct", "aaaagggcat", "agtaagggac", 
        "agggtagtta", "agctacgggc", "ataagccggg", "atagggagaa", "acttgatggg", 
        "acagtgaggg", "agggcaggga", "agggttctag", "agaggggaac", "atgcagggat", 
        "atcgtagggc", "aggggaagct", "agtggggctg", "aaacaaggga", "aagtgggatg", 
        "aagagggaat", "agggacggag", "attcgggagc", "aataactggg", "agggcgccca", 
        "agaggggatt", "agggacgaag", "aagggatatt", "agggcaggtg", "agaacgggta", 
        "aattgggtct", "agcgggtagg", "actcgggccc", "agggcctcct", "aagggagggg", 
        "aagggcatgt", "aagttgaggg", "aaaacggggt", "agagggcgta", "aagtctaggg", 
        "agggagcgtc"}}\)], "Output",
  CellLabel->"Out[153]="]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{

Cell["Rewrite General Expression Searches as String Searches", "Section",
  CellTags->"c:30"],

Cell[TextData[{
  "Because the string matching algorithm is different than the algorithm ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " uses for general expression matching (string matching can assume a finite \
alphabet and a flat structure, for instance), there are cases where it is \
advantageous to translate a normal expression-matching problem to a \
string-matching problem. A typical case is matching a long list of symbols \
against a pattern involving several occurrences of ",
  StyleBox["__", "MR"],
  " and ",
  StyleBox["___", "MR"],
  "."
}], "Text"],

Cell["\<\
As an example, assume you want to find primes (after prime number 1000000, \
say) that have at least four identical digits. Using ordinary pattern \
matching, it could be accomplished like this.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Select[Array[Prime, 1000, 1000000], 
        MatchQ[IntegerDigits[#], {___, x_, ___, x_, ___, x_, ___, 
              x_, ___}] &] // Timing\)], "Input",
  CellLabel->"In[154]:="],

Cell[BoxData[
    \({0.16000000000000003`\ Second, {15488881, 15491117, 15491171, 15491711, 
        15493333, 15493999, 15496111, 15499111, 15499199, 15499399, 15499499, 
        15499919, 15499997, 15500557, 15501119, 15501121, 15501151, 15501553, 
        15501559, 15501911, 15502111}}\)], "Output",
  CellLabel->"Out[154]="]
}, Open  ]],

Cell["\<\
By converting the list of integers to a string, you can use string matching \
instead.\
\>", "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(Select[Array[Prime, 1000, 1000000], 
        StringMatchQ[FromCharacterCode[48 + IntegerDigits[#]], 
            StringExpression[___, x_, ___, x_, ___, x_, ___, x_, ___]] &] // 
      Timing\)], "Input",
  CellLabel->"In[155]:="],

Cell[BoxData[
    \({0.04999999999999993`\ Second, {15488881, 15491117, 15491171, 15491711, 
        15493333, 15493999, 15496111, 15499111, 15499199, 15499399, 15499499, 
        15499919, 15499997, 15500557, 15501119, 15501121, 15501151, 15501553, 
        15501559, 15501911, 15502111}}\)], "Output",
  CellLabel->"Out[155]="]
}, Open  ]],

Cell[TextData[{
  "By using the previous tips of using ",
  StyleBox["Pick", "MR"],
  " or ",
  StyleBox["StringCases", "MR"],
  ", you can speed it up even more."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(With[{list = Array[Prime, 1000, 1000000]}, 
        Pick[list, 
          StringMatchQ[\(FromCharacterCode[48 + IntegerDigits[#]] &\) /@ 
              list, StringExpression[___, x_, ___, x_, ___, x_, ___, 
              x_, ___]]]] // Timing\)], "Input",
  CellLabel->"In[156]:="],

Cell[BoxData[
    \({0.040000000000000036`\ Second, {15488881, 15491117, 15491171, 
        15491711, 15493333, 15493999, 15496111, 15499111, 15499199, 15499399, 
        15499499, 15499919, 15499997, 15500557, 15501119, 15501121, 15501151, 
        15501553, 15501559, 15501911, 15502111}}\)], "Output",
  CellLabel->"Out[156]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(Flatten[
        StringCases[\(FromCharacterCode[48 + IntegerDigits[#]] &\) /@ 
            Array[Prime, 1000, 1000000], 
          StringExpression[StartOfString, ___, x_, ___, x_, ___, x_, ___, 
            x_, ___, EndOfString]]] // Timing\)], "Input",
  CellLabel->"In[157]:="],

Cell[BoxData[
    \({0.040000000000000036`\ Second, {"15488881", "15491117", "15491171", 
        "15491711", "15493333", "15493999", "15496111", "15499111", 
        "15499199", "15499399", "15499499", "15499919", "15499997", 
        "15500557", "15501119", "15501121", "15501151", "15501553", 
        "15501559", "15501911", "15502111"}}\)], "Output",
  CellLabel->"Out[157]="]
}, Open  ]],

Cell["For long sequences, the difference can be significant.", "Text"],

Cell[BoxData[
    \(test = Range[100]; test[\([{50, 75}]\)] = 5;\)], "Input",
  CellLabel->"In[158]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(Position[test, 5]\)], "Input",
  CellLabel->"In[159]:="],

Cell[BoxData[
    \({{5}, {50}, {75}}\)], "Output",
  CellLabel->"Out[159]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(MatchQ[test, {___, x_, ___, x_, ___, x_, ___}] // Timing\)], "Input",
  CellLabel->"In[160]:="],

Cell[BoxData[
    \({0.12100000000000022`\ Second, True}\)], "Output",
  CellLabel->"Out[160]="]
}, Open  ]],

Cell[BoxData[
    \(\(teststr = FromCharacterCode[test];\)\)], "Input",
  CellLabel->"In[161]:="],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringPosition[teststr, FromCharacterCode[5]]\)], "Input",
  CellLabel->"In[162]:="],

Cell[BoxData[
    \({{5, 5}, {50, 50}, {75, 75}}\)], "Output",
  CellLabel->"Out[162]="]
}, Open  ]],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringMatchQ[teststr, 
        StringExpression[___, x_, ___, x_, ___, x_, ___]] // 
      Timing\)], "Input",
  CellLabel->"In[163]:="],

Cell[BoxData[
    \({0.`\ Second, True}\)], "Output",
  CellLabel->"Out[163]="]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". Implementation Details"
}], "Chapter",
  CellTags->"c:31"],

Cell[TextData[{
  "The string pattern matching in ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " is built on top of the PCRE (Perl Compatible Regular Expressions) library \
by Philip Hazel ",
  ButtonBox["[1]",
    ButtonData:>"Ref-1",
    ButtonStyle->"Hyperlink"],
  ". "
}], "Text"],

Cell[TextData[{
  "In some cases the pre-5.1 ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " algorithms are used (e.g., when the pattern is just a single, literal \
string)."
}], "Text"],

Cell[TextData[{
  "Any symbolic string pattern is first translated to a regular expression. \
You can see this translation by using the internal ",
  StyleBox["StringPattern`PatternConvert", "MR"],
  " function."
}], "Text"],

Cell[CellGroupData[{

Cell[BoxData[
    \(StringPattern`PatternConvert["\<a\>" | "\<\>" ~~ DigitCharacter .. ] // 
      InputForm\)], "Input",
  CellLabel->"In[164]:="],

Cell["{\"(?ms)a?\\\\d+\", {}, {}, {}, Hold[None]}", "Output",
  CellLabel->"Out[164]//InputForm="]
}, Open  ]],

Cell["\<\
The first element returned is the regular expression, while the rest of the \
elements have to do with conditions, replacement rules, and named patterns.\
\>", "Text"],

Cell["\<\
The regular expression is then compiled by PCRE, and the compiled version is \
cached for future use when the same pattern appears again. The translation \
from symbolic string pattern to regular expression only happens once.\
\>", "Text"],

Cell[TextData[{
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " conditions in the pattern are handled by external call-outs from the PCRE \
library to the ",
  StyleBox["Mathematica",
    FontSlant->"Italic"],
  " evaluator, so this will slow down the matching."
}], "Text"],

Cell[TextData[{
  "Explicit ",
  StyleBox["RegularExpression", "MR"],
  " objects embedded into a general string pattern will be spliced into the \
final regular expression (surrounded by noncapturing parentheses ",
  StyleBox["\"(?:...)\"", "MR"],
  "), so the counting of named patterns can become skewed compared to what \
you might expect."
}], "Text"],

Cell[TextData[{
  "Because PCRE currently does not support preset character classes with \
characters beyond character code 255, the word and letter character classes \
(such as ",
  StyleBox["WordCharacter", "MR"],
  " and ",
  StyleBox["LetterCharacter", "MR"],
  ") only include character codes in the Unicode range 0\[Dash]255. Thus ",
  StyleBox["LetterCharacter", "MR"],
  " and ",
  StyleBox["_?LetterQ", "MR"],
  " do not give equivalent results beyond character code 255."
}], "Text"],

Cell[TextData[{
  "Because of a similar PCRE restriction, case-insensitive matching (e.g., \
with ",
  StyleBox["IgnoreCase->True", "MR"],
  ") will only apply to letters in the Unicode range 0\[Dash]127 (i.e., the \
normal English letters ",
  StyleBox["\"a\"-\"z\"", "MR"],
  " and ",
  StyleBox["\"A\"-\"Z\"", "MR"],
  ")."
}], "Text"]
}, Closed]],

Cell[CellGroupData[{

Cell[TextData[{
  
  CounterBox["Chapter"],
  ". References"
}], "Chapter",
  CellTags->"c:32"],

Cell[TextData[{
  "[1] P. Hazel, \[OpenCurlyDoubleQuote] PCRE \[LongDash] Perl Compatible \
Regular Expressions.\[CloseCurlyDoubleQuote] (Feb 10, 2004) ",
  ButtonBox["www.pcre.org",
    ButtonData:>{
      URL[ "http://www.pcre.org"], None},
    ButtonStyle->"Hyperlink"],
  "."
}], "Text",
  CellTags->"Ref-1"],

Cell[TextData[{
  "[2] J. E. F. Friedl, ",
  StyleBox["Mastering Regular Expressions",
    FontSlant->"Italic"],
  ", 2nd ed., Sebastopol, CA: O'Reilly & Associates, 2002."
}], "Text",
  CellTags->"Ref-2"]
}, Closed]]
},
FrontEndVersion->"5.1 for Macintosh",
ScreenRectangle->{{31, 1280}, {0, 1002}},
Saveable->False,
ScreenStyleEnvironment->"Brackets",
WindowToolbars->{},
WindowSize->{649, 549},
WindowMargins->{{0, Automatic}, {Automatic, 0}},
WindowTitle->"String Patterns in Mathematica",
StyleDefinitions -> "HelpBrowser.nb"
]

(*******************************************************************
Cached data follows.  If you edit this Notebook file directly, not
using Mathematica, you must remove the line containing CacheID at
the top of  the file.  The cache data will then be recreated when
you save this file from within Mathematica.
*******************************************************************)

(*CellTagsOutline
CellTagsIndex->{
  "c:1"->{
    Cell[1936, 64, 96, 5, 88, "Chapter",
      CellTags->"c:1"]},
  "c:2"->{
    Cell[10296, 369, 107, 5, 52, "Chapter",
      CellTags->"c:2"]},
  "Regular Expressions"->{
    Cell[31082, 1096, 128, 5, 52, "Chapter",
      CellTags->{"Regular Expressions", "c:3"}]},
  "c:3"->{
    Cell[31082, 1096, 128, 5, 52, "Chapter",
      CellTags->{"Regular Expressions", "c:3"}]},
  "c:4"->{
    Cell[53062, 1895, 125, 5, 82, "Chapter",
      CellTags->"c:4"]},
  "c:5"->{
    Cell[64198, 2234, 113, 5, 52, "Chapter",
      CellTags->"c:5"]},
  "c:6"->{
    Cell[64688, 2255, 50, 1, 61, "Section",
      CellTags->"c:6"]},
  "c:7"->{
    Cell[67604, 2383, 49, 1, 35, "Section",
      CellTags->"c:7"]},
  "c:8"->{
    Cell[68409, 2421, 49, 1, 35, "Section",
      CellTags->"c:8"]},
  "c:9"->{
    Cell[71079, 2536, 57, 1, 35, "Section",
      CellTags->"c:9"]},
  "c:10"->{
    Cell[74846, 2694, 53, 1, 35, "Section",
      CellTags->"c:10"]},
  "c:11"->{
    Cell[76341, 2763, 50, 1, 35, "Section",
      CellTags->"c:11"]},
  "c:12"->{
    Cell[77434, 2812, 52, 1, 35, "Section",
      CellTags->"c:12"]},
  "c:13"->{
    Cell[80442, 2934, 56, 1, 35, "Section",
      CellTags->"c:13"]},
  "c:14"->{
    Cell[81455, 2978, 50, 1, 35, "Section",
      CellTags->"c:14"]},
  "c:15"->{
    Cell[86251, 3174, 99, 5, 52, "Chapter",
      CellTags->"c:15"]},
  "c:16"->{
    Cell[90447, 3305, 45, 1, 61, "Section",
      CellTags->"c:16"]},
  "c:17"->{
    Cell[94297, 3473, 49, 1, 35, "Section",
      CellTags->"c:17"]},
  "c:18"->{
    Cell[96064, 3559, 49, 1, 35, "Section",
      CellTags->"c:18"]},
  "c:19"->{
    Cell[98588, 3666, 50, 1, 35, "Section",
      CellTags->"c:19"]},
  "c:20"->{
    Cell[103236, 3882, 98, 5, 52, "Chapter",
      CellTags->"c:20"]},
  "c:21"->{
    Cell[103475, 3896, 56, 1, 61, "Section",
      CellTags->"c:21"]},
  "c:22"->{
    Cell[107779, 4013, 57, 1, 35, "Section",
      CellTags->"c:22"]},
  "c:23"->{
    Cell[112991, 4119, 51, 1, 35, "Section",
      CellTags->"c:23"]},
  "c:24"->{
    Cell[118262, 4249, 49, 1, 35, "Section",
      CellTags->"c:24"]},
  "c:25"->{
    Cell[121957, 4406, 144, 5, 52, "Chapter",
      CellTags->{"c:25", "Tips and Tricks"}]},
  "Tips and Tricks"->{
    Cell[121957, 4406, 144, 5, 52, "Chapter",
      CellTags->{"c:25", "Tips and Tricks"}]},
  "c:26"->{
    Cell[122238, 4420, 80, 1, 61, "Section",
      CellTags->"c:26"]},
  "c:27"->{
    Cell[122859, 4439, 66, 1, 35, "Section",
      CellTags->"c:27"]},
  "c:28"->{
    Cell[124072, 4491, 63, 1, 35, "Section",
      CellTags->"c:28"]},
  "c:29"->{
    Cell[124718, 4512, 69, 1, 35, "Section",
      CellTags->"c:29"]},
  "c:30"->{
    Cell[129881, 4650, 93, 1, 57, "Section",
      CellTags->"c:30"]},
  "c:31"->{
    Cell[134927, 4817, 107, 5, 52, "Chapter",
      CellTags->"c:31"]},
  "c:32"->{
    Cell[137996, 4922, 95, 5, 52, "Chapter",
      CellTags->"c:32"]},
  "Ref-1"->{
    Cell[138094, 4929, 312, 9, 24, "Text",
      CellTags->"Ref-1"]},
  "Ref-2"->{
    Cell[138409, 4940, 205, 6, 24, "Text",
      CellTags->"Ref-2"]}
  }
*)

(*CellTagsIndex
CellTagsIndex->{
  {"c:1", 139369, 4969},
  {"c:2", 139446, 4972},
  {"Regular Expressions", 139542, 4975},
  {"c:3", 139648, 4978},
  {"c:4", 139754, 4981},
  {"c:5", 139835, 4984},
  {"c:6", 139916, 4987},
  {"c:7", 139996, 4990},
  {"c:8", 140076, 4993},
  {"c:9", 140156, 4996},
  {"c:10", 140237, 4999},
  {"c:11", 140319, 5002},
  {"c:12", 140401, 5005},
  {"c:13", 140483, 5008},
  {"c:14", 140565, 5011},
  {"c:15", 140647, 5014},
  {"c:16", 140729, 5017},
  {"c:17", 140811, 5020},
  {"c:18", 140893, 5023},
  {"c:19", 140975, 5026},
  {"c:20", 141057, 5029},
  {"c:21", 141140, 5032},
  {"c:22", 141223, 5035},
  {"c:23", 141306, 5038},
  {"c:24", 141389, 5041},
  {"c:25", 141472, 5044},
  {"Tips and Tricks", 141588, 5047},
  {"c:26", 141693, 5050},
  {"c:27", 141776, 5053},
  {"c:28", 141859, 5056},
  {"c:29", 141942, 5059},
  {"c:30", 142025, 5062},
  {"c:31", 142108, 5065},
  {"c:32", 142192, 5068},
  {"Ref-1", 142276, 5071},
  {"Ref-2", 142359, 5074}
  }
*)

(*NotebookFileOutline
Notebook[{

Cell[CellGroupData[{
Cell[1776, 53, 104, 4, 98, "Title"],
Cell[1883, 59, 16, 0, 22, "Text"]
}, Open  ]],

Cell[CellGroupData[{
Cell[1936, 64, 96, 5, 88, "Chapter",
  CellTags->"c:1"],
Cell[2035, 71, 621, 16, 54, "Text"],
Cell[2659, 89, 388, 13, 39, "Text"],

Cell[CellGroupData[{
Cell[3072, 106, 97, 2, 26, "Input"],
Cell[3172, 110, 63, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[3272, 117, 100, 2, 26, "Input"],
Cell[3375, 121, 62, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[3474, 128, 92, 2, 26, "Input"],
Cell[3569, 132, 67, 2, 25, "Output"]
}, Open  ]],
Cell[3651, 137, 409, 11, 54, "Text"],

Cell[CellGroupData[{
Cell[4085, 152, 113, 2, 26, "Input"],
Cell[4201, 156, 63, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[4301, 163, 114, 2, 26, "Input"],
Cell[4418, 167, 62, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[4517, 174, 98, 2, 26, "Input"],
Cell[4618, 178, 73, 2, 25, "Output"]
}, Open  ]],
Cell[4706, 183, 157, 3, 36, "Text"],

Cell[CellGroupData[{
Cell[4888, 190, 125, 3, 26, "Input"],
Cell[5016, 195, 63, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[5116, 202, 126, 3, 26, "Input"],
Cell[5245, 207, 62, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[5344, 214, 112, 2, 26, "Input"],
Cell[5459, 218, 73, 2, 25, "Output"]
}, Open  ]],
Cell[5547, 223, 91, 2, 22, "Text"],
Cell[5641, 227, 4556, 135, 234, "DefinitionBox"],
Cell[10200, 364, 59, 0, 15, "Caption"]
}, Closed]],

Cell[CellGroupData[{
Cell[10296, 369, 107, 5, 52, "Chapter",
  CellTags->"c:2"],
Cell[10406, 376, 354, 11, 39, "Text"],

Cell[CellGroupData[{
Cell[10785, 391, 80, 2, 26, "Input"],
Cell[10868, 395, 245, 7, 37, "Output"]
}, Open  ]],
Cell[11128, 405, 232, 6, 37, "Text"],

Cell[CellGroupData[{
Cell[11385, 415, 94, 2, 26, "Input"],
Cell[11482, 419, 67, 2, 25, "Output"]
}, Open  ]],
Cell[11564, 424, 450, 11, 54, "Text"],
Cell[12017, 437, 121, 3, 30, "Text"],
Cell[12141, 442, 5102, 145, 399, "2ColumnTable"],
Cell[17246, 589, 63, 0, 22, "Text"],
Cell[17312, 591, 4350, 123, 189, "2ColumnTable"],
Cell[21665, 716, 62, 0, 22, "Text"],
Cell[21730, 718, 991, 27, 143, "2ColumnTable"],
Cell[22724, 747, 111, 3, 22, "Text"],
Cell[22838, 752, 763, 23, 53, "2ColumnTable"],
Cell[23604, 777, 104, 1, 30, "Text"],
Cell[23711, 780, 424, 15, 38, "Text"],

Cell[CellGroupData[{
Cell[24160, 799, 95, 2, 26, "Input"],
Cell[24258, 803, 77, 2, 57, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[24372, 810, 113, 2, 26, "Input"],
Cell[24488, 814, 77, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[24602, 821, 127, 3, 26, "Input"],
Cell[24732, 826, 77, 2, 25, "Output"]
}, Open  ]],
Cell[24824, 831, 377, 10, 38, "Text"],

Cell[CellGroupData[{
Cell[25226, 845, 155, 3, 26, "Input"],
Cell[25384, 850, 84, 2, 25, "Output"]
}, Open  ]],
Cell[25483, 855, 333, 12, 39, "Text"],

Cell[CellGroupData[{
Cell[25841, 871, 201, 4, 58, "Input"],
Cell[26045, 877, 71, 2, 25, "Output"]
}, Open  ]],
Cell[26131, 882, 249, 9, 24, "Text"],

Cell[CellGroupData[{
Cell[26405, 895, 120, 3, 26, "Input"],
Cell[26528, 900, 77, 2, 25, "Output"]
}, Open  ]],
Cell[26620, 905, 154, 6, 23, "Text"],

Cell[CellGroupData[{
Cell[26799, 915, 134, 3, 26, "Input"],
Cell[26936, 920, 72, 2, 25, "Output"]
}, Open  ]],
Cell[27023, 925, 132, 4, 23, "Text"],

Cell[CellGroupData[{
Cell[27180, 933, 156, 3, 26, "Input"],
Cell[27339, 938, 71, 2, 25, "Output"]
}, Open  ]],
Cell[27425, 943, 271, 8, 39, "Text"],

Cell[CellGroupData[{
Cell[27721, 955, 159, 3, 42, "Input"],
Cell[27883, 960, 73, 2, 25, "Output"]
}, Open  ]],
Cell[27971, 965, 93, 3, 23, "Text"],

Cell[CellGroupData[{
Cell[28089, 972, 155, 3, 26, "Input"],
Cell[28247, 977, 77, 2, 25, "Output"]
}, Open  ]],
Cell[28339, 982, 200, 5, 37, "Text"],
Cell[28542, 989, 69, 0, 22, "Text"],

Cell[CellGroupData[{
Cell[28636, 993, 152, 3, 26, "Input"],
Cell[28791, 998, 66, 2, 25, "Output"]
}, Open  ]],
Cell[28872, 1003, 368, 10, 53, "Text"],

Cell[CellGroupData[{
Cell[29265, 1017, 119, 3, 26, "Input"],
Cell[29387, 1022, 73, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[29497, 1029, 139, 3, 26, "Input"],
Cell[29639, 1034, 76, 2, 25, "Output"]
}, Open  ]],
Cell[29730, 1039, 155, 5, 23, "Text"],

Cell[CellGroupData[{
Cell[29910, 1048, 230, 5, 42, "Input"],
Cell[30143, 1055, 69, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[30249, 1062, 241, 5, 42, "Input"],
Cell[30493, 1069, 73, 2, 25, "Output"]
}, Open  ]],
Cell[30581, 1074, 133, 4, 23, "Text"],

Cell[CellGroupData[{
Cell[30739, 1082, 218, 4, 42, "Input"],
Cell[30960, 1088, 73, 2, 25, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[31082, 1096, 128, 5, 52, "Chapter",
  CellTags->{"Regular Expressions", "c:3"}],
Cell[31213, 1103, 491, 14, 53, "Text"],
Cell[31707, 1119, 130, 3, 30, "Text"],
Cell[31840, 1124, 5702, 172, 289, "2ColumnTable"],
Cell[37545, 1298, 63, 0, 22, "Text"],
Cell[37611, 1300, 1710, 50, 191, "2ColumnTable"],
Cell[39324, 1352, 567, 30, 38, "Text"],
Cell[39894, 1384, 62, 0, 22, "Text"],
Cell[39959, 1386, 1327, 37, 165, "2ColumnTable"],
Cell[41289, 1425, 106, 3, 22, "Text"],
Cell[41398, 1430, 1316, 40, 135, "2ColumnTable"],
Cell[42717, 1472, 66, 0, 22, "Text"],
Cell[42786, 1474, 1308, 43, 101, "2ColumnTable"],
Cell[44097, 1519, 123, 3, 30, "Text"],
Cell[44223, 1524, 89, 2, 22, "Text"],

Cell[CellGroupData[{
Cell[44337, 1530, 144, 3, 42, "Input"],
Cell[44484, 1535, 80, 2, 25, "Output"]
}, Open  ]],
Cell[44579, 1540, 238, 6, 37, "Text"],

Cell[CellGroupData[{
Cell[44842, 1550, 116, 2, 26, "Input"],
Cell[44961, 1554, 71, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[45069, 1561, 119, 3, 26, "Input"],
Cell[45191, 1566, 61, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[45289, 1573, 119, 3, 26, "Input"],
Cell[45411, 1578, 72, 2, 25, "Output"]
}, Open  ]],
Cell[45498, 1583, 117, 3, 23, "Text"],

Cell[CellGroupData[{
Cell[45640, 1590, 144, 3, 42, "Input"],
Cell[45787, 1595, 84, 2, 25, "Output"]
}, Open  ]],
Cell[45886, 1600, 728, 35, 38, "Text"],
Cell[46617, 1637, 299, 14, 23, "Text"],
Cell[46919, 1653, 233, 7, 37, "Text"],

Cell[CellGroupData[{
Cell[47177, 1664, 126, 3, 26, "Input"],
Cell[47306, 1669, 68, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[47411, 1676, 130, 3, 26, "Input"],
Cell[47544, 1681, 77, 2, 25, "Output"]
}, Open  ]],
Cell[47636, 1686, 181, 6, 23, "Text"],

Cell[CellGroupData[{
Cell[47842, 1696, 132, 3, 26, "Input"],
Cell[47977, 1701, 68, 2, 25, "Output"]
}, Open  ]],
Cell[48060, 1706, 162, 5, 23, "Text"],

Cell[CellGroupData[{
Cell[48247, 1715, 228, 4, 74, "Input"],
Cell[48478, 1721, 77, 2, 25, "Output"]
}, Open  ]],
Cell[48570, 1726, 598, 15, 68, "Text"],

Cell[CellGroupData[{
Cell[49193, 1745, 139, 3, 26, "Input"],
Cell[49335, 1750, 75, 2, 25, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[49447, 1757, 187, 4, 42, "Input"],
Cell[49637, 1763, 115, 2, 25, "Output"]
}, Open  ]],
Cell[49767, 1768, 267, 8, 38, "Text"],

Cell[CellGroupData[{
Cell[50059, 1780, 207, 4, 42, "Input"],
Cell[50269, 1786, 152, 3, 25, "Output"]
}, Open  ]],
Cell[50436, 1792, 229, 5, 37, "Text"],

Cell[CellGroupData[{
Cell[50690, 1801, 214, 4, 42, "Input"],
Cell[50907, 1807, 156, 3, 25, "Output"]
}, Open  ]],
Cell[51078, 1813, 206, 5, 37, "Text"],

Cell[CellGroupData[{
Cell[51309, 1822, 138, 3, 26, "Input"],
Cell[51450, 1827, 73, 2, 25, "Output"]
}, Open  ]],
Cell[51538, 1832, 158, 3, 36, "Text"],
Cell[51699, 1837, 111, 4, 23, "Text"],

Cell[CellGroupData[{
Cell[51835, 1845, 142, 3, 42, "Input"],
Cell[51980, 1850, 73, 2, 25, "Output"]
}, Open  ]],
Cell[52068, 1855, 135, 3, 22, "Text"],

Cell[CellGroupData[{
Cell[52228, 1862, 183, 4, 42, "Input"],
Cell[52414, 1868, 87, 2, 25, "Output"]
}, Open  ]],
Cell[52516, 1873, 200, 4, 36, "Text"],

Cell[CellGroupData[{
Cell[52741, 1881, 198, 4, 58, "Input"],
Cell[52942, 1887, 71, 2, 25, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[53062, 1895, 125, 5, 82, "Chapter",
  CellTags->"c:4"],
Cell[53190, 1902, 281, 5, 50, "Text"],
Cell[53474, 1909, 10013, 299, 803, "3ColumnTable"],
Cell[63490, 2210, 334, 10, 39, "Text"],
Cell[63827, 2222, 334, 7, 51, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[64198, 2234, 113, 5, 52, "Chapter",
  CellTags->"c:5"],
Cell[64314, 2241, 349, 10, 40, "Text"],

Cell[CellGroupData[{
Cell[64688, 2255, 50, 1, 61, "Section",
  CellTags->"c:6"],
Cell[64741, 2258, 161, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[64927, 2266, 112, 2, 29, "Input"],
Cell[65042, 2270, 63, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[65142, 2277, 114, 2, 29, "Input"],
Cell[65259, 2281, 64, 2, 27, "Output"]
}, Open  ]],
Cell[65338, 2286, 580, 19, 59, "Text"],
Cell[65921, 2307, 70, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[66016, 2311, 97, 2, 29, "Input"],
Cell[66116, 2315, 63, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[66216, 2322, 133, 3, 29, "Input"],
Cell[66352, 2327, 63, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[66452, 2334, 120, 3, 29, "Input"],
Cell[66575, 2339, 63, 2, 27, "Output"]
}, Open  ]],
Cell[66653, 2344, 241, 7, 25, "Text"],
Cell[66897, 2353, 167, 5, 25, "Text"],
Cell[67067, 2360, 262, 6, 42, "Text"],

Cell[CellGroupData[{
Cell[67354, 2370, 135, 3, 29, "Input"],
Cell[67492, 2375, 63, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[67604, 2383, 49, 1, 35, "Section",
  CellTags->"c:7"],
Cell[67656, 2386, 283, 8, 42, "Text"],

Cell[CellGroupData[{
Cell[67964, 2398, 112, 2, 29, "Input"],
Cell[68079, 2402, 64, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[68180, 2409, 113, 2, 29, "Input"],
Cell[68296, 2413, 64, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[68409, 2421, 49, 1, 35, "Section",
  CellTags->"c:8"],
Cell[68461, 2424, 227, 5, 25, "Text"],
Cell[68691, 2431, 51, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[68767, 2435, 104, 2, 29, "Input"],
Cell[68874, 2439, 77, 2, 27, "Output"]
}, Open  ]],
Cell[68966, 2444, 50, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[69041, 2448, 146, 3, 29, "Input"],
Cell[69190, 2453, 76, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[69303, 2460, 148, 3, 29, "Input"],
Cell[69454, 2465, 76, 2, 27, "Output"]
}, Open  ]],
Cell[69545, 2470, 47, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[69617, 2474, 106, 2, 29, "Input"],
Cell[69726, 2478, 74, 2, 27, "Output"]
}, Open  ]],
Cell[69815, 2483, 44, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[69884, 2487, 300, 7, 49, "Input"],
Cell[70187, 2496, 101, 2, 27, "Output"]
}, Open  ]],
Cell[70303, 2501, 283, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[70611, 2512, 150, 3, 29, "Input"],
Cell[70764, 2517, 85, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[70886, 2524, 68, 2, 29, "Input"],
Cell[70957, 2528, 73, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[71079, 2536, 57, 1, 35, "Section",
  CellTags->"c:9"],
Cell[71139, 2539, 641, 27, 42, "Text"],
Cell[71783, 2568, 178, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[71986, 2577, 138, 3, 29, "Input"],
Cell[72127, 2582, 75, 2, 27, "Output"]
}, Open  ]],
Cell[72217, 2587, 218, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[72460, 2596, 168, 4, 29, "Input"],
Cell[72631, 2602, 82, 2, 27, "Output"]
}, Open  ]],
Cell[72728, 2607, 166, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[72919, 2616, 167, 4, 29, "Input"],
Cell[73089, 2622, 106, 2, 27, "Output"]
}, Open  ]],
Cell[73210, 2627, 242, 5, 42, "Text"],

Cell[CellGroupData[{
Cell[73477, 2636, 210, 4, 29, "Input"],
Cell[73690, 2642, 98, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[73825, 2649, 211, 4, 29, "Input"],
Cell[74039, 2655, 75, 2, 27, "Output"]
}, Open  ]],
Cell[74129, 2660, 230, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[74384, 2671, 111, 2, 29, "Input"],
Cell[74498, 2675, 80, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[74615, 2682, 110, 2, 29, "Input"],
Cell[74728, 2686, 69, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[74846, 2694, 53, 1, 35, "Section",
  CellTags->"c:10"],
Cell[74902, 2697, 215, 6, 25, "Text"],

Cell[CellGroupData[{
Cell[75142, 2707, 107, 2, 29, "Input"],
Cell[75252, 2711, 85, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[75374, 2718, 103, 2, 29, "Input"],
Cell[75480, 2722, 77, 2, 27, "Output"]
}, Open  ]],
Cell[75572, 2727, 186, 6, 25, "Text"],

Cell[CellGroupData[{
Cell[75783, 2737, 141, 3, 29, "Input"],
Cell[75927, 2742, 83, 2, 27, "Output"]
}, Open  ]],
Cell[76025, 2747, 60, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[76110, 2751, 88, 2, 29, "Input"],
Cell[76201, 2755, 91, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[76341, 2763, 50, 1, 35, "Section",
  CellTags->"c:11"],
Cell[76394, 2766, 349, 9, 42, "Text"],

Cell[CellGroupData[{
Cell[76768, 2779, 146, 3, 29, "Input"],
Cell[76917, 2784, 61, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[77015, 2791, 158, 3, 29, "Input"],
Cell[77176, 2796, 61, 2, 27, "Output"]
}, Open  ]],
Cell[77252, 2801, 145, 6, 25, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[77434, 2812, 52, 1, 35, "Section",
  CellTags->"c:12"],
Cell[77489, 2815, 160, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[77674, 2823, 140, 3, 29, "Input"],
Cell[77817, 2828, 66, 2, 27, "Output"]
}, Open  ]],
Cell[77898, 2833, 252, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[78175, 2844, 155, 3, 29, "Input"],
Cell[78333, 2849, 75, 2, 27, "Output"]
}, Open  ]],
Cell[78423, 2854, 195, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[78643, 2862, 179, 4, 29, "Input"],
Cell[78825, 2868, 75, 2, 27, "Output"]
}, Open  ]],
Cell[78915, 2873, 103, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[79043, 2880, 158, 3, 29, "Input"],
Cell[79204, 2885, 75, 2, 27, "Output"]
}, Open  ]],
Cell[79294, 2890, 180, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[79499, 2899, 200, 4, 29, "Input"],
Cell[79702, 2905, 164, 3, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[79903, 2913, 70, 2, 29, "Input"],
Cell[79976, 2917, 182, 4, 43, "Output"]
}, Open  ]],
Cell[80173, 2924, 232, 5, 42, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[80442, 2934, 56, 1, 35, "Section",
  CellTags->"c:13"],
Cell[80501, 2937, 242, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[80768, 2948, 142, 3, 29, "Input"],
Cell[80913, 2953, 83, 2, 27, "Output"]
}, Open  ]],
Cell[81011, 2958, 105, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[81141, 2965, 159, 3, 29, "Input"],
Cell[81303, 2970, 103, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[81455, 2978, 50, 1, 35, "Section",
  CellTags->"c:14"],
Cell[81508, 2981, 233, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[81766, 2990, 91, 2, 29, "Input"],
Cell[81860, 2994, 86, 2, 27, "Output"]
}, Open  ]],
Cell[81961, 2999, 132, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[82118, 3006, 160, 3, 29, "Input"],
Cell[82281, 3011, 115, 2, 27, "Output"]
}, Open  ]],
Cell[82411, 3016, 107, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[82543, 3023, 93, 2, 29, "Input"],
Cell[82639, 3027, 74, 2, 27, "Output"]
}, Open  ]],
Cell[82728, 3032, 120, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[82873, 3040, 98, 2, 29, "Input"],
Cell[82974, 3044, 102, 2, 27, "Output"]
}, Open  ]],
Cell[83091, 3049, 117, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[83233, 3056, 106, 2, 29, "Input"],
Cell[83342, 3060, 80, 2, 27, "Output"]
}, Open  ]],
Cell[83437, 3065, 59, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[83521, 3069, 115, 2, 29, "Input"],
Cell[83639, 3073, 95, 2, 27, "Output"]
}, Open  ]],
Cell[83749, 3078, 182, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[83956, 3086, 120, 3, 29, "Input"],
Cell[84079, 3091, 99, 2, 65, "Output"]
}, Open  ]],
Cell[84193, 3096, 136, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[84354, 3103, 116, 2, 29, "Input"],
Cell[84473, 3107, 101, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[84611, 3114, 116, 2, 29, "Input"],
Cell[84730, 3118, 101, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[84868, 3125, 179, 3, 29, "Input"],
Cell[85050, 3130, 99, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[85186, 3137, 211, 4, 29, "Input"],
Cell[85400, 3143, 160, 3, 27, "Output"]
}, Open  ]],
Cell[85575, 3149, 145, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[85745, 3156, 248, 5, 49, "Input"],
Cell[85996, 3163, 194, 4, 43, "Output"]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{
Cell[86251, 3174, 99, 5, 52, "Chapter",
  CellTags->"c:15"],
Cell[86353, 3181, 461, 11, 54, "Text"],
Cell[86817, 3194, 173, 5, 24, "Text"],
Cell[86993, 3201, 3353, 98, 311, "3ColumnTable"],
Cell[90349, 3301, 73, 0, 22, "Text"],

Cell[CellGroupData[{
Cell[90447, 3305, 45, 1, 61, "Section",
  CellTags->"c:16"],
Cell[90495, 3308, 319, 11, 42, "Text"],
Cell[90817, 3321, 182, 6, 25, "Text"],
Cell[91002, 3329, 95, 5, 82, "Program"],
Cell[91100, 3336, 126, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[91251, 3345, 208, 4, 49, "Input"],
Cell[91462, 3351, 71, 2, 23, "Print"]
}, Open  ]],
Cell[91548, 3356, 309, 11, 25, "Text"],
Cell[91860, 3369, 66, 0, 25, "Text"],
Cell[91929, 3371, 131, 5, 82, "Program"],
Cell[92063, 3378, 109, 5, 25, "Text"],

Cell[CellGroupData[{
Cell[92197, 3387, 348, 8, 69, "Input"],
Cell[92548, 3397, 252, 5, 23, "Print"]
}, Open  ]],
Cell[92815, 3405, 85, 2, 25, "Text"],
Cell[92903, 3409, 107, 3, 48, "Program"],
Cell[93013, 3414, 153, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[93191, 3425, 275, 6, 49, "Input"],
Cell[93469, 3433, 76, 2, 27, "Output"]
}, Open  ]],
Cell[93560, 3438, 139, 4, 25, "Text"],
Cell[93702, 3444, 80, 3, 48, "Program"],
Cell[93785, 3449, 169, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[93979, 3460, 180, 3, 49, "Input"],
Cell[94162, 3465, 86, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[94297, 3473, 49, 1, 35, "Section",
  CellTags->"c:17"],
Cell[94349, 3476, 229, 9, 25, "Text"],
Cell[94581, 3487, 65, 3, 48, "Program"],
Cell[94649, 3492, 72, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[94746, 3496, 189, 4, 49, "Input"],
Cell[94938, 3502, 68, 2, 27, "Output"]
}, Open  ]],
Cell[95021, 3507, 123, 4, 25, "Text"],
Cell[95147, 3513, 37, 0, 31, "Program"],

Cell[CellGroupData[{
Cell[95209, 3517, 121, 3, 29, "Input"],
Cell[95333, 3522, 68, 2, 27, "Output"]
}, Open  ]],
Cell[95416, 3527, 231, 8, 25, "Text"],
Cell[95650, 3537, 73, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[95748, 3544, 190, 5, 49, "Input"],
Cell[95941, 3551, 74, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[96064, 3559, 49, 1, 35, "Section",
  CellTags->"c:18"],
Cell[96116, 3562, 199, 9, 25, "Text"],
Cell[96318, 3573, 67, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[96410, 3580, 148, 3, 49, "Input"],
Cell[96561, 3585, 79, 2, 27, "Output"]
}, Open  ]],
Cell[96655, 3590, 159, 5, 25, "Text"],
Cell[96817, 3597, 38, 0, 31, "Program"],

Cell[CellGroupData[{
Cell[96880, 3601, 88, 2, 29, "Input"],
Cell[96971, 3605, 76, 2, 27, "Output"]
}, Open  ]],
Cell[97062, 3610, 464, 13, 42, "Text"],
Cell[97529, 3625, 109, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[97663, 3632, 248, 5, 49, "Input"],
Cell[97914, 3639, 148, 4, 43, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[98099, 3648, 269, 6, 49, "Input"],
Cell[98371, 3656, 168, 4, 43, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[98588, 3666, 50, 1, 35, "Section",
  CellTags->"c:19"],
Cell[98641, 3669, 248, 9, 25, "Text"],
Cell[98892, 3680, 324, 14, 25, "Text"],
Cell[99219, 3696, 67, 3, 48, "Program"],
Cell[99289, 3701, 172, 7, 25, "Text"],

Cell[CellGroupData[{
Cell[99486, 3712, 207, 5, 49, "Input"],
Cell[99696, 3719, 68, 2, 27, "Output"]
}, Open  ]],
Cell[99779, 3724, 281, 11, 25, "Text"],
Cell[100063, 3737, 74, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[100162, 3744, 264, 5, 49, "Input"],
Cell[100429, 3751, 71, 2, 27, "Output"]
}, Open  ]],
Cell[100515, 3756, 181, 7, 25, "Text"],
Cell[100699, 3765, 72, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[100796, 3772, 202, 4, 49, "Input"],
Cell[101001, 3778, 75, 2, 27, "Output"]
}, Open  ]],
Cell[101091, 3783, 125, 4, 25, "Text"],
Cell[101219, 3789, 75, 3, 48, "Program"],

Cell[CellGroupData[{
Cell[101319, 3796, 264, 5, 49, "Input"],
Cell[101586, 3803, 69, 2, 27, "Output"]
}, Open  ]],
Cell[101670, 3808, 127, 4, 25, "Text"],
Cell[101800, 3814, 38, 0, 31, "Program"],

Cell[CellGroupData[{
Cell[101863, 3818, 132, 3, 29, "Input"],
Cell[101998, 3823, 71, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[102106, 3830, 134, 3, 29, "Input"],
Cell[102243, 3835, 71, 2, 27, "Output"]
}, Open  ]],
Cell[102329, 3840, 157, 5, 25, "Text"],
Cell[102489, 3847, 88, 3, 48, "Program"],
Cell[102580, 3852, 194, 9, 25, "Text"],

Cell[CellGroupData[{
Cell[102799, 3865, 304, 6, 69, "Input"],
Cell[103106, 3873, 69, 2, 27, "Output"]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{
Cell[103236, 3882, 98, 5, 52, "Chapter",
  CellTags->"c:20"],
Cell[103337, 3889, 113, 3, 22, "Text"],

Cell[CellGroupData[{
Cell[103475, 3896, 56, 1, 61, "Section",
  CellTags->"c:21"],
Cell[103534, 3899, 220, 6, 25, "Text"],
Cell[103757, 3907, 250, 5, 49, "Input"],
Cell[104010, 3914, 274, 10, 25, "Text"],
Cell[104287, 3926, 131, 3, 29, "Input"],
Cell[104421, 3931, 36, 0, 25, "Text"],
Cell[104460, 3933, 86, 2, 29, "Input"],

Cell[CellGroupData[{
Cell[104571, 3939, 71, 2, 29, "Input"],
Cell[104645, 3943, 65, 2, 27, "Output"]
}, Open  ]],
Cell[104725, 3948, 128, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[104878, 3956, 102, 2, 29, "Input"],
Cell[104983, 3960, 1277, 18, 179, "Output"]
}, Open  ]],
Cell[106275, 3981, 63, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[106363, 3985, 117, 2, 29, "Input"],
Cell[106483, 3989, 1247, 18, 160, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[107779, 4013, 57, 1, 35, "Section",
  CellTags->"c:22"],
Cell[107839, 4016, 59, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[107923, 4020, 208, 5, 29, "Input"],
Cell[108134, 4027, 1088, 15, 198, "Output"]
}, Open  ]],
Cell[109237, 4045, 86, 2, 25, "Text"],

Cell[CellGroupData[{
Cell[109348, 4051, 293, 6, 49, "Input"],
Cell[109644, 4059, 1479, 20, 217, "Output"]
}, Open  ]],
Cell[111138, 4082, 67, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[111230, 4086, 230, 5, 49, "Input"],
Cell[111463, 4093, 1479, 20, 217, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[112991, 4119, 51, 1, 35, "Section",
  CellTags->"c:23"],
Cell[113045, 4122, 108, 3, 25, "Text"],
Cell[113156, 4127, 186, 7, 25, "Text"],
Cell[113345, 4136, 3022, 49, 829, "Input"],

Cell[CellGroupData[{
Cell[116392, 4189, 77, 2, 29, "Input"],
Cell[116472, 4193, 64, 2, 27, "Output"]
}, Open  ]],
Cell[116551, 4198, 70, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[116646, 4202, 238, 5, 29, "Input"],
Cell[116887, 4209, 351, 6, 65, "Output"]
}, Open  ]],
Cell[117253, 4218, 102, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[117380, 4226, 148, 4, 29, "Input"],
Cell[117531, 4232, 682, 11, 274, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[118262, 4249, 49, 1, 35, "Section",
  CellTags->"c:24"],
Cell[118314, 4252, 92, 2, 25, "Text"],

Cell[CellGroupData[{
Cell[118431, 4258, 134, 3, 29, "Input"],
Cell[118568, 4263, 124, 3, 27, "Output"]
}, Open  ]],
Cell[118707, 4269, 88, 2, 25, "Text"],

Cell[CellGroupData[{
Cell[118820, 4275, 191, 4, 29, "Input"],
Cell[119014, 4281, 78, 2, 27, "Output"]
}, Open  ]],
Cell[119107, 4286, 132, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[119264, 4293, 121, 3, 29, "Input"],
Cell[119388, 4298, 78, 2, 27, "Output"]
}, Open  ]],
Cell[119481, 4303, 145, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[119651, 4311, 101, 2, 29, "Input"],
Cell[119755, 4315, 78, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[119882, 4323, 37, 0, 35, "Section"],
Cell[119922, 4325, 141, 3, 25, "Text"],
Cell[120066, 4330, 228, 5, 49, "Input"],
Cell[120297, 4337, 48, 0, 25, "Text"],

Cell[CellGroupData[{
Cell[120370, 4341, 192, 3, 29, "Input"],
Cell[120565, 4346, 70, 2, 27, "Output"]
}, Open  ]],
Cell[120650, 4351, 148, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[120823, 4359, 108, 2, 29, "Input"],
Cell[120934, 4363, 331, 10, 53, "Output"]
}, Open  ]],
Cell[121280, 4376, 116, 4, 25, "Text"],

Cell[CellGroupData[{
Cell[121421, 4384, 133, 3, 29, "Input"],
Cell[121557, 4389, 339, 10, 53, "Output"]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{
Cell[121957, 4406, 144, 5, 52, "Chapter",
  CellTags->{"c:25", "Tips and Tricks"}],
Cell[122104, 4413, 109, 3, 22, "Text"],

Cell[CellGroupData[{
Cell[122238, 4420, 80, 1, 61, "Section",
  CellTags->"c:26"],
Cell[122321, 4423, 501, 11, 59, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[122859, 4439, 66, 1, 35, "Section",
  CellTags->"c:27"],
Cell[122928, 4442, 432, 14, 42, "Text"],
Cell[123363, 4458, 182, 5, 29, "Input"],

Cell[CellGroupData[{
Cell[123570, 4467, 127, 3, 29, "Input"],
Cell[123700, 4472, 77, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[123814, 4479, 111, 2, 29, "Input"],
Cell[123928, 4483, 95, 2, 27, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[124072, 4491, 63, 1, 35, "Section",
  CellTags->"c:28"],
Cell[124138, 4494, 543, 13, 42, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[124718, 4512, 69, 1, 35, "Section",
  CellTags->"c:29"],
Cell[124790, 4515, 589, 14, 59, "Text"],
Cell[125382, 4531, 230, 5, 49, "Input"],

Cell[CellGroupData[{
Cell[125637, 4540, 72, 2, 29, "Input"],
Cell[125712, 4544, 102, 2, 27, "Output"]
}, Open  ]],
Cell[125829, 4549, 146, 6, 25, "Text"],

Cell[CellGroupData[{
Cell[126000, 4559, 160, 4, 29, "Input"],
Cell[126163, 4565, 883, 12, 122, "Output"]
}, Open  ]],
Cell[127061, 4580, 225, 6, 25, "Text"],

Cell[CellGroupData[{
Cell[127311, 4590, 159, 4, 29, "Input"],
Cell[127473, 4596, 865, 12, 122, "Output"]
}, Open  ]],
Cell[128353, 4611, 366, 10, 42, "Text"],

Cell[CellGroupData[{
Cell[128744, 4625, 220, 5, 29, "Input"],
Cell[128967, 4632, 865, 12, 122, "Output"]
}, Open  ]]
}, Closed]],

Cell[CellGroupData[{
Cell[129881, 4650, 93, 1, 57, "Section",
  CellTags->"c:30"],
Cell[129977, 4653, 578, 13, 59, "Text"],
Cell[130558, 4668, 218, 4, 42, "Text"],

Cell[CellGroupData[{
Cell[130801, 4676, 199, 4, 49, "Input"],
Cell[131003, 4682, 329, 5, 46, "Output"]
}, Open  ]],
Cell[131347, 4690, 110, 3, 25, "Text"],

Cell[CellGroupData[{
Cell[131482, 4697, 250, 5, 49, "Input"],
Cell[131735, 4704, 329, 5, 46, "Output"]
}, Open  ]],
Cell[132079, 4712, 175, 6, 25, "Text"],

Cell[CellGroupData[{
Cell[132279, 4722, 302, 6, 69, "Input"],
Cell[132584, 4730, 330, 5, 46, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[132951, 4740, 301, 6, 49, "Input"],
Cell[133255, 4748, 381, 6, 46, "Output"]
}, Open  ]],
Cell[133651, 4757, 70, 0, 25, "Text"],
Cell[133724, 4759, 103, 2, 29, "Input"],

Cell[CellGroupData[{
Cell[133852, 4765, 76, 2, 29, "Input"],
Cell[133931, 4769, 77, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[134045, 4776, 115, 2, 29, "Input"],
Cell[134163, 4780, 96, 2, 27, "Output"]
}, Open  ]],
Cell[134274, 4785, 97, 2, 29, "Input"],

Cell[CellGroupData[{
Cell[134396, 4791, 104, 2, 29, "Input"],
Cell[134503, 4795, 88, 2, 27, "Output"]
}, Open  ]],

Cell[CellGroupData[{
Cell[134628, 4802, 156, 4, 29, "Input"],
Cell[134787, 4808, 79, 2, 27, "Output"]
}, Open  ]]
}, Closed]]
}, Closed]],

Cell[CellGroupData[{
Cell[134927, 4817, 107, 5, 52, "Chapter",
  CellTags->"c:31"],
Cell[135037, 4824, 297, 10, 40, "Text"],
Cell[135337, 4836, 197, 6, 24, "Text"],
Cell[135537, 4844, 224, 5, 37, "Text"],

Cell[CellGroupData[{
Cell[135786, 4853, 147, 3, 26, "Input"],
Cell[135936, 4858, 98, 1, 39, "Output"]
}, Open  ]],
Cell[136049, 4862, 177, 3, 36, "Text"],
Cell[136229, 4867, 249, 4, 36, "Text"],
Cell[136481, 4873, 282, 8, 38, "Text"],
Cell[136766, 4883, 356, 8, 52, "Text"],
Cell[137125, 4893, 493, 12, 66, "Text"],
Cell[137621, 4907, 338, 10, 38, "Text"]
}, Closed]],

Cell[CellGroupData[{
Cell[137996, 4922, 95, 5, 52, "Chapter",
  CellTags->"c:32"],
Cell[138094, 4929, 312, 9, 24, "Text",
  CellTags->"Ref-1"],
Cell[138409, 4940, 205, 6, 24, "Text",
  CellTags->"Ref-2"]
}, Closed]]
}
]
*)



(*******************************************************************
End of Mathematica Notebook file.
*******************************************************************)

