(* Content-type: application/mathematica *)

(*** Wolfram Notebook File ***)
(* http://www.wolfram.com/nb *)

(* CreatedBy='Mathematica 6.0' *)

(*CacheID: 234*)
(* Internal cache information:
NotebookFileLineBreakTest
NotebookFileLineBreakTest
NotebookDataPosition[       145,          7]
NotebookDataLength[     11291,        387]
NotebookOptionsPosition[      7595,        282]
NotebookOutlinePosition[      9384,        322]
CellTagsIndexPosition[      9341,        319]
WindowFrame->Normal
ContainsDynamic->False*)

(* Beginning of Notebook Content *)
Notebook[{
Cell[" ", "GuideColorBar"],

Cell[CellGroupData[{

Cell["Processing Textual Data", "GuideTitle",
 CellID->11836],

Cell[TextData[{
 StyleBox["Mathematica",
  FontSlant->"Italic"],
 " has uniquely flexible capabilities for processing large volumes of textual \
data",
 ". ",
 "Most often data represented as a string is converted to lists or other \
constructs which can then be manipulated using ",
 StyleBox["Mathematica",
  FontSlant->"Italic"],
 "'s powerful symbolic language constructs. "
}], "GuideAbstract",
 CellID->10591],

Cell[TextData[{
 Cell[TextData[ButtonBox["Import",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/Import"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " import data from files or the web"
}], "GuideText",
 CellID->14651],

Cell[TextData[{
 Cell[TextData[ButtonBox["\"Text\"",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/format/Text"]], "InlineFunctionSans"],
 ", ",
 Cell[TextData[ButtonBox["\"PDF\"",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/format/PDF"]], "InlineFunctionSans"],
 ", ",
 Cell[TextData[ButtonBox["\"TeX\"",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/format/TeX"]], "InlineFunctionSans"],
 ", ",
 Cell[TextData[ButtonBox["\"HTML\"",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/format/HTML"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " pick out plain text, table data, etc."
}], "GuideText",
 CellID->420180038],

Cell[TextData[{
 Cell[TextData[ButtonBox["FindList",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/FindList"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " search files for records containing particular strings"
}], "GuideText",
 CellID->27844],

Cell["\t", "GuideDelimiter",
 CellID->24743],

Cell[TextData[{
 Cell[TextData[ButtonBox["StringSplit",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/StringSplit"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " split a string into words, sentences, etc."
}], "GuideText",
 CellID->7741],

Cell[TextData[{
 Cell[TextData[ButtonBox["StringCount",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/StringCount"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " count occurrences of words, etc."
}], "GuideText",
 CellID->11245],

Cell[TextData[{
 Cell[TextData[ButtonBox["StringCases",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/StringCases"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " ",
 "find instances of a string pattern"
}], "GuideText",
 CellID->367032027],

Cell["\t", "GuideDelimiter",
 CellID->14578],

Cell[TextData[{
 Cell[TextData[ButtonBox["StringExpression",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/StringExpression"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " match symbolic string patterns"
}], "GuideText",
 CellID->4949],

Cell[TextData[{
 Cell[TextData[ButtonBox["Sort",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/Sort"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " sort into alphabetical order"
}], "GuideText",
 CellID->21705],

Cell["\t", "GuideDelimiter",
 CellID->24514],

Cell[TextData[{
 Cell[TextData[ButtonBox["Nearest",
  BaseStyle->"NewInVersionLink",
  ButtonData->"paclet:ref/Nearest"]], "InlineFunctionSans",
  Background->Dynamic[
    If[CurrentValue[{TaggingRules, "ModificationHighlight"}] === True, 
     RGBColor[0.92, 1, 0.59], None]]],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " find the closest-matching string from a list"
}], "GuideText",
 CellID->19785],

Cell[TextData[{
 Cell[TextData[ButtonBox["FindClusters",
  BaseStyle->"NewInVersionLink",
  ButtonData->"paclet:ref/FindClusters"]], "InlineFunctionSans",
  Background->Dynamic[
    If[CurrentValue[{TaggingRules, "ModificationHighlight"}] === True, 
     RGBColor[0.92, 1, 0.59], None]]],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " ",
 "find clusters in string data"
}], "GuideText",
 CellID->20076532],

Cell[TextData[{
 Cell[TextData[ButtonBox["EditDistance",
  BaseStyle->"NewInVersionLink",
  ButtonData->"paclet:ref/EditDistance"]], "InlineFunctionSans",
  Background->Dynamic[
    If[CurrentValue[{TaggingRules, "ModificationHighlight"}] === True, 
     RGBColor[0.92, 1, 0.59], None]]],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " ",
 "edit or Levenshtein distance"
}], "GuideText",
 CellID->166029593],

Cell["\t", "GuideDelimiter",
 CellID->9227],

Cell[TextData[{
 Cell[TextData[ButtonBox["Hash",
  BaseStyle->"Link",
  ButtonData->"paclet:ref/Hash"]], "InlineFunctionSans"],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " ",
 "find a hash code using a variety of schemes"
}], "GuideText",
 CellID->112653030],

Cell["\t", "GuideDelimiter",
 CellID->687147652],

Cell[TextData[{
 Cell[TextData[ButtonBox["DictionaryLookup",
  BaseStyle->"NewInVersionLink",
  ButtonData->"paclet:ref/DictionaryLookup"]], "InlineFunctionSans",
  Background->Dynamic[
    If[CurrentValue[{TaggingRules, "ModificationHighlight"}] === True, 
     RGBColor[0.92, 1, 0.59], None]]],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " ",
 "look up words in an English dictionary"
}], "GuideText",
 CellID->8624],

Cell[TextData[{
 Cell[TextData[ButtonBox["WordData",
  BaseStyle->"NewInVersionLink",
  ButtonData->"paclet:ref/WordData"]], "InlineFunctionSans",
  Background->Dynamic[
    If[CurrentValue[{TaggingRules, "ModificationHighlight"}] === True, 
     RGBColor[0.92, 1, 0.59], None]]],
 " ",
 StyleBox["\[LongDash]", "GuideEmDash"],
 " find semantic, grammatical, morphological etc. properties of words"
}], "GuideText",
 CellID->332081419]
}, Open  ]],

Cell[CellGroupData[{

Cell["TUTORIALS", "GuideTutorialsSection",
 CellID->17204],

Cell[TextData[ButtonBox["Searching Files",
 BaseStyle->"Link",
 ButtonData->"paclet:tutorial/SearchingFiles"]], "GuideTutorial",
 CellID->314491916],

Cell[TextData[ButtonBox["Operations on Strings",
 BaseStyle->"Link",
 ButtonData->"paclet:tutorial/OperationsOnStrings"]], "GuideTutorial",
 CellID->302344554],

Cell[TextData[ButtonBox["String Patterns",
 BaseStyle->"Link",
 ButtonData->"paclet:tutorial/StringPatterns"]], "GuideTutorial",
 CellID->234938534],

Cell[TextData[ButtonBox["Working with String Patterns",
 BaseStyle->"Link",
 ButtonData->
  "paclet:tutorial/WorkingWithStringPatternsOverview"]], "GuideTutorial",
 CellID->65394686]
}, Open  ]],

Cell[CellGroupData[{

Cell["MORE ABOUT", "GuideMoreAboutSection",
 CellID->6227],

Cell[TextData[ButtonBox["String Manipulation",
 BaseStyle->"Link",
 ButtonData->"paclet:guide/StringManipulation"]], "GuideMoreAbout",
 CellID->555],

Cell[TextData[ButtonBox["Converting between Strings & Expressions",
 BaseStyle->"Link",
 ButtonData->
  "paclet:guide/ConvertingBetweenExpressionsAndStrings"]], "GuideMoreAbout",
 CellID->10102],

Cell[TextData[ButtonBox["Notebook Programming",
 BaseStyle->"Link",
 ButtonData->"paclet:guide/LowLevelNotebookProgramming"]], "GuideMoreAboutSub",\

 CellID->19468]
}, Open  ]],

Cell[" ", "FooterCell"]
},
Saveable->False,
ScreenStyleEnvironment->"Working",
WindowSize->{725, 750},
WindowMargins->{{0, Automatic}, {Automatic, 0}},
WindowTitle->"Processing Textual Data - Wolfram Mathematica",
TaggingRules->{
 "ModificationHighlight" -> False, 
  "Metadata" -> {
   "built" -> "{2007, 4, 20, 18, 33, 49.2059356}", "context" -> "", 
    "keywords" -> {
     "alphabetic", "alphabetizing of strings", "breaking strings", 
      "collating of strings", "collation", "counting string matches", 
      "dictionary lookup", "dividing strings", "edit distance", "find list", 
      "grep", "Levenshtein distance", "lines of text", "patterns for strings",
       "records", "sorting of strings", "splitting strings", "string cases", 
      "string count", "string distance", "string expression", "string metric",
       "string order", "string pattern", "string split", "strings", "wc", 
      "whitespace", "words"}, "index" -> True, "label" -> "Mathematica Guide",
     "language" -> "en", "paclet" -> "Mathematica", "status" -> "None", 
    "summary" -> 
    "Mathematica has uniquely flexible capabilities for processing large \
volumes of textual data. Most often data represented as a string is converted \
to lists or other constructs which can then be manipulated using \
Mathematica's powerful symbolic language constructs. ", "synonyms" -> {}, 
    "title" -> "Processing Textual Data", "type" -> "Guide", "uri" -> 
    "guide/ProcessingTextualData"}},
FrontEndVersion->"6.0 for Microsoft Windows (32-bit) (April 17, 2007)",
StyleDefinitions->FrontEnd`FileName[{"Wolfram"}, "Reference.nb", 
  CharacterEncoding -> "WindowsANSI"]
]
(* End of Notebook Content *)

(* Internal cache information *)
(*CellTagsOutline
CellTagsIndex->{}
*)
(*CellTagsIndex
CellTagsIndex->{}
*)
(*NotebookFileOutline
Notebook[{
Cell[568, 21, 26, 0, 8, "GuideColorBar"],
Cell[CellGroupData[{
Cell[619, 25, 61, 1, 70, "GuideTitle",
 CellID->11836],
Cell[683, 28, 415, 12, 70, "GuideAbstract",
 CellID->10591],
Cell[1101, 42, 249, 8, 70, "GuideText",
 CellID->14651],
Cell[1353, 52, 650, 20, 70, "GuideText",
 CellID->420180038],
Cell[2006, 74, 274, 8, 70, "GuideText",
 CellID->27844],
Cell[2283, 84, 44, 1, 70, "GuideDelimiter",
 CellID->24743],
Cell[2330, 87, 267, 8, 70, "GuideText",
 CellID->7741],
Cell[2600, 97, 258, 8, 70, "GuideText",
 CellID->11245],
Cell[2861, 107, 269, 9, 70, "GuideText",
 CellID->367032027],
Cell[3133, 118, 44, 1, 70, "GuideDelimiter",
 CellID->14578],
Cell[3180, 121, 265, 8, 70, "GuideText",
 CellID->4949],
Cell[3448, 131, 240, 8, 70, "GuideText",
 CellID->21705],
Cell[3691, 141, 44, 1, 70, "GuideDelimiter",
 CellID->24514],
Cell[3738, 144, 407, 11, 70, "GuideText",
 CellID->19785],
Cell[4148, 157, 409, 12, 70, "GuideText",
 CellID->20076532],
Cell[4560, 171, 410, 12, 70, "GuideText",
 CellID->166029593],
Cell[4973, 185, 43, 1, 70, "GuideDelimiter",
 CellID->9227],
Cell[5019, 188, 264, 9, 70, "GuideText",
 CellID->112653030],
Cell[5286, 199, 48, 1, 70, "GuideDelimiter",
 CellID->687147652],
Cell[5337, 202, 423, 12, 70, "GuideText",
 CellID->8624],
Cell[5763, 216, 435, 11, 70, "GuideText",
 CellID->332081419]
}, Open  ]],
Cell[CellGroupData[{
Cell[6235, 232, 58, 1, 70, "GuideTutorialsSection",
 CellID->17204],
Cell[6296, 235, 148, 3, 70, "GuideTutorial",
 CellID->314491916],
Cell[6447, 240, 159, 3, 70, "GuideTutorial",
 CellID->302344554],
Cell[6609, 245, 148, 3, 70, "GuideTutorial",
 CellID->234938534],
Cell[6760, 250, 182, 4, 70, "GuideTutorial",
 CellID->65394686]
}, Open  ]],
Cell[CellGroupData[{
Cell[6979, 259, 58, 1, 70, "GuideMoreAboutSection",
 CellID->6227],
Cell[7040, 262, 148, 3, 70, "GuideMoreAbout",
 CellID->555],
Cell[7191, 267, 194, 4, 70, "GuideMoreAbout",
 CellID->10102],
Cell[7388, 273, 165, 4, 70, "GuideMoreAboutSub",
 CellID->19468]
}, Open  ]],
Cell[7568, 280, 23, 0, 70, "FooterCell"]
}
]
*)

(* End of internal cache information *)

