(*:Mathematica Version: 2.0 *)

(*:Package Version: 1.2 *)

(*:Name: Statistics`DescriptiveStatistics` *)

(*:Context: Statistics`DescriptiveStatistics` *)

(*:Title: Basic Descriptive Statistics *)

(*:Author: Stephen Wolfram, David Withoff *)

(*:History:
  Original version by Stephen Wolfram (Wolfram Research), April 1989.
  Revised by David Withoff (Wolfram Research), February 1990.
  Revised Skewness, Kurtosis, and KurtosisExcess measures to make use of the 
	MLE of variance rather than the unbiased estimate,
	added data transformations, ECM (Wolfram Research), September 1993.
  Added CoefficientOfVariation and ExpectedValue, ECM, September 1995.
  V1.2 -- Revisions for improved performance by Serguei Chebalov
     and John Novak, November 1999.
*)

(*:Copyright: Copyright 1990-2005, Wolfram Research Inc. *)

(*:Reference: Usage messages only. *)

(*:Summary:
This package computes descriptive statistics (location, dispersion,
and shape statistics) for samples represented by lists.
*)

(*:Keywords: Maximum likelihood estimate, unbiased estimate *)

(*:Requirements: No special system requirements. *)

(*:Warning: None. *)

(*:Sources: Basic statistics texts. *)

BeginPackage["Statistics`DescriptiveStatistics`",
             "Statistics`DataManipulation`"]

Unprotect[{LocationReport, GeometricMean, HarmonicMean,
    RootMeanSquare, TrimmedMean, InterpolatedQuantile,
    Mode, Quartiles, DispersionReport, SampleRange, VarianceMLE,
	VarianceOfSampleMean, StandardDeviationMLE,
	StandardErrorOfSampleMean, MeanDeviation, MedianDeviation,
	InterquartileRange, QuartileDeviation, QuantileQ,
	ShapeReport, CentralMoment, Skewness, PearsonSkewness1, PearsonSkewness2,
	QuartileSkewness, Kurtosis, KurtosisExcess, Standardize, ZeroMean}];


ExpectedValue::usage =
"ExpectedValue[f, list] gives the expected value of the pure function f with
respect to the sample distribution of list.  ExpectedValue[f, list, x] gives
the expected value of the function f of x with respect to the sample
distribution of list."

(* Location statistics *)

LocationReport::usage =
"LocationReport[list] gives the Mean, HarmonicMean, and Median location
statistics for list."

GeometricMean::usage = 
"GeometricMean[list] gives the geometric mean of the entries in list."

HarmonicMean::usage = 
"HarmonicMean[list] gives the harmonic mean of the entries in list."

RootMeanSquare::usage = 
"RootMeanSquare[list] gives the root mean square of values in list."

TrimmedMean::usage =
"TrimmedMean[list, f] gives the mean of the entries in list, with a
fraction f of entries at each end dropped.  TrimmedMean[list, {f1, f2}]
finds the mean with fractions f1 and f2 trimmed off."

InterpolatedQuantile::usage =
"InterpolatedQuantile[list, q] gives the q-th quantile of the
probability distribution inferred by linear interpolation of the
entries in list."

If[StringQ[Mode::usage] && StringPosition[Mode::usage, "Mode[list] gives the mode"] === {},
Mode::usage = Mode::usage <> " " <>
"Mode[list] gives the mode of the entries in list.  A list of modes
is returned if the data is bimodal, trimodal, or multimodal."
]

Quartiles::usage =
"Quartiles[list] gives a list of the interpolated .25, .50, and .75 quantiles of
the entries in list."

(* Dispersion statistics *)

DispersionReport::usage =
"DispersionReport[list] gives the Variance, StandardDeviation, SampleRange,
MeanDeviation, MedianDeviation, and QuartileDeviation dispersion statistics for
list."

SampleRange::usage =
"SampleRange[list] gives the range of values in list."

VarianceMLE::usage =
"VarianceMLE[list] gives the variance of the entries in list.
Division by n (rather than n-1) is used, giving a maximum
likelihood estimate of the population variance (use Variance[list]
for an unbiased estimate)."

VarianceOfSampleMean::usage =
"VarianceOfSampleMean[list] gives an unbiased estimate of the variance
of the sample mean, using the entries in list as a sample from the
population."

StandardDeviationMLE::usage =
"StandardDeviationMLE[list] gives the standard deviation of the entries
in list.  Division by n is used, giving a maximum likelihood estimate of
the population standard deviation."

StandardErrorOfSampleMean::usage =
"StandardErrorOfSampleMean[list] gives an unbiased estimate of the
standard error (standard deviation) of the sample mean, using the entries
in list as a sample from the population."

MeanDeviation::usage =
"MeanDeviation[list] gives the mean absolute deviation about the
mean of the entries in list."

MedianDeviation::usage =
"MedianDeviation[list] gives the median absolute deviation about
the median of the entries in list."

InterquartileRange::usage =
"InterquartileRange[list] gives the difference between the upper
and lower quartiles for the entries in list."

QuartileDeviation::usage =
"QuartileDeviation[list] gives the quartile deviation, or
semi-interquartile range of the entries in list."

CoefficientOfVariation::usage =
"CoefficientOfVariation[list] gives the coefficient of variation, defined
as the ratio of the standard deviation to the mean of the entries in list."

(* Shape statistics *)

ShapeReport::usage =
"ShapeReport[list] gives the Skewness, QuartileSkewness, and KurtosisExcess
shape statistics for list."

CentralMoment::usage =
"CentralMoment[list, r] gives the r-th central moment of
the entries in list with respect to their mean."

Skewness::usage =
"Skewness[list] gives the coefficient of skewness of the entries in list.
This is equivalent to the third central moment divided by the standard
deviation cubed.  Positive skewness indicates a distribution with a long
right tail (skewed to the left).  Negative skewness indicates a distribution
with a long left tail (skewed to the right)."

PearsonSkewness1::usage =
"PearsonSkewness1[list] gives Pearson's first coefficient of skewness of the
entries in list.  Positive skewness indicates that the mean is greater than
the mode, negative that the mode is greater than the mean."

PearsonSkewness2::usage = 
"PearsonSkewness2[list] gives Pearson's second coefficient of skewness of the
entries in list.  Positive skewness indicates that the mean is greater than
the median, negative that the mode is less than the median."

QuartileSkewness::usage =  
"QuartileSkewness[list] gives the quartile coefficient of skewness of the
entries in list.  Positive skewness indicates that the median is closer to the
lower quartile than the upper quartile, negative that the median is closer to
the upper quartile than the lower quartile."

Kurtosis::usage =
"Kurtosis[list] gives the kurtosis coefficient for the entries in list.  This
is equivalent to the fourth central moment divided by the variance squared.
Kurtosis > 3 indicates a distribution that is more peaked and has heavier tails
than a normal distribution with the same variance.  Kurtosis < 3 indicates a
distribution that is flatter or has heavier flanks than the normal."

KurtosisExcess::usage =
"KurtosisExcess[list] gives the kurtosis excess for the entries in list.
Positive kurtosis excess indicates a distribution that is more peaked and has
heavier tails than a normal distribution with the same variance.  Negative
kurtosis excess indicates a distribution that is flatter or has heavier flanks 
than the normal."

(* Data transformations *)

ZeroMean::usage =
"ZeroMean[list] subtracts the mean from the data in list and returns the data
with zero mean."

Standardize::usage =
"Standardize[list] standardizes the data in list and returns the data with
zero mean and unit variance.  Standardize[list, MLE -> True] uses the
maximum likelihood estimate of the variance.  When MLE -> False (default),
the unbiased estimate is used."

MLE::usage =
"MLE is an option of descriptive statistic functions specifying whether
the maximum likelihood estimate (MLE) or unbiased estimate of a statistic
should be used."

(* Utility *)

QuantileQ::usage =
"QuantileQ[q] returns True if q is a valid quantile specification
(a number between 0 and 1)."

Unprotect[LocationReport, GeometricMean, HarmonicMean, RootMeanSquare, 
	TrimmedMean, InterpolatedQuantile, Mode, Quartiles, 
	DispersionReport, SampleRange, VarianceMLE,
	VarianceOfSampleMean, StandardDeviationMLE,
	StandardErrorOfSampleMean, MeanDeviation, MedianDeviation,
	InterquartileRange, QuartileDeviation,
	ShapeReport, CentralMoment,
	Skewness, PearsonSkewness1, PearsonSkewness2,
	QuartileSkewness, Kurtosis, KurtosisExcess];


Begin["`Private`"]

(* sorting utility to sort lists of NumericQ objects,
   without messing with the efficiency of sorting packed arrays 
   used in  TrimmedMean, InterpolatedQuantile, etc. *)
sort[s_ /; VectorQ[s, NumberQ]] := Sort[s]

sort[s_] := Sort[s, OrderedQ[N[{#1, #2},
          Precision[s] /. Infinity -> MachinePrecision]] &]

(* Quantile::frac message generated by QuantileQ *)
Quantile::frac =
"Quantile specification `1` is expected to be between 0 and 1."

QuantileQ[q_] :=
 If[FreeQ[N[q], Complex] && !TrueQ[N[q] < 0] && !TrueQ[N[q] > 1], True,
    Message[Quantile::frac, q]; False]

ExpectedValue[f_, list_] := Total[Map[f, list]] / Length[list] /;
				VectorQ[list] && Length[list] > 0

ExpectedValue[f_, list_, x_] := Total[
	 Map[Function[{x}, Evaluate[f]], list]] / Length[list] /;
				VectorQ[list] && Length[list] > 0

GeometricMean[list_] :=
    ApplyToPacked[Times, list]^(1/Length[list]) /;
				VectorQ[list] && Length[list] > 0

HarmonicMean[list_] := Length[list] / Total[1/list] /;
				VectorQ[list] && Length[list] > 0

RootMeanSquare[list_?VectorQ] := Sqrt[Dot[list, list]/Length[list]]


TrimmedMean[list_, f_] := TrimmedMean[list, {f, f}]  /; 2 f < 1 

TrimmedMean[list_, {f1_, f2_}] :=
	Block[{s, n},
		s = sort[list] ;
		n = Length[list] ;
		s = Take[s, { 1 + Floor[f1 n], 
				n - Floor[f2 n] } ] ;
		Mean[s]
	]  /; (Developer`PackedArrayQ[list] || VectorQ[list, NumericQ]) &&
             Length[list] > 2 && (f1 + f2) < 1

InterpolatedQuantile[list_, q_] :=
	Block[{s, n},
		s = sort[list] ;
		n = Length[list] ;
		Which[
			q < 1/(2n),  First[s],
			q > 1-1/(2n),  Last[s],
			True,
			  Block[{f, i, u},
			    u = n q + 1/2 ;
			    i = Floor[u] ;
			    f = u - i ;
			    If[TrueQ[f == 0],
			       (1 - f) s[[i]],
			       (1 - f) s[[i]] + f s[[i+1]]
			    ]
			  ]
		]
	]  /; (Developer`PackedArrayQ[list] || VectorQ[list, NumericQ]) && Length[list] > 2 &&
	      NumberQ[N[q]] && QuantileQ[q]


Mode[list_]:=
    Module[{c, mc, ms},
        c = Frequencies[list];
        If[Length[c] === 1, Return[c[[1,2]]]]; (* one data value *)
        mc = Max[First[Transpose[c]]];
        If[mc === 1, Return[{}]]; (* no mode *)
        ms = Cases[c, {mc ,val_} -> val];
        If[Length[ms] == 1, ms[[1]], ms] (* scalar for one mode, list for multi-mode *)
    ] /; VectorQ[list] && (Length[list]>0)


LocationReport[list_] :=
	{
	Mean -> Mean[list] ,
	HarmonicMean -> HarmonicMean[list] ,
	Median -> Median[list]
	}   /;  VectorQ[list]


Quartiles[list_] := {InterpolatedQuantile[list, 1/4],
			InterpolatedQuantile[list, 1/2],
			InterpolatedQuantile[list, 3/4]}  /;
    (Developer`PackedArrayQ[list] || VectorQ[list, NumericQ])

VarianceMLE[list_?VectorQ] := 
  With[{d = list - Mean[list]},
	Dot[d, d]/Length[list]
	] /; Length[list] > 0

VarianceOfSampleMean[list_?VectorQ] :=
    1/Length[list] Variance[list] /; Length[list] > 1

StandardDeviationMLE[list_?VectorQ]:= Sqrt[VarianceMLE[list]] /;
							Length[list] > 0

StandardErrorOfSampleMean[list_?VectorQ] := Sqrt[VarianceOfSampleMean[list]]

SampleRange[list_] := Max[list] - Min[list]  /; 
				VectorQ[list] && Length[list] > 0

MeanDeviation[list_] := Mean[ Abs[list - Mean[list]] ]  /; 
				VectorQ[list] && Length[list] > 0

MedianDeviation[list_] := Median[ Abs[list - Median[list]] ] /;
				VectorQ[list] && Length[list] > 0

InterquartileRange[list_] := 
	InterpolatedQuantile[list, 3/4] - InterpolatedQuantile[list, 1/4]  /;
							VectorQ[list]

QuartileDeviation[list_] :=
	InterquartileRange[list] / 2

CoefficientOfVariation::negdat =
"Warning: CoefficientOfVariation is not well defined for negative data
points, and at least one negative value has been found in the input.";

CoefficientOfVariation[list_?VectorQ /; Length[list] > 1] := 
       (If[covtestQ[list],
             Message[CoefficientOfVariation::negdat]
        ];
	    StandardDeviation[list]/Mean[list])

covtestQ[list_?Developer`PackedArrayQ] :=
    TrueQ[N[Total[list - Abs[list]]] =!= 0.]

covtestQ[list_] :=
    Catch[Scan[If[# <= 0, Throw[True]] &, list]; False]

DispersionReport[list_] :=
	{
	Variance -> Variance[list] ,
	StandardDeviation -> StandardDeviation[list] ,
	SampleRange -> SampleRange[list] ,
	MeanDeviation -> MeanDeviation[list] ,
	MedianDeviation -> MedianDeviation[list] ,
	QuartileDeviation -> QuartileDeviation[list]
	}   /; VectorQ[list]

CentralMoment[list_?VectorQ, n_] :=
  Mean[ (list - Mean[list])^n ] /; !VectorQ[n]

Skewness[list_?VectorQ] := CentralMoment[list, 3] / StandardDeviationMLE[list]^3

PearsonSkewness1[list_?VectorQ] :=
	3 (Mean[list] - Mode[list]) / StandardDeviation[list]

PearsonSkewness2[list_?VectorQ] :=
	3 (Mean[list] - Median[list]) / StandardDeviation[list]

QuartileSkewness[list_] := 
	Block[{q1,q2,q3},
		{q1,q2, q3} = Quartiles[list] ;
		(q3 - 2 q2 + q1) / (q3 - q1)
	] /; VectorQ[list] && Length[list] > 0
		
Kurtosis[list_?VectorQ] :=	
	CentralMoment[list, 4] / VarianceMLE[list]^2

KurtosisExcess[list_?VectorQ] := Kurtosis[list] - 3


ShapeReport[list_] :=
	{
	Skewness -> Skewness[list] ,
	QuartileSkewness -> QuartileSkewness[list] ,
	KurtosisExcess -> KurtosisExcess[list]
	}

ZeroMean[a_?VectorQ] := a - Mean[a]

Options[Standardize] = {MLE -> False}

Standardize[a_?VectorQ, opt___] :=
  Module[{s},
    (a - Mean[a])/s /; (If[TrueQ[!(MLE /. {opt} /. Options[Standardize])],
			   FreeQ[s = StandardDeviation[a], StandardDeviation],
			   FreeQ[s = StandardDeviationMLE[a],StandardDeviationMLE]
                          ] &&
		        !(s === 0 || s === 0.))
  ]

ApplyToPacked[f_, l_] :=
  With[{oldcompileopts = 
        Developer`SystemOptions["CompileOptions"]},
    CheckAll[
      Developer`SetSystemOptions["CompileOptions" -> {"ApplyCompileLength" -> 1}];
      Apply[f, l],
      (Developer`SetSystemOptions[oldcompileopts];
          If[#2 === Hold[], #1, ReleaseHold[#2]]) &
      ]
    ]



End[ ]

SetAttributes[{LocationReport, GeometricMean, HarmonicMean,
    RootMeanSquare, TrimmedMean, InterpolatedQuantile,
    Mode, Quartiles, DispersionReport, SampleRange, VarianceMLE,
	VarianceOfSampleMean, StandardDeviationMLE,
	StandardErrorOfSampleMean, MeanDeviation, MedianDeviation,
	InterquartileRange, QuartileDeviation, QuantileQ,
	ShapeReport, CentralMoment, Skewness, PearsonSkewness1, PearsonSkewness2,
	QuartileSkewness, Kurtosis, KurtosisExcess, Standardize, ZeroMean},
  {Protected, ReadProtected}];

EndPackage[ ]


