(* :Copyright: Copyright 2003, Wolfram Research, Inc. *)

(* :Mathematica Version: 5.0 *)

(* :Title: Tools for working with URLs *)

(* :Author: Yifan Hu and Tom Wickham-Jones *)

(* :Keywords: FetchURL, UncompressGZIPFile *)


(* :Summary:
This package uses J/Link calls to copy files from a URL to a local file. It also 
provides a tool for unpacking GZIP files.
*)


BeginPackage["Utilities`URLTools`", "JLink`"]

Needs["PacletManager`"]


FetchURL::usage = "FetchURL[ url, new] copies the file referenced by url to the file new on the local machine. FetchURL[ url ] copies the file creating a new filename. FetchURL returns the name of the temporary file used on the local machine."

FetchURLWithHeaders::usage = "FetchURLWithHeaders[ url, new] copies the file referenced by url to the file new on the local machine.  FetchURLWithHeaders[ url ] copies the file creating a new filename.  FetchURLWithHeaders returns a list of two elements: the name of the temporary file used on the local machine and the HTTP header fields."

URLInformation::usage = URLInformation::obs = "URLInformation has been removed. Use FetchURLWithHeaders to get the response headers associated with a given URL."

UncompressGZIPFile::usage = "UncompressGZIPFile[ old, new] unformats the gzip file old into new."

SetFTPProxy::usage = "SetFTPProxy is deprecated. Use SetInternetProxy[\"FTP\", {\"host\", port}] instead."

RemoveFTPProxy::usage = "RemoveFTPProxy[ ] is deprecated. Use SetInternetProxy[\"FTP\", {}] to disable use of an FTP proxy."

FileFilters::usage = "FileFilters is an option of FetchURL that sets up pairs {patt, fun}. Each of these apply the function 'fun' to the file if the name matches patt. This can be used for decompressing files."



Begin["`Private`"];

$UserAgent = Automatic;

FetchURL::conopen = "The connection to URL `1` cannot be opened. If the URL is correct, you might need to configure your firewall program, or you might need to set a proxy in the Internet Connectivity tab of the Preferences dialog (or by calling SetInternetProxy)."

FetchURL::contime = 
    "The maximum connection time of `1` seconds has been exceeded.";

FetchURL::nofile = UncompressGZIPFile::nofile = "File `1` cannot be opened.";

FetchURL::nojava = "The connection to URL `1` could not be opened because the Java subsystem for using the Internet could not be initialized."

UncompressGZIPFile::outfile = "File name `1` is not a string.";


DEBUG = False; MONITOR = False;


(* exceptions *)
{NOJAVA, DISALLOWINTERNET, BADCONNECTION, CANTOPENOUTFILE, BADFILE, CONTIMEOUT, NOTSTRING}; 


(*
Create a temporary unique file name. If a suffix is given this attached to 
the file after three _ characters.
*)
      
createTempFile[ prefix_String] := createTempFile[ prefix, ""]

createTempFile[ prefix_String, suffix_String] :=
	JavaBlock[ 
		Module[ {f, ss},
			InstallJava[];
			If[ StringLength[ suffix] === 0, ss = "", ss = "___" <> suffix];
			LoadJavaClass[ "java.io.File"];
			f = File`createTempFile[ prefix, ss];
			f@getCanonicalPath[]]
	]


(*
 Returns the chars after the last '/' character.
*)
 
getFileName[ str_String] :=
	Module[ {pos},
      (* drop all except word characters (including _), and . *)
      pos = Complement[
          StringPosition[str, RegularExpression["\\W"]],
          StringPosition[str, "."]
      ];
		If[ pos === {}, 
			"unknown.dat",
			pos = pos[[-1,1]]; 
			StringDrop[ str, pos]]
	]

(*
 Process any exceptions that were thrown, issue messages as appropriate.
*)

processException[ _, URLToolsException[ except_, arg_]] :=
	(Switch[ except,
	        NOJAVA, Message[FetchURL::nojava, arg],
	        DISALLOWINTERNET, Message[FetchURL::offline],
			BADCONNECTION, Message[FetchURL::conopen, arg],
			CONTIMEOUT, Message[FetchURL::contime, CONTIME],
			BADFILE, Message[UncompressGZIPFile::nofile, arg],
			CANTOPENOUTFILE, Message[FetchURL::nofile, arg],
			NOTSTRING, Message[UncompressGZIPFile::outfile, arg],
			_,1]; $Failed)



(*
 Implementation of FetchURL.
*)

(* Hate to mix string and non-string options, but then I hate to introduce a symbol like
   Timeout for a pseudo-documented internal utility function. It is already used as a
   string option in WebServices, so I copy that here.
*)
Options[ FetchURL] = {FileFilters -> {{".gz", UncompressGZIPFile}}, "Timeout"->Automatic}


ValidFilter[ {{_String, _}..}] := True

ValidFilter[ ___] := False


(*
 If patt matches the end of name, then apply fun to the file. 
 Returning the new file name.  
*)

ApplyFilter[ {patt_String, fun_}, file_, nameIn_, autoName_] :=
	Module[ {fileOut = file, name = nameIn},
		If[ StringMatchQ[ file, "*" <> patt],
			name = StringDrop[ name, -StringLength[ patt]];
			fileOut = name;
			If[ autoName,
				fileOut = createTempFile[ "Temp", fileOut]];
			fileOut = fun[ file, fileOut]];	
			{fileOut, name}
	]


(* Useful to have the MIME type for the URL *)
FetchURLWithHeaders[ url_String, outFile_String:"", opts___?OptionQ] :=
    FetchURLInternal[url, outFile, True, opts]


FetchURL[ url_String, outFile_String:"", opts___?OptionQ] :=
    If[ListQ[#], First[#], #]& @ FetchURLInternal[url, outFile, False, opts]

 
FetchURLInternal[ url_String, outFileIn_String, wantHeaders:(True | False), opts___?OptionQ] :=
	Module[ {epil, timeout, file, headers, outFile = outFileIn, name = outFileIn, autoName = False},
		If[ !StringQ[ outFile] || StringLength[outFile] < 1,
			autoName = True;
			name = getFileName[ url];
		 	outFile = createTempFile[ "Temp", name]
		];      	
		{epil, timeout} = {FileFilters, "Timeout"} /. Flatten[{opts}] /. Options[ FetchURL];
		Catch[ 
			{file, headers} = iFetchURL[ url, outFile, timeout, wantHeaders];
			If[ ValidFilter[ epil],
				Map[ ({file, name} = ApplyFilter[ #, file, name, autoName])&, epil]];
			{file, headers}
			,
			_URLToolsException,
			processException
		]
	]

 
(*Returns True if the pathname begins with a relative path metacharacter. 
  This is copied from Converteres.m*)
(*for MacOS*)

beginsRelativeMetaCharQ[
  str_String] := (StringMatchQ[str, ":"] || StringMatchQ[str, "::"] || 
    StringMatchQ[str, ToFileName[{":"}, "*"]] || 
    StringMatchQ[str, ToFileName[{"::"}, "*"]]) /; 
  StringMatchQ[$OperatingSystem, "MacOS"]
(*for non - MacOS*)

beginsRelativeMetaCharQ[str_String] := 
 StringMatchQ[str, "."] || StringMatchQ[str, ".."] || 
  StringMatchQ[str, ToFileName[{"."}, "*"]] || 
  StringMatchQ[str, 
   ToFileName[{".."}, 
    "*"]] ||(*hack to make sure full pathname on Japanese Windows*)($SystemID \
=== "Windows" && $Language === "Japanese" && StringTake[str, {2}] != ":")
beginsRelativeMetaCharQ[___] := False


(* set a file to working directory unless absolute path is specified *)

setToWorkingDirectory[file_] := Module[
  {ffile = file},
  If[beginsRelativeMetaCharQ[ffile] || DirectoryName[ffile] === "", 
   ffile = ToFileName[{Directory[]}, ffile]];
  ffile]


(* URLInformation has been eliminated because it was apparently unused, and not convenient to implement. *)

URLInformation[url_String] := Null /; Message[URLInformation::obs]
		
      	

iFetchURL[url_String, outFile1_String, timeout_, wantHeaders_] :=
    Module[{outFile = outFile1, pm, fetcher, exc, t, headers = Null, toThrow = Null},
    
        If[!$AllowInternet, Throw[Null, URLToolsException[DISALLOWINTERNET, Null]]];
    
        (* make the file in the Mathemnatica working directory Directory[],
           unless the user specified another directory*)
        outFile = setToWorkingDirectory[outFile];
      
        pm = PacletManager`Package`getPacletManager[];
        fetcher = pm@createURLFetcher[url, outFile, wantHeaders];
        
        If[!JavaObjectQ[fetcher],
            (* OK To Throw here--won't leak any JavaObjects. *)
            Throw[Null, URLToolsException[NOJAVA, url]]
        ];
        
        If[IntegerQ[timeout],
            fetcher@setSocketTimeout[timeout]
        ];
        fetcher@start[];

        (* Call ServiceJava to support Java dialogs (e.g., network password) running
           in a standalone kernel.
        *)
        t = AbsoluteTiming[While[!fetcher@isFinished[], ServiceJava[]; Pause[0.1]]];

        exc = fetcher@getException[];
        If[exc === Null,
            If[wantHeaders, headers = Rule @@@ fetcher@getHeaders[]],
        (* else *)
            Which[
                InstanceOf[exc, "java.io.FileNotFoundException"],
                    toThrow = {Null, URLToolsException[CANTOPENOUTFILE, outFile]},
                InstanceOf[exc, "org.apache.commons.httpclient.ConnectTimeoutException"],
                    toThrow = {Null, URLToolsException[CONTIMEOUT, url]},
                True,
                    toThrow = {Null, URLToolsException[BADCONNECTION, url]}
            ]
        ];
        
        ReleaseJavaObject[{fetcher, exc}];
        
        If[toThrow =!= Null,
            Throw @@ toThrow
        ];
        
        If [MONITOR, Print["download time = ", t[[1]], " bytes = ", fetcher@getTotalBytesDownloaded[]]];
        
        {outFile, headers}
    ]



(*
 Implementation of UncompressGZIPFile.
*)

UncompressGZIPFile[ file_, outFile_:Null] :=
		Catch[ If[ outFile === Null, iUncompressGZIPFile[ file], iUncompressGZIPFile[ file, outFile]],
			_URLToolsException,
			processException]
  
  
iUncompressGZIPFile[ file_] := iUncompressGZIPFile[ file, createTempFile[ "Temp"]]

  
iUncompressGZIPFile[file2_, outFile_] := JavaBlock[
Internal`DeactivateMessages[
	Module[
      	{t, fis, gzis, fos, totalBytes = 0, buf, outFile2 = outFile, file = file2},
		InstallJava[];
		If[!StringQ[ outFile2],
            	Throw[Null, URLToolsException[ NOTSTRING, outFile2]]
			];
		If[!StringQ[file],
            	Throw[Null, URLToolsException[ NOTSTRING, file]]
			];

      (* make the file in the Mathemnatica working directory Directory[],
         unless the user specified another directory*)
      outFile2 = setToWorkingDirectory[outFile2];
      file = setToWorkingDirectory[file];

      	t = Timing[
			fis = JavaNew["java.io.BufferedInputStream", 
				JavaNew["java.io.FileInputStream", file]];
			If[fis === $Failed,
            	Throw[Null, URLToolsException[ BADFILE, file]]
			];
          
			gzis = JavaNew["java.util.zip.GZIPInputStream", fis];
			fos = JavaNew["java.io.BufferedOutputStream", 
				JavaNew["java.io.FileOutputStream", outFile2]];
			buf = JavaNew["[B", BUFSIZE];
          
			While[(numRead = gzis@read[buf]) > 0,
				totalBytes += numRead;
            	If[DEBUG, Print[totalBytes, " bytes uncompressed and written"]];
            	fos@write[buf, 0, numRead]
			];
			fis@close[];
          	gzis@close[];
          	fos@close[];
		];
		If [MONITOR, 
			Print["uncompress time = ", t[[1]], " bytes = ", totalBytes];];
		outFile2
	]]];


(*
 FTP proxy utilities.
 
 Preserve the old functionality of setting system properties, which will be relevant for legacy
 code that wrote directly to java.net and called SetFTPProxy, but also call the PacletManager`
 version, which does the work that is relevant for FetchURL.
*)
      
	
SetFTPProxy[ftpProxyHost_, ftpProxyPortIn_] :=
    Module[{sysProperties, ftpProxyPort = ftpProxyPortIn},
        InstallJava[];
        If[ !StringQ[ ftpProxyPort], ftpProxyPort = ToString[ ftpProxyPort]];
        LoadJavaClass["java.lang.System"];
        sysProperties = System`getProperties[];
        sysProperties@put[MakeJavaObject["ftpProxySet"], MakeJavaObject["true"]];
        sysProperties@put[MakeJavaObject["ftpProxyHost"], MakeJavaObject[ftpProxyHost]];
        sysProperties@put[MakeJavaObject["ftpProxyPort"], MakeJavaObject[ftpProxyPort]];
        PacletManager`SetInternetProxy["FTP", {ftpProxyHost, ftpProxyPortIn}];
    ]
      
      
RemoveFTPProxy[] := 
    Module[{sysProperties},
        InstallJava[];
        LoadJavaClass["java.lang.System"];
        sysProperties = System`getProperties[];
        sysProperties@remove[MakeJavaObject["ftpProxySet"]];
        sysProperties@remove[MakeJavaObject["ftpProxyHost"]];
        sysProperties@remove[MakeJavaObject["ftpProxyPort"]];
        PacletManager`SetInternetProxy["FTP", {}];
    ]

 
End[]; (* end private *)
EndPackage[];
