
; SUBROUTINES FOR CPUTYPE 

;
;    CPU IDENTIFICATION SUBROUTINE
;       Identify the CPU type, from 8088 to the Pentium.  Works 
;       even if the 386 or later CPU is in V86 mode.  Note that
;       interrupts are enabled at exit, even if they were 
;       disabled on entry.  If it is necessary to run this 
;       routine with interrupts disabled, just remove all CLI 
;       and STI instructions, so long as interrupts are 
;       always disabled before running.
;
;       Called with:    nothing
;
;       Returns:        al = CPU type
;                             0 if 8088/8086 or V20/V30
;                             1 if 80186/80188
;                             2 if 80286
;                             3 if 80386
;                             4 if 80486
;                             5 if Pentium
;                       ah =  bit 0 = 0 if CPUID unavailable
;                                     1 if CPUID ok
;                             bit 1 = 0 if not V20/V30
;                                     1 if NEC V20/V30
;
;       Regs used:      ax, bx (all)
;                       eax, ebx (386 or later)
;
;       Subs called:    hook_int6, restore_int6, bad_op_handler

.8086   ; all instructions 8088/8086 unless overridden later

cpuvalue proc    far
        push    cx
        push    dx
        push    ds
        push    es

; 8088/8086 test - Use rotate quirk - All later CPUs mask the CL
;   register with 0Fh, when shifting a byte by cl bits.  This 
;   test loads CL with a large value (20h) and shifts the AX
;   register right.  With the 8088, any bits in AX are shifted 
;   out, and becomes 0.  On all higher level processors, the
;   CL value of 20h is anded with 0Fh, before the shift.  This
;   means the effective number of shifts is 0, so AX is 
;   unaffected.

        mov     cl, 20h            ; load high CL value
        mov     ax, 1              ; load a non-zero value in AX
        shr     ax, cl             ; do the shift
        cmp     ax, 0              ; if zero, then 8088/86
        jne     up186              ; jump if not 8088/86

; V20/V30 test - It is now either a V20/V30 or a 8088.  I'll use
;   another undocumented trick to find out which.  On the 8088,
;   0Fh performs a POP CS.  On the V20/V30, it is the start of
;   a number of multi-byte instructions.  With the byte string
;   0Fh, 14h, C3h the CPU will perform the following:
;               8088/8086               V20/V30
;             pop     cs              set1   bl, cl  
;             adc     al, 0C3h

        xor     al, al             ; clear al and carry flag
        push    cs
        db      0Fh, 14h, 0C3h     ; instructions (see above)
        cmp     al, 0C3h           ; if al is C3h then 8088/8086
        jne     upV20
        mov     ax, 0              ; set 8088/8086 flag
        jmp     uP_Exit

upV20:
        pop     ax                 ; correct for lack of pop cs
        mov     ax, 200h           ; set V20/V30 flag
        jmp     uP_Exit

; 80186/80188 test - Check what is pushed onto the stack with a 
;   PUSH SP instruction.  The 80186 updates the stack pointer 
;   before the value of SP is pushed onto the stack.  With all
;   higher level processors, the current value of SP is pushed
;   onto the stack, and then the stack pointer is updated.
        
up186:  
        mov     bx, sp             ; save the current stack ptr
        push    sp                 ; do test
        pop     ax                 ; get the pushed value
        cmp     ax, bx             ; did SP change ?
        je      up286              ; if not, it's a 286+ 
        mov     ax, 1              ; set 80186 flag
        jmp     uP_Exit

; 80286 test A - We'll look at the top four bits of the EFLAGS 
;   register.  On a 286, these bits are always zero.  Later 
;   CPUs allow these bits to be changed.  During this test, 
;   We'll disable interrupts to ensure interrupts do not change 
;   the flags. 

up286: 
        cli                        ; disable interrupts
        pushf                      ; save the current flags

        pushf                      ; push flags onto stack 
        pop     ax                 ; now pop flags from stack
        or      ax, 0F000h         ; try and set bits 12-15 hi
        push    ax
        popf                       ; set new flags
        pushf
        pop     ax                 ; see if upper bits are 0

        popf                       ; restore flags to original
        sti                        ; enable interrupts
        test    ax, 0F000h         ; were any upper bits 1 ?
        jnz     up386              ; if so, not a 286

; 80286 test B - If the system was in V86 mode, (386 or higher) 
;   the POPF instruction causes a protection fault, and the 
;   protected mode software must emulate the action of POPF. If 
;   the protected mode software screws up, as occurs with a 
;   rarely encountered bug in Windows 3.1 enhanced mode, the 
;   prior test may look like a 286, but it's really a higher 
;   level processor. We'll check if the protected mode bit is 
;   on.  If not, it's guaranteed to be a 286.

.286P                              ; allow a 286 instruction
        smsw    ax                 ; get machine status word
        test    ax, 1              ; in protected mode ?
        jz      is286              ; jump if not (must be 286)

; 80286 test C - It's very likely a 386 or greater, but it is 
;   not guaranteed yet.  There is a small possibility the system 
;   could be in 286 protected mode so we'll do one last test. We 
;   will try out a 386 unique instruction, after vectoring the 
;   bad-opcode interrupt vector (int 6) to ourselves.  

        call    hook_int6          ; do it!
        mov     [badoff], offset upbad_op  ; where to go if bad
.386
        xchg    eax, eax           ; 32 bit nop (bad on 286)
        
        call    restore_int6       ; restore vector
        jmp     up386              ; only gets here if 386 
                                   ;  or greater!

; Interrupt vector 6 (bad opcode) comes here if system is a 
;   80286 (assuming the 286 protected mode interrupt 6 handler 
;   will execute the bad-opcode interrupt). 

upbad_op:
        call    restore_int6
is286:
        mov     ax, 2              ; set 80286 flag
        jmp     uP_Exit

; 80386 test - Bit 18 in EFLAGS is not settable on a 386, but is
;   changeable on the 486 and later CPUs.  Bit 18 is used to 
;   flag alignment faults. During this test, we'll disable 
;   interrupts to ensure no interrupt will change any flags.

.386                               ; allow 386 instructions

up386:
        cli                        ; disable interrupts
        pushfd                     ; push flags to look at
        pop     eax                ; get eflags
        mov     ebx, eax           ; save for later
        xor     eax, 40000h        ; toggle bit 18
        push    eax                                    
        popfd                      ; load modified eflags to CPU
        pushfd                     ; push eflags to look at
        pop     eax                ; get current eflags
        push    ebx                ; push original onto stack
        popfd                      ; restore original flags
        sti                        ; enable interrupts
        xor     eax, ebx           ; check if bit changed 
        jnz     up486              ; changed, so 486 or later
        mov     ax, 3              ; set 80386 flag
        jmp     uP_Exit

; 80486 test - Bit 21 in EFLAGS is not settable on a 486, but is
;   changeable on the Pentium CPU.  If bit 21 is changeable, it 
;   indicates the CPU supports the CPUID instruction.  It's 
;   amazing it's only taken 10 years to implement the CPUID 
;   instruction, which should have been included from the start!  
;   During this test, we'll disable interrupts to ensure no 
;   interrupt will change any flags. 

up486:
        cli                        ; disable interrupts
        pushfd                     ; push flags to look at
        pop     eax                ; get eflags
        mov     ebx, eax           ; save for later
        xor     eax, 200000h       ; toggle bit 21
        push    eax                                    
        popfd                      ; load modified eflags to CPU
        pushfd                     ; push eflags to look at
        pop     eax                ; get current eflags
        push    ebx                ; push original onto stack
        popfd                      ; restore original flags
        sti                        ; enable interrupts
        xor     eax, ebx           ; check if bit changed 
        jnz     upPentium          ; changed, it's a Pentium
        mov     ax, 4              ; set 80486 flag
        jmp     uP_Exit

; Pentium - It's possible the CPUID instruction may appear on 
;   other CPU chips, so run the CPUID instruction to see what 
;   CPU type it indicates.  The CPUID returns a family number 
;   0 to 5 for the processor type.  As of this date, only the 
;   Pentium supports the CPUID instruction and it is assigned 
;   type 5.

upPentium:
        push    ecx                ; CPUID changes eax to edx
        push    edx
        mov     eax, 1             ; get family info function
        CPUID                      ; macro for CPUID instruction
        and     eax, 0F00h         ; find family info
        shr     eax, 8             ; move to al
        mov     ah, 1              ; set flag that CPUID ok
        pop     edx
        pop     ecx
       
up_Exit:
        pop     es
        pop     ds
        pop     dx
        pop     cx
        ret
cpuvalue endp
.8086                              ; return to 8086 instructions

;
;    CPU VENDOR IDENTIFICATION SUBROUTINE
;       Determines the CPU manufacturer by checking a number
;       of unique aspects of each vendors chips.
;
;       Called with:    ds:[cpu_val] set with CPU type
;                       ds:[cpu_info] set with additional CPU info
;
;       Returns:        al = Vendor number
;                              0 = Unknown, 8088 to 80286
;                              1 = vendor in CPUID string
;                              2 = NEC V20/V30
;                              3 = Intel or AMD, not IBM or Cyrix
;                              4 = Intel
;                              5 = AMD, only one to make 40Mhz 386
;                              6 = IBM, only 386/486 with RDMSR
;                              7 = Cyrix or TI, no UMOV support
;                       transfers CPUID string to ds:[idstring]
;
;       Regs used:      ax, bx, ecx, dx
;
;       Subs called:    hook_int6, restore_int6, bad_op_handler
;                       hook_intD, restore_intD

.386                               ; allow 386 instructions
cpuvendor  proc   near
        mov     al, 2              ; assume NEC
        test    [cpu_info], 2      ; V20/V30 ? (only NEC makes this)
        jnz     cpuv_exit          ; jump if so
        mov     al, 0              ; assume unknown vendor
        cmp     [cpu_val], 2       ; get cpu number
        jbe     cpuv_exit          ; jump if 286 or less
        test    [cpu_info], 1      ; CPUID valid ?
        jz      cpuv_skp1          ; jump if not

        mov     eax, 0             ; get vendor string function
        CPUID                      ;   for CPUID into ebx, ecx, edx
        mov     di, offset idstring
        mov     eax, ebx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        mov     eax, edx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        mov     eax, ecx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        mov     al, 1
        jmp     cpuv_exit

; We'll check if it's a Cyrix/TI CPU.  Cyrix does not need
;  the UMOV instruction, and is not supported.  (The pentium 
;  does not support UMOV either, but the Pentium vendor has 
;  already been detected, since it supports CPUID).    

cpuv_skp1:
        call    hook_int6          ; hook the bad-opcode int
        call    hook_intD          ; hook general protection fault
        mov     [badoff], offset cpuv_badop  ; where to go if bad
        mov     al, 05Ah
        mov     bh, 0A5h
        clc                        ; clear carry
        db      0Fh, 10h, 0F8h     ; umov al, bh
        db      90h, 90h
        jc      cpuv_badop         ; carry should not be set
        cmp     al, bh
        jne     cpuv_badop         ; al should = bh
        jmp     cpuv_try_IBM       ; UMOV ok, likely Intel/AMD/IBM

; UMOV not supported on 386+ CPU, so must be Cyrix

cpuv_badop:
        mov     al, 7
        jmp     cpuv_restore

; See if it's an IBM CPU.  Only IBM's chips support the Read
;   Machine Specific Register (RDMSR) with ecx=1000h

cpuv_try_IBM:
        mov     [badoff], offset cpuv_badop2 ; where to go if bad
        mov     ecx, 1000h
        RDMSR                      ; read machine specific reg
        db      90h, 90h           ; safety NOPs
        mov     al, 6              ; RDMSR works, must be IBM!
        jmp     cpuv_restore

cpuv_badop2:
        mov     al, 3              ; not IBM, likely Intel or AMD

cpuv_restore:
        call    restore_intD       ; restore the int D handler
        call    restore_int6       ; restore int 6 vector
cpuv_exit:
        ret
cpuvendor endp
.8086                              ; return to 8086 instructions


;
;    CPU SPEED DETERMINATION SUBROUTINE
;       Determines the CPU speed by accurately measuring 
;       a short loop.
;
;       Called with:    ds:[cpu_val] set with CPU type
;                       ds:[cpu_mfg] set with CPU vendor
;
;       Returns:        ax = Speed in Mhz
;                       bx = raw timing value (not too useful)
;
;       Regs used:      ax, bx, cx

cpuspeed proc   near
        push    dx
        push    si
        mov     al, [cpu_val]      ; get cpu number
        xor     ah, ah
        shl     ax, 1
        shl     ax, 1              ; times 4
        mov     si, offset ibmspd  ; table of values
        cmp     [cpu_mfg], 6       ; IBM ?
        je      cpus_skp1          ; jump if so
        mov     si, offset cyrixspd  ; table of values
        cmp     [cpu_mfg], 7         ; Cyrix ?
        mov     si, offset intelspd  ; use Intel table

; now setup timer 2 to time instruction execution

cpus_skp1:
        add     si, ax             ; point SI to value for CPU
        mov     al, 0B0h           ; Timer 2 command, mode 0
        out     43h, al            ; send command
        IODELAY
        mov     al, 0FFh           ; counter value FFFF
        out     42h, al            ; send lsb to counter
        IODELAY
        out     42h, al            ; send msb to counter
        IODELAY

; all interrupts, including NMI, are shut off to prevent any
; interrupts from affecting the timing

        cli                        ; disable interrupts
        mov     al, 80h
        out     70h, al            ; disable NMI
        IODELAY    
        in      al, 61h            ; read the current contents
        IODELAY    
        or      al, 1              ; set gate bit on
        out     61h, al            ; turn on timer (begins timing)
        xor     dx, dx
        mov     bx, 1
        mov     ax, [si]

; this loop executes a bunch of slow divide instructions

cpus_loop1:
        mov     cx, 10h            ; loop value
cpus_loop2:
        div     bx                 ; ax = dx:ax/1  dx=rem
        div     bx                 ; (lots of cycles per inst)
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        div     bx
        loop    cpus_loop2
        dec     ax
        jnz     cpus_loop1         ; loop x times for the CPU

; when the loop completes, the timer is stopped, and interrupts 
; are re-enabled.

        in      al, 61h            ; read the current contents
        IODELAY    
        and     al, 0FEh           ; set gate bit off
        out     61h, al            ; disable the counter
        xor     al, al  
        out     70h, al            ; enable NMI
        sti                        ; enable interrupts

; now the timer contents are read, and the duration of 
; instruction execution is determined

        mov     al, 80h            ; latch output command
        out     43h, al            ; send command 
        IODELAY    
        in      al, 42h            ; get lsb of counter
        IODELAY                    
        mov     dl, al          
        in      al, 42h            ; get msb of counter
        mov     dh, al             ; dx = counter value
        mov     ax, 0FFFFh         ; starting value
        sub     ax, dx             ; ax = duration count
        mov     cx, ax
        mov     bx, ax             ; save for exit
        mov     ax, cx
        cmp     word ptr [si+2], 0 ; no factor adjust ?
        je      cpus_skp2          ; exit with value

; now compensate for each CPU type and vendor, since every
; type executes instructions with different timings
; (i.e. a divide on a 8088 takes from 144 to 162 clocks 
; depending on the values, while a Pentium takes 25 clocks)

        mov     ax, [si+2]         ; get factor
        xor     dx, dx
        shl     ax, 1
        rcl     dx, 1
        shl     ax, 1
        rcl     dx, 1              ; factor * 4
        div     cx                 ; factor adjust (ax=dx:ax/cx)

; return the CPU speed in MHz in AX

cpus_skp2:
        pop     si
        pop     dx
        ret
cpuspeed endp


;
;    FLOATING-POINT DETECTION SUBROUTINE
;       Determines if the math coprocessor is present by 
;       checking the FPU's status and control words.  Also
;       detects if a 80386 system has a 80287 or 80387 FPU.
;
;       Called with:    ds = cs (to handle local varible fpu_temp)
;                       ds:[cpu_val] set with CPU type
;
;       Returns:        al = FPU type
;                             0 if no FPU
;                             1 if 8087
;                             2 if 80287
;                             3 if 80387
;                             4 if 80486 with FPU
;                             5 if Pentium with FPU
;
;       Regs used:      ax

fpu_temp dw     0                  ; temp word for detector

.386
.387                               ; allow math instructions

fputype proc    near
        fninit                     ; initialize FPU (reset)
        mov     [fpu_temp], 1234h  ; set any non zero value
        fnstsw  [fpu_temp]         ; get status word from FPU
        and     [fpu_temp], 0FFh   ; only look at bottom 8 bits
        jnz     fput_not_found     ; if non-zero, no FPU
        fnstcw  [fpu_temp]         ; get control word from FPU
        and     [fpu_temp], 103Fh  ; strip unneeded bits
        cmp     [fpu_temp], 3Fh    ; are the proper bits set ?
        je      fput_present       ; jump if so, FPU present

fput_not_found:
        mov     ax, 0              ; FPU not present
        jmp     fput_Exit 

; FPU was found, so see which type, 8087, 80287, or 80387

fput_present:
        mov     ax, 1              ; assume 8087
        cmp     [cpu_val], 2       ; CPU below 286 ?
        jb      fput_Exit          ; if so, must be 8087
        mov     ax, 2              ; assume 80287
        je      fput_Exit          ; if 80286, then FPU is 80287
        xor     ah, ah
        mov     al, [cpu_val]      ; get cpu, 80386 or higher
        cmp     al, 3              ; 80386 ?
        jne     fput_Exit          ; jump if 486 or higher

; 80386 could have a 80287 or 80387 FPU.  To find out, check if
;   -infinity is equal to +infinity.  If not, it's an 80387.

        fld1                       ; push +1 onto stack
        fldz                       ; push +0 onto the stack
        fdiv                       ; 1/0 = infinity
        fld     st                 ; load +infinity onto stack
        fchs                       ; now -infinity on stack
        fcompp                     ; compare + and - infinity
        fstsw   [fpu_temp]         ; status of compare in temp
        test    [fpu_temp], 4000h  ; equal ? (test zero bit)
        jnz     fput_Exit          ; jump if not (387, al= 3)
        mov     al, 2              ; +/- infinity equal, 80287
fput_Exit:
        ret
fputype endp
.8086


;
;    FLOATING POINT ON CHIP SUBROUTINE
;       For the 80486/Pentium, a check is made to see if the FPU
;       is on the CPU chip or is separate.  The Extension Type
;       bit 4 in CR0 is checked to see if it can be changed.
;       If it can't be changed, the CPU has the FPU inside the
;       chip.  If the bit can be changed, it's outside the CPU.
;
;                   ********** IMPORTANT **********
;         Changing the CR0 register while in V86 mode causes
;         a CPU fault, that should be handled transparently by
;         the protected mode software.  See book for details.
;
;       Called with:    ds:[fpu_val] set with FPU type
;
;       Returns:        al = 0 if FPU not on CPU or no FPU
;                            1 if FPU inside CPU
;
;       Regs used:      ax

.386P

fpuloc proc     near
        xor     al, al             ; assume FPU not in CPU
        cmp     [fpu_val], 4       ; 486 or higher CPU with FPU?
        jb      fpul_exit          ; exit if not
        push    bx
        push    eax
        mov     eax, cr0
        mov     bx, ax             ; save lower portion
        and     eax, 0FFFFFFEFh    ; attempt to set 16-bit mode
        mov     cr0, eax
        mov     eax, cr0
        xchg    ax, bx
        mov     cr0, eax           ; restore cr0 to original
        pop     eax
        mov     al, bl
        shr     al, 4              ; return ET bit in bit 0 
        pop     bx
fpul_exit:
        ret
fpuloc  endp
.8086


;
;    CPU PREFETECH QUEUE DEPTH SUBROUTINE
;       This routine returns the depth of the pre-fetch 
;       queue.  Each processor uses a different size queue,
;       in which self-modifying code does not work.  This 
;       routine cleverly uses self modifying code to determine 
;       the queue size.  
;
;       Called with:    nothing
;
;       Returns:        ax = CPU pre-fetch size
;                             4 if 8088/8086
;                             8 if 80286
;                             16 if 80386
;                             32 if 80486
;
;       Regs used:      ax

QSIZE   equ     40h                ; maximum queue size

cpuQ    proc    far
        push    bx
        push    cx
        push    dx
        push    di
        push    es

; The Queue size test works by issuing a large number of 
;   very slow DIV instructions to allow the CPU to load
;   up the pre-fetch queue while the slow instructions
;   are being executed.  Next the REP STOB instruction 
;   will overwrite some number of NOP instructions 
;   with "INC BX".  If there was no pre-fetch queue, all 
;   of the NOP instructions would be converted to INC BX.
;   If there was a 16 byte pre-fetch queue, the change is 
;   made to memory, but not the NOPs in the pre-fetch 
;   Queue.

upq4test:
        mov     ax, cs             ; Initialize queue in
        mov     es, ax             ;   code segment
        mov     cx, QSIZE          ; queue size 
        mov     di, offset qdata   ; queue location
        mov     al, 90h            ; set to all NOPs to start
        cld
        rep     stosb              ; all Queue bytes NOPs

        xor     bx, bx             ; start value for count
        mov     cx, QSIZE          ; number of bytes to modify
        mov     di, offset qdata+QSIZE-1 ; ptr where to modify 
        std                        ; write backwards from end
        mov     al, 80h
        out     70h, al            ; disable NMI
        cli                        ; disable interrupts
        mov     ax, 43h            ; 43h = "inc bx"
        mov     dl, 1              ; divisor 1 (no effect to AL)
        jmp     short upqflush     ; flush pre-fetch queue
ALIGN 16                           ; align code on para boundary
upqflush:
        div     dl                 ; load up pre-fetch q with
        div     dl                 ;   lots of slow instructions
        div     dl
        div     dl
        div     dl
        div     dl
        div     dl
        div     dl
        rep     stosb              ; al to es:[di+39h] thru [di]
qdata   db      QSIZE dup (90h)    ; NOP instructions
        xor     al, al  
        out     70h, al            ; enable NMI
        sti
        cld
        mov     ax, QSIZE+2        ; size (less 2 for REP STOSB)
        sub     ax, bx             ; ax = queue size
        pop     es
        pop     di
        pop     dx
        pop     cx
        pop     bx
        ret
cpuQ    endp


;
;    OUTPUT PRE-FETCH QUEUE INFORMATION
;       Display information about pre-fetch queue size
;
;       Call with:      al = prefetch queue size
;
;       Returns:        text output to screen
;
;       Regs used:      ax, bx, cx, dx, si, di
;
;       Subs called:    decw

outprefetch proc    near
        mov     di, offset cpupfq
        xor     ah, ah
        cmp     ax, 48h            ; too large ?
        jae     q_too_large
        xor     bl, bl             ; left justify flag
        call    decw               ; convert ax to decimal
        jmp     q_out_done

q_too_large:
        mov     byte ptr [di], '>'
        mov     word ptr [di+1], '07'   ; greater than 70

q_out_done:
        OUTMSG  cpupf              ; pre-fetch message
        ret
outprefetch endp


;
;    CPU STEP
;       If the CPUID instruction is supported, get info from
;       it, otherwise, check if BIOS supports function that 
;       has the CPU step information.  Message displayed 
;       depending on results.
;
;       Call with:      ds:[cpu_val] set with CPU
;                       ds:[cpu_info] set with CPUID flag
;
;       Returns:        display step information
;
;       Regs used:      eax, ebx, ecx, edx, di
;
;       Subs called:    hex2ascii, xferbytes, hook_int6, 
;                       restore_int6, bad_op_handler

.386P

cpustep proc    near
        cmp     [cpu_val], 3       ; 386 or later ?
        jae     cpust_skp1
        jmp     cpus_no_BIOS_rev

cpust_skp1:
        test    [cpu_info], 1      ; CPUID supported ?
        jz      cpus_noid          ; jump if not
        mov     eax, 0             ; get vendor string function
        CPUID                      ;   for CPUID
        mov     di, offset idtext
        mov     eax, ebx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        mov     eax, edx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        mov     eax, ecx
        call    xfer_bytes         ; xfer 4 text bytes from eax
        OUTMSG  idtxtmg            ; display vendor string

        mov     eax, 1             ; get stepping information
        CPUID                      ;   for CPUID in al
        call    hex2ascii          ; convert al to ascii in bx
        mov     word ptr modval, bx  ; xfer ascii bytes
        mov     dx, offset stepmsg
        jmp     cpus_out_msg       ; output message

; CPUID not supported, so let's try the BIOS function for the ID.
;   Unfortunately the BIOS function is not supported by most 
;   manufacturer's BIOSs.

cpus_noid:
        mov     ax, 0C910h         ; BIOS get chip revision
        int     15h                ;   returned in cx
        jc      cpus_chk_A_step    ; carry if unsuccessful
        jcxz    cpus_chk_A_step    ; 0=not supported
        mov     al, ch
        mov     di, offset bvernum
        call    hex2ascii
        mov     [di], bx
        add     di, 2
        mov     al, cl
        call    hex2ascii
        mov     [di], bx
        add     di, 4
        mov     si, offset bvt     ; get text for step
        mov     dx, offset bver    ; output message

cpus_loop:
        cmp     word ptr [si], 0FFFFh ; at end ?
        je      cpus_out_msg       ; jump if so
        cmp     [si], ax           ; version match ?
        je      cpus_bios_match    ; jump if so
        add     si, offset bvte - offset bvt  
        jmp     cpus_loop

cpus_bios_match:
        mov     cx, offset bvte - offset bvt -1
        cld
        rep     movsb              ; xfer CPU & step text
        jmp     cpus_out_msg

; Ok - neither the CPUID or the BIOS supports getting the CPU 
;   step, so a few checks may determine if a 386 or 486 is 
;   revsion A or if it is a later revision. 

cpus_chk_A_step:
        call    hook_int6          ; hook the bad-opcode int
        cmp     [cpu_val], 3       ; 386 only ?
        jne     cpus_rev_486       ; jump if above
        mov     [badoff], offset cpus_no_rev ; for bad opcode
        mov     ax, 1
        mov     bx, 1
        mov     cl, 1
        mov     dx, 1
        db      0Fh, 0A6h, 0DAh    ; xbts  bx, dx, ax, al
        nop                        ;  (only valid on 386 A step)
        nop
        OUTMSG  stepa              ; use Step A message
        jmp     cpus_restore

cpus_rev_486:
        cmp     [cpu_val], 4       ; 486 only ?
        jne     cpus_no_rev        ; jump if not
        mov     [badoff], offset cpus_no_rev ; for bad opcode
        db      0Fh, 0A6h, 0DAh    ; cmpxchg   bx, dx
        nop                        ;  (only valid on 486 A step)
        nop
        OUTMSG  stepa              ; use Step A message
        jmp     cpus_restore

cpus_no_rev:
        OUTMSG  novermg            ; no support in BIOS message
cpus_restore:                      ; restore interrupt vector 6
        call    restore_int6       ; restore the int 6 handler
        jmp     cpus_exit

cpus_no_BIOS_rev:
        mov     dx, offset novermg ; no support in BIOS message
cpus_out_msg:
        mov     ah, 9
        int     21h
cpus_exit:
        ret
cpustep endp


;
;    TRANSFER BYTES SUBROUTINE
;       Transfer the four bytes in EAX to [di]
;
;       Call with:      eax = source bytes
;                       ds:[di] = destination
;
;       Returns:        4 bytes transferred
;                       di = original di+4
;
;       Regs used:      eax

.386                               ; allow 386 instructions

xfer_bytes proc    near
        push    cx
        mov     cx, 4
xferb_loop:
        mov     [di], al           ; xfer text byte
        inc     di
        ror     eax, 8             ; rotate to next byte
        loop    xferb_loop
        pop     cx
        ret
xfer_bytes endp
.8086


;
;    CPU MODE
;       Check if the 286 or later CPU is in real, protected or
;       V86 mode.  It is assumed that if the 80386 or later
;       processor is in protected mode, we must be in V86 mode.
;
;       Call with:      ds:[cpu_val] set
;
;       Returns:        al = 0 protected mode not supported 
;                            1 if real mode 
;                            2 if protected mode
;                            3 if V86 mode
;                       ah = privilege level 0 to 3 
;
;       Regs used:      ax

.386P                              ; allow 286/386 instructions

cpumode proc    near
        push    cx
        xor     cx, cx             ; assume no protected mode
        cmp     [cpu_val], 2       ; 286 CPU or later ?
        jb      cpum_Exit          ; jump if not
        mov     cx, 1              ; assume real mode flag
        smsw    ax                 ; get machine status word
        test    ax, 1              ; in protected mode ?
        jz      cpum_Exit          ; jump if not (real mode)

cpu_not_real:
        mov     cl, 2              ; protected mode
        pushf
        pop     ax                 ; get flags
        and     ax, 3000h          ; get I/O privilege level
        shl     ax,12
        mov     ch, al             ; save privilege
        cmp     [cpu_val], 2       ; if 286, then protected
        je      cpum_Exit          ; jump if so

; On 386 or later, we have to assume V86 mode.  Note that the 
;  next four lines of code (commented out) might seem the 
;  correct way to detect V86 mode.  It will not work, since the 
;  PUSHFD instruction clears the VM bit before placing it on the 
;  stack.  This is undocumented on the 386 and 486, but 
;  documented on the Pentium. 

;        pushfd                     ; save flags on stack
;        pop     eax                ; get extended flags
;        test    eax, 20000h        ; V86 mode ?
;        jz      cpum_out_mode      ; jump if not

        mov     cl, 3              ; return V86 mode

cpum_Exit:
        mov     ax, cx             ; return status
        pop     cx
        ret
cpumode endp
.8086


;
;    CPU MODE DISPLAY
;       Display results from CPU MODE routine
;
;       Call with:      ax = real/protected mode status from
;                            CPUMODE
;
;       Returns:        display mode information
;
;       Regs used:      ax, bx, dx
;
;       Subs called:    hex2ascii

cpum_display  proc    near
        cmp     al, 0              ; protected mode support?
        je      cpumd_Exit         ; exit if not possible
        mov     dx, offset mode_R  ; assume real mode
        cmp     al, 1              ; real ?
        je      cpumd_outmsg       ; jump if so
        mov     dx, offset mode_V  ; assume V86
        cmp     al, 3              ; V86 ?
        je      cpumd_outmsg       ; jump if so
        mov     dx, offset mode_P  ; must be protected
        mov     al, ah
        call    hex2ascii
        mov     [mode_Pa], bh      ; save privilege level
cpumd_outmsg:
        mov     ah, 9
        int     21h                ; display mode message
cpumd_Exit:
        ret
cpum_display endp


;
;    CPU CACHE
;       Check if the later CPU cache is enabled and cache
;       flags.  Cyrix, IBM, and Intel all handle the cache
;       information a little differently.
;
;       Call with:      ds:[cpu_val] set to CPU
;                       ds:[cpu_mfg] set to manufacturer
;
;       Returns:        al = 0 if no cache 
;                            1 if cache disabled
;                            2 if enabled, no-write through
;                            3 if enabled, write through
;
;       Regs used:      eax, bx

.486P                              ; allow 486 instructions
cpu_cache proc  near
        xor     bl, bl             ; assume no cache
        cmp     [cpu_val], 3       ; cache only in 486+, and some 386
        jb      cpuc_Exit          ; jump if none
        mov     bh, [cpu_mfg]
        cmp     bh, 6              ; IBM ?
        je      cpuc_ibm           ; jump if so
        cmp     bh, 7              ; Cyrix ?
        je      cpuc_cyrix         ; jump if so

; Use Intel method of detection (486 and later only)

        cmp     [cpu_val], 4       ; cache only in 486+
        jb      cpuc_Exit          ; jump if none
        mov     eax, cr0           ; get control register
        inc     bl                 ; has an internal cache
        test    eax, 40000000h     ; cache disabled ?
        jnz     cpuc_Exit          ; jump if so
        inc     bl                 ; return cache is enabled
        test    eax, 20000000h     ; write through ?
        jnz     cpuc_Exit          ; jump if not
        inc     bl                 ; write-through enabled
        jmp     cpuc_exit

; IBM method of cache enable detection

cpuc_ibm:
        mov     ecx, 1000h         ; get register 1000h
        RDMSR                      ; read machine specific reg
        mov     bl, 1              ; has a cache, assume disabled
        test    eax, 80h           ; cache enabled ?
        jz      cpuc_exit          ; jump if not
        mov     bl, 3              ; cache with write-through
        jmp     cpuc_exit

; Cyrix is similar to Intel, but if enabled, always has write-through

cpuc_cyrix:
        mov     eax, cr0           ; get control register
        mov     bl, 1              ; has an internal cache
        test    eax, 40000000h     ; cache disabled ?
        jnz     cpuc_Exit          ; jump if so
        mov     bl, 3              ; write-through enabled

cpuc_Exit:
        mov     al, bl             ; return status in al
        ret
cpu_cache endp
.8086


;
;    CPU CACHE DISPLAY
;       Display status from CPU_CACHE
;
;       Call with:      al = value from CPU_CACHE
;
;       Returns:        display cache information
;
;       Regs used:      dx

cpu_cache_display proc  near
        push    ax
        mov     dx, offset cache_X ; assume no cache 
        cmp     al, 0              
        je      cpuc_msgout        ; no cache
        mov     dx, offset cache_D ; assume cache disabled
        cmp     al, 1
        je      cpuc_msgout        ; disabled
        mov     dx, offset cache_N ; assume no write-through
        cmp     al, 2              
        je      cpuc_msgout        ; no write-through
        mov     dx, offset cache_W ; must be write-through
cpuc_msgout:
        mov     ah, 9
        int     21h                ; display cache message
        pop     ax
        ret
cpu_cache_display endp


;
;    FIND DATA CACHE SIZE 
;       Determine the internal CPU data cache size.  Results 
;       will not be valid on a CPU without an internal cache.
;
;       Call with:      nothing
;
;       Returns:        ax = cache size * 1KB
;
;       Regs used:      ax, bx, cx, si, di
;
;       Subs called:    read_n_time

cache_d_size proc  near
        mov     di, offset timings ; array of timings
        mov     bh, 7              ; get 7 sets of values
        mov     bl, 128            ; number of times to re-read
        mov     cx, 256            ; start at .5K (256 words)

chd_loop:
        call    read_n_time        ; read cx words @ ds:0, 
                                   ;   returns duration in ax

        mov     [di], ax           ; save value
        add     di, 2
        shr     bl, 1              ; number of group reads/2
        shl     cx, 1              ; number of bytes * 2
        dec     bh
        jnz     chd_loop

        mov     bp, 1              ; cache size 1=1KB
        mov     dx, bp
        mov     di, offset timings 
        mov     cx, 6              ; 7 timings 
        xor     bx, bx             ; first time - minimal value

chd_loop3:
        mov     ax, [di]
        sub     ax, [di+2]         ; difference between times
        jns     chd_not_neg
        not     ax                 ; get positive difference
chd_not_neg:
        cmp     ax, bx             ; which is larger ?
        jb      chd_skp2
        mov     bx, ax             ; save new large value
        mov     dx, bp             ; save cache size
chd_skp2:
        shl     bp, 1
        add     di, 2
        loop    chd_loop3
        shr     dx, 1              ; adjust back

; now check that there was sufficient difference to matter
;   (i.e. maximum difference should be greater that 5% of first
;   timing value)

        mov     cx, dx             ; save size
        mov     ax, bx             ; bx 
        mov     bx, [timings]      ; get 1st timing value
        xor     dx, dx
        div     bx                 ; ax = ax/bx, dx=remainder
        or      ax, ax
        jnz     chd_ok             ; if ax > 0, then valid
        cmp     dx, 5              ; if > 5% ok
        ja      chd_ok             ; jump if so
        xor     cx, cx             ; return zero
chd_ok:  
        mov     ax, cx
        ret
cache_d_size endp


;
;    READ AND TIME
;       Read CX words from memory, starting at ds:0.   Re-read
;       the data BL times. Time the duration required for 
;       block of reads using hardware timer 2.  Interrupts are
;       disabled during the test to improve results.
;
;       Call with:      cx = number of words to read (0FFFh max)
;                       bl = number of times to repeat read
;
;       Returns:        ax = duration of reads (ax*838nS = time)
;
;       Regs used:      ax

read_n_time  proc    near
        push    dx
        push    si
        mov     al, 0B0h           ; Timer 2 command, mode 0
        out     43h, al            ; send command
        IODELAY                    
        mov     al, 0FFh           ; counter value FFFF
        out     42h, al            ; send lsb to counter
        IODELAY                    
        out     42h, al            ; send msb to counter
        IODELAY                    
        cli                        ; disable interrupts
        in      al, 61h            ; read the current contents
        IODELAY                    
        or      al, 1              ; set gate bit on
        out     61h, al            ; activate counter
        cld
        push    bx

read_again:
        push    cx
        xor     si, si             ; start from 0 
        rep     lodsw              ; read block
        pop     cx
        dec     bl                 ; number of times to cycle
        jnz     read_again

        pop     bx
        in      al, 61h            ; read the current contents
        IODELAY
        and     al, 0FEh           ; set gate bit off
        out     61h, al            ; disable the counter
        sti                        ; enable interrupts

        mov     al, 80h            ; latch output command
        out     43h, al            ; send command 
        IODELAY
        in      al, 42h            ; get lsb of counter
        IODELAY                
        mov     dl, al          
        in      al, 42h            ; get msb of counter
        mov     dh, al             ; dx = counter value
        mov     ax, 0FFFFh         ; starting value
        sub     ax, dx             ; ax = duration count
        pop     si
        pop     dx
        ret
read_n_time endp


;
;    DISPLAY CACHE TIMINGS
;       Display timings if "+" command line option set.  Timings
;       are the duration of each test, starting at .5K, through
;       32K.  Time is the number of ticks, each 838nS long.
;
;       Called with     ds:[cmd_line] = "+" if to display
;                       ds:[timings] array set
;                       es = ds
;
;       Returns:        display of raw cache timing values
;
;       Regs used:      ax, bx, cx, si, di
;
;       Subs called:    decw

timings_display proc near
        cmp     [cmd_line], "+"    ; display timings ?
        jne     td_skp1            ; jump if not
        mov     cx, 7
        mov     si, offset timings
        mov     di, offset cache_time1
td_loop:
        mov     ax, [si]             ; get timing value
        xor     bl, bl
        call    decw                 ; convert to decimal
        mov     word ptr [di], '  '  ; 2 spaces between timings
        add     si, 2
        add     di, 2
        loop    td_loop            ; get next timing

        OUTMSG  cache_time         ; display timing line
td_skp1:
        ret
timings_display endp


;
;    HOOK INTERRUPT 6
;       Save the old interrupt 6 vector and replace it with  
;       a new vector to the bad_op_handler.  Vectors are handled 
;       directly without using DOS.
;
;       Called with:    nothing
;                      
;       Returns:        vector hooked
;                       old vector stored at
;                         ds:[old_int6_seg]
;                         ds:[old_int6_off]
;
;       Regs used:      none

hook_int6 proc    near
        push    ax
        push    cx
        push    es
        xor     ax, ax
        mov     es, ax
        cli                        ; disable interrupts
        mov     ax, es:[6*4]       ; get offset of int 6
        mov     cx, es:[6*4+2]     ; get segment
        mov     es:[6*4], offset bad_op_handler
        mov     word ptr es:[6*4+2], seg bad_op_handler
        sti                        ; enable interrupts
        mov     [old_int6_seg], cx ; save original vector
        mov     [old_int6_off], ax
        pop     es
        pop     cx
        pop     ax
        ret
hook_int6 endp


;
;    RESTORE INTERRUPT 6
;       Restore the previously saved old interrupt 6 vector.
;       Vectors handled directly without using DOS.
;
;       Called with:    old vector stored at
;                         ds:[old_int6_seg]
;                         ds:[old_int6_off]
;                      
;       Returns:        vector restored
;
;       Regs used:      none

restore_int6 proc    near
        push    ax
        push    cx
        push    dx
        mov     cx, [old_int6_seg] ; get original vector
        mov     dx, [old_int6_off]
        push    es
        xor     ax, ax
        mov     es, ax
        cli                        ; disable interrupts
        mov     es:[6*4], dx       ; restore original int 6
        mov     es:[6*4+2], cx
        sti                        ; enable interrupts
        pop     es
        pop     dx
        pop     cx
        pop     ax
        ret
restore_int6 endp


;
;    BAD OFFSET INTERRUPT HANDLER
;       If a bad opcode occurs (80286 or later) will come here.
;       The saved BADOFF offset is used to goto the routine
;       previously stored in BADOFF.
;
;       In a few cases, it is also used for double faults. A few
;       instructions (RDMSR & WRMSR) can issue a double fault if
;       not supported, so well come here as well.
;
;       Called with:    cs:[badoff] previously set
;
;       Returns:        returns to address stored in badoff


bad_op_handler proc far
        push    ax
        push    bp
        mov     ax, cs:[badoff]
        mov     bp, sp
        mov     ss:[bp+4], ax      ; insert new return offset
        pop     bp
        pop     ax
        iret
bad_op_handler endp


;
;    HOOK INTERRUPT D
;       Save the current interrupt mask state, and mask off
;       interrupt D (General Protection Fault).  Also save
;       the old interrupt D vector and replace it with  a new 
;       vector to bad_op_handler.  Vectors are handled 
;       directly without using DOS.  
;
;       NOTE: This is only effective if in Real Mode.  If in 
;       V86 mode, the memory manager will not pass the fault 
;       into this routine.
;
;       Called with:    nothing
;                      
;       Returns:        vector hooked
;                       old vector stored at
;                         ds:[old_intD_seg]
;                         ds:[old_intD_off]
;                       interrupt mask stored at
;                         ds:[int_mask]
;
;       Regs used:      none

hook_intD proc    near
        push    ax
        push    cx
        push    es
        xor     ax, ax
        mov     es, ax
        cli                        ; disable interrupts

        in      al, 21h            ; get current interrupt mask
        mov     [int_mask], al     ; save
        IODELAY
        or      al, 20h            ; set bit to disable hardware 
        out     21h, al            ;  IRQ 5 (interrupt D)
                                   ; Now change the vector
        mov     ax, es:[0Dh*4]     ; get offset of int D
        mov     cx, es:[0Dh*4+2]   ; get segment
        mov     es:[0Dh*4], offset bad_op_handler  
        mov     word ptr es:[0Dh*4+2], seg bad_op_handler
        sti                        ; enable interrupts
        mov     [old_intD_seg], cx ; save original vector
        mov     [old_intD_off], ax
        pop     es
        pop     cx
        pop     ax
        ret
hook_intD endp


;
;    RESTORE INTERRUPT D
;       Restore the previously saved old interrupt D vector
;       and restore the interrupt mask to it's prior state
;       Vectors handled directly without using DOS.
;
;       Called with:    old vector stored at
;                         ds:[old_intD_seg]
;                         ds:[old_intD_off]
;                       interrupt mask stored at 
;                         ds:[int_mask]
;                      
;       Returns:        vector restored
;
;       Regs used:      none

restore_intD proc    near
        push    ax
        push    cx
        push    dx
        mov     cx, [old_intD_seg] ; get original vector
        mov     dx, [old_intD_off]
        push    es
        xor     ax, ax
        mov     es, ax
        cli                        ; disable interrupts
        mov     es:[0Dh*4], dx     ; restore original int D
        mov     es:[0Dh*4+2], cx
                                   ; Now restore IRQ 5 mask
        in      al, 21h            ; get current interrupt mask
        mov     ah, [int_mask]     ; get previous mask
        IODELAY
        or      ah, 0DFh           ; set all but IRQ 5 bit
        and     al, ah             ; insert old IRQ 5 state
        out     21h, al            ;  IRQ 5 (interrupt D)

        sti                        ; enable interrupts
        pop     es
        pop     dx
        pop     cx
        pop     ax
        ret
restore_intD endp


;
;    DECW
;       Convert the hex number in ax into decimal 1 to 5 ascii 
;       characters and insert into [di].  Increment di ptr.  The 
;       leading zeros are suppressed.
;
;       Called with:    ax = input hex number 
;                       di = pointer where to store characters
;                       bl = 0 for left justification 
;                            1 for no justification
;
;       Returns:        word converted to ascii at [di]
;
;       Regs used:      bx
;
;       Subs called:    hex2ascii

decw    proc    near
        push    ax
        push    cx
        push    dx
        cmp     ax, 0              ; check for zero
        jne     decskip0           ; jump if not
        mov     al, 4              ; if justify, make ax = 0
        mul     bl                 ;    no justify, ax = 4
        add     di, ax             ; move pointer
        mov     byte ptr [di], '0' ; put up ascii zero
        jmp     decskip15          ; done !

decskip0:
        xor     cl, cl             ; temp flag, 0=suppression on
        mov     ch, bl             ; save flag (0=left justify)
        xor     dx, dx             ; zero
        mov     bx, 10000
        div     bx                 ; (labelcnt)/10000
        cmp     al, 0              ; 10000's ?
        je      decskip1           ; jump if zero
        inc     cl                 ; no longer zero suppression
        call    hex2ascii          ; convert to ascii
        mov     byte ptr [di], bh  ; put 1000's digit in
        jmp     decskip2
decskip1:
        cmp     ch, 0              ; left justify ?
        je      decskip3           ; jump if so
decskip2:
        inc     di
decskip3:
        mov     ax, dx             ; get remainder
        xor     dx, dx             ; zero
        mov     bx, 1000
        div     bx                 ; (labelcnt)/1000
        cmp     cl, 0              ; zero suppression active ?
        jne     decskip3a          ; jump if not
        cmp     al, 0              ; 1000's ?
        je      decskip4           ; jump if zero
decskip3a:
        inc     cl                 ; no longer zero suppression
        call    hex2ascii          ; convert to ascii
        mov     byte ptr [di], bh  ; put 1000's digit in
        jmp     decskip5
decskip4:
        cmp     ch, 0              ; left justify ?
        je      decskip6           ; jump if so
decskip5:
        inc     di
decskip6:
        mov     ax, dx             ; get remainder
        xor     dx, dx             ; zero
        mov     bx, 100
        div     bx                 ; (remainder in dx)/100
        cmp     cl, 0              ; zero suppression active ?
        jne     decskip7           ; jump if not
        cmp     al, 0              ; zero ?
        je      decskip8           ; suppress zero
decskip7:
        inc     cl                 ; no longer zero suppression
        call    hex2ascii          ; convert to ascii
        mov     byte ptr [di], bh  ; put 100's digit in
        jmp     decskip9
decskip8:
        cmp     ch, 0              ; left justify ?
        je      decskip10          ; jump if so
decskip9:
        inc     di
decskip10:
        mov     ax, dx             ; get remainder
        xor     dx, dx             ; zero
        mov     bx, 10
        div     bx                 ; (remainder in dx)/10
        cmp     cl, 0              ; zero suppression active ?
        jne     decskip11          ; jump if not
        cmp     al, 0              ; zero ?
        je      decskip12          ; suppress zero
decskip11:
        call    hex2ascii          ; convert to ascii
        mov     byte ptr [di], bh  ; put 10's digit in
        jmp     decskip13
decskip12:
        cmp     ch, 0              ; left justify ?
        je      decskip14          ; jump if so
decskip13:
        inc     di
decskip14:
        mov     al, dl             ; get 1's digit (remainder)
        call    hex2ascii          ; convert to ascii
        mov     byte ptr [di], bh  ; put 1's digit in output
decskip15:
        inc     di
        pop     dx
        pop     cx
        pop     ax
        ret
decw    endp


;
;   HEX2ASCII
;       Convert the hex number in al into two ascii characters
;
;       Called with:    al = input hex number
;
;       Returns         bx = converted digits in ascii
;
;       Regs Used:      al, bx

hex2ascii       proc    near
        mov     bl, al
        and     al, 0fh
        add     al, 90h
        daa
        adc     al, 40h
        daa
        mov     bh, al

        mov     al, bl             ; upper nibble
        shr     al, 1
        shr     al, 1
        shr     al, 1
        shr     al, 1
        and     al, 0fh
        add     al, 90h
        daa
        adc     al, 40h
        daa
        mov     bl, al             ; bx has two ascii bytes
        ret
hex2ascii       endp


