**********************************************************************
* Name:		TIM
* Stack:	( ob --> ? time )
* Desc:		Execution time measurement
**********************************************************************
ASSEMBLE
	CON(1)	8
RPL
xNAME TIM
::
  CK1

  GX? NOTcase :: "Not on SX" DO$EXIT ;

* Equal oppoturnity for all programs

  SysTime 1LAMBIND			( Just allocate correct size )
  GARBAGE

  '

  ::
    CODE
TIMEPART1	GOSBVL	=SAVPTR

* First make sure executing TIM itself by a keypress doesn't
* disturb the measurement

		GOSBVL	=DisableIntr
-		GOSBVL	=BITMAP
		GOSBVL	=Debounce
		?A#0	W
		GOYES	-
		GOSBVL	=AllowIntr
		LC(5)	#1A00		Good experimental delay on GX
-		C=C-1	A		to cool things down
		GONC	-

* Now synchronize to tick boundary to get best possible start time

		GOSUB	TimGetLam
		GOSUB	Synchronize
		DAT0=C	13		Store the start time
		GOSBVL	=GETPTR
		A=DAT1	A		Pop the program
		D1=D1+	5
		D=D+1	A
		PC=(A)			And evaluate it

* Use following exit instead to measure overhead

OverHead
*		A=A+1	A
*		A=DAT0	A
*		D0=D0+	5
*		PC=(A)

* Get TIM lambda variable

TimGetLam	D0=(5)	=G_TEMPENV
		A=DAT0	A
		D0=A			->tempenv
		D0=D0+	15		->lam1
		A=DAT0	A
		D0=A			->hxs_time
		D0=D0+	10		->time
		RTN

* Synchronize code with ticking clock

Synchronize	GOSBVL	=DisableIntr
		SETHEX
		D1=(5)	=TIMERCTRL.2
		C=DAT1	1		[SRQ WKE INT RUN]
		D=0	S		Flag timer is valid
		?CBIT=1	0
		GOYES	+
		D=D+1	S		D[S]<>0 - timer not running
+		D1=(2)	=CRC
		C=0	A
		DAT1=C	4		Clear CRC for time verification
		D1=(5)	=G_NEXTIRQ
		C=DAT1	13		C[0-13]=nextirq
		D1=(5)	=CRC
		C=0	A
		C=DAT1	4		C[A]=nextirq crc (true)
		D1=(5)	=G_TIMECRC
		A=0	A
		A=DAT1	4		A[A]=nextirq crc
		?C=A	A
		GOYES	+
		D=D+1	S		D[S]<>0 - invalid nextirq
+		D1=(4)	=G_NEXTIRQ
		A=0	W
		A=DAT1	13
		B=A	W		B[W]=nextirq
		C=0	W
		P=	0
		?D#0	S
		GOYES	skipsynch	Timer is bad - ignore synching

* Now synchronize close to display update boundary

		D1=(5)	=LINECOUNT	even address #128

* Even address

synchloop	A=DAT1	B		Wait until close to line change
		C=DAT1	B
		?A=C	P
		GOYES	synchloop
		LA(2)	#1C		Experimental value
-		A=A-1	B
		GONC	-
		LAHEX	0000000000	Experimental - tighter might fail
		D=D+1	X
		GOC	synchfail	Synch failure - error
		A=DAT1	B
		?A#C	P
		GOYES	synchloop

skipsynch	D1=(5)	=TIMER2
		C=0	P
		DAT1=C	P		Clear low nibble to stabilize
		C=DAT1	8
		GOSBVL	=GetTimeEnd
		GOSBVL	=AllowIntr
		RTNNC
		GOVLNG	=FUBAR4		Warmstart - bad time

synchfail	GOSBVL	=AllowIntr
		LC(5)	#D02		"Invalid Time"
		GOVLNG	=GPErrjmpC
    ENDCODE

  ( Program gets evaluated in here )

    CODE
TIMEPART2	GOSBVL	=SAVPTR
		A=0	W		Read current time registers
		C=0	W
		D0=(5)	=G_NEXTIRQ
		D1=(5)	=TIMER2
		A=DAT0	13
		C=DAT1	8
		B=A	W		B[W] = nextirq
		D0=(5)	=G_SAVE_C[A]	C[A] interrupt save variable address
		R3=C	W		R3[W] = ticks
		D=C	A
		A=0	A
		A=A-1	A		Avoid getting a display update
* Even address
-		A=A+1	A		during C=C+1 sequence
		C=DAT1	A		[A] chosen to get a .14 tick loop
		?C=D	X		Experimentally counter = 0 - 6
		GOYES	-		35 cycle loop (30 in the last one)
* Even address
		C=0	W		e 12.25	Set interrupt
		DAT1=C	8		o 24
		C=DAT1	W		o 36
		C=DAT1	W		o 36
* 5 cycle units

		LC(2)	1		And keep increasing C[A]
		LC(2)	2		...
		LC(2)	3
		LC(2)	4		The number of opcodes needed is
		LC(2)	5		dependant on the possible values.
		LC(2)	6		Experimentally the limits are
		LC(2)	7			5 - 28
		LC(2)	8		but we don't really care to optimize
		LC(2)	9
		LC(2)	10
		LC(2)	11
		LC(2)	12
		LC(2)	13
		LC(2)	14		LC(2) is stable in GX so that
		LC(2)	15		it takes equal cycles for even
		LC(2)	16		and odd addresses
		LC(2)	17
		LC(2)	18
		LC(2)	19
		LC(2)	20
		LC(2)	21
		LC(2)	22
		LC(2)	23
		LC(2)	24
		LC(2)	25
		LC(2)	26
		LC(2)	27
		LC(2)	28
		LC(2)	29
		LC(2)	30
		LC(2)	31
		LC(2)	32
		LC(2)	33
		LC(2)	34
		LC(2)	35
		LC(2)	36
		LC(2)	37
		LC(2)	38
		LC(2)	39
		C=DAT0	A		C[A]=interrupt C[A]
		R0=C.F	A		R0[A]=frac
		R1=A.F	A		R1[A]=loops

		C=R3	W		C[W]=ticks
		GOSBVL	=GetTimeEnd	C[W]=time
		R4=C	W		R4[W]=time

		GOSUB	TimGetLam	C[W]=new time
		A=DAT0	13		A[W]=old time
		C=C-A	W		C[W]=difference
		DAT0=C	13		Store back

* Fix time to correct value. Some ticks have been lost in the code above
* and in the generated interrupt - but can't really help it.

		A=R0	A
		R2=A	A		Save R0[A]
		GOSBVL	=DisableIntr
		GOSBVL	=getCLKON	Establish sCLKON
		C=R3	W
		D=C	W		D[W]=ticks
		C=R4	W		C[W]=time
		GOSBVL	=CLKUPD
		GOSBVL	=AllowIntr
		A=R2	A
		R0=A	A
		GOVLNG	=Push2#Loop	Push fractional ticks & dispwaits

    ENDCODE

    1GETLAM #>%					( #frac #waits %ticks )

* The ratio is 7 for the counters

    UNROT SEVEN #* #+

* And the unit is 0.02 ticks

* C[A] is 0.02 ticks in GX

    UNCOERCE % 0.02 %* %-

* The total overhead was measured by using the commented out code
* instead of evaluating the object at label 'OverHead'.
* The measured average from 100 runs was 3.41 ticks
* But - after a speedtest + ON-C the value will be higher until
*	until TurnOff is done. Thus we calibrate by timing a known
*	result instead of a fixed amount each time.

    %0
    CODE
	GOTO	TIMEPART1
    ENDCODE
    CODE
	GOTO	TIMEPART2
    ENDCODE
    ROTDROP 1GETABND #>%			( %ticks1 #frac #waits %ticks )
    UNROT SEVEN #* #+
    UNCOERCE % 0.02 %* %-			( %ticks1 %ticks2 )

* Execution time for %0 should be 0.39 ticks by executing a few hundred
* of them in sequence and taking the average

    % 0.39 %-

* And fix the error

    %-

* Format the result into ":ticks:time"

    DUP % 8.192 %/				( %ticks %seconds )
    %2 RNDXY

  ( Choose units )

    DUP % 100 %>= ITE				( limit = 0.1s )
    :: % 1000 %/ DOEXT %1 tok_s umEND [;] ;
    DOEXT %1 CHR_m tok_s umP umEND [;]
    UM>U					( %ticks unit_seconds )
    SWAP DECOMP$ >TAG				( Tag with %ticks )
  ;

* Copy above subroutine to even address

  CHR_X TOTEMPOB
  CODE
	A=DAT1	A
	?ABIT=1	0
	GOYES	+
+	GOVLNG	=PushT/FLoop
  ENDCODE
  IT TOTEMPOB DROP
  TOTEMPOB

* And evaluate it there

  COLA_EVAL
;

**********************************************************************
* Notes:
*
*	Note that in software terms we are talking about an accuracy
*	of a single C=C+1 A instruction. In reality the hardware errors
*	dominate long runs, somewhat in short runs too. Also note that
*	C=C+1 A was chosen precisely because 50 seems to be the maximum
*	value it can reach during one tick, thus providing nice 0.02 tick
*	scale. 
*
*	Fractional tick gaps due to display updates are not emulated,
*	so in reality there is some error close to even tick boundaries.
*	This could in principle be fixed by using some slower opcode
*	in combination with TIMER2 bit check (display update) but this
*	doesn't seem worth the trouble.
*
**********************************************************************

