home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
DOS/V Power Report 1998 January
/
VPR9801A.ISO
/
BENCH
/
pfm68621
/
PFM686.C
next >
Wrap
C/C++ Source or Header
|
1997-10-06
|
27KB
|
1,167 lines
/* pfmdpmi ver 2.1A DysanKeihin & (koji) at fext */
/* hgg02424@niftyserve.or.jp */
#include <stdio.h>
#include <dos.h>
#include <dpmi.h>
#include <stdlib.h>
#include <math.h>
/*
#define PRINT_IMED
#define DEBUG
*/
union REGS inregs,outregs;
#define cli asm volatile("cli");
#define sti asm volatile("sti");
#define TickSet RDTSC(TickStart)
#define TickGet RDTSC(TickStop)
void EvaluateEcacheSize(long Sel, long Offset, long EcacheSize, long FillSize, double Freq);
void exagoge(char e_level,char *err_mes1,char *err_mes2)
{
if (*err_mes2 == (char)NULL) fprintf(stderr,"%s \n",err_mes1);
else fprintf(stderr,"%s [%s]\n",err_mes1,err_mes2);
exit(e_level);
}
/* ============ Detect CPUID ============== */
#define cpuid(i_eax,r_eax,r_ebx,r_ecx,r_edx)\
asm volatile\
("\
movl %4,%%eax
.byte 0x0f
.byte 0xa2
movl %%eax,%0
movl %%ebx,%1
movl %%ecx,%2
movl %%edx,%3
":"=g"(r_eax),"=g"(r_ebx),"=g"(r_ecx),"=g"(r_edx)\
:"g"(i_eax)\
:"ax","bx","cx","dx"\
)
#define MMXFLAG 0x00800000 /* Bit 23*/
char flag_p5=0;
long GetCPUID(char *id_string)
{ volatile unsigned long i_eax,r_eax,r_edx;
volatile unsigned long string_reg[4];
char VendorString[32], p5_bug_string[64],cputype[7],cpu[4];
long i,j;
volatile double c;
asm volatile /* Whether the CPU has ID string */
("
pushfl
pushfl
popl %%eax
orl $0x200000,%%eax
pushl %%eax
popfl
pushfl
popl %%eax
popfl
movl %%eax,%0
":"=g"(r_eax)
:
:"ax"
);
#ifdef DEBUG
fprintf(stderr,"%lx \n",r_eax);
#endif
if (!(r_eax & 0x200000)) /* if bit21==1 then P5,P6...*/
{ *id_string = (char)NULL;
#ifdef DEBUG
printf(" It doesen't have CPUID. \n");
#endif
return(0);
}
#ifdef DEBUG
printf(" It has CPUID. \n");
#endif
VendorString[0] = (char)NULL;
i_eax=0;
cpuid(i_eax,string_reg[3],string_reg[0],string_reg[2],string_reg[1]);
for(i=0; i<3; i++)
for(j=0; j<4; j++)
VendorString[i*4+j] = (char)( 0xff & (string_reg[i] >> j*8) );
VendorString[12] = (char)NULL;
i_eax=1;
cpuid(i_eax,r_eax,string_reg[0],string_reg[2],string_reg[1]);
sprintf(cputype," ??? ");
if ((r_eax & 0x0ff0) == 0x0000 ) sprintf(cputype," non ");
if ((r_eax & 0x0ff0) == 0x0400 ) sprintf(cputype," 486DX");
if ((r_eax & 0x0ff0) == 0x0410 ) sprintf(cputype," 486DX");
if ((r_eax & 0x0ff0) == 0x0420 ) sprintf(cputype," 486SX");
if ((r_eax & 0x0ff0) == 0x0430 ) sprintf(cputype,"486DX2");
if ((r_eax & 0x0ff0) == 0x0440 ) sprintf(cputype," 486SL");
if ((r_eax & 0x0ff0) == 0x0450 ) sprintf(cputype,"486SX2");
if ((r_eax & 0x0ff0) == 0x0470 ) sprintf(cputype,"WBEDX2");
if ((r_eax & 0x0ff0) == 0x0480 ) sprintf(cputype," DX4 ");
if ((r_eax & 0x0ff0) == 0x0420 ) sprintf(cputype," UMC ");
if ((r_eax & 0x0ff0) == 0x0510 ) { sprintf(cputype," P5 "); flag_p5 = 1; }
if ((r_eax & 0x0ff0) == 0x0520 ) { sprintf(cputype," P54C "); flag_p5 = 1; }
if ((r_eax & 0x0ff0) == 0x0530 ) { sprintf(cputype," P24T "); flag_p5 = 1; }
if ((r_eax & 0x0ff0) == 0x0540 ) sprintf(cputype," P55C ");
if ((r_eax & 0x0ff0) == 0x0570 ) { sprintf(cputype,"P5note"); flag_p5 = 1; }
if ((r_eax & 0x0ff0) == 0x04e0 ) sprintf(cputype,"Am5x86");
if ((r_eax & 0x0ff0) == 0x0610 ) sprintf(cputype," P6 ");
if ((r_eax & 0x0ff0) == 0x0630 ) sprintf(cputype,"Pen-II");
if ((r_eax & 0x0ff0) == 0x0560 ) sprintf(cputype,"AMD K6");
sprintf(p5_bug_string,"\0");
if( flag_p5 )
{ c = 824633702449.0;
if( fabs (1.0 - (1.0/c)*c) > 0.00000000000001 )
sprintf(p5_bug_string,"\n| This FPU is BUGGY version. |");
}
i_eax=1;
cpuid(i_eax,r_eax,string_reg[0],string_reg[2],r_edx);
if ( MMXFLAG & r_edx) sprintf(cpu,"MMX");
else sprintf(cpu,"CPU");
sprintf(id_string,"|%s:%6s[%12s Fam%1ld Mdl%1ld Stp%2ld]|%s\0",cpu,
cputype,VendorString,0x0f&(r_eax>>8),0x0f&(r_eax>>4),0x0f&r_eax,p5_bug_string );
return(1);
}
/* ============ Measure Frequency ============== */
static unsigned long TickStart, TickStop, Tick;
#define IOtickGet(counter_l)\
asm volatile\
("\
movb $0x00,%%al\n\
outb %%al,$0x43\n\
inb $0x40,%%al\n\
xchgb %%al,%%ah\n\
inb $0x40,%%al\n\
xchgb %%al,%%ah\n\
movw %%ax,%0\n\
":"=g"(counter_l)\
:\
:"ax"\
)
#define IOtickRecover\
asm volatile\
("\
movb $0x36,%%al /*timer0(system)*/\n\
outb %%al,$0x43\n\
xorw %%ax,%%ax\n\
outb %%al,$0x40\n\
outb %%al,$0x40\n\
":\
:\
:"ax"\
)
#define RDTSC(CounterLo)\
asm volatile\
("\
.byte 0x0f\n\
.byte 0x31\n\
movl %%eax,%0\n\
":"=g"(CounterLo)\
:\
:"ax","dx"\
)
long ExactTick()
{
if (TickStop > TickStart) Tick = TickStop-TickStart;
else Tick = TickStop + (unsigned long)(~TickStart) + 1;
return(Tick);
}
#define CMOS_ADDR 0x70
#define CMOS_SEC 0x00
#define CMOS_DATA 0x71
#define COUNTER 2386360 /* Hz */
double GetFrequency(void)
{
unsigned long Diff;
unsigned short NewCounter,PreviousCounter;
int j;
unsigned char sec,sec1;
long CounterValSec, Vcount;
double CycleTime;
unsigned long DelayRDTSC;
for(j=0; j<2; j++)
{
TickSet;
TickGet;
}
if (TickStart == TickStop )
exagoge(1," Sorry, this CPU doesn't have RDTSC instruction.\n Use PFM586 or PFM486.","");
DelayRDTSC = ExactTick();
IOtickRecover;
IOtickGet(PreviousCounter); /* wait for tick*/
while(1)
{ IOtickGet(NewCounter);
if ( NewCounter > PreviousCounter) break;
PreviousCounter = NewCounter;
}
for(j=0; j<2; j++)
{
TickSet;
{ int i=0; /* wait for 10 ticks*/
while(i<10)
{ IOtickGet(PreviousCounter);
while(1)
{ IOtickGet(NewCounter);
if ( NewCounter > PreviousCounter) break;
PreviousCounter = NewCounter;
}
i++;
}
}
TickGet;
}
Diff = ExactTick()-DelayRDTSC;
#ifdef DEBUG
printf("Delay:%lx\n",DelayRDTSC);
printf("Diff :%lx\n",Diff);
#endif
/* Check CMOS clock */
outportb(CMOS_ADDR, CMOS_SEC);
sec = inportb(CMOS_DATA);
Vcount = 0;
while(1)
{ outportb(CMOS_ADDR, CMOS_SEC);
if ((sec1 = inportb(CMOS_DATA)) == sec) { if ( 10000 == Vcount++) break; }
else Vcount = 0;
sec = sec1;
}
while(1)
{ outportb(CMOS_ADDR, CMOS_SEC);
if (sec != inportb(CMOS_DATA)) break;
}
TickSet;
Vcount = 0;
while(1)
{ outportb(CMOS_ADDR, CMOS_SEC);
if ( (sec1 = inportb(CMOS_DATA))==sec ) { if ( 10000 == Vcount++) break; }
else Vcount = 0;
sec = sec1;
}
while(1)
{ outportb(CMOS_ADDR, CMOS_SEC);
if (sec != inportb(CMOS_DATA)) break;
}
TickGet;
CounterValSec = ExactTick();
CycleTime = 1./CounterValSec;
#ifdef DEBUG
fprintf(stdout,"%ld\n",CounterValSec);
fprintf(stdout,"%lf\n",COUNTER/(double)(10*0x10000)*(double)Diff);
#endif
if (abs ( (double)CounterValSec/1000000. - COUNTER/(double)(10*0x10000)*(double)Diff/1000000) > 0.1 )
{ fprintf(stdout,"-------------------------------------------------\n");
fprintf(stdout,"Either CMOS-RTC or 14.318MHz clock seems strange.\n");
fprintf(stdout,"factor : :%5.4f\n",CounterValSec/(COUNTER/(double)(10*0x10000)*(double)Diff));
}
return (.000001/CycleTime);
}
/* ============ Memory Setup ============== */
#define SegAttrConfExecutable 0x9a
#define SegAttrDataRW 0x92
long SegAttributeSet(long Sel,unsigned char Attr)
{ long PrevAttr, s;
unsigned char _buffer[8];
long i;
long CPL;
CPL = _my_cs() & 0x03;
Attr |= ( CPL << 5 );
s = __dpmi_get_descriptor(Sel, _buffer); /* DPMI 0.9 AX=000b */
#ifdef DEBUG
printf("s:%lx \n",s);
for (i=0; i<8; i++)
printf("%2x ",_buffer[7-i]);
printf("\n");
#endif
PrevAttr = _buffer[5];
_buffer[5] = Attr;
_buffer[6] |= 0x40; /* default size : 32bit */
s = __dpmi_set_descriptor(Sel, _buffer); /* DPMI 0.9 AX=000c */
#ifdef DEBUG
printf("s:%lx \n",s);
s = __dpmi_get_descriptor(Sel, _buffer); /* DPMI 0.9 AX=000b */
printf("s:%lx \n",s);
for (i=0; i<8; i++)
printf("%2x ",_buffer[7-i]);
printf("\n");
#endif
return(PrevAttr);
}
long AllocatePhysicalMem(long PhyAddr, long Size)
{ long Sel,LAddr,base;
__dpmi_meminfo info;
long s,i;
Sel= __dpmi_allocate_ldt_descriptors(1);
#ifdef DEBUG
printf("PhyMemSel: %x \n",Sel);
#endif
info.handle = 0;
info.size = Size;
info.address = PhyAddr;
s = __dpmi_physical_address_mapping(&info);
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0800)");
LAddr = info.address;
#ifdef DEBUG
printf("LAddr :%lp \n",info.address);
#endif
s = __dpmi_lock_linear_region(&info); /* DPMI 0.9 AX=0600 */
#ifdef 0
if ( s )
{ fprintf(stderr,"Lock Error : %lx -- %lx\n",PhyAddr,PhyAddr+Size);
exagoge(-1,"Memory allocation Failure","DPMI(0600)");
}
#endif
#ifdef DEBUG
printf("s:%lx[__dpmi_lock_linear_region] \n",s);
printf("handle:%lx \n",info.handle);
printf("size :%lx \n",info.size);
printf("addr :%lp \n",info.address);
#endif
s = __dpmi_set_segment_base_address(Sel,LAddr); /* DPMI 0.9 AX=0007 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0007)");
s = __dpmi_set_segment_limit(Sel, Size|0xfff); /* DPMI 0.9 AX=0008 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0008)");
#ifdef DEBUG
__dpmi_get_segment_base_address(Sel, &base);
printf("Selector 0x%04x : base=0x%lx, limit=0x%lx, flags=0x%x\n",
Sel, base, __dpmi_get_segment_limit(Sel),
__dpmi_get_descriptor_access_rights(Sel));
#endif
for(i=0; i<Size; i+=256)
_farpokel(Sel,i,0);
_farpokel(Sel,0,0xAA);
_farpokel(Sel,Size-1,0xAA);
if((_farpeekb(Sel,0) != 0xAA) || (_farpeekb(Sel,Size - 1) != 0xAA) )
{
fprintf(stdout,"Requested range : %08lX - %08lX \n",PhyAddr,PhyAddr+Size);
exagoge(1,"Memory out of range.","");
}
return(Sel);
}
long AllocateMemBlock( long Size)
{ long Sel,LAddr,base;
__dpmi_meminfo info;
long s=0,i;
Sel= __dpmi_allocate_ldt_descriptors(1);
#ifdef DEBUG
printf("MemBlockSel: %x \n",Sel);
#endif
info.handle = 0;
info.size = Size;
info.address = 0;
s = __dpmi_allocate_memory(&info); /* DPMI 0.9 AX=0501 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0501)");
LAddr = info.address;
#ifdef DEBUG
printf("LAddr :%lp \n",info.address);
#endif
s = __dpmi_lock_linear_region(&info); /* DPMI 0.9 AX=0600 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0600)");
#ifdef DEBUG
printf("s:%lx[__dpmi_lock_linear_region] \n",s);
printf("handle:%lx \n",info.handle);
printf("size :%lx \n",info.size);
printf("addr :%lp \n",info.address);
#endif
s = __dpmi_set_segment_base_address(Sel,LAddr); /* DPMI 0.9 AX=0007 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0007)");
s = __dpmi_set_segment_limit(Sel, Size | 0xfff); /* DPMI 0.9 AX=0008 */
if ( s ) exagoge(-1,"Memory allocation Failure","DPMI(0008)");
#ifdef DEBUG
__dpmi_get_segment_base_address(Sel, &base);
printf("Selector 0x%04x : base=0x%lx, limit=0x%lx, flags=0x%x\n",
Sel, base, __dpmi_get_segment_limit(Sel),
__dpmi_get_descriptor_access_rights(Sel));
#endif
for(i=0; i<Size; i+=256)
_farpokel(Sel,i,0);
return(Sel);
}
/* ============ Cache, Main Read ============== */
#define fp4(val4) fp((val4 & 0xff)); fp(((val4 >> 8) & 0xff)); fp(((val4 >> 16) & 0xff)); fp(((val4 >> 24) & 0xff));
#define fp(val) { _farpokeb(Sel, pp + Offset, val); pp++; }
long FillTestCode(long Sel, long Offset, long EcacheSize, long FillSize )
{ long ReadBlock, i, pp=0, BigLoopStartPoint, FillStartPoint;
ReadBlock = EcacheSize / (24 / 4);
FillStartPoint = Offset + EcacheSize * 4;
#ifdef 0
printf("Offset : %lx \n",Offset);
printf("EcacheSize : %lx \n",EcacheSize);
printf("FillStartPoint : %lx \n",FillStartPoint);
printf("FillSize : %lx \n",FillSize);
printf("\n");
#endif
SegAttributeSet(Sel,SegAttrDataRW);
fp(0x1e); /* push %%ds */
fp(0x66); fp(0x8c); fp(0xc9); /* movw %%cs,%%cx */
fp(0x66); fp(0x8e); fp(0xd9); /* movw %%cx,%%ds */
fp(0xbf); fp4(0x04); /* movl $04,edi */
BigLoopStartPoint = pp + Offset;
if (FillSize != 0) /* for main */
{
for (i=0; i<FillSize*4; i++)
fp(0x90); /* nop */
}
while(1) /* auto aline */
{
if ( ( pp % 4 ) == 0 ) break;
fp(0x90); /* nop */
}
fp(0x0f); fp(0x31); /* RDTSC */
fp(0x89); fp(0xc6); /* movl eax,esi */
for(i=0; i<ReadBlock; i++)
{
fp(0x81); fp(0xc3); fp4(0x00); /* 81c300000000 adc eax,0x0 */
fp(0x81); fp(0xc0); fp4(0x00); /* 81c000000000 adc ebx,0x0 */
fp(0x81); fp(0xc1); fp4(0x00); /* 81c100000000 adc ecx,0x0 */
fp(0x81); fp(0xc2); fp4(0x00); /* 81c200000000 adc edx,0x0 */
}
fp(0x0f); fp(0x31); /* RDTSC */
fp(0x29); fp(0xf0); /* subl %%esi,%%eax : eax - esi => eax */
fp(0x4f); /* dec %%edi */
fp(0x74); fp(0x07); /* jz +07 */
fp(0xb8); /* mov *BigLoopStart,%%eax */
fp4( BigLoopStartPoint );
fp(0xff); fp(0xe0); /* jmp %%eax */
fp(0x1f); /* pop %%ds */
fp(0xcb); /* far ret immediate */
return(ReadBlock);
}
long CallTestCode(long Sel, long Offset)
{
long Count;
struct
{
unsigned long Offset;
unsigned long Selector;
}JumpAdd;
SegAttributeSet(Sel,SegAttrConfExecutable);
JumpAdd.Selector = Sel;
JumpAdd.Offset = Offset;
asm volatile
("\
movl %1,%%eax
lcall (%%eax)
movl %%eax,%0
":"=g"(Count)\
:"g"(&JumpAdd)\
:"eax");
return(Count);
}
double EcacheRead(long Sel, long Offset, long EcacheSize, double Freq)
{
long CounterValER, ReadBlockER;
double TimeER;
long Count;
ReadBlockER = FillTestCode(Sel, Offset, EcacheSize, 0 );
Count = CallTestCode(Sel, Offset);
CounterValER = Count;
#ifdef DEBUG
fprintf(stdout,"CounterValER:%ld \n",CounterValER);
#endif
/* call overhead of E-cache read */
FillTestCode(Sel, Offset, 0, 0 );
Count = CallTestCode(Sel, Offset);
CounterValER -= Count;
#ifdef DEBUG
fprintf(stdout,"CounterValER:%ld %ld\n",CounterValER,Count);
#endif
TimeER = (double)(CounterValER)/(double)(ReadBlockER * 24 / 4 * Freq) * 1000.;
#ifdef PRINT_IMED
fprintf(stdout,"CounterValER:%ld \n",CounterValER);
fprintf(stdout,"E-cache read %6.2f[ns/dword] :%5.2f[clocks]\n",TimeER,TimeER/(1000./Freq));
#endif
return (TimeER);
}
double MainRead(long Sel, long Offset, long EcacheSize, long FillSize, double Freq)
{
long CounterValMR, ReadBlockMR;
double TimeMR;
long Count;
ReadBlockMR = FillTestCode(Sel, Offset, EcacheSize, FillSize );
Count = CallTestCode(Sel, Offset);
CounterValMR = Count;
#ifdef DEBUG
fprintf(stdout,"CounterValMR:%ld \n",CounterValMR);
#endif
/* call overhead of E-cache read */
FillTestCode(Sel, Offset, 0, 0 );
Count = CallTestCode(Sel, Offset);
CounterValMR -= Count;
#ifdef DEBUG
fprintf(stdout,"CounterValMR:%ld %ld\n",CounterValMR,Count);
#endif
TimeMR = (double)(CounterValMR)/(double)(ReadBlockMR * 24 / 4 * Freq) * 1000.;
#ifdef PRINT_IMED
fprintf(stdout,"CounterValMR:%ld \n",CounterValMR);
fprintf(stdout,"E-cache read %6.2f[ns/dword] :%5.2f[clocks]\n",TimeMR,TimeMR/(1000./Freq));
#endif
return (TimeMR);
}
/* ============ Cache, Main Write ============== */
long DoContinuousPush(CodePointer, StackSel )
{ long Count;
SegAttributeSet(StackSel,SegAttrDataRW);
asm volatile
("
movl %1,%%ebx
movl %2,%%eax
call %%ebx
movl %%eax,%0
":"=g"(Count)
:"g"(CodePointer),"g"(StackSel)
:"ax","bx","dx"
);
return(Count);
}
#ifdef fp
#undef fp
#endif
#define fp(val) { *pp++ = val; }
void SetCodeContinuousPush(char *CodePointer, long StackStartPoint, long PushUnit, long PushLoops, long FillSize)
{ long i;
unsigned char *pp;
long EcacheSize_ , BigLoopStart, FillStartPoint;
EcacheSize_ = PushLoops * PushUnit * 8;
pp = CodePointer;
FillStartPoint = StackStartPoint;
#ifdef DEBUG
printf("StackStartPoint : %lx \n",StackStartPoint);
printf("EcacheSize_ : %lx \n",EcacheSize_);
printf("FillStartPoint : %lx \n",FillStartPoint);
printf("FillSize : %lx \n",FillSize);
printf("\n");
#endif
fp(0x1e); /* push %%ds */
fp(0x06); /* push %%es */
fp(0x55); /* push %%ebp */
fp(0x66); fp(0x8e); fp(0xd0); /* movw %ax,%ss */
fp(0x66); fp(0x8e); fp(0xc0); /* movw %%ax,%%es */
fp(0x66); fp(0x8e); fp(0xd8); /* movw %%ax,%%ds */
fp(0x89); fp(0xe5); /* movl %esp,%ebp */
fp(0xbb); fp4(0x04); /* movl $04,%%ebx : 4 Big loops*/
BigLoopStart = (long)pp;
/* Big Loops Start */
/* load StackStartPoint to esp & edi */
fp(0xb9); /* mov imm32, %%ecx */
fp4( StackStartPoint );
fp(0x89); fp(0xcc); /* movl %%ecx,%%esp StackOffset */
fp(0x89); fp(0xcf); /* movl %%ecx,%%edi : same as StackOffset pointer */
if( FillSize == 0)
{ /* for cache */
fp(0xb9); /* mov imm32, %%ecx*/
fp4( EcacheSize_ );
fp(0xfd); /* std */
fp(0xf3); fp(0xab); /* rep stosl */
fp(0xbe); /* mov imm32, %%esi */
fp4( StackStartPoint );
fp(0xb9); /* mov imm32, %%ecx*/
fp4( EcacheSize_ );
fp(0xf3); fp(0xad); /* rep lodsl (eax is affected) */
}
else
{ /* for Main */
fp(0xb9); /* mov imm32, %%ecx */
fp4( FillStartPoint );
fp(0x89); fp(0xcf); /* movl %%ecx,%%edi */
fp(0xb9); /* mov imm32, %%ecx*/
fp4( FillSize );
fp(0xfc); /* cld */
fp(0xf3); fp(0xab); /* rep stosl */
}
fp(0xfc); /* cld */
fp(0xb9); /* mov imm32, %%ecx */
fp4( PushLoops );
while(1) /* auto aline */
{
if ( ( (long)pp % 4 ) == 0 ) break;
fp(0x90); /* Nop */
}
fp(0x0f); fp(0x31); /* RDTSC */
fp(0x89); fp(0xc6); /* movl %%eax, %%esi */
for (i=0; i<PushUnit; i++)
fp(0x60); /* pushal*/
fp(0xe2); /* loop 0xbe-0xfe = Head of push codes (-0x40) */
fp(0xfe - PushUnit);
fp(0x0f); fp(0x31); /* RDTSC */
fp(0x29); fp(0xf0); /* subl %%esi,%%eax : eax - esi => eax */
fp(0x4b); /* dec %%ebx */
fp(0x74); fp(0x07); /* jz +07 */
fp(0xb9); /* mov *BigLoopsStart,%%ecx */
fp4( BigLoopStart );
fp(0xff); fp(0xe1); /* jmp %%ecx */
/* Recover SS ESP */
fp(0x66); fp(0xb9); /* movw imm16,%%cx */
fp( _my_ss() & 0xff); /* LSB */
fp((_my_ss() >> 8) & 0xff); /* MSB */
fp(0x66); fp(0x8e); fp(0xd1); /* movw %cx,%ss */
fp(0x89); fp(0xec); /* movl %ebp,%esp */
fp(0x5d); /* pop %%ebp */
fp(0x07); /* pop %%es */
fp(0x1f); /* pop %%ds */
fp(0xc3); /* ret immediate */
}
static unsigned char CodePointer[512];
#define PushUnit 64
double EcacheWrite(long Sel, long Offset, long EcacheSize, double Freq)
{
long CounterValEW, Count, i;
double TimeEW;
long PushLoops, StackStartPoint;
StackStartPoint = Offset + EcacheSize*4;
PushLoops = EcacheSize/PushUnit/8;
SetCodeContinuousPush(CodePointer, StackStartPoint, PushUnit, PushLoops, 0 );
Count = DoContinuousPush(CodePointer, Sel);
CounterValEW = Count;
/* loop & call overhead of E-cache write */
SetCodeContinuousPush(CodePointer, StackStartPoint, 0, PushLoops, 0 );
Count = DoContinuousPush(CodePointer, Sel);
CounterValEW -= Count;
TimeEW = ((double)CounterValEW )/(double)(EcacheSize * Freq)*1000. ;
#ifdef PRINT_IMED
fprintf(stdout,"E-cache write %6.2f[ns/dword] :%5.2f[clocks]\n",TimeEW,TimeEW/(1000./Freq));
#endif
return (TimeEW);
}
#define MinMainSize 512 /* 2k */
double MainWrite(long Sel, long Offset, long MainSize, long FillSize, double Freq)
{
long CounterValMW, Count, i;
double TimeMW;
long PushLoops, StackStartPoint;
StackStartPoint = Offset + (MainSize+MinMainSize)*4;
PushLoops = (MainSize+MinMainSize)/PushUnit/8;
SetCodeContinuousPush(CodePointer, StackStartPoint, PushUnit, PushLoops, FillSize);
Count = DoContinuousPush(CodePointer, Sel);
CounterValMW = Count;
/* loop & call overhead of E-cache write */
SetCodeContinuousPush(CodePointer, (Offset + MinMainSize*4), PushUnit, 1, FillSize );
Count = DoContinuousPush(CodePointer, Sel);
#ifdef DEBUG
printf("MW %ld : %5.4f \n",CounterValMW,(double)CounterValMW /(double)MainSize);
printf("Count %ld : %5.4f \n",Count,(double)Count /(double)MinMainSize);
#endif
CounterValMW -= Count;
TimeMW = ((double)CounterValMW )/(double)(MainSize * Freq)*1000. ;
#ifdef PRINT_IMED
fprintf(stdout,"E-cache write %6.2f[ns/dword] :%5.2f[clocks]\n",TimeMW,TimeMW/(1000./Freq));
#endif
return (TimeMW);
}
void main(int argc,char **argv)
{
long FillSize,EcacheSize,AllocSize,kbytes;
char VendorString[128],CacheSizeString[128];
double TimeER,TimeEW,TimeMR,TimeMW,Freq;
long Sel, PhyAddr = -1;
fprintf(stdout,"\n-- pfm686 ver2.1A DysanKeihin & (koji) --\n");
#ifdef DEBUG
/* dpmi test */
inregs.x.ax=0x0400;
int86(0x31,&inregs,&outregs);
if ((outregs.x.bx & 1) == 0 ){ printf ("No DPMI installed!\n"); exit(1); }
printf("DPMI test : %lx\n",outregs.x.bx);
#endif
CacheSizeString[0] = '\0';
kbytes = 256;
if (argc != 1)
{ int i;
for (i=1; i<argc; i++)
{
if ( '0' <= argv[i][0] && argv[i][0] <= '9')
{ kbytes = atol(argv[i]);
if ( (double)kbytes != atof(argv[i]))
exagoge(-1,"Sorry, cache size must be integer.",argv[i]);
sprintf(CacheSizeString,"| Cache size : [%4ld]Kbytes |",kbytes);
}
if ( strchr(argv[i],'a') != NULL || strchr(argv[i],'A' ) != NULL)
{
if( i+1 > argc ) exagoge(-1,"Please define start address.","");
sscanf(argv[i+1],"%x",&PhyAddr);
i++;
}
}
}
if ( kbytes <= 1 )
{ fprintf(stderr,"Cache size must be more than 2kbytes.\n");
kbytes = 2;
}
if ( kbytes & 1 == 1 )
{ fprintf(stderr,"Please define cache size with even value.\n");
kbytes++;
}
/* if ( kbytes > 32 ) kbytes -=2; <-- pfm686 v1.2*/
EcacheSize = kbytes*256;/* array size counted by long */
if ( EcacheSize < 65500 ) AllocSize = 65500;
else AllocSize = EcacheSize;
/* FillSize = ( EcacheSize*2 > 1024*256 )? EcacheSize*2:1024*256; <-- 686 v1.2*/
FillSize = AllocSize*2;
if (FillSize < 1024*256) FillSize = 2048*256;
if ( PhyAddr != -1 )
{ Sel = AllocatePhysicalMem(PhyAddr, (AllocSize + FillSize)*4+1024);
}
else
{ Sel = AllocateMemBlock( (AllocSize + FillSize)*4 + 1024);
}
/* cpuid */
#ifdef PRINT_IMED
if ( GetCPUID(&VendorString[0]) ) fprintf(stdout,"%s\n",VendorString);
#endif
#ifdef DEBUG
printf(" cli \n");
#endif
cli;
/* evaluate clock Frequency */
Freq = GetFrequency();
#ifdef PRINT_IMED
fprintf(stdout,"CPU Frequency %6.2f[MHz]\n",Freq);
#endif
EvaluateEcacheSize(Sel, 0, EcacheSize, FillSize, Freq);
/* e-cache read */
TimeER = EcacheRead(Sel, 0, EcacheSize, Freq);
/* E-cache write */
TimeEW = EcacheWrite(Sel, 0, EcacheSize, Freq);
/* main read */
TimeMR = MainRead(Sel, 0, EcacheSize , FillSize, Freq);
/* main write */
TimeMW = MainWrite(Sel, 0, EcacheSize , FillSize, Freq);
/* Recover timer IC */
IOtickRecover;
sti;
/* print */
#ifndef PRINT_IMED
if ( GetCPUID(&VendorString[0]) )
{ fprintf(stdout,"------------------------------------------\n");
fprintf(stdout,"%s\n",VendorString);
}
if( PhyAddr != -1 )
{ fprintf(stdout,"| Cache fill %08lX - %08lX |\n",PhyAddr,PhyAddr+EcacheSize*4);
fprintf(stdout,"| Dummy fill %08lX - %08lX |\n",PhyAddr+EcacheSize,PhyAddr+(AllocSize+FillSize)*4+1024);
}
if( CacheSizeString[0] != '\0' )
fprintf(stdout,"%s\n",CacheSizeString);
fprintf(stdout,"|----------------------------------------|\n");
{ char st[5][10];
sprintf(st[0],"%6.2f",Freq);
sprintf(st[1],"%7.3f",TimeER);
sprintf(st[2],"%7.3f",TimeEW);
sprintf(st[3],"%7.3f",TimeMR);
sprintf(st[4],"%7.3f",TimeMW);
fprintf(stdout,"| %6s[MHz] | [ns/dword] |[CPUclocks]|\n",st[0]);
fprintf(stdout,"|---------------+------------+-----------|\n");
fprintf(stdout,"| E-cache read | %8s | %6.3f |\n",st[1],TimeER/(1000./Freq));
fprintf(stdout,"| E-cache write | %8s | %6.3f |\n",st[2],TimeEW/(1000./Freq));
fprintf(stdout,"| main read | %8s | %6.3f |\n",st[3],TimeMR/(1000./Freq));
fprintf(stdout,"| main write | %8s | %6.3f |\n",st[4],TimeMW/(1000./Freq));
fprintf(stdout,"------------------------------------------\n");
/*
printf("[%s]",st[0]);
printf("[%s]",st[1]);
printf("[%s]",st[2]);
printf("[%s]",st[3]);
printf("[%s]",st[4]);
*/
}
#endif
}
#define MaxCacheSize 2048
void EvaluateEcacheSize(long Sel, long Offset, long EcacheSize, long FillSize, double Freq)
{
long Sizes[32]={2,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,1024,1536,MaxCacheSize};
long Counts[32],MaxCacheCount,i,CacheLayer=0, ReadBlockMR;
ReadBlockMR = FillTestCode(Sel, Offset, EcacheSize, FillSize );
MaxCacheCount = CallTestCode(Sel, Offset);
#ifdef DEBUG
printf("MaxCacheCount:%ld\n",MaxCacheCount);
#endif
FillTestCode(Sel, 0, Sizes[0]*256, 0 );
Counts[0] = CallTestCode(Sel, 0);
for (i=1; Sizes[i] !=MaxCacheSize; i++)
{ FillTestCode(Sel, 0, Sizes[i]*256, 0 );
Counts[i] = CallTestCode(Sel, 0);
#ifdef DEBUG
fprintf(stdout,"Size %ldKB Count:%ld rate:%f\n",Sizes[i],Counts[i]/Sizes[i],(double)(Counts[i]/Sizes[i])/(double)(Counts[i-1]/Sizes[i-1]));
#endif
if( (double)(Counts[i]/Sizes[i])/(double)(Counts[i-1]/Sizes[i-1]) > 1.5)
{ CacheLayer++;
fprintf(stdout,"L%d cache size %dKB \n",CacheLayer,Sizes[i-1]);
}
if ( (double)(MaxCacheCount/(EcacheSize/256))/(double)(Counts[i]/Sizes[i]) < 1.2 ) break;
}
}