/*++ Copyright (c) 1990 Microsoft Corporation Module Name: p6.c Abstract: Counted events for P6 processor Author: Ken Reneris Environment: Notes: Revision History: --*/ #include "ntddk.h" #include "..\..\pstat.h" #include "stat.h" #ifdef ALLOC_DATA_PRAGMA #pragma data_seg("PAGE") #endif // // Official descriptions // char desc_0x03[] = "Number of store buffer blocks."; char desc_0x04[] = "Number of store buffer drains cycles."; char desc_0x05[] = "Number of misaligned data memory references."; char desc_0x06[] = "Number of segment register loads."; char desc_0x10[] = "Number of computational floating point operations " "executed."; char desc_0x11[] = "Number of floating point exception cases handled by " "microcode."; char desc_0x12[] = "Number of multiplies."; char desc_0x13[] = "Number of divides."; char desc_0x14[] = "Number of cycles the divider is busy."; char desc_0x21[] = "Number of L2 address strobes."; char desc_0x22[] = "Number of cycles in which the data bus is busy."; char desc_0x23[] = "Number of cycles in which the data bus is busy " "transfering data from L2 to the processor."; char desc_0x24[] = "Number of lines allocated in the L2."; char desc_0x25[] = "Number of modified lines allocated in the L2."; char desc_0x26[] = "Number of lines removed from the L2 for any reason."; char desc_0x27[] = "Number of Modified lines removed from the L2 for any " "reason."; char desc_0x28[] = "Number of L2 instruction fetches."; char desc_0x29[] = "Number of L2 data loads."; char desc_0x2A[] = "Number of L2 data stores."; char desc_0x2E[] = "Total number of L2 requests."; char desc_0x43[] = "Total number of all memory references, both cacheable " "and non-cacheable."; char desc_0x45[] = "Number of total lines allocated in the DCU."; char desc_0x46[] = "Number of M state lines allocated in the DCU."; char desc_0x47[] = "Number of M state lines evicted from the DCU. This " "includes evictions via snoop HITM, intervention " "or replacement."; char desc_0x48[] = "Weighted number of cycles while a DCU miss is " "outstanding."; char desc_0x60[] = "Number of bus requests outstanding."; char desc_0x61[] = "Number of bus clock cycles that this processor is " "driving the BNR pin."; char desc_0x62[] = "Number of clocks in which DRDY is asserted."; char desc_0x63[] = "Number of clocks in which LOCK is asserted."; char desc_0x64[] = "Number of bus clock cycles that this processor is " "receiving data."; char desc_0x65[] = "Number of Burst Read transactions."; char desc_0x66[] = "Number of Read For Ownership transactions."; char desc_0x67[] = "Number of Write Back transactions."; char desc_0x68[] = "Number of Instruction Fetch transactions."; char desc_0x69[] = "Number of Invalidate transactions."; char desc_0x6A[] = "Number of Partial Write transactions."; char desc_0x6B[] = "Number of Partial transactions."; char desc_0x6C[] = "Number of I/O transations."; char desc_0x6D[] = "Number of Deferred transactions."; char desc_0x6E[] = "Number of Burst transactions."; char desc_0x6F[] = "Number of memory transactions."; char desc_0x70[] = "Total number of all transactions."; char desc_0x79[] = "Number of cycles for which the processor is not halted."; char desc_0x7A[] = "Number of bus clock cycles that this processor is " "driving the HIT pin, including cycles due to " "snoop stalls."; char desc_0x7B[] = "Number of bus clock cycles that this processor is " "driving the HITM pin, including cycles due to " "snoop stalls."; char desc_0x7E[] = "Number of clock cycles for which the bus is snoop " "stalled."; char desc_0x80[] = "Total number of instruction fetches, both cacheable " "and uncacheable."; char desc_0x81[] = "Total number of instruction fetch misses."; char desc_0x85[] = "Number of ITLB misses."; char desc_0x86[] = "The number of cycles that instruction fetch " "pipestage is stalled (includes cache " "misses, ITLB misses, ITLB faults and " "Victem Cache evictions.)"; char desc_0x87[] = "Number of cycles for which the instruction " "length decoder is stalled."; char desc_0xA2[] = "Number of cycles for which there are resource related " "stalls."; char desc_0xC0[] = "Number of instructions retired."; char desc_0xC1[] = "Number of computational floating point operations " "retired."; char desc_0xC2[] = "Number of UOPs retired."; char desc_0xC4[] = "Number of branch instructions retired."; char desc_0xC5[] = "Number of mispredicted branches retired."; char desc_0xC6[] = "Number of processor cycles for which interrupts are " "disabled."; char desc_0xC7[] = "Number of processor cycles for which interrupts are " "disabled and interrupts are pending."; char desc_0xC8[] = "Number of hardware interrupts received."; char desc_0xC9[] = "Number of taken branchs retired."; char desc_0xCA[] = "Number of taken mispredicted branchs retired."; char desc_0xD0[] = "Number of instructions decoded."; char desc_0xD2[] = "Number of cycles or events for partial stalls."; char desc_0xE0[] = "Number of branch instructions decoded."; char desc_0xE2[] = "Number of branchs that miss the BTB."; char desc_0xE4[] = "Number of bogus branches."; char desc_0xE6[] = "Number of times BACLEAR is asserted."; #define RARE 100 // suggested counts are set to be around .1ms // 1 2 3* 4 COUNTED_EVENTS P6Events[] = { // 1234567890123456789012345678901234567890 // Memory Ordering // LD_BLOCKS - Number of store buffer blocks. 0x03, "sbb", 1000, "Store buffer blocks", "LD_BLOCKS", desc_0x03, // SB_DRAINS - Number of store buffer drain cycles. 0x04, "sbd", RARE, "Store buffer drain cycles", "SB_DRAINS", desc_0x04, // MISALIGN_MEM_REF - Number of misaligned data memory references 0x05, "misalign", 1000, "Misadligned data ref", "MISALIGN_MEM_REF", desc_0x05, // Segment Register Loads // SEGMENT_REG_LOADS - Number of segment register loads. 0x06, "segloads", 10000, "Segment loads", "SEGMENT_REG_LOADS", desc_0x06, // Floating Point // FP_COMP_OPS_EXE - Number of computatonal floating point operations // executed. 0x10, "flops", 1000, "FLOPs (computational) executed", "FP_COMP_OPS_EXE", desc_0x10, // FP_ASSIST - Number of floating point exception cases handled by // microcode. 0x11, "eflops", RARE, "FP exceptions handled by ucode", "FP_ASSIST", desc_0x11, // MUL - Number of multiplies. 0x12, "mul", 1000, "Multiplies", "MUL", desc_0x12, // DIV - Number of divides. 0x13, "div", 1000, "Divides", "DIV", desc_0x13, // CYCLES_DIV_BUSY - Number of cycles the divider is busy. 0x14, "divb", 10000, "Divider busy cycles", "CYCLES_DIV_BUSY", desc_0x14, // see also 0xC1 below // Secondary Cache (L2) // L2_ADS - Number of L2 address strobes. 0x21, "l2astrobe", 1000, "L2 address stobes", "L2_ADS", desc_0x21, // L2_DBUS_BUSY - Number of cycles in which the data bus was busy. 0x22, "l2busy", 10000, "L2 data bus busy cycles", "L2_DBUS_BUSY", desc_0x22, // L2_DBUS_BUSY_RD - Number of cycles in which the data bus was busy // transfering data from L2 to processor. 0x23, "l2busyrd", 10000, "L2 data bus to cpu busy cycles", "L2_DBUS_BUSY_RD", desc_0x23, // L2_LINES_IN - Number of lines allocated in the L2. 0x24, "l2all", 1000, "L2 lines allocated", "L2_LINES_IN", desc_0x24, // L2LINEINM - Number of Modified lines allocated in the L2. 0x25, "l2m", 1000, "L2 lines M state", "L2_M_LINES_IN", desc_0x25, // L2_LINES_OUT - Number of lines removed from the L2 for any reason. 0x26, "l2evict", 1000, "L2 lines removed", "L2_LINES_OUT", desc_0x26, // L2_M_LINES_OUT - Number of Modified lines removed from the L2 for // any reason. 0x27, "l2mevict", 100, "L2 lines M state removed", "L2_M_LINES_OUT", desc_0x27, // L2_IFETCH - L2 instruction fetches - "MESI" (0Fh) 0x28, "l2inst", 0, "L2 instruction fetches", "L2_IFETCH", desc_0x28, // L2_LD - L2 data loads - "MESI" (0Fh) 0x29, "l2load", 0, "L2 data loads", "L2_LD", desc_0x29, // L2_ST - L2 data stores - "MESI" (0Fh) 0x2a, "l2store", 0, "L2 data stores", "L2_ST", desc_0x2A, // L2_RQSTS - Total Number of L2 Requests - "MESI" (0Fh) 0x2e, "l2req", 0, "L2 requests (all)", "L2_RQSTS", desc_0x2E, // Data Cache Unit (DCU) // DATA_MEM_REFS - Total number of all memory referenced both cacheable // and non-cachable 0x43, "memref", 10000, "Data memory references", "DATA_MEM_REFS", desc_0x43, // DCU_LINES_IN - Number of total lines allocated in the DCU 0x45, "dculines", 1000, "DCU lines allocated", "DCU_LINES_IN", desc_0x45, // DCU_M_LINES_IN - Number of M state lines allocated in the DCU 0x46, "dcumlines", 100, "DCU M state lines allocated", "DCU_M_LINES_IN", desc_0x46, // DCU_M_LINES_OUT - Number of M state lines evicted from the DCU. // This includes evictions via snoop HITM, intervention or replacement. 0x47, "dcumevicted", 100, "DCU M state lines evicted", "DCU_M_LINES_OUT", desc_0x47, // DCU_MISS_OUTSTANDING - Weighted number of cycles while a DCU miss is // outstanding. Note - An access that also misses the L2 is short-changed // by 2 cycles. i.e. - if counts N cycles, should be N+2 cycles. // Count value not precise, but still usful. 0x48, "dcuout", 100000, "Weighted DCU misses outstd", "DCU_MISS_OUTSTANDING", desc_0x48, // External Bus Logic (EBL) // BUS_REQ_OUTSTANDING - Total number of bus requests outstanding. // Note - Counts only DCU full-line cacheable reads (not RFO's, writes, // ifetches or anything else. Counts "waiting for bus" to "Complete" // (last data chunk received). 0x60, "bus", 1000, "Bus requests outstanding", "BUS_REQ_OUTSTANDING", desc_0x60, // BUS_BRN_DRV - Number of bus clock cycles that this processor is driving // the corresponding pin. 0x61, "bnr", 0, "Bus BNR pin drive cycles", "BUS_BNR_DRV", desc_0x61, // BUS_DRDY_CLOCKS - Number of clocks in which DRDY is asserted. // Note - UMSK = 0h counts bus clocks when PPP is driving DRDY. // UMSK = 20h counts in processor clocks when any agent is // driving DRDY. 0x62, "drdy", 0, "Bus DRDY asserted clocks", "BUS_DRDY_CLOCKS", desc_0x62, // BUS_LOCK_CLOCKS - Number of clocks LOCK is asserted. // Note - always counts in processor clocks. 0x63, "lock", 0, "Bus LOCK asserted clocks", "BUS_LOCK_CLOCKS", desc_0x63, // BUS_DATA_RCV - Number of bus clock cycles that this p6 is receiving data. 0x64, "rdata", 10000, "Bus clocks receiving data", "BUS_DATA_RCV", desc_0x64, // BUS_TRANS_BRD - Total number of Burst Read transactions. 0x65, "bread", 10000, "Bus burst read transactions", "BUS_TRANS_BRD", desc_0x65, // BUS_TRANS_RFO - Total number of Read For Ownership transactions. 0x66, "owner", 1000, "Bus read for ownership trans", "BUS_TRANS_RFO", desc_0x66, // BUS_TRANS_WB - Total number of Write Back transactions 0x67, "writeback", 1000, "Bus writeback transactions", "BUS_TRANS_WB", desc_0x67, // BUS_TRANS_IFETCH - Total number of instruction fetch transactions. 0x68, "binst", 10000, "Bus instruction fetches", "BUS_TRANS_IFETCH", desc_0x68, // BUS_TRANS_INVAL - Total number of invalidate transactions. 0x69, "binvalid", 1000, "Bus invalidate transactions", "BUS_TRANS_INVAL", desc_0x69, // BUS_TRANS_PWR - Total number of Partial Write transactions. 0x6a, "bpwrite", 1000, "Bus partial write transactions", "BUS_TRANS_PWR", desc_0x6A, // BUS_TRANS_P - Total number of Partial transactions 0x6b, "bptrans", 1000, "Bus partial transactions", "BUS_TRANS_P", desc_0x6B, // BUS_TRANS_IO - Total number of IO transactions 0x6c, "bio", 10000, "Bus IO transactions", "BUS_TRANS_IO", desc_0x6C, // BUS_TRANS_DEF - Total number of deferred transactions. 0x6d, "bdeferred", 10000, "Bus deferred transactions", "BUS_TRANS_DEF", desc_0x6D, // BUS_TRANS_BURST - Total number of Burst transactions. 0x6e, "bburst", 10000, "Bus burst transactions (total)", "BUS_TRANS_BURST", desc_0x6E, // BUS_TRANS_MEM - Total number of memory transactions. 0x6f, "bmemory", 10000, "Bus memory transactions (total)", "BUS_TRANS_MEM", desc_0x6F, // BUS_TRANS_ANY - Total number of all transactions. 0x70, "btrans", 10000, "Bus all transactions", "BUS_TRANS_ANY", desc_0x70, // continued at 0x7a below // Clocks // CPU_CLK_UNHALTED - Number of cycles for which the processor is not // halted. 0x79, "nhalt", 100000, "CPU was not HALTED cycles", "CPU_CLK_UNHALTED", desc_0x79, // External Bus Logic (EBL) (continued from 0x70 above) // BUS_HIT_DRV - Number of bus clock cycles that this processor is driving // the corresponding pin. // Note - includes cycles due to snoop stalls 0x7a, "hit", 1000, "Bus CPU drives HIT cycles", "BUS_HIT_DRV", desc_0x7A, // BUS_HITM_DRV - Number of bus clock cycles that this processor is driving // the cooresponding pin. // Note - includes cycles due to snoop stalls 0x7b, "hitm", 1000, "Bus CPU drives HITM cycles", "BUS_HITM_DRV", desc_0x7B, // BUS_SNOOP_STALL - Number of clock cycles for which the bus is snoop // stalled. 0x7e, "bsstall", 0, "Bus snoop stalled cycles", "BUS_SNOOP_STALL", desc_0x7E, // Instruction Fetch Unit (IFU) // IFU_IFETCH - Total number of instruction fetches (cacheable and // uncacheable). 0x80, "ifetch", 100000, "Instruction fetches", "IFU_IFETCH", desc_0x80, // IFU_IFETCH_MISS _ Total number of instruction fetch misses. 0x81, "imfetch", 10000, "Instrection fetch Misses", "IFU_IFETCH_MISS", desc_0x81, // ITLB_MISS - Number of ITLB misses 0x85, "itlbmiss", 100, "Instruction TLB misses", "ITLB_MISS", desc_0x85, // IFU_MEM_STALL - The number of cycles that instruction fetch pipestage // is stalled (includes cache misses, ITLB misses, ITLB faults and // Victim Cache evictions). 0x86, "ifstall", 1000, "Inst fetch stalled cycles", "IFU_MEM_STALL", desc_0x86, // ILD_STALL - Number of cycles for which the instruction length decoder // is stalled. 0x87, "ildstall", 1000, "Inst len decoder stalled cycles", "ILD_STALL", desc_0x87, // Stalls // RESOURCE_STALLS - Number of cycles for which there are resouce related // stalls. 0xa2, "rstall", 10000, "Resource related stalls", "RESOURCE_STALLS", desc_0xA2, // see also 0xd2 below // Instruction Decode and Retirement // INST_RETIRED - Number of instructions retired. 0xc0, "instr", 100000, "Instructions retired", "INST_RETIRED", desc_0xC0, // continued at 0xc2 below // Floating Point (continued from 0x14 above) // FLOPS - Number of computational floating point operations retired. 0xc1, "fpr", RARE, "FP compute opers retired", "FLOPS", desc_0xC1, // Instruction Decode and Retirement (continued from 0xc0 above) // UOPS_RETIRED - Number of Uops retired 0xc2, "ur", 100000, "UOPs retired", "UOPS_RETIRED", desc_0xC2, // see also 0xd0 below // Branches // BR_INST_RETIRED - Number of branch instructions that retire. 0xc4, "br", 10000, "Branches retired", "BR_INST_RETIRED", desc_0xC4, // BR_MISS_PRED_RETIRED - Number of mispredicted branches that retire. 0xc5, "brm", 1000, "Branch miss predictions retired", "BR_MISS_PRED_RETIRED", desc_0xC5, // continued at 0xc9 below // Interrupts // CYCLES_INT_MASKED - Number of processor cycles for which interrupts // are disabled. 0xc6, "intm", 10000, "Interrupts masked cycles", "CYCLES_INT_MASKED", desc_0xC6, // CYCLES_INT_PENDING_AND_MASKED - Number of processor cycles for which // interrupts are disabled and interrupts are pending. 0xc7, "intmp", 1000, "Int pending while masked cycles", "CYCLES_INT_PENDING_AND_MASKED", desc_0xC7, // HW_INT_RX - Number of hardware interrupts received. 0xc8, "int", 0, "Hardware interrupts received", "HW_INT_RX", desc_0xC8, // Branches (continued from 0xc5 above) // BR_TAKEN_RETIRED - Number of taken branches that are retired. 0xc9, "brt", 10000, "Taken branches retired", "BR_TAKEN_RETIRED", desc_0xC9, // BR_MISS_PRED_TAKEN_RET - Number of Mispredictions that are retired. 0xca, "brtm", 0, "Taken branch miss pred retired", "BR_MISS_PRED_TAKEN_RET", desc_0xCA, // continued at 0xe0 below // Instruction Decode and Retirement (continued from 0xc2 above) // INST_DECODED - Number of Instructions decoded. 0xd0, "idecode", 100000, "Instructions decoded", "INST_DECODED", desc_0xD0, // Stalls (continued from 0xa2 above) // PARTIAL_RAT_STALLS - Number of cycles or events for partial stalls. 0xd2, "pstall", 1000, "Partial register stalls", "PARTIAL_RAT_STALLS", desc_0xD2, // Branches (continued from 0xca above) // BR_INST_DECODED - Number of branch instructions that are decoded. 0xe0, "ibdecode", 0, "Branches decoded", "BR_INST_DECODED", desc_0xE0, // BTB_MISSES - Number of branches that miss the BTB 0xe2, "btbmiss", 1000, "BTB misses", "BTB_MISSES", desc_0xE2, // BR_BOGUS - Number of bogus branches. 0xe4, "brbogus", 1000, "Bogus branches", "BR_BOGUS", desc_0xE4, // BACLEARS - Number of times BACLEAR is asserted. 0xe6, "baclear", 1000, "BACLEARS Asserted", "BACLEARS", desc_0xE6, // Terminator 0, NULL, 0, NULL, NULL, NULL } ;