diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index cadcc64a8657..174bd9f911db 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -183,7 +183,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) spu->slb_replace = 0; spu_restart_dma(spu); - + spu->stats.slb_flt++; return 0; } @@ -332,6 +332,7 @@ spu_irq_class_2(int irq, void *data) if (stat & 0x10) /* SPU mailbox threshold */ spu->wbox_callback(spu); + spu->stats.class2_intr++; return stat ? IRQ_HANDLED : IRQ_NONE; } diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index f623d963fdc7..6d7bd60f5380 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -59,6 +59,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang) spu_gang_add_ctx(gang, ctx); ctx->cpus_allowed = current->cpus_allowed; spu_set_timeslice(ctx); + ctx->stats.execution_state = SPUCTX_UTIL_USER; + ctx->stats.tstamp = jiffies; atomic_inc(&nr_spu_contexts); goto out; diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 0f75c07e29d8..3a9e49a24ec0 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -33,7 +33,8 @@ * function. Currently, there are a few corner cases that we haven't had * to handle fortunately. */ -static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr) +static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt) { struct vm_area_struct *vma; unsigned long is_write; @@ -73,7 +74,8 @@ static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned goto bad_area; } ret = 0; - switch (handle_mm_fault(mm, vma, ea, is_write)) { + *flt = handle_mm_fault(mm, vma, ea, is_write); + switch (*flt) { case VM_FAULT_MINOR: current->min_flt++; break; @@ -153,6 +155,7 @@ int spufs_handle_class1(struct spu_context *ctx) { u64 ea, dsisr, access; unsigned long flags; + unsigned flt = 0; int ret; /* @@ -178,9 +181,13 @@ int spufs_handle_class1(struct spu_context *ctx) if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) return 0; + spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT); + pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, dsisr, ctx->state); + ctx->stats.hash_flt++; + /* we must not hold the lock when entering spu_handle_mm_fault */ spu_release(ctx); @@ -192,7 +199,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* hashing failed, so try the actual fault handler */ if (ret) - ret = spu_handle_mm_fault(current->mm, ea, dsisr); + ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt); spu_acquire(ctx); /* @@ -201,11 +208,17 @@ int spufs_handle_class1(struct spu_context *ctx) * In case of unhandled error report the problem to user space. */ if (!ret) { + if (flt == VM_FAULT_MINOR) + ctx->stats.min_flt++; + else + ctx->stats.maj_flt++; + if (ctx->spu) ctx->ops->restart_dma(ctx); } else spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); return ret; } EXPORT_SYMBOL_GPL(spufs_handle_class1); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 2bb51ca51a6c..30f7b077f347 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2059,6 +2059,83 @@ static const struct file_operations spufs_tid_fops = { .release = single_release, }; +static const char *ctx_state_names[] = { + "user", "system", "iowait", "loaded" +}; + +static unsigned long long spufs_acct_time(struct spu_context *ctx, + enum spuctx_execution_state state) +{ + unsigned long time = ctx->stats.times[state]; + + if (ctx->stats.execution_state == state) + time += jiffies - ctx->stats.tstamp; + + return jiffies_to_msecs(time); +} + +static unsigned long long spufs_slb_flts(struct spu_context *ctx) +{ + unsigned long long slb_flts = ctx->stats.slb_flt; + + if (ctx->state == SPU_STATE_RUNNABLE) { + slb_flts += (ctx->spu->stats.slb_flt - + ctx->stats.slb_flt_base); + } + + return slb_flts; +} + +static unsigned long long spufs_class2_intrs(struct spu_context *ctx) +{ + unsigned long long class2_intrs = ctx->stats.class2_intr; + + if (ctx->state == SPU_STATE_RUNNABLE) { + class2_intrs += (ctx->spu->stats.class2_intr - + ctx->stats.class2_intr_base); + } + + return class2_intrs; +} + + +static int spufs_show_stat(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + spu_acquire(ctx); + seq_printf(s, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + ctx_state_names[ctx->stats.execution_state], + spufs_acct_time(ctx, SPUCTX_UTIL_USER), + spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM), + spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT), + spufs_acct_time(ctx, SPUCTX_UTIL_LOADED), + ctx->stats.vol_ctx_switch, + ctx->stats.invol_ctx_switch, + spufs_slb_flts(ctx), + ctx->stats.hash_flt, + ctx->stats.min_flt, + ctx->stats.maj_flt, + spufs_class2_intrs(ctx), + ctx->stats.libassist); + spu_release(ctx); + return 0; +} + +static int spufs_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_stat_fops = { + .open = spufs_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + struct tree_descr spufs_dir_contents[] = { { "capabilities", &spufs_caps_fops, 0444, }, { "mem", &spufs_mem_fops, 0666, }, @@ -2093,6 +2170,7 @@ struct tree_descr spufs_dir_contents[] = { { "dma_info", &spufs_dma_info_fops, 0444, }, { "proxydma_info", &spufs_proxydma_info_fops, 0444, }, { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, {}, }; @@ -2117,6 +2195,7 @@ struct tree_descr spufs_dir_nosched_contents[] = { { "phys-id", &spufs_id_ops, 0666, }, { "object-id", &spufs_object_id_ops, 0666, }, { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, {}, }; diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 4e0db6ae0d5e..c69f63dd5f0a 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -351,6 +351,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_SINGLE_STEP))); + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + ((status >> SPU_STOP_STATUS_SHIFT) & 0x2100)) + ctx->stats.libassist++; + ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); spu_yield(ctx); diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 9fc09306c9ae..bb16c22360d5 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -229,6 +229,10 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) { pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, spu->number, spu->node); + + ctx->stats.slb_flt_base = spu->stats.slb_flt; + ctx->stats.class2_intr_base = spu->stats.class2_intr; + spu->ctx = ctx; spu->flags = 0; ctx->spu = spu; @@ -275,6 +279,11 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) ctx->spu = NULL; spu->flags = 0; spu->ctx = NULL; + + ctx->stats.slb_flt += + (spu->stats.slb_flt - ctx->stats.slb_flt_base); + ctx->stats.class2_intr += + (spu->stats.class2_intr - ctx->stats.class2_intr_base); } /** @@ -400,6 +409,7 @@ static struct spu *find_victim(struct spu_context *ctx) } spu_remove_from_active_list(spu); spu_unbind_context(spu, victim); + victim->stats.invol_ctx_switch++; mutex_unlock(&victim->state_mutex); /* * We need to break out of the wait loop in spu_run @@ -425,6 +435,7 @@ static struct spu *find_victim(struct spu_context *ctx) */ int spu_activate(struct spu_context *ctx, unsigned long flags) { + spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM); if (ctx->spu) return 0; @@ -492,6 +503,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (new || force) { spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); + ctx->stats.vol_ctx_switch++; spu_free(spu); if (new) wake_up(&new->stop_wq); @@ -521,6 +533,7 @@ void spu_deactivate(struct spu_context *ctx) } __spu_deactivate(ctx, 1, MAX_PRIO); + spuctx_switch_state(ctx, SPUCTX_UTIL_USER); } /** @@ -535,7 +548,10 @@ void spu_yield(struct spu_context *ctx) { if (!(ctx->flags & SPU_CREATE_NOSCHED)) { mutex_lock(&ctx->state_mutex); - __spu_deactivate(ctx, 0, MAX_PRIO); + if (__spu_deactivate(ctx, 0, MAX_PRIO)) + spuctx_switch_state(ctx, SPUCTX_UTIL_USER); + else + spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED); mutex_unlock(&ctx->state_mutex); } } @@ -564,6 +580,7 @@ static void spusched_tick(struct spu_context *ctx) __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; spu_free(spu); wake_up(&new->stop_wq); /* diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 7f5d0b2fdea3..cd2b54f6e378 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -40,6 +40,19 @@ enum { struct spu_context_ops; struct spu_gang; +/* + * This is the state for spu utilization reporting to userspace. + * Because this state is visible to userspace it must never change and needs + * to be kept strictly separate from any internal state kept by the kernel. + */ +enum spuctx_execution_state { + SPUCTX_UTIL_USER = 0, + SPUCTX_UTIL_SYSTEM, + SPUCTX_UTIL_IOWAIT, + SPUCTX_UTIL_LOADED, + SPUCTX_UTIL_MAX +}; + struct spu_context { struct spu *spu; /* pointer to a physical SPU */ struct spu_state csa; /* SPU context save area. */ @@ -87,6 +100,24 @@ struct spu_context { cpumask_t cpus_allowed; int policy; int prio; + + /* statistics */ + struct { + /* updates protected by ctx->state_mutex */ + enum spuctx_execution_state execution_state; + unsigned long tstamp; /* time of last ctx switch */ + unsigned long times[SPUCTX_UTIL_MAX]; + unsigned long long vol_ctx_switch; + unsigned long long invol_ctx_switch; + unsigned long long min_flt; + unsigned long long maj_flt; + unsigned long long hash_flt; + unsigned long long slb_flt; + unsigned long long slb_flt_base; /* # at last ctx switch */ + unsigned long long class2_intr; + unsigned long long class2_intr_base; /* # at last ctx switch */ + unsigned long long libassist; + } stats; }; struct spu_gang { @@ -256,4 +287,24 @@ struct spufs_coredump_reader { extern struct spufs_coredump_reader spufs_coredump_read[]; extern int spufs_coredump_num_notes; +/* + * This function is a little bit too large for an inline, but + * as fault.c is built into the kernel we can't move it out of + * line. + */ +static inline void spuctx_switch_state(struct spu_context *ctx, + enum spuctx_execution_state new_state) +{ + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + + if (ctx->stats.execution_state != new_state) { + unsigned long curtime = jiffies; + + ctx->stats.times[ctx->stats.execution_state] += + curtime - ctx->stats.tstamp; + ctx->stats.tstamp = curtime; + ctx->stats.execution_state = new_state; + } +} + #endif diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index 5f894b61e2d4..5957fcdda04c 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -156,6 +156,12 @@ struct spu { u64 shadow_int_mask_RW[3]; struct sys_device sysdev; + + struct { + /* protected by interrupt reentrancy */ + unsigned long long slb_flt; + unsigned long long class2_intr; + } stats; }; struct spu *spu_alloc(void);