From 95aa2041c654161d1b5c1eca5379d67d91ef1cf2 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 17 Sep 2025 17:12:53 -0500 Subject: [PATCH 1/3] scsi: target: Fix LUN/device R/W and total command stats In commit 9cf2317b795d ("scsi: target: Move I/O path stats to per CPU") I saw we sometimes use %u and also misread the spec. As a result I thought all the stats were supposed to be 32-bit only. However, for the majority of cases we support currently, the spec specifies u64 bit stats. This patch converts the stats changed in the commit above to u64. Fixes: 9cf2317b795d ("scsi: target: Move I/O path stats to per CPU") Signed-off-by: Mike Christie Reviewed-by: Dmitry Bogdanov Link: https://patch.msgid.link/20250917221338.14813-2-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_stat.c | 24 ++++++++++++------------ include/target/target_core_base.h | 12 ++++++------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 6bdf2d8bd694..4fdc307ea38b 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -282,7 +282,7 @@ static ssize_t target_stat_lu_num_cmds_show(struct config_item *item, struct se_device *dev = to_stat_lu_dev(item); struct se_dev_io_stats *stats; unsigned int cpu; - u32 cmds = 0; + u64 cmds = 0; for_each_possible_cpu(cpu) { stats = per_cpu_ptr(dev->stats, cpu); @@ -290,7 +290,7 @@ static ssize_t target_stat_lu_num_cmds_show(struct config_item *item, } /* scsiLuNumCommands */ - return snprintf(page, PAGE_SIZE, "%u\n", cmds); + return snprintf(page, PAGE_SIZE, "%llu\n", cmds); } static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item, @@ -299,7 +299,7 @@ static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item, struct se_device *dev = to_stat_lu_dev(item); struct se_dev_io_stats *stats; unsigned int cpu; - u32 bytes = 0; + u64 bytes = 0; for_each_possible_cpu(cpu) { stats = per_cpu_ptr(dev->stats, cpu); @@ -307,7 +307,7 @@ static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item, } /* scsiLuReadMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20); + return snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); } static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item, @@ -316,7 +316,7 @@ static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item, struct se_device *dev = to_stat_lu_dev(item); struct se_dev_io_stats *stats; unsigned int cpu; - u32 bytes = 0; + u64 bytes = 0; for_each_possible_cpu(cpu) { stats = per_cpu_ptr(dev->stats, cpu); @@ -324,7 +324,7 @@ static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item, } /* scsiLuWrittenMegaBytes */ - return snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20); + return snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); } static ssize_t target_stat_lu_resets_show(struct config_item *item, char *page) @@ -1044,7 +1044,7 @@ static ssize_t target_stat_auth_num_cmds_show(struct config_item *item, struct se_dev_entry *deve; unsigned int cpu; ssize_t ret; - u32 cmds = 0; + u64 cmds = 0; rcu_read_lock(); deve = target_nacl_find_deve(nacl, lacl->mapped_lun); @@ -1059,7 +1059,7 @@ static ssize_t target_stat_auth_num_cmds_show(struct config_item *item, } /* scsiAuthIntrOutCommands */ - ret = snprintf(page, PAGE_SIZE, "%u\n", cmds); + ret = snprintf(page, PAGE_SIZE, "%llu\n", cmds); rcu_read_unlock(); return ret; } @@ -1073,7 +1073,7 @@ static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item, struct se_dev_entry *deve; unsigned int cpu; ssize_t ret; - u32 bytes = 0; + u64 bytes = 0; rcu_read_lock(); deve = target_nacl_find_deve(nacl, lacl->mapped_lun); @@ -1088,7 +1088,7 @@ static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item, } /* scsiAuthIntrReadMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20); + ret = snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); rcu_read_unlock(); return ret; } @@ -1102,7 +1102,7 @@ static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item, struct se_dev_entry *deve; unsigned int cpu; ssize_t ret; - u32 bytes = 0; + u64 bytes = 0; rcu_read_lock(); deve = target_nacl_find_deve(nacl, lacl->mapped_lun); @@ -1117,7 +1117,7 @@ static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item, } /* scsiAuthIntrWrittenMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%u\n", bytes >> 20); + ret = snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); rcu_read_unlock(); return ret; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index c4d9116904aa..27e1f9d5f0c6 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -671,9 +671,9 @@ struct se_lun_acl { }; struct se_dev_entry_io_stats { - u32 total_cmds; - u32 read_bytes; - u32 write_bytes; + u64 total_cmds; + u64 read_bytes; + u64 write_bytes; }; struct se_dev_entry { @@ -806,9 +806,9 @@ struct se_device_queue { }; struct se_dev_io_stats { - u32 total_cmds; - u32 read_bytes; - u32 write_bytes; + u64 total_cmds; + u64 read_bytes; + u64 write_bytes; }; struct se_device { From ed6b97a79577db9bf9d7b21fd623f24f499e3acc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 17 Sep 2025 17:12:54 -0500 Subject: [PATCH 2/3] scsi: target: Create and use macro helpers for per-CPU stats Create some macros to reduce code duplication for when we handle per-CPU stats. Convert the existing LUN and auth cases. Note: This is similar to percpu_counters but they only support s64 since they are also used for non-stat counters where you need to handle/prevent rollover more gracefully. Our use is just for stats where the spec defines u64 use plus we already have some files exporting u64 values so it's not possible to directly use percpu_counters. Signed-off-by: Mike Christie Reviewed-by: Dmitry Bogdanov Link: https://patch.msgid.link/20250917221338.14813-3-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_stat.c | 187 +++++++++--------------------- 1 file changed, 56 insertions(+), 131 deletions(-) diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index 4fdc307ea38b..e29d43dacaf7 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -276,56 +276,39 @@ static ssize_t target_stat_lu_state_bit_show(struct config_item *item, return snprintf(page, PAGE_SIZE, "exposed\n"); } -static ssize_t target_stat_lu_num_cmds_show(struct config_item *item, - char *page) -{ - struct se_device *dev = to_stat_lu_dev(item); - struct se_dev_io_stats *stats; - unsigned int cpu; - u64 cmds = 0; - - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(dev->stats, cpu); - cmds += stats->total_cmds; - } - - /* scsiLuNumCommands */ - return snprintf(page, PAGE_SIZE, "%llu\n", cmds); +#define per_cpu_stat_snprintf(stats_struct, prefix, field, shift) \ +static ssize_t \ +per_cpu_stat_##prefix##_snprintf(struct stats_struct __percpu *per_cpu_stats, \ + char *page) \ +{ \ + struct stats_struct *stats; \ + unsigned int cpu; \ + u64 sum = 0; \ + \ + for_each_possible_cpu(cpu) { \ + stats = per_cpu_ptr(per_cpu_stats, cpu); \ + sum += stats->field; \ + } \ + \ + return snprintf(page, PAGE_SIZE, "%llu\n", sum >> shift); \ } -static ssize_t target_stat_lu_read_mbytes_show(struct config_item *item, - char *page) -{ - struct se_device *dev = to_stat_lu_dev(item); - struct se_dev_io_stats *stats; - unsigned int cpu; - u64 bytes = 0; +#define lu_show_per_cpu_stat(prefix, field, shift) \ +per_cpu_stat_snprintf(se_dev_io_stats, prefix, field, shift); \ +static ssize_t \ +target_stat_##prefix##_show(struct config_item *item, char *page) \ +{ \ + struct se_device *dev = to_stat_lu_dev(item); \ + \ + return per_cpu_stat_##prefix##_snprintf(dev->stats, page); \ +} \ - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(dev->stats, cpu); - bytes += stats->read_bytes; - } - - /* scsiLuReadMegaBytes */ - return snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); -} - -static ssize_t target_stat_lu_write_mbytes_show(struct config_item *item, - char *page) -{ - struct se_device *dev = to_stat_lu_dev(item); - struct se_dev_io_stats *stats; - unsigned int cpu; - u64 bytes = 0; - - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(dev->stats, cpu); - bytes += stats->write_bytes; - } - - /* scsiLuWrittenMegaBytes */ - return snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); -} +/* scsiLuNumCommands */ +lu_show_per_cpu_stat(lu_num_cmds, total_cmds, 0); +/* scsiLuReadMegaBytes */ +lu_show_per_cpu_stat(lu_read_mbytes, read_bytes, 20); +/* scsiLuWrittenMegaBytes */ +lu_show_per_cpu_stat(lu_write_mbytes, write_bytes, 20); static ssize_t target_stat_lu_resets_show(struct config_item *item, char *page) { @@ -1035,92 +1018,34 @@ static ssize_t target_stat_auth_att_count_show(struct config_item *item, return ret; } -static ssize_t target_stat_auth_num_cmds_show(struct config_item *item, - char *page) -{ - struct se_lun_acl *lacl = auth_to_lacl(item); - struct se_node_acl *nacl = lacl->se_lun_nacl; - struct se_dev_entry_io_stats *stats; - struct se_dev_entry *deve; - unsigned int cpu; - ssize_t ret; - u64 cmds = 0; - - rcu_read_lock(); - deve = target_nacl_find_deve(nacl, lacl->mapped_lun); - if (!deve) { - rcu_read_unlock(); - return -ENODEV; - } - - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(deve->stats, cpu); - cmds += stats->total_cmds; - } - - /* scsiAuthIntrOutCommands */ - ret = snprintf(page, PAGE_SIZE, "%llu\n", cmds); - rcu_read_unlock(); - return ret; +#define auth_show_per_cpu_stat(prefix, field, shift) \ +per_cpu_stat_snprintf(se_dev_entry_io_stats, prefix, field, shift); \ +static ssize_t \ +target_stat_##prefix##_show(struct config_item *item, char *page) \ +{ \ + struct se_lun_acl *lacl = auth_to_lacl(item); \ + struct se_node_acl *nacl = lacl->se_lun_nacl; \ + struct se_dev_entry *deve; \ + int ret; \ + \ + rcu_read_lock(); \ + deve = target_nacl_find_deve(nacl, lacl->mapped_lun); \ + if (!deve) { \ + rcu_read_unlock(); \ + return -ENODEV; \ + } \ + \ + ret = per_cpu_stat_##prefix##_snprintf(deve->stats, page); \ + rcu_read_unlock(); \ + return ret; \ } -static ssize_t target_stat_auth_read_mbytes_show(struct config_item *item, - char *page) -{ - struct se_lun_acl *lacl = auth_to_lacl(item); - struct se_node_acl *nacl = lacl->se_lun_nacl; - struct se_dev_entry_io_stats *stats; - struct se_dev_entry *deve; - unsigned int cpu; - ssize_t ret; - u64 bytes = 0; - - rcu_read_lock(); - deve = target_nacl_find_deve(nacl, lacl->mapped_lun); - if (!deve) { - rcu_read_unlock(); - return -ENODEV; - } - - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(deve->stats, cpu); - bytes += stats->read_bytes; - } - - /* scsiAuthIntrReadMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); - rcu_read_unlock(); - return ret; -} - -static ssize_t target_stat_auth_write_mbytes_show(struct config_item *item, - char *page) -{ - struct se_lun_acl *lacl = auth_to_lacl(item); - struct se_node_acl *nacl = lacl->se_lun_nacl; - struct se_dev_entry_io_stats *stats; - struct se_dev_entry *deve; - unsigned int cpu; - ssize_t ret; - u64 bytes = 0; - - rcu_read_lock(); - deve = target_nacl_find_deve(nacl, lacl->mapped_lun); - if (!deve) { - rcu_read_unlock(); - return -ENODEV; - } - - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(deve->stats, cpu); - bytes += stats->write_bytes; - } - - /* scsiAuthIntrWrittenMegaBytes */ - ret = snprintf(page, PAGE_SIZE, "%llu\n", bytes >> 20); - rcu_read_unlock(); - return ret; -} +/* scsiAuthIntrOutCommands */ +auth_show_per_cpu_stat(auth_num_cmds, total_cmds, 0); +/* scsiAuthIntrReadMegaBytes */ +auth_show_per_cpu_stat(auth_read_mbytes, read_bytes, 20); +/* scsiAuthIntrWrittenMegaBytes */ +auth_show_per_cpu_stat(auth_write_mbytes, write_bytes, 20); static ssize_t target_stat_auth_hs_num_cmds_show(struct config_item *item, char *page) From bbb490053173b737604a87af03f2113fb1c279a0 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 17 Sep 2025 17:12:55 -0500 Subject: [PATCH 3/3] scsi: target: Move LUN stats to per-CPU The atomic use in the main I/O path is causing perf issues when using higher performance backend devices and multiple queues (more than 10 when using vhost-scsi) like with this fio workload: [global] bs=4K iodepth=128 direct=1 ioengine=libaio group_reporting time_based runtime=120 name=standard-iops rw=randread numjobs=16 cpus_allowed=0-15 To fix this issue, move the LUN stats to per CPU. Note: I forgot to include this patch with the delayed/ordered per CPU tracking and per device/device entry per CPU stats. With this patch you get the full 33% improvements when using fast backends, multiple queues and multiple IO submiters. Signed-off-by: Mike Christie Reviewed-by: Dmitry Bogdanov Link: https://patch.msgid.link/20250917221338.14813-4-michael.christie@oracle.com Signed-off-by: Martin K. Petersen --- drivers/target/target_core_device.c | 1 + drivers/target/target_core_fabric_configfs.c | 2 +- drivers/target/target_core_internal.h | 1 + drivers/target/target_core_stat.c | 67 +++++++------------- drivers/target/target_core_tpg.c | 23 ++++++- drivers/target/target_core_transport.c | 22 +++++-- include/target/target_core_base.h | 8 +-- 7 files changed, 65 insertions(+), 59 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 7bb711b24c0d..2d4a7c0c69ce 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -814,6 +814,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; dev->dev_attrib.submit_type = TARGET_FABRIC_DEFAULT_SUBMIT; + /* Skip allocating lun_stats since we can't export them. */ xcopy_lun = &dev->xcopy_lun; rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); init_completion(&xcopy_lun->lun_shutdown_comp); diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index 7156a4dc1ca7..13159928e365 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -697,7 +697,7 @@ static void target_fabric_port_release(struct config_item *item) struct se_lun *lun = container_of(to_config_group(item), struct se_lun, lun_group); - kfree_rcu(lun, rcu_head); + call_rcu(&lun->rcu_head, target_tpg_free_lun); } static struct configfs_item_operations target_fabric_port_item_ops = { diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 20aab1f50565..763e6d26e187 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -125,6 +125,7 @@ void core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *, struct se_lun *); void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *); struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64); +void target_tpg_free_lun(struct rcu_head *head); int core_tpg_add_lun(struct se_portal_group *, struct se_lun *, bool, struct se_device *); void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *); diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index e29d43dacaf7..083205052be2 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -606,53 +606,30 @@ static ssize_t target_stat_tgt_port_port_index_show(struct config_item *item, return ret; } -static ssize_t target_stat_tgt_port_in_cmds_show(struct config_item *item, - char *page) -{ - struct se_lun *lun = to_stat_tgt_port(item); - struct se_device *dev; - ssize_t ret = -ENODEV; - - rcu_read_lock(); - dev = rcu_dereference(lun->lun_se_dev); - if (dev) - ret = snprintf(page, PAGE_SIZE, "%lu\n", - atomic_long_read(&lun->lun_stats.cmd_pdus)); - rcu_read_unlock(); - return ret; +#define tgt_port_show_per_cpu_stat(prefix, field, shift) \ +per_cpu_stat_snprintf(scsi_port_stats, prefix, field, shift); \ +static ssize_t \ +target_stat_##prefix##_show(struct config_item *item, char *page) \ +{ \ + struct se_lun *lun = to_stat_tgt_port(item); \ + struct se_device *dev; \ + int ret; \ + \ + rcu_read_lock(); \ + dev = rcu_dereference(lun->lun_se_dev); \ + if (!dev) { \ + rcu_read_unlock(); \ + return -ENODEV; \ + } \ + \ + ret = per_cpu_stat_##prefix##_snprintf(lun->lun_stats, page); \ + rcu_read_unlock(); \ + return ret; \ } -static ssize_t target_stat_tgt_port_write_mbytes_show(struct config_item *item, - char *page) -{ - struct se_lun *lun = to_stat_tgt_port(item); - struct se_device *dev; - ssize_t ret = -ENODEV; - - rcu_read_lock(); - dev = rcu_dereference(lun->lun_se_dev); - if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(atomic_long_read(&lun->lun_stats.rx_data_octets) >> 20)); - rcu_read_unlock(); - return ret; -} - -static ssize_t target_stat_tgt_port_read_mbytes_show(struct config_item *item, - char *page) -{ - struct se_lun *lun = to_stat_tgt_port(item); - struct se_device *dev; - ssize_t ret = -ENODEV; - - rcu_read_lock(); - dev = rcu_dereference(lun->lun_se_dev); - if (dev) - ret = snprintf(page, PAGE_SIZE, "%u\n", - (u32)(atomic_long_read(&lun->lun_stats.tx_data_octets) >> 20)); - rcu_read_unlock(); - return ret; -} +tgt_port_show_per_cpu_stat(tgt_port_in_cmds, cmd_pdus, 0); +tgt_port_show_per_cpu_stat(tgt_port_write_mbytes, rx_data_octets, 20); +tgt_port_show_per_cpu_stat(tgt_port_read_mbytes, tx_data_octets, 20); static ssize_t target_stat_tgt_port_hs_in_cmds_show(struct config_item *item, char *page) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c0e429e5ef31..8b5ad50baa43 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -548,7 +548,7 @@ int core_tpg_register( ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0, true, g_lun0_dev); if (ret < 0) { - kfree(se_tpg->tpg_virt_lun0); + target_tpg_free_lun(&se_tpg->tpg_virt_lun0->rcu_head); return ret; } } @@ -595,7 +595,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) if (se_tpg->proto_id >= 0) { core_tpg_remove_lun(se_tpg, se_tpg->tpg_virt_lun0); - kfree_rcu(se_tpg->tpg_virt_lun0, rcu_head); + call_rcu(&se_tpg->tpg_virt_lun0->rcu_head, target_tpg_free_lun); } target_tpg_deregister_rtpi(se_tpg); @@ -615,6 +615,13 @@ struct se_lun *core_tpg_alloc_lun( pr_err("Unable to allocate se_lun memory\n"); return ERR_PTR(-ENOMEM); } + + lun->lun_stats = alloc_percpu(struct scsi_port_stats); + if (!lun->lun_stats) { + pr_err("Unable to allocate se_lun stats memory\n"); + goto free_lun; + } + lun->unpacked_lun = unpacked_lun; atomic_set(&lun->lun_acl_count, 0); init_completion(&lun->lun_shutdown_comp); @@ -628,6 +635,18 @@ struct se_lun *core_tpg_alloc_lun( lun->lun_tpg = tpg; return lun; + +free_lun: + kfree(lun); + return ERR_PTR(-ENOMEM); +} + +void target_tpg_free_lun(struct rcu_head *head) +{ + struct se_lun *lun = container_of(head, struct se_lun, rcu_head); + + free_percpu(lun->lun_stats); + kfree(lun); } int core_tpg_add_lun( diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0a76bdfe5528..fca9b44288bc 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1571,7 +1571,12 @@ target_cmd_parse_cdb(struct se_cmd *cmd) return ret; cmd->se_cmd_flags |= SCF_SUPPORTED_SAM_OPCODE; - atomic_long_inc(&cmd->se_lun->lun_stats.cmd_pdus); + /* + * If this is the xcopy_lun then we won't have lun_stats since we + * can't export them. + */ + if (cmd->se_lun->lun_stats) + this_cpu_inc(cmd->se_lun->lun_stats->cmd_pdus); return 0; } EXPORT_SYMBOL(target_cmd_parse_cdb); @@ -2597,8 +2602,9 @@ queue_rsp: !(cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL)) goto queue_status; - atomic_long_add(cmd->data_length, - &cmd->se_lun->lun_stats.tx_data_octets); + if (cmd->se_lun->lun_stats) + this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets, + cmd->data_length); /* * Perform READ_STRIP of PI using software emulation when * backend had PI enabled, if the transport will not be @@ -2621,14 +2627,16 @@ queue_rsp: goto queue_full; break; case DMA_TO_DEVICE: - atomic_long_add(cmd->data_length, - &cmd->se_lun->lun_stats.rx_data_octets); + if (cmd->se_lun->lun_stats) + this_cpu_add(cmd->se_lun->lun_stats->rx_data_octets, + cmd->data_length); /* * Check if we need to send READ payload for BIDI-COMMAND */ if (cmd->se_cmd_flags & SCF_BIDI) { - atomic_long_add(cmd->data_length, - &cmd->se_lun->lun_stats.tx_data_octets); + if (cmd->se_lun->lun_stats) + this_cpu_add(cmd->se_lun->lun_stats->tx_data_octets, + cmd->data_length); ret = cmd->se_tfo->queue_data_in(cmd); if (ret) goto queue_full; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 27e1f9d5f0c6..372da2eadf54 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -744,9 +744,9 @@ struct se_port_stat_grps { }; struct scsi_port_stats { - atomic_long_t cmd_pdus; - atomic_long_t tx_data_octets; - atomic_long_t rx_data_octets; + u64 cmd_pdus; + u64 tx_data_octets; + u64 rx_data_octets; }; struct se_lun { @@ -773,7 +773,7 @@ struct se_lun { spinlock_t lun_tg_pt_gp_lock; struct se_portal_group *lun_tpg; - struct scsi_port_stats lun_stats; + struct scsi_port_stats __percpu *lun_stats; struct config_group lun_group; struct se_port_stat_grps port_stat_grps; struct completion lun_shutdown_comp;