scsi: lpfc: Fix reusing an ndlp that is marked NLP_DROPPED during FLOGI
It's possible for an unstable link to repeatedly bounce allowing a FLOGI retry, but then bounce again forcing an abort of the FLOGI. Ensure that the initial reference count on the FLOGI ndlp is restored in this faulty link scenario. Signed-off-by: Justin Tee <justin.tee@broadcom.com> Link: https://patch.msgid.link/20251106224639.139176-8-justintee8345@gmail.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
committed by
Martin K. Petersen
parent
0b8b15a0b7
commit
07caedc6a3
@@ -934,10 +934,15 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
|
|||||||
/* Check to see if link went down during discovery */
|
/* Check to see if link went down during discovery */
|
||||||
if (lpfc_els_chk_latt(vport)) {
|
if (lpfc_els_chk_latt(vport)) {
|
||||||
/* One additional decrement on node reference count to
|
/* One additional decrement on node reference count to
|
||||||
* trigger the release of the node
|
* trigger the release of the node. Make sure the ndlp
|
||||||
|
* is marked NLP_DROPPED.
|
||||||
*/
|
*/
|
||||||
if (!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD))
|
if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) &&
|
||||||
|
!test_bit(NLP_DROPPED, &ndlp->nlp_flag) &&
|
||||||
|
!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) {
|
||||||
|
set_bit(NLP_DROPPED, &ndlp->nlp_flag);
|
||||||
lpfc_nlp_put(ndlp);
|
lpfc_nlp_put(ndlp);
|
||||||
|
}
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -995,9 +1000,10 @@ stop_rr_fcf_flogi:
|
|||||||
IOERR_LOOP_OPEN_FAILURE)))
|
IOERR_LOOP_OPEN_FAILURE)))
|
||||||
lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
|
lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
|
||||||
"2858 FLOGI Status:x%x/x%x TMO"
|
"2858 FLOGI Status:x%x/x%x TMO"
|
||||||
":x%x Data x%lx x%x\n",
|
":x%x Data x%lx x%x x%lx x%x\n",
|
||||||
ulp_status, ulp_word4, tmo,
|
ulp_status, ulp_word4, tmo,
|
||||||
phba->hba_flag, phba->fcf.fcf_flag);
|
phba->hba_flag, phba->fcf.fcf_flag,
|
||||||
|
ndlp->nlp_flag, ndlp->fc4_xpt_flags);
|
||||||
|
|
||||||
/* Check for retry */
|
/* Check for retry */
|
||||||
if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
|
if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
|
||||||
@@ -1015,14 +1021,17 @@ stop_rr_fcf_flogi:
|
|||||||
* reference to trigger node release.
|
* reference to trigger node release.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) &&
|
if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) &&
|
||||||
!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD))
|
!test_bit(NLP_DROPPED, &ndlp->nlp_flag) &&
|
||||||
|
!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) {
|
||||||
|
set_bit(NLP_DROPPED, &ndlp->nlp_flag);
|
||||||
lpfc_nlp_put(ndlp);
|
lpfc_nlp_put(ndlp);
|
||||||
|
}
|
||||||
|
|
||||||
lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
|
lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
|
||||||
"0150 FLOGI Status:x%x/x%x "
|
"0150 FLOGI Status:x%x/x%x "
|
||||||
"xri x%x TMO:x%x refcnt %d\n",
|
"xri x%x iotag x%x TMO:x%x refcnt %d\n",
|
||||||
ulp_status, ulp_word4, cmdiocb->sli4_xritag,
|
ulp_status, ulp_word4, cmdiocb->sli4_xritag,
|
||||||
tmo, kref_read(&ndlp->kref));
|
cmdiocb->iotag, tmo, kref_read(&ndlp->kref));
|
||||||
|
|
||||||
/* If this is not a loop open failure, bail out */
|
/* If this is not a loop open failure, bail out */
|
||||||
if (!(ulp_status == IOSTAT_LOCAL_REJECT &&
|
if (!(ulp_status == IOSTAT_LOCAL_REJECT &&
|
||||||
@@ -1279,6 +1288,19 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
|
|||||||
uint32_t tmo, did;
|
uint32_t tmo, did;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
|
/* It's possible for lpfc to reissue a FLOGI on an ndlp that is marked
|
||||||
|
* NLP_DROPPED. This happens when the FLOGI completed with the XB bit
|
||||||
|
* set causing lpfc to reference the ndlp until the XRI_ABORTED CQE is
|
||||||
|
* issued. The time window for the XRI_ABORTED CQE can be as much as
|
||||||
|
* 2*2*RA_TOV allowing for ndlp reuse of this type when the link is
|
||||||
|
* cycling quickly. When true, restore the initial reference and remove
|
||||||
|
* the NLP_DROPPED flag as lpfc is retrying.
|
||||||
|
*/
|
||||||
|
if (test_and_clear_bit(NLP_DROPPED, &ndlp->nlp_flag)) {
|
||||||
|
if (!lpfc_nlp_get(ndlp))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
|
cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
|
||||||
elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
|
elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
|
||||||
ndlp->nlp_DID, ELS_CMD_FLOGI);
|
ndlp->nlp_DID, ELS_CMD_FLOGI);
|
||||||
|
|||||||
@@ -424,6 +424,7 @@ lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
|
|||||||
struct lpfc_nodelist *ndlp)
|
struct lpfc_nodelist *ndlp)
|
||||||
{
|
{
|
||||||
if (test_and_clear_bit(NLP_IN_RECOV_POST_DEV_LOSS, &ndlp->save_flags)) {
|
if (test_and_clear_bit(NLP_IN_RECOV_POST_DEV_LOSS, &ndlp->save_flags)) {
|
||||||
|
clear_bit(NLP_DROPPED, &ndlp->nlp_flag);
|
||||||
lpfc_nlp_get(ndlp);
|
lpfc_nlp_get(ndlp);
|
||||||
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE,
|
lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE,
|
||||||
"8438 Devloss timeout reversed on DID x%x "
|
"8438 Devloss timeout reversed on DID x%x "
|
||||||
@@ -566,6 +567,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
|
|||||||
return fcf_inuse;
|
return fcf_inuse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag))
|
||||||
lpfc_nlp_put(ndlp);
|
lpfc_nlp_put(ndlp);
|
||||||
return fcf_inuse;
|
return fcf_inuse;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user