diff --git a/libfreerdp/primitives/neon/prim_YUV_neon.c b/libfreerdp/primitives/neon/prim_YUV_neon.c
index 53ce9c5be..7cafde53a 100644
--- a/libfreerdp/primitives/neon/prim_YUV_neon.c
+++ b/libfreerdp/primitives/neon/prim_YUV_neon.c
@@ -156,15 +156,13 @@ static INLINE void neon_write_pixel(BYTE* pRGB, BYTE Y, BYTE U, BYTE V, const ui
 	pRGB[bPos] = b;
 }
 
-static INLINE pstatus_t neon_YUV420ToX_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pY[2],
-                                                  const BYTE* WINPR_RESTRICT pU,
-                                                  const BYTE* WINPR_RESTRICT pV,
-                                                  BYTE* WINPR_RESTRICT pRGB[2], size_t width,
-                                                  const uint8_t rPos, const uint8_t gPos,
-                                                  const uint8_t bPos, const uint8_t aPos)
+static INLINE void neon_YUV420ToX_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pY[2],
+                                             const BYTE* WINPR_RESTRICT pU,
+                                             const BYTE* WINPR_RESTRICT pV,
+                                             BYTE* WINPR_RESTRICT pRGB[2], size_t width,
+                                             const uint8_t rPos, const uint8_t gPos,
+                                             const uint8_t bPos, const uint8_t aPos)
 {
-	WINPR_ASSERT((width % 2) == 0);
-
 	UINT32 x = 0;
 
 	for (; x < width - width % 16; x += 16)
@@ -180,7 +178,7 @@ static INLINE pstatus_t neon_YUV420ToX_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pY[
 		neon_YuvToRgbPixel(&pRGB[1][4ULL * x], Y1, D, E, rPos, gPos, bPos, aPos);
 	}
 
-	for (; x < width; x += 2)
+	for (; x < width - width % 2; x += 2)
 	{
 		const BYTE U = pU[x / 2];
 		const BYTE V = pV[x / 2];
@@ -191,7 +189,49 @@ static INLINE pstatus_t neon_YUV420ToX_DOUBLE_ROW(const BYTE* WINPR_RESTRICT pY[
 		neon_write_pixel(&pRGB[1][4 * (1ULL + x)], pY[1][1ULL + x], U, V, rPos, gPos, bPos, aPos);
 	}
 
-	return PRIMITIVES_SUCCESS;
+	for (; x < width; x++)
+	{
+		const BYTE U = pU[x / 2];
+		const BYTE V = pV[x / 2];
+
+		neon_write_pixel(&pRGB[0][4 * x], pY[0][x], U, V, rPos, gPos, bPos, aPos);
+		neon_write_pixel(&pRGB[1][4 * x], pY[1][x], U, V, rPos, gPos, bPos, aPos);
+	}
+}
+
+static INLINE void neon_YUV420ToX_SINGLE_ROW(const BYTE* WINPR_RESTRICT pY,
+                                             const BYTE* WINPR_RESTRICT pU,
+                                             const BYTE* WINPR_RESTRICT pV,
+                                             BYTE* WINPR_RESTRICT pRGB, size_t width,
+                                             const uint8_t rPos, const uint8_t gPos,
+                                             const uint8_t bPos, const uint8_t aPos)
+{
+	UINT32 x = 0;
+
+	for (; x < width - width % 16; x += 16)
+	{
+		const uint8x16_t Y0raw = vld1q_u8(&pY[x]);
+		const uint8x8x2_t Y0 = { { vget_low_u8(Y0raw), vget_high_u8(Y0raw) } };
+		const int16x8x2_t D = loadUV(pU, x);
+		const int16x8x2_t E = loadUV(pV, x);
+		neon_YuvToRgbPixel(&pRGB[4ULL * x], Y0, D, E, rPos, gPos, bPos, aPos);
+	}
+
+	for (; x < width - width % 2; x += 2)
+	{
+		const BYTE U = pU[x / 2];
+		const BYTE V = pV[x / 2];
+
+		neon_write_pixel(&pRGB[4 * x], pY[x], U, V, rPos, gPos, bPos, aPos);
+		neon_write_pixel(&pRGB[4 * (1ULL + x)], pY[1ULL + x], U, V, rPos, gPos, bPos, aPos);
+	}
+	for (; x < width; x++)
+	{
+		const BYTE U = pU[x / 2];
+		const BYTE V = pV[x / 2];
+
+		neon_write_pixel(&pRGB[4 * x], pY[x], U, V, rPos, gPos, bPos, aPos);
+	}
 }
 
 static INLINE pstatus_t neon_YUV420ToX(const BYTE* WINPR_RESTRICT pSrc[3], const UINT32 srcStep[3],
@@ -202,20 +242,26 @@ static INLINE pstatus_t neon_YUV420ToX(const BYTE* WINPR_RESTRICT pSrc[3], const
 	const UINT32 nWidth = roi->width;
 	const UINT32 nHeight = roi->height;
 
-	WINPR_ASSERT((nHeight % 2) == 0);
-	for (UINT32 y = 0; y < nHeight; y += 2)
+	WINPR_ASSERT(nHeight > 0);
+	UINT32 y = 0;
+	for (; y < (nHeight - 1); y += 2)
 	{
 		const uint8_t* pY[2] = { pSrc[0] + y * srcStep[0], pSrc[0] + (1ULL + y) * srcStep[0] };
 		const uint8_t* pU = pSrc[1] + (y / 2) * srcStep[1];
 		const uint8_t* pV = pSrc[2] + (y / 2) * srcStep[2];
 		uint8_t* pRGB[2] = { pDst + y * dstStep, pDst + (1ULL + y) * dstStep };
 
-		const pstatus_t rc =
-		    neon_YUV420ToX_DOUBLE_ROW(pY, pU, pV, pRGB, nWidth, rPos, gPos, bPos, aPos);
-		if (rc != PRIMITIVES_SUCCESS)
-			return rc;
+		neon_YUV420ToX_DOUBLE_ROW(pY, pU, pV, pRGB, nWidth, rPos, gPos, bPos, aPos);
 	}
+	for (; y < nHeight; y++)
+	{
+		const uint8_t* pY = pSrc[0] + y * srcStep[0];
+		const uint8_t* pU = pSrc[1] + (y / 2) * srcStep[1];
+		const uint8_t* pV = pSrc[2] + (y / 2) * srcStep[2];
+		uint8_t* pRGB = pDst + y * dstStep;
 
+		neon_YUV420ToX_SINGLE_ROW(pY, pU, pV, pRGB, nWidth, rPos, gPos, bPos, aPos);
+	}
 	return PRIMITIVES_SUCCESS;
 }