Find All Links on Site Pages
This PnP PowerShell script scans all SharePoint site pages to identify and report hyperlinks. This is particularly useful for content audits, migration planning, and identifying broken or outdated links across your SharePoint site.
Purpose
This script helps with content migration and maintenance by:
- Identifying all hyperlinks within site pages
- Providing link destinations and anchor text
- Supporting content audit and migration planning
- Enabling link validation and cleanup activities
Prerequisites
- PnP PowerShell module installed
- Site collection reader permissions (minimum)
- Connection to your SharePoint Online site
PowerShell Script
# Connect to your SharePoint site
$ClientId = ""
Connect-PnPOnline -Url "https://tenantName.sharepoint.com/sites/siteName" -Interactive -ClientId $ClientId
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "EXTRACTING ALL LINKS FROM ALL PAGES" -ForegroundColor Cyan
Write-Host "Including Quick Links, text hyperlinks, SPFx web parts, and custom controls" -ForegroundColor Yellow
Write-Host "========================================`n" -ForegroundColor Cyan
# Get all pages
Write-Host "Retrieving all pages from Site Pages library..." -ForegroundColor Yellow
try {
$allPages = Get-PnPListItem -List "SitePages" -PageSize 500
Write-Host "Found $($allPages.Count) items to process`n" -ForegroundColor Green
}
catch {
Write-Host "ERROR: Could not retrieve pages from Site Pages library" -ForegroundColor Red
Write-Host $_.Exception.Message -ForegroundColor Red
exit
}
$allLinksData = @()
$errorLog = @()
$pagesProcessed = 0
$pagesWithLinks = 0
$totalLinksFound = 0
$foldersSkipped = 0
foreach ($page in $allPages) {
$pageName = $page["FileLeafRef"]
$pageUrl = $page["FileRef"]
$fileType = $page["File_x0020_Type"]
$contentType = $page["ContentType"]
$pagesProcessed++
# Progress indicator
if ($pagesProcessed % 10 -eq 0) {
Write-Host "Processed $pagesProcessed / $($allPages.Count) items..." -ForegroundColor Gray
}
# Skip folders
if ($contentType -eq "Folder" -or [string]::IsNullOrEmpty($fileType) -or $fileType -eq "") {
Write-Host " Skipping folder: $pageName" -ForegroundColor DarkGray
$foldersSkipped++
continue
}
# Skip non-page files
if ($fileType -ne "aspx") {
Write-Host " Skipping non-page file: $pageName (.$fileType)" -ForegroundColor DarkGray
continue
}
try {
# Get page content
$file = Get-PnPFile -Url $pageUrl -AsListItem -ErrorAction Stop
# Get ALL possible content fields
$content = $file["CanvasContent1"]
$layoutWebParts = $file["LayoutWebpartsContent"]
# Combine all content sources
$allContent = @()
if ($content) { $allContent += $content }
if ($layoutWebParts) { $allContent += $layoutWebParts }
# Also try to get the full page to catch SPFx web parts
try {
$pageObject = Get-PnPPage -Identity $pageName -ErrorAction SilentlyContinue
if ($pageObject) {
foreach ($control in $pageObject.Controls) {
if ($control.PropertiesJson) {
$allContent += $control.PropertiesJson
}
if ($control.JsonProperties) {
$allContent += $control.JsonProperties
}
}
}
}
catch {
# Silently continue
}
$combinedContent = $allContent -join " "
if ($combinedContent) {
$foundLinks = @()
# Pattern 1: Standard absolute URLs (http/https) - catches text hyperlinks
$urlPattern1 = 'https?://[^\s<>"{}|\\^`\[\]]+'
$matches1 = [regex]::Matches($combinedContent, $urlPattern1)
foreach ($match in $matches1) {
$foundLinks += $match.Value
}
# Pattern 2: href in HTML (text hyperlinks in page content)
$urlPattern2 = 'href="([^"]+)"'
$matches2 = [regex]::Matches($combinedContent, $urlPattern2)
foreach ($match in $matches2) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 3: href property in JSON (Quick Links)
$urlPattern3 = '"href"\s*:\s*"([^"]+)"'
$matches3 = [regex]::Matches($combinedContent, $urlPattern3)
foreach ($match in $matches3) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 4: url property in JSON (Quick Links and web parts)
$urlPattern4 = '"url"\s*:\s*"([^"]+)"'
$matches4 = [regex]::Matches($combinedContent, $urlPattern4)
foreach ($match in $matches4) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 5: Url with capital U (case variation)
$urlPattern5 = '"Url"\s*:\s*"([^"]+)"'
$matches5 = [regex]::Matches($combinedContent, $urlPattern5)
foreach ($match in $matches5) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 6: serverRelativeUrl (SharePoint specific)
$urlPattern6 = '"serverRelativeUrl"\s*:\s*"([^"]+)"'
$matches6 = [regex]::Matches($combinedContent, $urlPattern6)
foreach ($match in $matches6) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 7: linkUrl (custom web parts)
$urlPattern7 = '"linkUrl"\s*:\s*"([^"]+)"'
$matches7 = [regex]::Matches($combinedContent, $urlPattern7)
foreach ($match in $matches7) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 8: fileRef or FileRef (document links)
$urlPattern8 = '"[fF]ileRef"\s*:\s*"([^"]+)"'
$matches8 = [regex]::Matches($combinedContent, $urlPattern8)
foreach ($match in $matches8) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 9: targetUrl (Quick Links variant)
$urlPattern9 = '"targetUrl"\s*:\s*"([^"]+)"'
$matches9 = [regex]::Matches($combinedContent, $urlPattern9)
foreach ($match in $matches9) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 10: navigationUrl (navigation web parts)
$urlPattern10 = '"navigationUrl"\s*:\s*"([^"]+)"'
$matches10 = [regex]::Matches($combinedContent, $urlPattern10)
foreach ($match in $matches10) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 11: data-link-url attribute
$urlPattern11 = 'data-link-url="([^"]+)"'
$matches11 = [regex]::Matches($combinedContent, $urlPattern11)
foreach ($match in $matches11) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 12: SitePages relative paths (without quotes)
$urlPattern12 = '(?:^|[/\s])([Ss]ite[Pp]ages/[^\s<>"]+\.aspx)'
$matches12 = [regex]::Matches($combinedContent, $urlPattern12)
foreach ($match in $matches12) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Pattern 13: Root-relative paths starting with /sites/
$urlPattern13 = '(?:^|["\s])(/[Ss]ites/[^\s<>"]+)'
$matches13 = [regex]::Matches($combinedContent, $urlPattern13)
foreach ($match in $matches13) {
if ($match.Groups[1].Value) {
$foundLinks += $match.Groups[1].Value
}
}
# Get unique links and clean them
$uniqueLinks = $foundLinks | Where-Object {
$_ -and
$_ -notmatch '^#' -and
$_ -notmatch '^\s*$' -and
$_ -notmatch '^data:' -and
$_ -notmatch '^javascript:' -and
$_ -notmatch '^\{' -and
$_ -notmatch '^null$' -and
$_ -ne 'null' -and
$_.Length -gt 3
} | Select-Object -Unique
# Add each link as a separate row
if ($uniqueLinks.Count -gt 0) {
$pagesWithLinks++
Write-Host "`nFOUND $($uniqueLinks.Count) links in: $pageName" -ForegroundColor Green
Write-Host " Page location: $pageUrl" -ForegroundColor Gray
foreach ($link in $uniqueLinks) {
# Decode URL encoded characters
$decodedLink = [System.Web.HttpUtility]::UrlDecode($link)
# Remove any leading/trailing quotes, spaces, backslashes
$cleanedLink = $decodedLink.Trim('"', "'", ' ', ',', ';', '\', '/')
# Skip if it's just a file extension or very generic
if ($cleanedLink -match '^\.[a-z]{3,4}$' -or $cleanedLink -eq 'null' -or $cleanedLink.Length -lt 4) {
continue
}
# Skip common non-link patterns
if ($cleanedLink -match '^(true|false|undefined|NaN)$') {
continue
}
# Determine link type and source
$linkType = "Unknown"
$linkSource = "Standard"
if ($cleanedLink -match '^https?://') {
$linkType = "Absolute URL"
# Check if it's an external link
if ($cleanedLink -notmatch 'sharepoint\.com|tranzversal') {
$linkSource = "External"
} else {
$linkSource = "Internal SharePoint"
}
}
elseif ($cleanedLink -match '^/[Ss]ites/') {
$linkType = "Site-Relative URL"
$linkSource = "Internal SharePoint"
}
elseif ($cleanedLink -match '^/[Ss]ite[Pp]ages/') {
$linkType = "SitePages Path"
$linkSource = "Internal Page"
}
elseif ($cleanedLink -match '^[Ss]ite[Pp]ages/') {
$linkType = "Relative SitePages Path"
$linkSource = "Internal Page"
}
elseif ($cleanedLink -match '\.(docx|xlsx|pptx|pdf|doc|xls|ppt)$') {
$linkType = "Document Link"
$linkSource = "Document"
}
elseif ($cleanedLink -match '^/') {
$linkType = "Root-Relative URL"
$linkSource = "Internal"
}
# Display the link
Write-Host " -> $cleanedLink" -ForegroundColor Cyan
$allLinksData += [PSCustomObject]@{
PageName = $pageName
PagePath = $pageUrl
Link = $cleanedLink
LinkType = $linkType
LinkSource = $linkSource
}
$totalLinksFound++
}
}
}
else {
Write-Host " No content in page: $pageName" -ForegroundColor DarkGray
}
}
catch {
$errorMessage = $_.Exception.Message
if ($errorMessage -notlike "*Cannot index into a null array*") {
Write-Host "`nERROR processing: $pageName" -ForegroundColor Red
Write-Host " $errorMessage" -ForegroundColor Red
$errorLog += [PSCustomObject]@{
PageName = $pageName
PagePath = $pageUrl
ErrorMessage = $errorMessage
ErrorTime = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
}
}
}
}
# Generate output files
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$outputFolder = "C:\Temp"
# Create output folder if it doesn't exist
if (!(Test-Path $outputFolder)) {
New-Item -Path $outputFolder -ItemType Directory -Force | Out-Null
}
# Check if ImportExcel module is installed
if (-not (Get-Module -ListAvailable -Name ImportExcel)) {
Write-Host "`nImportExcel module not found. Installing..." -ForegroundColor Yellow
try {
Install-Module -Name ImportExcel -Scope CurrentUser -Force -AllowClobber
Write-Host "ImportExcel module installed successfully!" -ForegroundColor Green
}
catch {
Write-Host "Failed to install ImportExcel module. Falling back to CSV export." -ForegroundColor Red
$useCSV = $true
}
}
if (-not $useCSV) {
Import-Module ImportExcel
}
# Export to Excel or CSV
if ($allLinksData.Count -gt 0) {
if (-not $useCSV) {
# EXCEL OUTPUT
$excelFile = "$outputFolder\PageLinksReport_$timestamp.xlsx"
# Sheet 1: All Links
$allLinksData | Export-Excel -Path $excelFile -WorksheetName "All Links" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
# Sheet 2: Summary by Page
$summary = $allLinksData | Group-Object -Property PageName | Select-Object @{
Name='PageName'; Expression={$_.Name}
}, @{
Name='TotalLinks'; Expression={$_.Count}
}, @{
Name='PagePath'; Expression={($_.Group | Select-Object -First 1).PagePath}
} | Sort-Object TotalLinks -Descending
$summary | Export-Excel -Path $excelFile -WorksheetName "Summary by Page" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
# Sheet 3: Summary by Link
$linkSummary = $allLinksData | Group-Object -Property Link | Select-Object @{
Name='Link'; Expression={$_.Name}
}, @{
Name='TimesUsed'; Expression={$_.Count}
}, @{
Name='PagesCount'; Expression={($_.Group.PageName | Select-Object -Unique).Count}
}, @{
Name='LinkType'; Expression={($_.Group | Select-Object -First 1).LinkType}
}, @{
Name='LinkSource'; Expression={($_.Group | Select-Object -First 1).LinkSource}
}, @{
Name='UsedOnPages'; Expression={($_.Group.PageName | Select-Object -Unique | Sort-Object) -join '; '}
} | Sort-Object TimesUsed -Descending
$linkSummary | Export-Excel -Path $excelFile -WorksheetName "Summary by URL" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
# Sheet 4: Links by Source
$sourceBreakdown = $allLinksData | Group-Object -Property LinkSource | Select-Object @{
Name='Source'; Expression={$_.Name}
}, @{
Name='Count'; Expression={$_.Count}
}, @{
Name='Percentage'; Expression={[math]::Round(($_.Count / $allLinksData.Count) * 100, 2)}
} | Sort-Object Count -Descending
$sourceBreakdown | Export-Excel -Path $excelFile -WorksheetName "Links by Source" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
# Sheet 5: Links by Type
$typeBreakdown = $allLinksData | Group-Object -Property LinkType | Select-Object @{
Name='LinkType'; Expression={$_.Name}
}, @{
Name='Count'; Expression={$_.Count}
}, @{
Name='Percentage'; Expression={[math]::Round(($_.Count / $allLinksData.Count) * 100, 2)}
} | Sort-Object Count -Descending
$typeBreakdown | Export-Excel -Path $excelFile -WorksheetName "Links by Type" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
Write-Host "`n========================================" -ForegroundColor Cyan
Write-Host "Excel report created with 5 sheets:" -ForegroundColor Green
Write-Host " $excelFile" -ForegroundColor White
Write-Host "`nSheets included:" -ForegroundColor Yellow
Write-Host " 1. All Links - Complete link inventory" -ForegroundColor White
Write-Host " 2. Summary by Page - Links per page" -ForegroundColor White
Write-Host " 3. Summary by URL - Where each link is used" -ForegroundColor White
Write-Host " 4. Links by Source - Internal vs External breakdown" -ForegroundColor White
Write-Host " 5. Links by Type - Link type distribution" -ForegroundColor White
}
else {
# CSV FALLBACK
$linksFile = "$outputFolder\AllPageLinks_$timestamp.csv"
$allLinksData | Export-Csv -Path $linksFile -NoTypeInformation -Encoding UTF8
Write-Host "`nAll links exported to: $linksFile" -ForegroundColor Green
$summaryFile = "$outputFolder\LinksSummaryByPage_$timestamp.csv"
$summary | Export-Csv -Path $summaryFile -NoTypeInformation -Encoding UTF8
Write-Host "Summary by page exported to: $summaryFile" -ForegroundColor Green
}
}
else {
Write-Host "`nNo links found in any pages!" -ForegroundColor Yellow
}
# Export errors if any
if ($errorLog.Count -gt 0) {
if (-not $useCSV) {
$errorFile = "$outputFolder\ErrorLog_$timestamp.xlsx"
$errorLog | Export-Excel -Path $errorFile -WorksheetName "Errors" -AutoSize -AutoFilter -FreezeTopRow -BoldTopRow
}
else {
$errorFile = "$outputFolder\ErrorLog_$timestamp.csv"
$errorLog | Export-Csv -Path $errorFile -NoTypeInformation -Encoding UTF8
}
Write-Host "Error log exported to: $errorFile" -ForegroundColor Yellow
}
# Final Summary Report
Write-Host "`n========================================" -ForegroundColor Cyan
Write-Host "EXTRACTION COMPLETE" -ForegroundColor Green
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Total items scanned: $($allPages.Count)" -ForegroundColor White
Write-Host "Folders skipped: $foldersSkipped" -ForegroundColor DarkGray
Write-Host "Pages processed: $($pagesProcessed - $foldersSkipped)" -ForegroundColor White
Write-Host "Pages with links: $pagesWithLinks" -ForegroundColor Green
Write-Host "Total links found: $totalLinksFound" -ForegroundColor Green
Write-Host "Pages with errors: $($errorLog.Count)" -ForegroundColor $(if($errorLog.Count -gt 0){'Red'}else{'Green'})
Write-Host "========================================`n" -ForegroundColor Cyan
Usage Notes
- Update the site URL to match your SharePoint environment
- The script processes modern SharePoint pages (Site Pages library)
- Results are exported and formatted in Excel for further analysis
- Consider running during off-peak hours for large sites
- Important: When copying the script, replace
<with<and>with>in the regex patterns for the script to work correctly
Output Information
- PageName: The name of the SharePoint page
- PageUrl: Full URL to the SharePoint page
- LinkUrl: The links
- LinkText: The visible text of the hyperlink