Below Function will grab All PDF from a webpage in windows PowerShell .
Steps to recreate:
Copy this script in a file
rename the file with extension ps1
execute this script and download all PDFs
function Grab-PDFs {
[Reflection.Assembly]::LoadWithPartialName("System.Windows.Forms") | Out-Null
[System.Windows.Forms.Application]::EnableVisualStyles()
$browse = New-Object System.Windows.Forms.FolderBrowserDialog
$browse.SelectedPath = ""
$browse.ShowNewFolderButton = $false
$browse.Description = "Select a directory"
$baseurl = ""
$i = 1
$loop = $true
while($loop)
{
if ($browse.ShowDialog() -eq "OK")
{
$loop = $false
cd $browse.SelectedPath
#Scrape Web Page for PDFs
$psPage = Invoke-WebRequest "http://localhost/data/ipindia.gov.in_journal-tm.html"
$urls = $psPage.ParsedHtml.getElementsByTagName("A") | ? {$_.href -like "*.pdf"} | Select-Object -ExpandProperty href
Write-Host $urls
$urls | ForEach-Object {
$path = ($_ | Split-Path -Leaf)
Invoke-WebRequest -Uri $_ -OutFile $i$path
$i = $i+1
}
Write-Host "... PDF downloading is complete."
[System.Windows.Forms.MessageBox]::Show("Your PDFs have been downloaded.", "Job Complete")
} else
{
$res = [System.Windows.Forms.MessageBox]::Show("You clicked Cancel. Would you like to try again or exit?", "Select a location", [System.Windows.Forms.MessageBoxButtons]::RetryCancel)
if($res -eq "Cancel")
{
#Ends script
return
}
}
}
$browse.SelectedPath
$browse.Dispose()
} Grab-PDFs
No comments:
Post a Comment