Wednesday, March 7, 2018

GRAB ALL PDF from a webpage in windows powershell

Below Function will grab All PDF from a webpage in windows PowerShell .

Steps to recreate:

Copy this script in a file

rename the file with extension ps1

execute this script and download all PDFs

 

function Grab-PDFs {

    [Reflection.Assembly]::LoadWithPartialName("System.Windows.Forms") | Out-Null

    [System.Windows.Forms.Application]::EnableVisualStyles()

    $browse = New-Object System.Windows.Forms.FolderBrowserDialog

    $browse.SelectedPath = ""

    $browse.ShowNewFolderButton = $false

    $browse.Description = "Select a directory"

                $baseurl = ""

                $i = 1

 

    $loop = $true

    while($loop)

    {

        if ($browse.ShowDialog() -eq "OK")

        {

        $loop = $false

                               

                                cd $browse.SelectedPath

                               

                                #Scrape Web Page for PDFs

                               

                                $psPage = Invoke-WebRequest "http://localhost/data/ipindia.gov.in_journal-tm.html"

                                $urls = $psPage.ParsedHtml.getElementsByTagName("A") | ? {$_.href -like "*.pdf"} | Select-Object -ExpandProperty href

                               

                                Write-Host $urls

 

                                $urls | ForEach-Object {

                                $path = ($_ | Split-Path -Leaf)

                                Invoke-WebRequest -Uri $_ -OutFile $i$path

                                $i = $i+1

                                }

                               

                                Write-Host "... PDF downloading is complete."

                                [System.Windows.Forms.MessageBox]::Show("Your PDFs have been downloaded.", "Job Complete")

                               

        } else

        {

            $res = [System.Windows.Forms.MessageBox]::Show("You clicked Cancel. Would you like to try again or exit?", "Select a location", [System.Windows.Forms.MessageBoxButtons]::RetryCancel)

            if($res -eq "Cancel")

            {

                #Ends script

                return

            }

        }

    }

    $browse.SelectedPath

    $browse.Dispose()

} Grab-PDFs

No comments:

Post a Comment