#!/usr/bin/env php 
<?php 
 
/** 
    * Bookmarks Checker 
    * 
    * Verify links in a Chrome or Firefox exported bookmarks file - single threaded dumb version. 
    * 
    * Usage:          php bookmarks_checker_prototype.php [file] 
    * 
    * @author         Martin Latter 
    * @copyright      Martin Latter 11/02/2016 
    * @version        0.06 
    * @license        GNU GPL version 3.0 (GPL v3); http://www.gnu.org/licenses/gpl.html 
    * @link           https://github.com/Tinram/Bookmarks-Checker.git 
*/ 
 
 
declare(strict_types = 1); 
 
define('DUB_EOL', PHP_EOL . PHP_EOL); 
define('DEFAULT_FILE', 'bookmarks.html'); 
 
ini_set('user_agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0'); 
 
 
/* filename */ 
if ( ! isset($_SERVER['argv'][1])) 
{ 
    if (file_exists(DEFAULT_FILE)) 
    { 
        $sFilename = DEFAULT_FILE; 
    } 
    else 
    { 
        $sUsage = 
            PHP_EOL . ' ' . 
            str_replace('_', ' ', ucwords(basename(__FILE__, '.php'), '_')) . 
            DUB_EOL . 
            "\tusage: " . basename(__FILE__) . ' [filename]' . 
            DUB_EOL; 
 
        die($sUsage); 
    } 
} 
else 
{ 
    $sFilename = $_SERVER['argv'][1]; 
} 
 
/* no such file */ 
if ( ! file_exists($sFilename)) 
{ 
    die(PHP_EOL . ' ' . $sFilename . ' does not exist in this directory!' . DUB_EOL); 
} 
 
 
$sHtml = file_get_contents($sFilename); 
 
$rxPattern = '/<a\s[^>]*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/siU'; /* avoid attributes: by chirp.com.au */ 
 
preg_match_all($rxPattern, $sHtml, $aMatches, PREG_SET_ORDER); 
 
 
$aLinks = []; 
 
foreach ($aMatches as $aLinkEntity) 
{ 
    $aLinks[] = [ $aLinkEntity[1], $aLinkEntity[2] ]; 
} 
 
echo PHP_EOL; 
 
 
if (empty($aLinks)) 
{ 
    die(' No links extracted from ' . $sFilename . DUB_EOL); 
} 
 
$fTS = microtime(true); 
 
checkLinks($aLinks); 
 
$fTE = microtime(true); 
 
echo PHP_EOL . ' URL parse time: ' . sprintf('%01.3f', $fTE - $fTS) . ' secs' . DUB_EOL; 
 
 
function checkLinks(array $aLinks) 
{ 
    $iCount = 0; 
    $iFails = 0; 
 
    echo ' ' . count($aLinks) . ' links being checked ...' . DUB_EOL; 
 
    foreach ($aLinks as $aLink) 
    { 
        $rFile = @fopen($aLink[0], 'r'); 
 
        if ($rFile) 
        { 
            fclose($rFile); 
        } 
        else 
        { 
            if ( ! $iFails) 
            { 
                echo ' failures: ' . DUB_EOL; 
            } 
 
            echo "\t" . $aLink[1] . '  |  ' . $aLink[0] . PHP_EOL; 
            $iFails++; 
        } 
 
        $iCount++; 
    } 
 
    echo PHP_EOL . ' ' . $iFails . ' links failed'; 
    echo PHP_EOL . ' ' . ($iCount - $iFails) . ' links verified' . PHP_EOL; 
} 
 
 |