| 
<?php
 namespace App;
 
 use Domain\ImgCountReport;
 use Domain\Page;
 use Domain\Report;
 use Domain\Site;
 use Infrastructure\Repository\PageRepository;
 use InvalidArgumentException;
 
 /**
 * Class ImgCountHandler.
 * Implementation of the recursive command for counting the number of tags <img />.
 *
 * @package App
 */
 class ImgCountHandler
 {
 /** @var Page $rootPage */
 protected $rootPage;
 
 /** @var PageRepository $repository */
 protected $repository;
 
 /** @var Site $site */
 protected $site;
 
 /** @var int $maxDepth The maximum depth of recursion when processing site pages. */
 protected $maxDepth;
 
 /**
 * @var ContentLoaderInterface
 */
 private $contentLoader;
 
 /**
 * ImgCountHandler constructor.
 *
 * @param Site                   $site     Site information.
 * @param string                 $rootUrl  Root URL for begin processing.
 * @param ContentLoaderInterface $loader   Content loader.
 * @param array                  $headers  CURL headers for content load.
 * @param int                    $maxDepth The maximum depth of recursion when processing site pages.
 */
 public function __construct(Site $site, string $rootUrl, ContentLoaderInterface $loader, array $headers = [],
 int $maxDepth = PHP_INT_MAX)
 {
 $this->repository = new PageRepository();
 $this->repository->store($this->rootPage = new Page($site->correctUrl($rootUrl)));
 
 $this->maxDepth      = $maxDepth;
 $this->site          = $site;
 $this->contentLoader = $loader;
 $loader->setHeaders($headers);
 }
 
 /**
 * @param string $url
 *
 * @return Report
 */
 public function handle(string $url): Report
 {
 $this->pageProcessingRecursive([$url]);
 
 return new ImgCountReport($this->repository);
 }
 
 private function countImgTags(string &$content): int
 {
 preg_match_all('/<img(?>\\s|$)/i', $content, $matches);
 return count($matches[0] ?? []);
 }
 
 private function pageProcessing(Page $page, string &$content): void
 {
 if (($childrenUrls = $this->correctUrls(UrlFilter::getInstance()->handle($content))) === null) {
 $page->setChildren([])->setImgCount(0);
 $this->echoErrorMsg($page);
 }
 $children = [];
 
 /** @var string $url */
 foreach ($childrenUrls as $url) {
 $children[] =
 $childrenPage = $this->repository->get($url) ?? new Page($url);
 $this->repository->store($childrenPage);
 }
 
 $page->setChildren($children)
 ->setImgCount($this->countImgTags($content));
 }
 
 private function echoErrorMsg(Page $page): void
 {
 switch (preg_last_error()) {
 case PREG_NO_ERROR:
 $errorMsg = '?????? ???????????.';
 break;
 
 case PREG_INTERNAL_ERROR:
 $errorMsg = '????????? ?????????? ?????? PCRE.';
 break;
 
 case PREG_BACKTRACK_LIMIT_ERROR:
 $errorMsg = '????? ???????? ?????? ??? ????????.';
 break;
 
 case PREG_RECURSION_LIMIT_ERROR:
 $errorMsg = '????? ???????? ??? ????????.';
 break;
 
 case PREG_BAD_UTF8_ERROR:
 $errorMsg = '?????? ???? ??????? ????????????? ??????? UTF-8 (?????? ??? ??????? ? ?????? UTF-8).';
 break;
 
 case PREG_BAD_UTF8_OFFSET_ERROR:
 $errorMsg =
 '???????? ?? ????????????? ?????? ?????????? ??????? ????? UTF-8 (?????? ??? ??????? ? ?????? UTF-8).';
 break;
 
 case PREG_JIT_STACKLIMIT_ERROR:
 $errorMsg = '????????? ??????? PCRE ??????????? ???????? ??-?? ?????? ????? JIT.';
 break;
 
 default:
 $errorMsg = '??????????? ?????? PCRE.';
 }
 echo "\nContent parsing error for URL \"", $page->getUrl(), '": ', $errorMsg, "\n";
 }
 
 private function correctUrls(?array $urlList): ?array
 {
 if ($urlList === null) {
 return null;
 }
 
 foreach ($urlList as $i => $url) {
 if ($this->site->isInhere($url)) {
 try {
 $correctedUrl = $this->site->correctUrl($url);
 if (($this->repository->get($correctedUrl) ?? new Page($correctedUrl))->isNotProcessed()) {
 $urlList[$i] = $correctedUrl;
 } else {
 unset($urlList[$i]);
 }
 } catch (InvalidArgumentException $e) {
 unset($urlList[$i]);
 }
 } else {
 unset($urlList[$i]);
 }
 }
 
 return array_values($urlList);
 }
 
 private function pageProcessingRecursive(array $urlList, int $depth = 1): void
 {
 $start = microtime(true);
 
 $urlList      = $this->correctUrls($urlList) ?? [];
 $contentArray = $this->contentLoader->loadContent($urlList);
 $loadTime     = microtime(true) - $start;
 
 foreach ($contentArray as $url => $content) {
 $start = microtime(true);
 $page  = $this->repository->get($url) ?? new Page($url);
 if ($page->isNotProcessed()) {
 $this->repository->store($page);
 $this->pageProcessing($page, $content);
 $page->setProcessingTime(microtime(true) - $start + $loadTime);
 }
 unset($contentArray[$url]);
 }
 
 /** Check max depth level */
 if ($this->maxDepth <= ++$depth) {
 return;
 }
 
 foreach ($urlList as $url) {
 $page     = $this->repository->get($url);
 $children = $page->getChildren();
 foreach ($children as $i => $page) {
 if ($page->isNotProcessed()) {
 $children[$i] = $page->getUrl();
 } else {
 unset($children[$i]);
 }
 }
 
 $this->pageProcessingRecursive($children, $depth);
 }
 }
 }
 
 |