AparserService.php 8.13 KB
<?php

namespace {

    /** PHPExcel root directory */
    if (!defined('PHPEXCEL_ROOT')) {
        define('PHPEXCEL_ROOT', dirname(__FILE__) . '/');
        require(PHPEXCEL_ROOT . 'A_Parser/aparser-api-php-client.php');
    }

    class AparserService
    {
        public function microtime_float()
        {
            list($usec, $sec) = explode(" ", microtime());
            return ((float)$usec + (float)$sec);
        }
        
        function getParser($query){

            $time_start = $this->microtime_float();
            $aparser = 'http://195.248.225.110:9091/API';
            $queries = $items = $competitors = $result = array();

            foreach($query as $one){
                $queries[] = $one['query'];
                $items[] = $one['item_id'];
                $competitors[] = $one['competitor_id'];
            }

            $request = json_encode(array(
                "action" => "bulkRequest",
                "data" => array(
                      "parser" => "SE::Google",
                      "preset" => "Use AntiGate",
                      "threads" => 200,
                      "rawResults" => 1,
                      "queries" => $queries
                ),
                'password' => 'qwerty1'
            ));

            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $aparser);
            curl_setopt($ch, CURLOPT_POST, 1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $request);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Length: ' . strlen($request)));
            curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: text/plain; charset=UTF-8'));

            $response = curl_exec($ch);

            if(!$response) {
                throw new \Exception("А-Парсер не работает");
            }

            curl_close($ch);

            $response = json_decode($response, true);

            for($i = 0; $i<=count($response['data']['results'])-1; $i++){
                preg_match("/http\:(.[^\s]*)/", $response['data']['logs'][$i][1][5][2], $str);
                preg_match("/http\:(.[^\s]*)/", $response['data']['logs'][$i][1][4][2], $str2);
                $link_array = isset($response['data']['results'][$i]['serp'][0]) ? $response['data']['results'][$i]['serp'][0] : '';
                $result[$i]['google_link'] = isset($str[0]) && !empty($str[0]) ? $str[0] : $str2[0];
                $result[$i]['links'] = $link_array;
                $result[$i]['item_id'] = $items[$i];
                $result[$i]['competitor_id'] = $competitors[$i];
                $result[$i]['success'] = $response['data']['results'][$i]['info']['stats']['success'];
                $result[$i]['result'] = $response['data']['results'][$i]['totalcount'];

            }
            $time_end = $this->microtime_float();
            $time = $time_end - $time_start;
            print($time);
            return $result;

        }

        public function parseExistLink($query, $h1_reg, $price_reg, $exist_regexr){

            $exist = '';
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $query['link']);
            curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
            curl_setopt($ch,  CURLOPT_USERAGENT , "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
            $headers = array
            (
                'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8',
                'Accept-Language: ru,en-us;q=0.7,en;q=0.3',
                'Accept-Encoding: deflate',
                'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7'
            );

            curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
            $page = curl_exec($ch);

            sleep(1);
            curl_close($ch);
            preg_match($price_reg, $page, $price);
            preg_match($h1_reg, $page, $h1);
            if(!empty($exist_regexr)) {

                preg_match($exist_regexr, $page, $exist);
            }




            if((isset($price) && !empty($price)) && (isset($h1) && !empty($h1))) {
                $h1 = mb_convert_encoding($h1[1], 'utf-8', 'windows-1251, utf-8');
                if(!empty($exist)){
                    $exist = mb_convert_encoding($exist[1], 'utf-8', 'windows-1251, utf-8');
                }
                $price = preg_replace("/[^0-9]/", '', $price[1]);
                $result['price'] = $price;
                $result['h1'] = $h1;
                $result['exist'] = !empty($exist) ? $exist : '';
                $result['link'] = $query['link'];
                $result['google_link'] = $query['google_link'];
                $result['success'] = '1';
                return $result;
            } else {
                $result['price'] = '';
                $result['h1'] = 'Товар пропал';
                $result['exist'] =  '';
                $result['link'] = $query['link'];
                $result['google_link'] = $query['google_link'];
                $result['success'] = '1';
                return $result;
            }
        }

        public function parseNewLink($query, $h1_reg, $price_reg, $exist_regexr){
            $exist = '';

            $link_arrays = $this->getParser($query);

            $i = 0;
            foreach($link_arrays as $link_array){
                if(!empty($link_array['links'])){

                    $ch = curl_init();
                    curl_setopt($ch, CURLOPT_URL, $link_array['links']);
                    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
                    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
                    curl_setopt($ch,  CURLOPT_USERAGENT , "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
                    $headers = array
                    (
                        'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*;q=0.8',
                        'Accept-Language: ru,en-us;q=0.7,en;q=0.3',
                        'Accept-Encoding: deflate',
                        'Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7'
                    );

                    curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
                    $page = curl_exec($ch);



                    curl_close($ch);
                    preg_match($price_reg, $page, $price);
                    preg_match($h1_reg, $page, $h1);
                    if(!empty($exist_regexr)) {
                        preg_match($exist_regexr, $page, $exist);
                    }

                    if((isset($price) && !empty($price)) && (isset($h1) && !empty($h1))) {
                        $price = preg_replace("/[^0-9]/", '', $price[1]);

                        if(!empty($exist)){
                            $exist = mb_convert_encoding($exist[1], 'utf-8', 'windows-1251, utf-8');
                        }

                        $result[$i]['price'] = $price;
                        $result[$i]['h1'] = mb_convert_encoding($h1[1], 'utf-8', 'windows-1251, utf-8');
                        $result[$i]['exist'] = !empty($exist) ? $exist : '';
                        $result[$i]['link'] = $link_array['links'];
                        $result[$i]['google_link'] = $link_array['google_link'];
                        $result[$i]['item_id'] = $link_array['item_id'];
                        $result[$i]['competitor_id'] = $link_array['competitor_id'];
                        $result[$i]['result'] = $link_array['result'];
                        $result[$i]['link_status'] = 'new';
                        $i++;

                        continue;
                    }


                }
                $result[$i]['item_id'] = $link_array['item_id'];
                $result[$i]['competitor_id'] = $link_array['competitor_id'];
                $result[$i]['google_link'] = $link_array['google_link'];
                $result[$i]['link'] = '';
                $result[$i]['exist'] = '';
                $result[$i]['success'] = $link_array['success'];
                $result[$i]['result'] = $link_array['result'];
                $result[$i]['link_status'] = 'new';
                $i++;


            }
            return $result;
        }

    }
}