Commit d0a7acb3087a5413cc422f84f3575855ca3850ca
1 parent
4fd43c25
refactor CsvParser, add CustomCsvParser
Showing
4 changed files
with
95 additions
and
72 deletions
Show diff stats
backend/components/parsers/CsvParser.php
| 1 | <?php | 1 | <?php |
| 2 | /** | 2 | /** |
| 3 | - * Created by PhpStorm. | ||
| 4 | - * User: Cibermag | ||
| 5 | - * Date: 26.08.2015 | ||
| 6 | - * Time: 17:00 | ||
| 7 | - */ | ||
| 8 | 3 | ||
| 4 | + */ | ||
| 5 | +//@ todo add exceptions | ||
| 9 | namespace backend\components\parsers; | 6 | namespace backend\components\parsers; |
| 10 | 7 | ||
| 11 | 8 | ||
| @@ -17,64 +14,70 @@ class CsvParser | @@ -17,64 +14,70 @@ class CsvParser | ||
| 17 | { | 14 | { |
| 18 | 15 | ||
| 19 | 16 | ||
| 20 | - /** @var bool */ | 17 | + /** @var bool |
| 18 | + имеет ли файл заголовок который будет установлен ключами возвращемого массива*/ | ||
| 21 | public $hasHeaderRow = false; | 19 | public $hasHeaderRow = false; |
| 20 | + /** @var array - массив с заголовком, | ||
| 21 | + * если не указан и установлено свойство $hasHeaderRow - будет определен автоматически */ | ||
| 22 | + public $keys; | ||
| 22 | 23 | ||
| 23 | - /** @var resource */ | 24 | + /** @var экземляр SplFileObject читаемого файла */ |
| 24 | public $file; | 25 | public $file; |
| 25 | 26 | ||
| 26 | - /** @var out encoding charset */ | ||
| 27 | - public $out_charset = 'UTF-8'; | ||
| 28 | - /** @var out encoding charset */ | ||
| 29 | - public $in_charset = 'windows-1251'; | ||
| 30 | - /** @var int - first line for parsing */ | 27 | + /** @var int - первая строка с которой начинать парсить */ |
| 31 | public $first_line = 0; | 28 | public $first_line = 0; |
| 32 | - public $last_line = 10; | ||
| 33 | 29 | ||
| 34 | - /** @var int - first column for parsing */ | 30 | + /** @var int - последняя строка до которой парсить |
| 31 | + * если не указана, то парсинг происходит до конца файла*/ | ||
| 32 | + public $last_line = 0; | ||
| 33 | + | ||
| 34 | + /** @var int - первая колонка файла с которой начнется парсинг */ | ||
| 35 | public $first_column = 0; | 35 | public $first_column = 0; |
| 36 | 36 | ||
| 37 | - /** @var array - array of headers values */ | ||
| 38 | - public $keys; | 37 | + /** @var string - разделитель csv */ |
| 39 | public $delimiter = ';'; | 38 | public $delimiter = ';'; |
| 40 | - public $auto_detect_start_position = false; | 39 | + |
| 40 | + /** @var bool | ||
| 41 | + нужно ли искать автоматически первоую значисмую строку (не пустая строка) | ||
| 42 | + * иначе первая строка будет взята из аттрибута $first_line */ | ||
| 43 | + public $auto_detect_first_line = false; | ||
| 44 | + | ||
| 45 | + /** @var int - количество значимых колонок, что бы определить первую значимую строку | ||
| 46 | + * используется при автоопределении первой строки*/ | ||
| 41 | public $min_column_quantity = 5; | 47 | public $min_column_quantity = 5; |
| 42 | 48 | ||
| 43 | 49 | ||
| 50 | + /** | ||
| 51 | + метод устанвливает нужные настройки объекта SplFileObject, для работы с csv | ||
| 52 | + */ | ||
| 44 | public function setup() | 53 | public function setup() |
| 45 | { | 54 | { |
| 46 | - | ||
| 47 | $this->file->setCsvControl($this->delimiter); | 55 | $this->file->setCsvControl($this->delimiter); |
| 48 | $this->file->setFlags(\SplFileObject::READ_CSV); | 56 | $this->file->setFlags(\SplFileObject::READ_CSV); |
| 49 | $this->file->setFlags(\SplFileObject::SKIP_EMPTY); | 57 | $this->file->setFlags(\SplFileObject::SKIP_EMPTY); |
| 50 | -// $this->file->setFlags(\SplFileObject::READ_AHEAD); | ||
| 51 | - | ||
| 52 | - if ($this->auto_detect_start_position) { | ||
| 53 | - $this->first_line = $this->detectStartPosition(); | 58 | + if ($this->auto_detect_first_line) { |
| 59 | + $this->shiftToFirstValuableLine(); | ||
| 54 | } | 60 | } |
| 55 | - // CustomVarDamp::dumpAndDie($this); | ||
| 56 | -// echo $this->file->key(); | ||
| 57 | -// $this->file->seek($this->first_line + 1); | ||
| 58 | -// echo $this->file->key(); | ||
| 59 | - | ||
| 60 | - | ||
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | - | ||
| 64 | - protected function detectStartPosition() | 63 | + /** |
| 64 | + * определяет первую значимую строку, | ||
| 65 | + * считывается файл пока в нем не встретится строка с непустыми колонками | ||
| 66 | + * в количестве указанном в атрибуте min_column_quantity | ||
| 67 | + * в результате выполнения курсор ресурса будет находится на последней незначимой строке | ||
| 68 | + */ | ||
| 69 | + protected function shiftToFirstValuableLine() | ||
| 65 | { | 70 | { |
| 66 | - $first_line = 0; | ||
| 67 | - $find = false; | ||
| 68 | - while (!$find) { | ||
| 69 | 71 | ||
| 72 | + $finish = false; | ||
| 73 | + | ||
| 74 | + while (!$finish) { | ||
| 70 | $j = 0; | 75 | $j = 0; |
| 71 | $row = $this->readRow(); | 76 | $row = $this->readRow(); |
| 72 | - | ||
| 73 | if ($row === false) { | 77 | if ($row === false) { |
| 74 | continue; | 78 | continue; |
| 75 | } | 79 | } |
| 76 | 80 | ||
| 77 | - $first_line++; | ||
| 78 | for ($i = 1; $i <= count($row); $i++) { | 81 | for ($i = 1; $i <= count($row); $i++) { |
| 79 | 82 | ||
| 80 | if ($row[$i - 1] <> '') { | 83 | if ($row[$i - 1] <> '') { |
| @@ -82,30 +85,25 @@ class CsvParser | @@ -82,30 +85,25 @@ class CsvParser | ||
| 82 | } | 85 | } |
| 83 | 86 | ||
| 84 | if ($j >= $this->min_column_quantity) { | 87 | if ($j >= $this->min_column_quantity) { |
| 85 | - $find = true; | ||
| 86 | - break; | 88 | + break 2; |
| 87 | } | 89 | } |
| 88 | } | 90 | } |
| 89 | } | 91 | } |
| 90 | - | ||
| 91 | - return $first_line; | ||
| 92 | - | ||
| 93 | } | 92 | } |
| 94 | 93 | ||
| 95 | /** | 94 | /** |
| 96 | - * @return array | ||
| 97 | - * @throws InvalidFileException | 95 | + * @return array - итоговый двумерный массив с результатом парсинга |
| 96 | + * метод считывает с открытого файла данные построчно | ||
| 98 | */ | 97 | */ |
| 99 | public function read() | 98 | public function read() |
| 100 | { | 99 | { |
| 101 | - // @todo add comments | ||
| 102 | $return = []; | 100 | $return = []; |
| 103 | - //CustomVarDamp::dump(debug_print_backtrace(1,2)); | ||
| 104 | - $line = 0; | 101 | + |
| 102 | + $current_line = 0; | ||
| 105 | $this->keys = NULL; | 103 | $this->keys = NULL; |
| 106 | - CustomVarDamp::dump($this->file->key()); | 104 | + |
| 107 | while (($row = $this->readRow()) !== FALSE) { | 105 | while (($row = $this->readRow()) !== FALSE) { |
| 108 | - $line++; | 106 | + $current_line++; |
| 109 | 107 | ||
| 110 | if ($this->hasHeaderRow) { | 108 | if ($this->hasHeaderRow) { |
| 111 | if ($this->keys === NULL) { | 109 | if ($this->keys === NULL) { |
| @@ -114,25 +112,25 @@ class CsvParser | @@ -114,25 +112,25 @@ class CsvParser | ||
| 114 | 112 | ||
| 115 | if (count($this->keys) !== count($row)) { | 113 | if (count($this->keys) !== count($row)) { |
| 116 | // | 114 | // |
| 117 | - Yii::warning("Invalid columns detected on line #$line ."); | 115 | + Yii::warning("Invalid columns detected on line #$current_line ."); |
| 118 | return $return; | 116 | return $return; |
| 119 | } | 117 | } |
| 120 | 118 | ||
| 121 | $return[] = array_combine($this->keys, $row); | 119 | $return[] = array_combine($this->keys, $row); |
| 122 | } | 120 | } |
| 123 | - } else { | 121 | + } |
| 122 | + else | ||
| 123 | + { | ||
| 124 | $return[] = $row; | 124 | $return[] = $row; |
| 125 | } | 125 | } |
| 126 | - if(($this->last_line) && ($line > $this->last_line)){ | ||
| 127 | -// CustomVarDamp::dump($this->last_line); | ||
| 128 | -// CustomVarDamp::dump($line); | 126 | + // если у нас установлен лимит, при его достижении прекращаем парсинг |
| 127 | + if (($this->last_line) && ($current_line > $this->last_line)) { | ||
| 129 | break; | 128 | break; |
| 130 | } | 129 | } |
| 131 | 130 | ||
| 132 | } | 131 | } |
| 133 | 132 | ||
| 134 | $this->closeHandler(); | 133 | $this->closeHandler(); |
| 135 | - //CustomVarDamp::dumpAndDie($return); | ||
| 136 | return $return; | 134 | return $return; |
| 137 | } | 135 | } |
| 138 | 136 | ||
| @@ -142,20 +140,20 @@ class CsvParser | @@ -142,20 +140,20 @@ class CsvParser | ||
| 142 | $this->file = NULL; | 140 | $this->file = NULL; |
| 143 | } | 141 | } |
| 144 | 142 | ||
| 143 | + /** | ||
| 144 | + * @return array - одномерный массив результата парсинга строки | ||
| 145 | + */ | ||
| 145 | protected function readRow() | 146 | protected function readRow() |
| 146 | - // @todo add comments | ||
| 147 | { | 147 | { |
| 148 | 148 | ||
| 149 | $row = $this->file->fgetcsv(); | 149 | $row = $this->file->fgetcsv(); |
| 150 | - // | ||
| 151 | - if (is_array($row)) { | ||
| 152 | - // $row = array_slice( $row, $this->first_column ); | ||
| 153 | - $row = Encoder::encodeArray($this->in_charset, $this->out_charset, $row); | 150 | + if (is_array($row) && $this->first_column) { |
| 151 | + | ||
| 152 | + $row = array_slice($row, $this->first_column); | ||
| 153 | + | ||
| 154 | } | 154 | } |
| 155 | if (is_null($row)) | 155 | if (is_null($row)) |
| 156 | $row = false; | 156 | $row = false; |
| 157 | -// if ($this->keys !== NULL) | ||
| 158 | -// @$clear_arr[3] = ValueFilter::pricefilter($clear_arr[3]);{}{}{} | ||
| 159 | 157 | ||
| 160 | return $row; | 158 | return $row; |
| 161 | 159 |
| 1 | +<?php | ||
| 2 | +/** | ||
| 3 | + * Created by PhpStorm. | ||
| 4 | + * User: Cibermag | ||
| 5 | + * Date: 04.09.2015 | ||
| 6 | + * Time: 16:07 | ||
| 7 | + */ | ||
| 8 | + | ||
| 9 | +namespace backend\components\parsers; | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +class CustomCsvParser extends CsvParser { | ||
| 13 | + | ||
| 14 | + protected function readRow() | ||
| 15 | + { | ||
| 16 | + | ||
| 17 | + $row = parent::readRow(); | ||
| 18 | + | ||
| 19 | + if (is_array($row)) { | ||
| 20 | + | ||
| 21 | + $row = Encoder::encodeArray( Encoder::$in_charset, Encoder::$out_charset, $row ); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + return $row; | ||
| 25 | + | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | +} | ||
| 0 | \ No newline at end of file | 29 | \ No newline at end of file |
backend/components/parsers/Encoder.php
| @@ -11,9 +11,10 @@ namespace backend\components\parsers; | @@ -11,9 +11,10 @@ namespace backend\components\parsers; | ||
| 11 | // @todo add comments | 11 | // @todo add comments |
| 12 | class Encoder | 12 | class Encoder |
| 13 | { | 13 | { |
| 14 | - public static $in_charset; | ||
| 15 | - public static $out_charset; | ||
| 16 | - | 14 | + /** @var out encoding charset */ |
| 15 | + public static $out_charset = 'UTF-8'; | ||
| 16 | + /** @var out encoding charset */ | ||
| 17 | + public static $in_charset = 'windows-1251'; | ||
| 17 | 18 | ||
| 18 | public static function encodeFile($in_charset, $out_charset, $filePath) | 19 | public static function encodeFile($in_charset, $out_charset, $filePath) |
| 19 | { | 20 | { |
backend/components/parsers/ParserHandler.php
| @@ -39,20 +39,16 @@ class ParserHandler { | @@ -39,20 +39,16 @@ class ParserHandler { | ||
| 39 | 39 | ||
| 40 | public function run(){ | 40 | public function run(){ |
| 41 | if ($this->extension = 'csv'){ | 41 | if ($this->extension = 'csv'){ |
| 42 | - $first_line = isset( $this->options->first_line )? $this->options->first_line : 0; | ||
| 43 | - $first_column = isset( $this->options->first_column )? $this->options->first_column : 0; | ||
| 44 | 42 | ||
| 45 | $csvParser = Yii::createObject([ | 43 | $csvParser = Yii::createObject([ |
| 46 | - 'class' => 'backend\components\parsers\CsvParser', | 44 | + 'class' => 'backend\components\parsers\CustomCsvParser', |
| 47 | 'file' => $this->fileObject, | 45 | 'file' => $this->fileObject, |
| 48 | - 'auto_detect_start_position' => true, | 46 | + 'auto_detect_first_line' => true, |
| 49 | ]); | 47 | ]); |
| 50 | - //CustomVarDamp::dumpAndDie($csvParser); | ||
| 51 | - // $csvParser = new CsvParser( ); | ||
| 52 | - $csvParser->setup( ); | ||
| 53 | 48 | ||
| 54 | -// CustomVarDamp::dumpAndDie($data); | ||
| 55 | - return $csvParser->read();;// | 49 | + $csvParser->setup(); |
| 50 | + | ||
| 51 | + return $csvParser->read(); | ||
| 56 | }; | 52 | }; |
| 57 | } | 53 | } |
| 58 | } | 54 | } |