Commit f6e54131c1adc6b153e334f17010c74745a6a83a
1 parent
735c416d
fixed keys and header issue and handle with empty columns
Showing
2 changed files
with
128 additions
and
61 deletions
Show diff stats
lib/TableParser.php
| @@ -9,6 +9,8 @@ | @@ -9,6 +9,8 @@ | ||
| 9 | namespace yii\multiparser; | 9 | namespace yii\multiparser; |
| 10 | 10 | ||
| 11 | 11 | ||
| 12 | +use common\components\CustomVarDamp; | ||
| 13 | + | ||
| 12 | abstract class TableParser extends Parser { | 14 | abstract class TableParser extends Parser { |
| 13 | 15 | ||
| 14 | 16 | ||
| @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { | @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { | ||
| 107 | { | 109 | { |
| 108 | do { | 110 | do { |
| 109 | 111 | ||
| 110 | - $this->current_row_number ++; | 112 | + $this->current_row_number++; |
| 111 | $this->readRow(); | 113 | $this->readRow(); |
| 112 | 114 | ||
| 113 | } while( $this->isEmptyRow() ); | 115 | } while( $this->isEmptyRow() ); |
lib/XlsxParser.php
| @@ -7,6 +7,7 @@ | @@ -7,6 +7,7 @@ | ||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | namespace yii\multiparser; | 9 | namespace yii\multiparser; |
| 10 | + | ||
| 10 | use common\components\CustomVarDamp; | 11 | use common\components\CustomVarDamp; |
| 11 | 12 | ||
| 12 | 13 | ||
| @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; | @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; | ||
| 14 | * Class XlsxParser | 15 | * Class XlsxParser |
| 15 | * @package yii\multiparser | 16 | * @package yii\multiparser |
| 16 | */ | 17 | */ |
| 17 | -class XlsxParser extends TableParser { | 18 | +class XlsxParser extends TableParser |
| 19 | +{ | ||
| 18 | 20 | ||
| 19 | /** | 21 | /** |
| 20 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера | 22 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера |
| @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { | @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { | ||
| 39 | 41 | ||
| 40 | parent::setup(); | 42 | parent::setup(); |
| 41 | 43 | ||
| 42 | - if ( $this->path_for_extract_files == '' ) { | 44 | + if ($this->path_for_extract_files == '') { |
| 43 | $this->path_for_extract_files = sys_get_temp_dir(); | 45 | $this->path_for_extract_files = sys_get_temp_dir(); |
| 44 | } | 46 | } |
| 45 | } | 47 | } |
| @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { | @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { | ||
| 47 | 49 | ||
| 48 | public function read() | 50 | public function read() |
| 49 | { | 51 | { |
| 50 | - $this->extractFiles(); | 52 | + $this->extractFiles(); |
| 51 | $this->readSheets(); | 53 | $this->readSheets(); |
| 52 | $this->readStrings(); | 54 | $this->readStrings(); |
| 53 | - foreach ( $this->sheets_arr as $sheet ) { | 55 | + foreach ($this->sheets_arr as $sheet) { |
| 54 | //проходим по всем файлам из директории /xl/worksheets/ | 56 | //проходим по всем файлам из директории /xl/worksheets/ |
| 55 | $this->current_sheet = $sheet; | 57 | $this->current_sheet = $sheet; |
| 56 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; | 58 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; |
| 57 | - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) { | ||
| 58 | - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); | 59 | + if (file_exists($sheet_path) && is_readable($sheet_path)) { |
| 60 | + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator"); | ||
| 59 | $this->current_node = $xml->sheetData->row; | 61 | $this->current_node = $xml->sheetData->row; |
| 60 | $this->current_node->rewind(); | 62 | $this->current_node->rewind(); |
| 61 | - if ( $this->current_node->valid() ) { | 63 | + if ($this->current_node->valid()) { |
| 62 | parent::read(); | 64 | parent::read(); |
| 63 | } | 65 | } |
| 64 | } | 66 | } |
| @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { | @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { | ||
| 66 | 68 | ||
| 67 | $this->cleanUp(); | 69 | $this->cleanUp(); |
| 68 | 70 | ||
| 69 | - if ( $this->active_sheet ) { | 71 | + if ($this->active_sheet) { |
| 70 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив | 72 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив |
| 71 | - return $this->result[ $this->current_sheet ]; | ||
| 72 | - }else{ | 73 | + return $this->result[$this->current_sheet]; |
| 74 | + } else { | ||
| 73 | return $this->result; | 75 | return $this->result; |
| 74 | } | 76 | } |
| 75 | 77 | ||
| 76 | } | 78 | } |
| 77 | 79 | ||
| 78 | - protected function extractFiles () | 80 | + protected function extractFiles() |
| 79 | { | 81 | { |
| 80 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); | 82 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); |
| 81 | - if ( !file_exists($this->path_for_extract_files )) { | ||
| 82 | - if ( !mkdir( $this->path_for_extract_files ) ) | ||
| 83 | - { | ||
| 84 | - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); | 83 | + if (!file_exists($this->path_for_extract_files)) { |
| 84 | + if (!mkdir($this->path_for_extract_files)) { | ||
| 85 | + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files); | ||
| 85 | } | 86 | } |
| 86 | } | 87 | } |
| 87 | 88 | ||
| 88 | $zip = new \ZipArchive; | 89 | $zip = new \ZipArchive; |
| 89 | - if ( $zip->open( $this->file_path ) === TRUE ) { | ||
| 90 | - $zip->extractTo( $this->path_for_extract_files . '/' ); | 90 | + if ($zip->open($this->file_path) === TRUE) { |
| 91 | + $zip->extractTo($this->path_for_extract_files . '/'); | ||
| 91 | $zip->close(); | 92 | $zip->close(); |
| 92 | } else { | 93 | } else { |
| 93 | 94 | ||
| 94 | - throw new \Exception( 'Ошибка чтения xlsx файла' ); | 95 | + throw new \Exception('Ошибка чтения xlsx файла'); |
| 95 | } | 96 | } |
| 96 | unset($zip); | 97 | unset($zip); |
| 97 | } | 98 | } |
| 98 | 99 | ||
| 99 | - protected function readSheets () | 100 | + protected function readSheets() |
| 100 | { | 101 | { |
| 101 | - if ( $this->active_sheet ) { | ||
| 102 | - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; | 102 | + if ($this->active_sheet) { |
| 103 | + $this->sheets_arr[] = 'sheet' . $this->active_sheet; | ||
| 103 | return; | 104 | return; |
| 104 | } | 105 | } |
| 105 | 106 | ||
| 106 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' ); | ||
| 107 | - foreach ( $xml->sheets->children() as $sheet ) { | 107 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml'); |
| 108 | + foreach ($xml->sheets->children() as $sheet) { | ||
| 108 | $sheet_name = ''; | 109 | $sheet_name = ''; |
| 109 | $sheet_id = 0; | 110 | $sheet_id = 0; |
| 110 | $attr = $sheet->attributes(); | 111 | $attr = $sheet->attributes(); |
| 111 | - foreach ( $attr as $name => $value ) { | 112 | + foreach ($attr as $name => $value) { |
| 112 | if ($name == 'name') | 113 | if ($name == 'name') |
| 113 | $sheet_name = (string)$value; | 114 | $sheet_name = (string)$value; |
| 114 | 115 | ||
| @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { | @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { | ||
| 116 | $sheet_id = $value; | 117 | $sheet_id = $value; |
| 117 | 118 | ||
| 118 | } | 119 | } |
| 119 | - if ( $sheet_name && $sheet_id ) { | 120 | + if ($sheet_name && $sheet_id) { |
| 120 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; | 121 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; |
| 121 | } | 122 | } |
| 122 | // | 123 | // |
| 123 | } | 124 | } |
| 124 | } | 125 | } |
| 125 | 126 | ||
| 126 | - protected function readStrings () | 127 | + protected function readStrings() |
| 127 | { | 128 | { |
| 128 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' ); | ||
| 129 | - foreach ( $xml->children() as $item ) { | 129 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml'); |
| 130 | + foreach ($xml->children() as $item) { | ||
| 130 | $this->strings_arr[] = (string)$item->t; | 131 | $this->strings_arr[] = (string)$item->t; |
| 131 | } | 132 | } |
| 132 | } | 133 | } |
| 133 | 134 | ||
| 134 | 135 | ||
| 135 | - | ||
| 136 | - // protected function readRow ( $item, $sheet , $current_row ) | ||
| 137 | - protected function readRow ( ) | 136 | + protected function readRow() |
| 138 | { | 137 | { |
| 139 | $this->row = []; | 138 | $this->row = []; |
| 140 | $node = $this->current_node->getChildren(); | 139 | $node = $this->current_node->getChildren(); |
| 141 | if ($node === NULL) { | 140 | if ($node === NULL) { |
| 142 | return; | 141 | return; |
| 143 | } | 142 | } |
| 144 | - //foreach ( $node as $child ) { | ||
| 145 | - for ( $node->rewind(); $node->valid(); $node->next() ) { | 143 | + |
| 144 | + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) { | ||
| 146 | $child = $node->current(); | 145 | $child = $node->current(); |
| 147 | $attr = $child->attributes(); | 146 | $attr = $child->attributes(); |
| 148 | 147 | ||
| 149 | - if( isset($child->v) ) { | 148 | + // define the index of result array |
| 149 | + // $attr['r'] - contain the address of cells - A1, B1 ... | ||
| 150 | + if (isset($attr['r'])) { | ||
| 151 | + // override index | ||
| 152 | + $i = $this->convertCellToIndex( $attr['r'] ); | ||
| 153 | + | ||
| 154 | + if ( $this->keys !== Null ){ | ||
| 155 | + if( isset( $this->keys[$i] ) ){ | ||
| 156 | + //$i = $this->keys[$i]; | ||
| 157 | + } else { | ||
| 158 | + // we have a keys, but this one we didn't find, so skip it | ||
| 159 | + continue; | ||
| 160 | + } | ||
| 161 | + } | ||
| 162 | + } | ||
| 163 | + // define the value of result array | ||
| 164 | + if (isset($child->v)) { | ||
| 150 | $value = (string)$child->v; | 165 | $value = (string)$child->v; |
| 151 | - }else{ | 166 | + |
| 167 | + if ( isset($attr['t']) ) | ||
| 168 | + // it's not a value it's a string, so fetch it from string array | ||
| 169 | + $value = $this->strings_arr[$value]; | ||
| 170 | + | ||
| 171 | + } else { | ||
| 152 | $value = ''; | 172 | $value = ''; |
| 153 | } | 173 | } |
| 154 | - if ( isset( $attr['t'] ) ) { | ||
| 155 | - $this->row[] = $this->strings_arr[ $value ]; | ||
| 156 | - }else{ | ||
| 157 | - $this->row[] = $value; | ||
| 158 | - } | 174 | + |
| 175 | + // set | ||
| 176 | + $this->row[$i] = $value; | ||
| 159 | 177 | ||
| 160 | } | 178 | } |
| 161 | - // дополним ряд пустыми значениями если у нас ключей больше чем значений | ||
| 162 | - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) { | ||
| 163 | - $extra_coloumn = count( $this->keys ) - count( $this->row ); | ||
| 164 | - for ( $i = 1; $i <= $extra_coloumn; $i++ ) { | ||
| 165 | - $this->row[] = ''; | 179 | +// // fill the row by empty values for keys that we are missed in previous step |
| 180 | + // only for 'has_header_row = true' mode | ||
| 181 | + if ( $this->has_header_row && $this->keys !== Null ) { | ||
| 182 | + $extra_column = count( $this->keys ) - count( $this->row ); | ||
| 183 | + if ( $extra_column ) { | ||
| 184 | + foreach ( $this->keys as $key => $key ) { | ||
| 185 | + | ||
| 186 | + if ( isset( $this->row[$key] ) ) { | ||
| 187 | + continue; | ||
| 188 | + } | ||
| 189 | + $this->row[$key] = ''; | ||
| 190 | + } | ||
| 166 | } | 191 | } |
| 192 | + | ||
| 167 | } | 193 | } |
| 194 | + ksort( $this->row ); | ||
| 168 | $this->current_node->next(); | 195 | $this->current_node->next(); |
| 169 | } | 196 | } |
| 170 | 197 | ||
| 171 | - protected function isEmptyRow(){ | 198 | + protected function isEmptyRow() |
| 199 | + { | ||
| 172 | 200 | ||
| 173 | $is_empty = false; | 201 | $is_empty = false; |
| 174 | 202 | ||
| 175 | - if ( !count( $this->row ) || !$this->current_node->valid() ) { | 203 | + if (!count($this->row) || !$this->current_node->valid()) { |
| 176 | return true; | 204 | return true; |
| 177 | } | 205 | } |
| 178 | 206 | ||
| 179 | $j = 0; | 207 | $j = 0; |
| 180 | - for ($i = 1; $i <= count( $this->row ); $i++) { | 208 | + for ($i = 1; $i <= count($this->row); $i++) { |
| 181 | 209 | ||
| 182 | - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { | 210 | + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) { |
| 183 | $j++; | 211 | $j++; |
| 184 | } | 212 | } |
| 185 | 213 | ||
| 186 | - if ( $j >= $this->min_column_quantity ) { | 214 | + if ($j >= $this->min_column_quantity) { |
| 187 | $is_empty = true; | 215 | $is_empty = true; |
| 188 | break; | 216 | break; |
| 189 | } | 217 | } |
| @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { | @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { | ||
| 192 | return $is_empty; | 220 | return $is_empty; |
| 193 | } | 221 | } |
| 194 | 222 | ||
| 195 | - protected function isEmptyColumn( $val ){ | 223 | + protected function isEmptyColumn($val) |
| 224 | + { | ||
| 196 | return $val == ''; | 225 | return $val == ''; |
| 197 | } | 226 | } |
| 198 | 227 | ||
| 199 | - protected function setResult( ){ | ||
| 200 | - $this->result[ $this->current_sheet ][] = $this->row; | 228 | + protected function setResult() |
| 229 | + { | ||
| 230 | + $this->result[$this->current_sheet][] = $this->row; | ||
| 201 | } | 231 | } |
| 202 | 232 | ||
| 203 | - protected function deleteExtractFiles () | 233 | + protected function deleteExtractFiles() |
| 204 | { | 234 | { |
| 205 | - $this->removeDir( $this->path_for_extract_files ); | 235 | + $this->removeDir($this->path_for_extract_files); |
| 206 | 236 | ||
| 207 | } | 237 | } |
| 208 | 238 | ||
| 209 | - protected function removeDir($dir) { | 239 | + protected function removeDir($dir) |
| 240 | + { | ||
| 210 | if (is_dir($dir)) { | 241 | if (is_dir($dir)) { |
| 211 | $objects = scandir($dir); | 242 | $objects = scandir($dir); |
| 212 | foreach ($objects as $object) { | 243 | foreach ($objects as $object) { |
| 213 | if ($object != "." && $object != "..") { | 244 | if ($object != "." && $object != "..") { |
| 214 | - if (filetype($dir."/".$object) == "dir") | ||
| 215 | - $this->removeDir($dir."/".$object); | 245 | + if (filetype($dir . "/" . $object) == "dir") |
| 246 | + $this->removeDir($dir . "/" . $object); | ||
| 216 | else | 247 | else |
| 217 | - unlink($dir."/".$object); | 248 | + unlink($dir . "/" . $object); |
| 218 | } | 249 | } |
| 219 | } | 250 | } |
| 220 | reset($objects); | 251 | reset($objects); |
| @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { | @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { | ||
| 223 | } | 254 | } |
| 224 | 255 | ||
| 225 | 256 | ||
| 257 | + /** | ||
| 258 | + * @param $cell_address - string with address like A1, B1 ... | ||
| 259 | + * @return int - integer index | ||
| 260 | + * this method has a constraint - 'Z' - it's a last column to convert, | ||
| 261 | + * column with 'AA..' address and bigger - return index = 0 | ||
| 262 | + */ | ||
| 263 | + protected function convertCellToIndex($cell_address) | ||
| 264 | + { | ||
| 265 | + $index = 0; | ||
| 266 | + | ||
| 267 | + $address_letter = substr($cell_address, 0, 1); | ||
| 268 | + $address_arr = range('A', 'Z'); | ||
| 269 | + | ||
| 270 | + if ( $search_value = array_search( $address_letter, $address_arr ) ) | ||
| 271 | + $index = $search_value; | ||
| 272 | + | ||
| 273 | + return $index; | ||
| 274 | + | ||
| 275 | + } | ||
| 276 | +// @todo - переписать родительский метод в универсальной манере а не переопределять его | ||
| 277 | + protected function setKeysFromHeader(){ | ||
| 278 | + if ( $this->has_header_row ) { | ||
| 279 | + | ||
| 280 | + if ($this->keys === NULL) { | ||
| 281 | + $this->keys = $this->row; | ||
| 282 | + return true; | ||
| 283 | + } | ||
| 284 | + } | ||
| 285 | + return false; | ||
| 286 | + } | ||
| 226 | protected function cleanUp() | 287 | protected function cleanUp() |
| 227 | { | 288 | { |
| 228 | parent::cleanUp(); | 289 | parent::cleanUp(); |
| @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { | @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { | ||
| 230 | unset($this->sheets_arr); | 291 | unset($this->sheets_arr); |
| 231 | unset($this->current_node); | 292 | unset($this->current_node); |
| 232 | 293 | ||
| 233 | - $this->deleteExtractFiles(); | ||
| 234 | 294 | ||
| 235 | } | 295 | } |
| 236 | 296 | ||
| 297 | + function __destruct() | ||
| 298 | + { | ||
| 299 | + $this->deleteExtractFiles(); | ||
| 300 | + } | ||
| 301 | + | ||
| 237 | 302 | ||
| 238 | } | 303 | } |
| 239 | \ No newline at end of file | 304 | \ No newline at end of file |