Commit f6e54131c1adc6b153e334f17010c74745a6a83a
1 parent
735c416d
fixed keys and header issue and handle with empty columns
Showing
2 changed files
with
128 additions
and
61 deletions
Show diff stats
lib/TableParser.php
| ... | ... | @@ -9,6 +9,8 @@ |
| 9 | 9 | namespace yii\multiparser; |
| 10 | 10 | |
| 11 | 11 | |
| 12 | +use common\components\CustomVarDamp; | |
| 13 | + | |
| 12 | 14 | abstract class TableParser extends Parser { |
| 13 | 15 | |
| 14 | 16 | |
| ... | ... | @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { |
| 107 | 109 | { |
| 108 | 110 | do { |
| 109 | 111 | |
| 110 | - $this->current_row_number ++; | |
| 112 | + $this->current_row_number++; | |
| 111 | 113 | $this->readRow(); |
| 112 | 114 | |
| 113 | 115 | } while( $this->isEmptyRow() ); | ... | ... |
lib/XlsxParser.php
| ... | ... | @@ -7,6 +7,7 @@ |
| 7 | 7 | */ |
| 8 | 8 | |
| 9 | 9 | namespace yii\multiparser; |
| 10 | + | |
| 10 | 11 | use common\components\CustomVarDamp; |
| 11 | 12 | |
| 12 | 13 | |
| ... | ... | @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; |
| 14 | 15 | * Class XlsxParser |
| 15 | 16 | * @package yii\multiparser |
| 16 | 17 | */ |
| 17 | -class XlsxParser extends TableParser { | |
| 18 | +class XlsxParser extends TableParser | |
| 19 | +{ | |
| 18 | 20 | |
| 19 | 21 | /** |
| 20 | 22 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера |
| ... | ... | @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { |
| 39 | 41 | |
| 40 | 42 | parent::setup(); |
| 41 | 43 | |
| 42 | - if ( $this->path_for_extract_files == '' ) { | |
| 44 | + if ($this->path_for_extract_files == '') { | |
| 43 | 45 | $this->path_for_extract_files = sys_get_temp_dir(); |
| 44 | 46 | } |
| 45 | 47 | } |
| ... | ... | @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { |
| 47 | 49 | |
| 48 | 50 | public function read() |
| 49 | 51 | { |
| 50 | - $this->extractFiles(); | |
| 52 | + $this->extractFiles(); | |
| 51 | 53 | $this->readSheets(); |
| 52 | 54 | $this->readStrings(); |
| 53 | - foreach ( $this->sheets_arr as $sheet ) { | |
| 55 | + foreach ($this->sheets_arr as $sheet) { | |
| 54 | 56 | //проходим по всем файлам из директории /xl/worksheets/ |
| 55 | 57 | $this->current_sheet = $sheet; |
| 56 | 58 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; |
| 57 | - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) { | |
| 58 | - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); | |
| 59 | + if (file_exists($sheet_path) && is_readable($sheet_path)) { | |
| 60 | + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator"); | |
| 59 | 61 | $this->current_node = $xml->sheetData->row; |
| 60 | 62 | $this->current_node->rewind(); |
| 61 | - if ( $this->current_node->valid() ) { | |
| 63 | + if ($this->current_node->valid()) { | |
| 62 | 64 | parent::read(); |
| 63 | 65 | } |
| 64 | 66 | } |
| ... | ... | @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { |
| 66 | 68 | |
| 67 | 69 | $this->cleanUp(); |
| 68 | 70 | |
| 69 | - if ( $this->active_sheet ) { | |
| 71 | + if ($this->active_sheet) { | |
| 70 | 72 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив |
| 71 | - return $this->result[ $this->current_sheet ]; | |
| 72 | - }else{ | |
| 73 | + return $this->result[$this->current_sheet]; | |
| 74 | + } else { | |
| 73 | 75 | return $this->result; |
| 74 | 76 | } |
| 75 | 77 | |
| 76 | 78 | } |
| 77 | 79 | |
| 78 | - protected function extractFiles () | |
| 80 | + protected function extractFiles() | |
| 79 | 81 | { |
| 80 | 82 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); |
| 81 | - if ( !file_exists($this->path_for_extract_files )) { | |
| 82 | - if ( !mkdir( $this->path_for_extract_files ) ) | |
| 83 | - { | |
| 84 | - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); | |
| 83 | + if (!file_exists($this->path_for_extract_files)) { | |
| 84 | + if (!mkdir($this->path_for_extract_files)) { | |
| 85 | + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files); | |
| 85 | 86 | } |
| 86 | 87 | } |
| 87 | 88 | |
| 88 | 89 | $zip = new \ZipArchive; |
| 89 | - if ( $zip->open( $this->file_path ) === TRUE ) { | |
| 90 | - $zip->extractTo( $this->path_for_extract_files . '/' ); | |
| 90 | + if ($zip->open($this->file_path) === TRUE) { | |
| 91 | + $zip->extractTo($this->path_for_extract_files . '/'); | |
| 91 | 92 | $zip->close(); |
| 92 | 93 | } else { |
| 93 | 94 | |
| 94 | - throw new \Exception( 'Ошибка чтения xlsx файла' ); | |
| 95 | + throw new \Exception('Ошибка чтения xlsx файла'); | |
| 95 | 96 | } |
| 96 | 97 | unset($zip); |
| 97 | 98 | } |
| 98 | 99 | |
| 99 | - protected function readSheets () | |
| 100 | + protected function readSheets() | |
| 100 | 101 | { |
| 101 | - if ( $this->active_sheet ) { | |
| 102 | - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; | |
| 102 | + if ($this->active_sheet) { | |
| 103 | + $this->sheets_arr[] = 'sheet' . $this->active_sheet; | |
| 103 | 104 | return; |
| 104 | 105 | } |
| 105 | 106 | |
| 106 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' ); | |
| 107 | - foreach ( $xml->sheets->children() as $sheet ) { | |
| 107 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml'); | |
| 108 | + foreach ($xml->sheets->children() as $sheet) { | |
| 108 | 109 | $sheet_name = ''; |
| 109 | 110 | $sheet_id = 0; |
| 110 | 111 | $attr = $sheet->attributes(); |
| 111 | - foreach ( $attr as $name => $value ) { | |
| 112 | + foreach ($attr as $name => $value) { | |
| 112 | 113 | if ($name == 'name') |
| 113 | 114 | $sheet_name = (string)$value; |
| 114 | 115 | |
| ... | ... | @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { |
| 116 | 117 | $sheet_id = $value; |
| 117 | 118 | |
| 118 | 119 | } |
| 119 | - if ( $sheet_name && $sheet_id ) { | |
| 120 | + if ($sheet_name && $sheet_id) { | |
| 120 | 121 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; |
| 121 | 122 | } |
| 122 | 123 | // |
| 123 | 124 | } |
| 124 | 125 | } |
| 125 | 126 | |
| 126 | - protected function readStrings () | |
| 127 | + protected function readStrings() | |
| 127 | 128 | { |
| 128 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' ); | |
| 129 | - foreach ( $xml->children() as $item ) { | |
| 129 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml'); | |
| 130 | + foreach ($xml->children() as $item) { | |
| 130 | 131 | $this->strings_arr[] = (string)$item->t; |
| 131 | 132 | } |
| 132 | 133 | } |
| 133 | 134 | |
| 134 | 135 | |
| 135 | - | |
| 136 | - // protected function readRow ( $item, $sheet , $current_row ) | |
| 137 | - protected function readRow ( ) | |
| 136 | + protected function readRow() | |
| 138 | 137 | { |
| 139 | 138 | $this->row = []; |
| 140 | 139 | $node = $this->current_node->getChildren(); |
| 141 | 140 | if ($node === NULL) { |
| 142 | 141 | return; |
| 143 | 142 | } |
| 144 | - //foreach ( $node as $child ) { | |
| 145 | - for ( $node->rewind(); $node->valid(); $node->next() ) { | |
| 143 | + | |
| 144 | + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) { | |
| 146 | 145 | $child = $node->current(); |
| 147 | 146 | $attr = $child->attributes(); |
| 148 | 147 | |
| 149 | - if( isset($child->v) ) { | |
| 148 | + // define the index of result array | |
| 149 | + // $attr['r'] - contain the address of cells - A1, B1 ... | |
| 150 | + if (isset($attr['r'])) { | |
| 151 | + // override index | |
| 152 | + $i = $this->convertCellToIndex( $attr['r'] ); | |
| 153 | + | |
| 154 | + if ( $this->keys !== Null ){ | |
| 155 | + if( isset( $this->keys[$i] ) ){ | |
| 156 | + //$i = $this->keys[$i]; | |
| 157 | + } else { | |
| 158 | + // we have a keys, but this one we didn't find, so skip it | |
| 159 | + continue; | |
| 160 | + } | |
| 161 | + } | |
| 162 | + } | |
| 163 | + // define the value of result array | |
| 164 | + if (isset($child->v)) { | |
| 150 | 165 | $value = (string)$child->v; |
| 151 | - }else{ | |
| 166 | + | |
| 167 | + if ( isset($attr['t']) ) | |
| 168 | + // it's not a value it's a string, so fetch it from string array | |
| 169 | + $value = $this->strings_arr[$value]; | |
| 170 | + | |
| 171 | + } else { | |
| 152 | 172 | $value = ''; |
| 153 | 173 | } |
| 154 | - if ( isset( $attr['t'] ) ) { | |
| 155 | - $this->row[] = $this->strings_arr[ $value ]; | |
| 156 | - }else{ | |
| 157 | - $this->row[] = $value; | |
| 158 | - } | |
| 174 | + | |
| 175 | + // set | |
| 176 | + $this->row[$i] = $value; | |
| 159 | 177 | |
| 160 | 178 | } |
| 161 | - // дополним ряд пустыми значениями если у нас ключей больше чем значений | |
| 162 | - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) { | |
| 163 | - $extra_coloumn = count( $this->keys ) - count( $this->row ); | |
| 164 | - for ( $i = 1; $i <= $extra_coloumn; $i++ ) { | |
| 165 | - $this->row[] = ''; | |
| 179 | +// // fill the row by empty values for keys that we are missed in previous step | |
| 180 | + // only for 'has_header_row = true' mode | |
| 181 | + if ( $this->has_header_row && $this->keys !== Null ) { | |
| 182 | + $extra_column = count( $this->keys ) - count( $this->row ); | |
| 183 | + if ( $extra_column ) { | |
| 184 | + foreach ( $this->keys as $key => $key ) { | |
| 185 | + | |
| 186 | + if ( isset( $this->row[$key] ) ) { | |
| 187 | + continue; | |
| 188 | + } | |
| 189 | + $this->row[$key] = ''; | |
| 190 | + } | |
| 166 | 191 | } |
| 192 | + | |
| 167 | 193 | } |
| 194 | + ksort( $this->row ); | |
| 168 | 195 | $this->current_node->next(); |
| 169 | 196 | } |
| 170 | 197 | |
| 171 | - protected function isEmptyRow(){ | |
| 198 | + protected function isEmptyRow() | |
| 199 | + { | |
| 172 | 200 | |
| 173 | 201 | $is_empty = false; |
| 174 | 202 | |
| 175 | - if ( !count( $this->row ) || !$this->current_node->valid() ) { | |
| 203 | + if (!count($this->row) || !$this->current_node->valid()) { | |
| 176 | 204 | return true; |
| 177 | 205 | } |
| 178 | 206 | |
| 179 | 207 | $j = 0; |
| 180 | - for ($i = 1; $i <= count( $this->row ); $i++) { | |
| 208 | + for ($i = 1; $i <= count($this->row); $i++) { | |
| 181 | 209 | |
| 182 | - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { | |
| 210 | + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) { | |
| 183 | 211 | $j++; |
| 184 | 212 | } |
| 185 | 213 | |
| 186 | - if ( $j >= $this->min_column_quantity ) { | |
| 214 | + if ($j >= $this->min_column_quantity) { | |
| 187 | 215 | $is_empty = true; |
| 188 | 216 | break; |
| 189 | 217 | } |
| ... | ... | @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { |
| 192 | 220 | return $is_empty; |
| 193 | 221 | } |
| 194 | 222 | |
| 195 | - protected function isEmptyColumn( $val ){ | |
| 223 | + protected function isEmptyColumn($val) | |
| 224 | + { | |
| 196 | 225 | return $val == ''; |
| 197 | 226 | } |
| 198 | 227 | |
| 199 | - protected function setResult( ){ | |
| 200 | - $this->result[ $this->current_sheet ][] = $this->row; | |
| 228 | + protected function setResult() | |
| 229 | + { | |
| 230 | + $this->result[$this->current_sheet][] = $this->row; | |
| 201 | 231 | } |
| 202 | 232 | |
| 203 | - protected function deleteExtractFiles () | |
| 233 | + protected function deleteExtractFiles() | |
| 204 | 234 | { |
| 205 | - $this->removeDir( $this->path_for_extract_files ); | |
| 235 | + $this->removeDir($this->path_for_extract_files); | |
| 206 | 236 | |
| 207 | 237 | } |
| 208 | 238 | |
| 209 | - protected function removeDir($dir) { | |
| 239 | + protected function removeDir($dir) | |
| 240 | + { | |
| 210 | 241 | if (is_dir($dir)) { |
| 211 | 242 | $objects = scandir($dir); |
| 212 | 243 | foreach ($objects as $object) { |
| 213 | 244 | if ($object != "." && $object != "..") { |
| 214 | - if (filetype($dir."/".$object) == "dir") | |
| 215 | - $this->removeDir($dir."/".$object); | |
| 245 | + if (filetype($dir . "/" . $object) == "dir") | |
| 246 | + $this->removeDir($dir . "/" . $object); | |
| 216 | 247 | else |
| 217 | - unlink($dir."/".$object); | |
| 248 | + unlink($dir . "/" . $object); | |
| 218 | 249 | } |
| 219 | 250 | } |
| 220 | 251 | reset($objects); |
| ... | ... | @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { |
| 223 | 254 | } |
| 224 | 255 | |
| 225 | 256 | |
| 257 | + /** | |
| 258 | + * @param $cell_address - string with address like A1, B1 ... | |
| 259 | + * @return int - integer index | |
| 260 | + * this method has a constraint - 'Z' - it's a last column to convert, | |
| 261 | + * column with 'AA..' address and bigger - return index = 0 | |
| 262 | + */ | |
| 263 | + protected function convertCellToIndex($cell_address) | |
| 264 | + { | |
| 265 | + $index = 0; | |
| 266 | + | |
| 267 | + $address_letter = substr($cell_address, 0, 1); | |
| 268 | + $address_arr = range('A', 'Z'); | |
| 269 | + | |
| 270 | + if ( $search_value = array_search( $address_letter, $address_arr ) ) | |
| 271 | + $index = $search_value; | |
| 272 | + | |
| 273 | + return $index; | |
| 274 | + | |
| 275 | + } | |
| 276 | +// @todo - переписать родительский метод в универсальной манере а не переопределять его | |
| 277 | + protected function setKeysFromHeader(){ | |
| 278 | + if ( $this->has_header_row ) { | |
| 279 | + | |
| 280 | + if ($this->keys === NULL) { | |
| 281 | + $this->keys = $this->row; | |
| 282 | + return true; | |
| 283 | + } | |
| 284 | + } | |
| 285 | + return false; | |
| 286 | + } | |
| 226 | 287 | protected function cleanUp() |
| 227 | 288 | { |
| 228 | 289 | parent::cleanUp(); |
| ... | ... | @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { |
| 230 | 291 | unset($this->sheets_arr); |
| 231 | 292 | unset($this->current_node); |
| 232 | 293 | |
| 233 | - $this->deleteExtractFiles(); | |
| 234 | 294 | |
| 235 | 295 | } |
| 236 | 296 | |
| 297 | + function __destruct() | |
| 298 | + { | |
| 299 | + $this->deleteExtractFiles(); | |
| 300 | + } | |
| 301 | + | |
| 237 | 302 | |
| 238 | 303 | } |
| 239 | 304 | \ No newline at end of file | ... | ... |