Commit f6e54131c1adc6b153e334f17010c74745a6a83a
1 parent
735c416d
fixed keys and header issue and handle with empty columns
Showing
2 changed files
with
128 additions
and
61 deletions
Show diff stats
lib/TableParser.php
@@ -9,6 +9,8 @@ | @@ -9,6 +9,8 @@ | ||
9 | namespace yii\multiparser; | 9 | namespace yii\multiparser; |
10 | 10 | ||
11 | 11 | ||
12 | +use common\components\CustomVarDamp; | ||
13 | + | ||
12 | abstract class TableParser extends Parser { | 14 | abstract class TableParser extends Parser { |
13 | 15 | ||
14 | 16 | ||
@@ -107,7 +109,7 @@ abstract class TableParser extends Parser { | @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { | ||
107 | { | 109 | { |
108 | do { | 110 | do { |
109 | 111 | ||
110 | - $this->current_row_number ++; | 112 | + $this->current_row_number++; |
111 | $this->readRow(); | 113 | $this->readRow(); |
112 | 114 | ||
113 | } while( $this->isEmptyRow() ); | 115 | } while( $this->isEmptyRow() ); |
lib/XlsxParser.php
@@ -7,6 +7,7 @@ | @@ -7,6 +7,7 @@ | ||
7 | */ | 7 | */ |
8 | 8 | ||
9 | namespace yii\multiparser; | 9 | namespace yii\multiparser; |
10 | + | ||
10 | use common\components\CustomVarDamp; | 11 | use common\components\CustomVarDamp; |
11 | 12 | ||
12 | 13 | ||
@@ -14,7 +15,8 @@ use common\components\CustomVarDamp; | @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; | ||
14 | * Class XlsxParser | 15 | * Class XlsxParser |
15 | * @package yii\multiparser | 16 | * @package yii\multiparser |
16 | */ | 17 | */ |
17 | -class XlsxParser extends TableParser { | 18 | +class XlsxParser extends TableParser |
19 | +{ | ||
18 | 20 | ||
19 | /** | 21 | /** |
20 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера | 22 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера |
@@ -39,7 +41,7 @@ class XlsxParser extends TableParser { | @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { | ||
39 | 41 | ||
40 | parent::setup(); | 42 | parent::setup(); |
41 | 43 | ||
42 | - if ( $this->path_for_extract_files == '' ) { | 44 | + if ($this->path_for_extract_files == '') { |
43 | $this->path_for_extract_files = sys_get_temp_dir(); | 45 | $this->path_for_extract_files = sys_get_temp_dir(); |
44 | } | 46 | } |
45 | } | 47 | } |
@@ -47,18 +49,18 @@ class XlsxParser extends TableParser { | @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { | ||
47 | 49 | ||
48 | public function read() | 50 | public function read() |
49 | { | 51 | { |
50 | - $this->extractFiles(); | 52 | + $this->extractFiles(); |
51 | $this->readSheets(); | 53 | $this->readSheets(); |
52 | $this->readStrings(); | 54 | $this->readStrings(); |
53 | - foreach ( $this->sheets_arr as $sheet ) { | 55 | + foreach ($this->sheets_arr as $sheet) { |
54 | //проходим по всем файлам из директории /xl/worksheets/ | 56 | //проходим по всем файлам из директории /xl/worksheets/ |
55 | $this->current_sheet = $sheet; | 57 | $this->current_sheet = $sheet; |
56 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; | 58 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; |
57 | - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) { | ||
58 | - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); | 59 | + if (file_exists($sheet_path) && is_readable($sheet_path)) { |
60 | + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator"); | ||
59 | $this->current_node = $xml->sheetData->row; | 61 | $this->current_node = $xml->sheetData->row; |
60 | $this->current_node->rewind(); | 62 | $this->current_node->rewind(); |
61 | - if ( $this->current_node->valid() ) { | 63 | + if ($this->current_node->valid()) { |
62 | parent::read(); | 64 | parent::read(); |
63 | } | 65 | } |
64 | } | 66 | } |
@@ -66,49 +68,48 @@ class XlsxParser extends TableParser { | @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { | ||
66 | 68 | ||
67 | $this->cleanUp(); | 69 | $this->cleanUp(); |
68 | 70 | ||
69 | - if ( $this->active_sheet ) { | 71 | + if ($this->active_sheet) { |
70 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив | 72 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив |
71 | - return $this->result[ $this->current_sheet ]; | ||
72 | - }else{ | 73 | + return $this->result[$this->current_sheet]; |
74 | + } else { | ||
73 | return $this->result; | 75 | return $this->result; |
74 | } | 76 | } |
75 | 77 | ||
76 | } | 78 | } |
77 | 79 | ||
78 | - protected function extractFiles () | 80 | + protected function extractFiles() |
79 | { | 81 | { |
80 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); | 82 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); |
81 | - if ( !file_exists($this->path_for_extract_files )) { | ||
82 | - if ( !mkdir( $this->path_for_extract_files ) ) | ||
83 | - { | ||
84 | - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); | 83 | + if (!file_exists($this->path_for_extract_files)) { |
84 | + if (!mkdir($this->path_for_extract_files)) { | ||
85 | + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files); | ||
85 | } | 86 | } |
86 | } | 87 | } |
87 | 88 | ||
88 | $zip = new \ZipArchive; | 89 | $zip = new \ZipArchive; |
89 | - if ( $zip->open( $this->file_path ) === TRUE ) { | ||
90 | - $zip->extractTo( $this->path_for_extract_files . '/' ); | 90 | + if ($zip->open($this->file_path) === TRUE) { |
91 | + $zip->extractTo($this->path_for_extract_files . '/'); | ||
91 | $zip->close(); | 92 | $zip->close(); |
92 | } else { | 93 | } else { |
93 | 94 | ||
94 | - throw new \Exception( 'Ошибка чтения xlsx файла' ); | 95 | + throw new \Exception('Ошибка чтения xlsx файла'); |
95 | } | 96 | } |
96 | unset($zip); | 97 | unset($zip); |
97 | } | 98 | } |
98 | 99 | ||
99 | - protected function readSheets () | 100 | + protected function readSheets() |
100 | { | 101 | { |
101 | - if ( $this->active_sheet ) { | ||
102 | - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; | 102 | + if ($this->active_sheet) { |
103 | + $this->sheets_arr[] = 'sheet' . $this->active_sheet; | ||
103 | return; | 104 | return; |
104 | } | 105 | } |
105 | 106 | ||
106 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' ); | ||
107 | - foreach ( $xml->sheets->children() as $sheet ) { | 107 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml'); |
108 | + foreach ($xml->sheets->children() as $sheet) { | ||
108 | $sheet_name = ''; | 109 | $sheet_name = ''; |
109 | $sheet_id = 0; | 110 | $sheet_id = 0; |
110 | $attr = $sheet->attributes(); | 111 | $attr = $sheet->attributes(); |
111 | - foreach ( $attr as $name => $value ) { | 112 | + foreach ($attr as $name => $value) { |
112 | if ($name == 'name') | 113 | if ($name == 'name') |
113 | $sheet_name = (string)$value; | 114 | $sheet_name = (string)$value; |
114 | 115 | ||
@@ -116,74 +117,101 @@ class XlsxParser extends TableParser { | @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { | ||
116 | $sheet_id = $value; | 117 | $sheet_id = $value; |
117 | 118 | ||
118 | } | 119 | } |
119 | - if ( $sheet_name && $sheet_id ) { | 120 | + if ($sheet_name && $sheet_id) { |
120 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; | 121 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; |
121 | } | 122 | } |
122 | // | 123 | // |
123 | } | 124 | } |
124 | } | 125 | } |
125 | 126 | ||
126 | - protected function readStrings () | 127 | + protected function readStrings() |
127 | { | 128 | { |
128 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' ); | ||
129 | - foreach ( $xml->children() as $item ) { | 129 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml'); |
130 | + foreach ($xml->children() as $item) { | ||
130 | $this->strings_arr[] = (string)$item->t; | 131 | $this->strings_arr[] = (string)$item->t; |
131 | } | 132 | } |
132 | } | 133 | } |
133 | 134 | ||
134 | 135 | ||
135 | - | ||
136 | - // protected function readRow ( $item, $sheet , $current_row ) | ||
137 | - protected function readRow ( ) | 136 | + protected function readRow() |
138 | { | 137 | { |
139 | $this->row = []; | 138 | $this->row = []; |
140 | $node = $this->current_node->getChildren(); | 139 | $node = $this->current_node->getChildren(); |
141 | if ($node === NULL) { | 140 | if ($node === NULL) { |
142 | return; | 141 | return; |
143 | } | 142 | } |
144 | - //foreach ( $node as $child ) { | ||
145 | - for ( $node->rewind(); $node->valid(); $node->next() ) { | 143 | + |
144 | + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) { | ||
146 | $child = $node->current(); | 145 | $child = $node->current(); |
147 | $attr = $child->attributes(); | 146 | $attr = $child->attributes(); |
148 | 147 | ||
149 | - if( isset($child->v) ) { | 148 | + // define the index of result array |
149 | + // $attr['r'] - contain the address of cells - A1, B1 ... | ||
150 | + if (isset($attr['r'])) { | ||
151 | + // override index | ||
152 | + $i = $this->convertCellToIndex( $attr['r'] ); | ||
153 | + | ||
154 | + if ( $this->keys !== Null ){ | ||
155 | + if( isset( $this->keys[$i] ) ){ | ||
156 | + //$i = $this->keys[$i]; | ||
157 | + } else { | ||
158 | + // we have a keys, but this one we didn't find, so skip it | ||
159 | + continue; | ||
160 | + } | ||
161 | + } | ||
162 | + } | ||
163 | + // define the value of result array | ||
164 | + if (isset($child->v)) { | ||
150 | $value = (string)$child->v; | 165 | $value = (string)$child->v; |
151 | - }else{ | 166 | + |
167 | + if ( isset($attr['t']) ) | ||
168 | + // it's not a value it's a string, so fetch it from string array | ||
169 | + $value = $this->strings_arr[$value]; | ||
170 | + | ||
171 | + } else { | ||
152 | $value = ''; | 172 | $value = ''; |
153 | } | 173 | } |
154 | - if ( isset( $attr['t'] ) ) { | ||
155 | - $this->row[] = $this->strings_arr[ $value ]; | ||
156 | - }else{ | ||
157 | - $this->row[] = $value; | ||
158 | - } | 174 | + |
175 | + // set | ||
176 | + $this->row[$i] = $value; | ||
159 | 177 | ||
160 | } | 178 | } |
161 | - // дополним ряд пустыми значениями если у нас ключей больше чем значений | ||
162 | - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) { | ||
163 | - $extra_coloumn = count( $this->keys ) - count( $this->row ); | ||
164 | - for ( $i = 1; $i <= $extra_coloumn; $i++ ) { | ||
165 | - $this->row[] = ''; | 179 | +// // fill the row by empty values for keys that we are missed in previous step |
180 | + // only for 'has_header_row = true' mode | ||
181 | + if ( $this->has_header_row && $this->keys !== Null ) { | ||
182 | + $extra_column = count( $this->keys ) - count( $this->row ); | ||
183 | + if ( $extra_column ) { | ||
184 | + foreach ( $this->keys as $key => $key ) { | ||
185 | + | ||
186 | + if ( isset( $this->row[$key] ) ) { | ||
187 | + continue; | ||
188 | + } | ||
189 | + $this->row[$key] = ''; | ||
190 | + } | ||
166 | } | 191 | } |
192 | + | ||
167 | } | 193 | } |
194 | + ksort( $this->row ); | ||
168 | $this->current_node->next(); | 195 | $this->current_node->next(); |
169 | } | 196 | } |
170 | 197 | ||
171 | - protected function isEmptyRow(){ | 198 | + protected function isEmptyRow() |
199 | + { | ||
172 | 200 | ||
173 | $is_empty = false; | 201 | $is_empty = false; |
174 | 202 | ||
175 | - if ( !count( $this->row ) || !$this->current_node->valid() ) { | 203 | + if (!count($this->row) || !$this->current_node->valid()) { |
176 | return true; | 204 | return true; |
177 | } | 205 | } |
178 | 206 | ||
179 | $j = 0; | 207 | $j = 0; |
180 | - for ($i = 1; $i <= count( $this->row ); $i++) { | 208 | + for ($i = 1; $i <= count($this->row); $i++) { |
181 | 209 | ||
182 | - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { | 210 | + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) { |
183 | $j++; | 211 | $j++; |
184 | } | 212 | } |
185 | 213 | ||
186 | - if ( $j >= $this->min_column_quantity ) { | 214 | + if ($j >= $this->min_column_quantity) { |
187 | $is_empty = true; | 215 | $is_empty = true; |
188 | break; | 216 | break; |
189 | } | 217 | } |
@@ -192,29 +220,32 @@ class XlsxParser extends TableParser { | @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { | ||
192 | return $is_empty; | 220 | return $is_empty; |
193 | } | 221 | } |
194 | 222 | ||
195 | - protected function isEmptyColumn( $val ){ | 223 | + protected function isEmptyColumn($val) |
224 | + { | ||
196 | return $val == ''; | 225 | return $val == ''; |
197 | } | 226 | } |
198 | 227 | ||
199 | - protected function setResult( ){ | ||
200 | - $this->result[ $this->current_sheet ][] = $this->row; | 228 | + protected function setResult() |
229 | + { | ||
230 | + $this->result[$this->current_sheet][] = $this->row; | ||
201 | } | 231 | } |
202 | 232 | ||
203 | - protected function deleteExtractFiles () | 233 | + protected function deleteExtractFiles() |
204 | { | 234 | { |
205 | - $this->removeDir( $this->path_for_extract_files ); | 235 | + $this->removeDir($this->path_for_extract_files); |
206 | 236 | ||
207 | } | 237 | } |
208 | 238 | ||
209 | - protected function removeDir($dir) { | 239 | + protected function removeDir($dir) |
240 | + { | ||
210 | if (is_dir($dir)) { | 241 | if (is_dir($dir)) { |
211 | $objects = scandir($dir); | 242 | $objects = scandir($dir); |
212 | foreach ($objects as $object) { | 243 | foreach ($objects as $object) { |
213 | if ($object != "." && $object != "..") { | 244 | if ($object != "." && $object != "..") { |
214 | - if (filetype($dir."/".$object) == "dir") | ||
215 | - $this->removeDir($dir."/".$object); | 245 | + if (filetype($dir . "/" . $object) == "dir") |
246 | + $this->removeDir($dir . "/" . $object); | ||
216 | else | 247 | else |
217 | - unlink($dir."/".$object); | 248 | + unlink($dir . "/" . $object); |
218 | } | 249 | } |
219 | } | 250 | } |
220 | reset($objects); | 251 | reset($objects); |
@@ -223,6 +254,36 @@ class XlsxParser extends TableParser { | @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { | ||
223 | } | 254 | } |
224 | 255 | ||
225 | 256 | ||
257 | + /** | ||
258 | + * @param $cell_address - string with address like A1, B1 ... | ||
259 | + * @return int - integer index | ||
260 | + * this method has a constraint - 'Z' - it's a last column to convert, | ||
261 | + * column with 'AA..' address and bigger - return index = 0 | ||
262 | + */ | ||
263 | + protected function convertCellToIndex($cell_address) | ||
264 | + { | ||
265 | + $index = 0; | ||
266 | + | ||
267 | + $address_letter = substr($cell_address, 0, 1); | ||
268 | + $address_arr = range('A', 'Z'); | ||
269 | + | ||
270 | + if ( $search_value = array_search( $address_letter, $address_arr ) ) | ||
271 | + $index = $search_value; | ||
272 | + | ||
273 | + return $index; | ||
274 | + | ||
275 | + } | ||
276 | +// @todo - переписать родительский метод в универсальной манере а не переопределять его | ||
277 | + protected function setKeysFromHeader(){ | ||
278 | + if ( $this->has_header_row ) { | ||
279 | + | ||
280 | + if ($this->keys === NULL) { | ||
281 | + $this->keys = $this->row; | ||
282 | + return true; | ||
283 | + } | ||
284 | + } | ||
285 | + return false; | ||
286 | + } | ||
226 | protected function cleanUp() | 287 | protected function cleanUp() |
227 | { | 288 | { |
228 | parent::cleanUp(); | 289 | parent::cleanUp(); |
@@ -230,9 +291,13 @@ class XlsxParser extends TableParser { | @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { | ||
230 | unset($this->sheets_arr); | 291 | unset($this->sheets_arr); |
231 | unset($this->current_node); | 292 | unset($this->current_node); |
232 | 293 | ||
233 | - $this->deleteExtractFiles(); | ||
234 | 294 | ||
235 | } | 295 | } |
236 | 296 | ||
297 | + function __destruct() | ||
298 | + { | ||
299 | + $this->deleteExtractFiles(); | ||
300 | + } | ||
301 | + | ||
237 | 302 | ||
238 | } | 303 | } |
239 | \ No newline at end of file | 304 | \ No newline at end of file |