Commit f6e54131c1adc6b153e334f17010c74745a6a83a
1 parent
735c416d
fixed keys and header issue and handle with empty columns
Showing
2 changed files
with
128 additions
and
61 deletions
Show diff stats
lib/TableParser.php
... | ... | @@ -9,6 +9,8 @@ |
9 | 9 | namespace yii\multiparser; |
10 | 10 | |
11 | 11 | |
12 | +use common\components\CustomVarDamp; | |
13 | + | |
12 | 14 | abstract class TableParser extends Parser { |
13 | 15 | |
14 | 16 | |
... | ... | @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { |
107 | 109 | { |
108 | 110 | do { |
109 | 111 | |
110 | - $this->current_row_number ++; | |
112 | + $this->current_row_number++; | |
111 | 113 | $this->readRow(); |
112 | 114 | |
113 | 115 | } while( $this->isEmptyRow() ); | ... | ... |
lib/XlsxParser.php
... | ... | @@ -7,6 +7,7 @@ |
7 | 7 | */ |
8 | 8 | |
9 | 9 | namespace yii\multiparser; |
10 | + | |
10 | 11 | use common\components\CustomVarDamp; |
11 | 12 | |
12 | 13 | |
... | ... | @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; |
14 | 15 | * Class XlsxParser |
15 | 16 | * @package yii\multiparser |
16 | 17 | */ |
17 | -class XlsxParser extends TableParser { | |
18 | +class XlsxParser extends TableParser | |
19 | +{ | |
18 | 20 | |
19 | 21 | /** |
20 | 22 | * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера |
... | ... | @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { |
39 | 41 | |
40 | 42 | parent::setup(); |
41 | 43 | |
42 | - if ( $this->path_for_extract_files == '' ) { | |
44 | + if ($this->path_for_extract_files == '') { | |
43 | 45 | $this->path_for_extract_files = sys_get_temp_dir(); |
44 | 46 | } |
45 | 47 | } |
... | ... | @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { |
47 | 49 | |
48 | 50 | public function read() |
49 | 51 | { |
50 | - $this->extractFiles(); | |
52 | + $this->extractFiles(); | |
51 | 53 | $this->readSheets(); |
52 | 54 | $this->readStrings(); |
53 | - foreach ( $this->sheets_arr as $sheet ) { | |
55 | + foreach ($this->sheets_arr as $sheet) { | |
54 | 56 | //проходим по всем файлам из директории /xl/worksheets/ |
55 | 57 | $this->current_sheet = $sheet; |
56 | 58 | $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; |
57 | - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) { | |
58 | - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); | |
59 | + if (file_exists($sheet_path) && is_readable($sheet_path)) { | |
60 | + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator"); | |
59 | 61 | $this->current_node = $xml->sheetData->row; |
60 | 62 | $this->current_node->rewind(); |
61 | - if ( $this->current_node->valid() ) { | |
63 | + if ($this->current_node->valid()) { | |
62 | 64 | parent::read(); |
63 | 65 | } |
64 | 66 | } |
... | ... | @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { |
66 | 68 | |
67 | 69 | $this->cleanUp(); |
68 | 70 | |
69 | - if ( $this->active_sheet ) { | |
71 | + if ($this->active_sheet) { | |
70 | 72 | // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив |
71 | - return $this->result[ $this->current_sheet ]; | |
72 | - }else{ | |
73 | + return $this->result[$this->current_sheet]; | |
74 | + } else { | |
73 | 75 | return $this->result; |
74 | 76 | } |
75 | 77 | |
76 | 78 | } |
77 | 79 | |
78 | - protected function extractFiles () | |
80 | + protected function extractFiles() | |
79 | 81 | { |
80 | 82 | $this->path_for_extract_files = $this->path_for_extract_files . session_id(); |
81 | - if ( !file_exists($this->path_for_extract_files )) { | |
82 | - if ( !mkdir( $this->path_for_extract_files ) ) | |
83 | - { | |
84 | - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); | |
83 | + if (!file_exists($this->path_for_extract_files)) { | |
84 | + if (!mkdir($this->path_for_extract_files)) { | |
85 | + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files); | |
85 | 86 | } |
86 | 87 | } |
87 | 88 | |
88 | 89 | $zip = new \ZipArchive; |
89 | - if ( $zip->open( $this->file_path ) === TRUE ) { | |
90 | - $zip->extractTo( $this->path_for_extract_files . '/' ); | |
90 | + if ($zip->open($this->file_path) === TRUE) { | |
91 | + $zip->extractTo($this->path_for_extract_files . '/'); | |
91 | 92 | $zip->close(); |
92 | 93 | } else { |
93 | 94 | |
94 | - throw new \Exception( 'Ошибка чтения xlsx файла' ); | |
95 | + throw new \Exception('Ошибка чтения xlsx файла'); | |
95 | 96 | } |
96 | 97 | unset($zip); |
97 | 98 | } |
98 | 99 | |
99 | - protected function readSheets () | |
100 | + protected function readSheets() | |
100 | 101 | { |
101 | - if ( $this->active_sheet ) { | |
102 | - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; | |
102 | + if ($this->active_sheet) { | |
103 | + $this->sheets_arr[] = 'sheet' . $this->active_sheet; | |
103 | 104 | return; |
104 | 105 | } |
105 | 106 | |
106 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' ); | |
107 | - foreach ( $xml->sheets->children() as $sheet ) { | |
107 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml'); | |
108 | + foreach ($xml->sheets->children() as $sheet) { | |
108 | 109 | $sheet_name = ''; |
109 | 110 | $sheet_id = 0; |
110 | 111 | $attr = $sheet->attributes(); |
111 | - foreach ( $attr as $name => $value ) { | |
112 | + foreach ($attr as $name => $value) { | |
112 | 113 | if ($name == 'name') |
113 | 114 | $sheet_name = (string)$value; |
114 | 115 | |
... | ... | @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { |
116 | 117 | $sheet_id = $value; |
117 | 118 | |
118 | 119 | } |
119 | - if ( $sheet_name && $sheet_id ) { | |
120 | + if ($sheet_name && $sheet_id) { | |
120 | 121 | $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; |
121 | 122 | } |
122 | 123 | // |
123 | 124 | } |
124 | 125 | } |
125 | 126 | |
126 | - protected function readStrings () | |
127 | + protected function readStrings() | |
127 | 128 | { |
128 | - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' ); | |
129 | - foreach ( $xml->children() as $item ) { | |
129 | + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml'); | |
130 | + foreach ($xml->children() as $item) { | |
130 | 131 | $this->strings_arr[] = (string)$item->t; |
131 | 132 | } |
132 | 133 | } |
133 | 134 | |
134 | 135 | |
135 | - | |
136 | - // protected function readRow ( $item, $sheet , $current_row ) | |
137 | - protected function readRow ( ) | |
136 | + protected function readRow() | |
138 | 137 | { |
139 | 138 | $this->row = []; |
140 | 139 | $node = $this->current_node->getChildren(); |
141 | 140 | if ($node === NULL) { |
142 | 141 | return; |
143 | 142 | } |
144 | - //foreach ( $node as $child ) { | |
145 | - for ( $node->rewind(); $node->valid(); $node->next() ) { | |
143 | + | |
144 | + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) { | |
146 | 145 | $child = $node->current(); |
147 | 146 | $attr = $child->attributes(); |
148 | 147 | |
149 | - if( isset($child->v) ) { | |
148 | + // define the index of result array | |
149 | + // $attr['r'] - contain the address of cells - A1, B1 ... | |
150 | + if (isset($attr['r'])) { | |
151 | + // override index | |
152 | + $i = $this->convertCellToIndex( $attr['r'] ); | |
153 | + | |
154 | + if ( $this->keys !== Null ){ | |
155 | + if( isset( $this->keys[$i] ) ){ | |
156 | + //$i = $this->keys[$i]; | |
157 | + } else { | |
158 | + // we have a keys, but this one we didn't find, so skip it | |
159 | + continue; | |
160 | + } | |
161 | + } | |
162 | + } | |
163 | + // define the value of result array | |
164 | + if (isset($child->v)) { | |
150 | 165 | $value = (string)$child->v; |
151 | - }else{ | |
166 | + | |
167 | + if ( isset($attr['t']) ) | |
168 | + // it's not a value it's a string, so fetch it from string array | |
169 | + $value = $this->strings_arr[$value]; | |
170 | + | |
171 | + } else { | |
152 | 172 | $value = ''; |
153 | 173 | } |
154 | - if ( isset( $attr['t'] ) ) { | |
155 | - $this->row[] = $this->strings_arr[ $value ]; | |
156 | - }else{ | |
157 | - $this->row[] = $value; | |
158 | - } | |
174 | + | |
175 | + // set | |
176 | + $this->row[$i] = $value; | |
159 | 177 | |
160 | 178 | } |
161 | - // дополним ряд пустыми значениями если у нас ключей больше чем значений | |
162 | - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) { | |
163 | - $extra_coloumn = count( $this->keys ) - count( $this->row ); | |
164 | - for ( $i = 1; $i <= $extra_coloumn; $i++ ) { | |
165 | - $this->row[] = ''; | |
179 | +// // fill the row by empty values for keys that we are missed in previous step | |
180 | + // only for 'has_header_row = true' mode | |
181 | + if ( $this->has_header_row && $this->keys !== Null ) { | |
182 | + $extra_column = count( $this->keys ) - count( $this->row ); | |
183 | + if ( $extra_column ) { | |
184 | + foreach ( $this->keys as $key => $key ) { | |
185 | + | |
186 | + if ( isset( $this->row[$key] ) ) { | |
187 | + continue; | |
188 | + } | |
189 | + $this->row[$key] = ''; | |
190 | + } | |
166 | 191 | } |
192 | + | |
167 | 193 | } |
194 | + ksort( $this->row ); | |
168 | 195 | $this->current_node->next(); |
169 | 196 | } |
170 | 197 | |
171 | - protected function isEmptyRow(){ | |
198 | + protected function isEmptyRow() | |
199 | + { | |
172 | 200 | |
173 | 201 | $is_empty = false; |
174 | 202 | |
175 | - if ( !count( $this->row ) || !$this->current_node->valid() ) { | |
203 | + if (!count($this->row) || !$this->current_node->valid()) { | |
176 | 204 | return true; |
177 | 205 | } |
178 | 206 | |
179 | 207 | $j = 0; |
180 | - for ($i = 1; $i <= count( $this->row ); $i++) { | |
208 | + for ($i = 1; $i <= count($this->row); $i++) { | |
181 | 209 | |
182 | - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { | |
210 | + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) { | |
183 | 211 | $j++; |
184 | 212 | } |
185 | 213 | |
186 | - if ( $j >= $this->min_column_quantity ) { | |
214 | + if ($j >= $this->min_column_quantity) { | |
187 | 215 | $is_empty = true; |
188 | 216 | break; |
189 | 217 | } |
... | ... | @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { |
192 | 220 | return $is_empty; |
193 | 221 | } |
194 | 222 | |
195 | - protected function isEmptyColumn( $val ){ | |
223 | + protected function isEmptyColumn($val) | |
224 | + { | |
196 | 225 | return $val == ''; |
197 | 226 | } |
198 | 227 | |
199 | - protected function setResult( ){ | |
200 | - $this->result[ $this->current_sheet ][] = $this->row; | |
228 | + protected function setResult() | |
229 | + { | |
230 | + $this->result[$this->current_sheet][] = $this->row; | |
201 | 231 | } |
202 | 232 | |
203 | - protected function deleteExtractFiles () | |
233 | + protected function deleteExtractFiles() | |
204 | 234 | { |
205 | - $this->removeDir( $this->path_for_extract_files ); | |
235 | + $this->removeDir($this->path_for_extract_files); | |
206 | 236 | |
207 | 237 | } |
208 | 238 | |
209 | - protected function removeDir($dir) { | |
239 | + protected function removeDir($dir) | |
240 | + { | |
210 | 241 | if (is_dir($dir)) { |
211 | 242 | $objects = scandir($dir); |
212 | 243 | foreach ($objects as $object) { |
213 | 244 | if ($object != "." && $object != "..") { |
214 | - if (filetype($dir."/".$object) == "dir") | |
215 | - $this->removeDir($dir."/".$object); | |
245 | + if (filetype($dir . "/" . $object) == "dir") | |
246 | + $this->removeDir($dir . "/" . $object); | |
216 | 247 | else |
217 | - unlink($dir."/".$object); | |
248 | + unlink($dir . "/" . $object); | |
218 | 249 | } |
219 | 250 | } |
220 | 251 | reset($objects); |
... | ... | @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { |
223 | 254 | } |
224 | 255 | |
225 | 256 | |
257 | + /** | |
258 | + * @param $cell_address - string with address like A1, B1 ... | |
259 | + * @return int - integer index | |
260 | + * this method has a constraint - 'Z' - it's a last column to convert, | |
261 | + * column with 'AA..' address and bigger - return index = 0 | |
262 | + */ | |
263 | + protected function convertCellToIndex($cell_address) | |
264 | + { | |
265 | + $index = 0; | |
266 | + | |
267 | + $address_letter = substr($cell_address, 0, 1); | |
268 | + $address_arr = range('A', 'Z'); | |
269 | + | |
270 | + if ( $search_value = array_search( $address_letter, $address_arr ) ) | |
271 | + $index = $search_value; | |
272 | + | |
273 | + return $index; | |
274 | + | |
275 | + } | |
276 | +// @todo - переписать родительский метод в универсальной манере а не переопределять его | |
277 | + protected function setKeysFromHeader(){ | |
278 | + if ( $this->has_header_row ) { | |
279 | + | |
280 | + if ($this->keys === NULL) { | |
281 | + $this->keys = $this->row; | |
282 | + return true; | |
283 | + } | |
284 | + } | |
285 | + return false; | |
286 | + } | |
226 | 287 | protected function cleanUp() |
227 | 288 | { |
228 | 289 | parent::cleanUp(); |
... | ... | @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { |
230 | 291 | unset($this->sheets_arr); |
231 | 292 | unset($this->current_node); |
232 | 293 | |
233 | - $this->deleteExtractFiles(); | |
234 | 294 | |
235 | 295 | } |
236 | 296 | |
297 | + function __destruct() | |
298 | + { | |
299 | + $this->deleteExtractFiles(); | |
300 | + } | |
301 | + | |
237 | 302 | |
238 | 303 | } |
239 | 304 | \ No newline at end of file | ... | ... |