Commit f6e54131c1adc6b153e334f17010c74745a6a83a

Authored by Mihail
1 parent 735c416d

fixed keys and header issue and handle with empty columns

Showing 2 changed files with 128 additions and 61 deletions   Show diff stats
lib/TableParser.php
@@ -9,6 +9,8 @@ @@ -9,6 +9,8 @@
9 namespace yii\multiparser; 9 namespace yii\multiparser;
10 10
11 11
  12 +use common\components\CustomVarDamp;
  13 +
12 abstract class TableParser extends Parser { 14 abstract class TableParser extends Parser {
13 15
14 16
@@ -107,7 +109,7 @@ abstract class TableParser extends Parser { @@ -107,7 +109,7 @@ abstract class TableParser extends Parser {
107 { 109 {
108 do { 110 do {
109 111
110 - $this->current_row_number ++; 112 + $this->current_row_number++;
111 $this->readRow(); 113 $this->readRow();
112 114
113 } while( $this->isEmptyRow() ); 115 } while( $this->isEmptyRow() );
lib/XlsxParser.php
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 */ 7 */
8 8
9 namespace yii\multiparser; 9 namespace yii\multiparser;
  10 +
10 use common\components\CustomVarDamp; 11 use common\components\CustomVarDamp;
11 12
12 13
@@ -14,7 +15,8 @@ use common\components\CustomVarDamp; @@ -14,7 +15,8 @@ use common\components\CustomVarDamp;
14 * Class XlsxParser 15 * Class XlsxParser
15 * @package yii\multiparser 16 * @package yii\multiparser
16 */ 17 */
17 -class XlsxParser extends TableParser { 18 +class XlsxParser extends TableParser
  19 +{
18 20
19 /** 21 /**
20 * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера 22 * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера
@@ -39,7 +41,7 @@ class XlsxParser extends TableParser { @@ -39,7 +41,7 @@ class XlsxParser extends TableParser {
39 41
40 parent::setup(); 42 parent::setup();
41 43
42 - if ( $this->path_for_extract_files == '' ) { 44 + if ($this->path_for_extract_files == '') {
43 $this->path_for_extract_files = sys_get_temp_dir(); 45 $this->path_for_extract_files = sys_get_temp_dir();
44 } 46 }
45 } 47 }
@@ -47,18 +49,18 @@ class XlsxParser extends TableParser { @@ -47,18 +49,18 @@ class XlsxParser extends TableParser {
47 49
48 public function read() 50 public function read()
49 { 51 {
50 - $this->extractFiles(); 52 + $this->extractFiles();
51 $this->readSheets(); 53 $this->readSheets();
52 $this->readStrings(); 54 $this->readStrings();
53 - foreach ( $this->sheets_arr as $sheet ) { 55 + foreach ($this->sheets_arr as $sheet) {
54 //проходим по всем файлам из директории /xl/worksheets/ 56 //проходим по всем файлам из директории /xl/worksheets/
55 $this->current_sheet = $sheet; 57 $this->current_sheet = $sheet;
56 $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; 58 $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml';
57 - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) {  
58 - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); 59 + if (file_exists($sheet_path) && is_readable($sheet_path)) {
  60 + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator");
59 $this->current_node = $xml->sheetData->row; 61 $this->current_node = $xml->sheetData->row;
60 $this->current_node->rewind(); 62 $this->current_node->rewind();
61 - if ( $this->current_node->valid() ) { 63 + if ($this->current_node->valid()) {
62 parent::read(); 64 parent::read();
63 } 65 }
64 } 66 }
@@ -66,49 +68,48 @@ class XlsxParser extends TableParser { @@ -66,49 +68,48 @@ class XlsxParser extends TableParser {
66 68
67 $this->cleanUp(); 69 $this->cleanUp();
68 70
69 - if ( $this->active_sheet ) { 71 + if ($this->active_sheet) {
70 // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив 72 // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив
71 - return $this->result[ $this->current_sheet ];  
72 - }else{ 73 + return $this->result[$this->current_sheet];
  74 + } else {
73 return $this->result; 75 return $this->result;
74 } 76 }
75 77
76 } 78 }
77 79
78 - protected function extractFiles () 80 + protected function extractFiles()
79 { 81 {
80 $this->path_for_extract_files = $this->path_for_extract_files . session_id(); 82 $this->path_for_extract_files = $this->path_for_extract_files . session_id();
81 - if ( !file_exists($this->path_for_extract_files )) {  
82 - if ( !mkdir( $this->path_for_extract_files ) )  
83 - {  
84 - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); 83 + if (!file_exists($this->path_for_extract_files)) {
  84 + if (!mkdir($this->path_for_extract_files)) {
  85 + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files);
85 } 86 }
86 } 87 }
87 88
88 $zip = new \ZipArchive; 89 $zip = new \ZipArchive;
89 - if ( $zip->open( $this->file_path ) === TRUE ) {  
90 - $zip->extractTo( $this->path_for_extract_files . '/' ); 90 + if ($zip->open($this->file_path) === TRUE) {
  91 + $zip->extractTo($this->path_for_extract_files . '/');
91 $zip->close(); 92 $zip->close();
92 } else { 93 } else {
93 94
94 - throw new \Exception( 'Ошибка чтения xlsx файла' ); 95 + throw new \Exception('Ошибка чтения xlsx файла');
95 } 96 }
96 unset($zip); 97 unset($zip);
97 } 98 }
98 99
99 - protected function readSheets () 100 + protected function readSheets()
100 { 101 {
101 - if ( $this->active_sheet ) {  
102 - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; 102 + if ($this->active_sheet) {
  103 + $this->sheets_arr[] = 'sheet' . $this->active_sheet;
103 return; 104 return;
104 } 105 }
105 106
106 - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' );  
107 - foreach ( $xml->sheets->children() as $sheet ) { 107 + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml');
  108 + foreach ($xml->sheets->children() as $sheet) {
108 $sheet_name = ''; 109 $sheet_name = '';
109 $sheet_id = 0; 110 $sheet_id = 0;
110 $attr = $sheet->attributes(); 111 $attr = $sheet->attributes();
111 - foreach ( $attr as $name => $value ) { 112 + foreach ($attr as $name => $value) {
112 if ($name == 'name') 113 if ($name == 'name')
113 $sheet_name = (string)$value; 114 $sheet_name = (string)$value;
114 115
@@ -116,74 +117,101 @@ class XlsxParser extends TableParser { @@ -116,74 +117,101 @@ class XlsxParser extends TableParser {
116 $sheet_id = $value; 117 $sheet_id = $value;
117 118
118 } 119 }
119 - if ( $sheet_name && $sheet_id ) { 120 + if ($sheet_name && $sheet_id) {
120 $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; 121 $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id;
121 } 122 }
122 // 123 //
123 } 124 }
124 } 125 }
125 126
126 - protected function readStrings () 127 + protected function readStrings()
127 { 128 {
128 - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' );  
129 - foreach ( $xml->children() as $item ) { 129 + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml');
  130 + foreach ($xml->children() as $item) {
130 $this->strings_arr[] = (string)$item->t; 131 $this->strings_arr[] = (string)$item->t;
131 } 132 }
132 } 133 }
133 134
134 135
135 -  
136 - // protected function readRow ( $item, $sheet , $current_row )  
137 - protected function readRow ( ) 136 + protected function readRow()
138 { 137 {
139 $this->row = []; 138 $this->row = [];
140 $node = $this->current_node->getChildren(); 139 $node = $this->current_node->getChildren();
141 if ($node === NULL) { 140 if ($node === NULL) {
142 return; 141 return;
143 } 142 }
144 - //foreach ( $node as $child ) {  
145 - for ( $node->rewind(); $node->valid(); $node->next() ) { 143 +
  144 + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) {
146 $child = $node->current(); 145 $child = $node->current();
147 $attr = $child->attributes(); 146 $attr = $child->attributes();
148 147
149 - if( isset($child->v) ) { 148 + // define the index of result array
  149 + // $attr['r'] - contain the address of cells - A1, B1 ...
  150 + if (isset($attr['r'])) {
  151 + // override index
  152 + $i = $this->convertCellToIndex( $attr['r'] );
  153 +
  154 + if ( $this->keys !== Null ){
  155 + if( isset( $this->keys[$i] ) ){
  156 + //$i = $this->keys[$i];
  157 + } else {
  158 + // we have a keys, but this one we didn't find, so skip it
  159 + continue;
  160 + }
  161 + }
  162 + }
  163 + // define the value of result array
  164 + if (isset($child->v)) {
150 $value = (string)$child->v; 165 $value = (string)$child->v;
151 - }else{ 166 +
  167 + if ( isset($attr['t']) )
  168 + // it's not a value it's a string, so fetch it from string array
  169 + $value = $this->strings_arr[$value];
  170 +
  171 + } else {
152 $value = ''; 172 $value = '';
153 } 173 }
154 - if ( isset( $attr['t'] ) ) {  
155 - $this->row[] = $this->strings_arr[ $value ];  
156 - }else{  
157 - $this->row[] = $value;  
158 - } 174 +
  175 + // set
  176 + $this->row[$i] = $value;
159 177
160 } 178 }
161 - // дополним ряд пустыми значениями если у нас ключей больше чем значений  
162 - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) {  
163 - $extra_coloumn = count( $this->keys ) - count( $this->row );  
164 - for ( $i = 1; $i <= $extra_coloumn; $i++ ) {  
165 - $this->row[] = ''; 179 +// // fill the row by empty values for keys that we are missed in previous step
  180 + // only for 'has_header_row = true' mode
  181 + if ( $this->has_header_row && $this->keys !== Null ) {
  182 + $extra_column = count( $this->keys ) - count( $this->row );
  183 + if ( $extra_column ) {
  184 + foreach ( $this->keys as $key => $key ) {
  185 +
  186 + if ( isset( $this->row[$key] ) ) {
  187 + continue;
  188 + }
  189 + $this->row[$key] = '';
  190 + }
166 } 191 }
  192 +
167 } 193 }
  194 + ksort( $this->row );
168 $this->current_node->next(); 195 $this->current_node->next();
169 } 196 }
170 197
171 - protected function isEmptyRow(){ 198 + protected function isEmptyRow()
  199 + {
172 200
173 $is_empty = false; 201 $is_empty = false;
174 202
175 - if ( !count( $this->row ) || !$this->current_node->valid() ) { 203 + if (!count($this->row) || !$this->current_node->valid()) {
176 return true; 204 return true;
177 } 205 }
178 206
179 $j = 0; 207 $j = 0;
180 - for ($i = 1; $i <= count( $this->row ); $i++) { 208 + for ($i = 1; $i <= count($this->row); $i++) {
181 209
182 - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { 210 + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) {
183 $j++; 211 $j++;
184 } 212 }
185 213
186 - if ( $j >= $this->min_column_quantity ) { 214 + if ($j >= $this->min_column_quantity) {
187 $is_empty = true; 215 $is_empty = true;
188 break; 216 break;
189 } 217 }
@@ -192,29 +220,32 @@ class XlsxParser extends TableParser { @@ -192,29 +220,32 @@ class XlsxParser extends TableParser {
192 return $is_empty; 220 return $is_empty;
193 } 221 }
194 222
195 - protected function isEmptyColumn( $val ){ 223 + protected function isEmptyColumn($val)
  224 + {
196 return $val == ''; 225 return $val == '';
197 } 226 }
198 227
199 - protected function setResult( ){  
200 - $this->result[ $this->current_sheet ][] = $this->row; 228 + protected function setResult()
  229 + {
  230 + $this->result[$this->current_sheet][] = $this->row;
201 } 231 }
202 232
203 - protected function deleteExtractFiles () 233 + protected function deleteExtractFiles()
204 { 234 {
205 - $this->removeDir( $this->path_for_extract_files ); 235 + $this->removeDir($this->path_for_extract_files);
206 236
207 } 237 }
208 238
209 - protected function removeDir($dir) { 239 + protected function removeDir($dir)
  240 + {
210 if (is_dir($dir)) { 241 if (is_dir($dir)) {
211 $objects = scandir($dir); 242 $objects = scandir($dir);
212 foreach ($objects as $object) { 243 foreach ($objects as $object) {
213 if ($object != "." && $object != "..") { 244 if ($object != "." && $object != "..") {
214 - if (filetype($dir."/".$object) == "dir")  
215 - $this->removeDir($dir."/".$object); 245 + if (filetype($dir . "/" . $object) == "dir")
  246 + $this->removeDir($dir . "/" . $object);
216 else 247 else
217 - unlink($dir."/".$object); 248 + unlink($dir . "/" . $object);
218 } 249 }
219 } 250 }
220 reset($objects); 251 reset($objects);
@@ -223,6 +254,36 @@ class XlsxParser extends TableParser { @@ -223,6 +254,36 @@ class XlsxParser extends TableParser {
223 } 254 }
224 255
225 256
  257 + /**
  258 + * @param $cell_address - string with address like A1, B1 ...
  259 + * @return int - integer index
  260 + * this method has a constraint - 'Z' - it's a last column to convert,
  261 + * column with 'AA..' address and bigger - return index = 0
  262 + */
  263 + protected function convertCellToIndex($cell_address)
  264 + {
  265 + $index = 0;
  266 +
  267 + $address_letter = substr($cell_address, 0, 1);
  268 + $address_arr = range('A', 'Z');
  269 +
  270 + if ( $search_value = array_search( $address_letter, $address_arr ) )
  271 + $index = $search_value;
  272 +
  273 + return $index;
  274 +
  275 + }
  276 +// @todo - переписать родительский метод в универсальной манере а не переопределять его
  277 + protected function setKeysFromHeader(){
  278 + if ( $this->has_header_row ) {
  279 +
  280 + if ($this->keys === NULL) {
  281 + $this->keys = $this->row;
  282 + return true;
  283 + }
  284 + }
  285 + return false;
  286 + }
226 protected function cleanUp() 287 protected function cleanUp()
227 { 288 {
228 parent::cleanUp(); 289 parent::cleanUp();
@@ -230,9 +291,13 @@ class XlsxParser extends TableParser { @@ -230,9 +291,13 @@ class XlsxParser extends TableParser {
230 unset($this->sheets_arr); 291 unset($this->sheets_arr);
231 unset($this->current_node); 292 unset($this->current_node);
232 293
233 - $this->deleteExtractFiles();  
234 294
235 } 295 }
236 296
  297 + function __destruct()
  298 + {
  299 + $this->deleteExtractFiles();
  300 + }
  301 +
237 302
238 } 303 }
239 \ No newline at end of file 304 \ No newline at end of file