Commit f6e54131c1adc6b153e334f17010c74745a6a83a

Authored by Mihail
1 parent 735c416d

fixed keys and header issue and handle with empty columns

Showing 2 changed files with 128 additions and 61 deletions   Show diff stats
lib/TableParser.php
... ... @@ -9,6 +9,8 @@
9 9 namespace yii\multiparser;
10 10  
11 11  
  12 +use common\components\CustomVarDamp;
  13 +
12 14 abstract class TableParser extends Parser {
13 15  
14 16  
... ... @@ -107,7 +109,7 @@ abstract class TableParser extends Parser {
107 109 {
108 110 do {
109 111  
110   - $this->current_row_number ++;
  112 + $this->current_row_number++;
111 113 $this->readRow();
112 114  
113 115 } while( $this->isEmptyRow() );
... ...
lib/XlsxParser.php
... ... @@ -7,6 +7,7 @@
7 7 */
8 8  
9 9 namespace yii\multiparser;
  10 +
10 11 use common\components\CustomVarDamp;
11 12  
12 13  
... ... @@ -14,7 +15,8 @@ use common\components\CustomVarDamp;
14 15 * Class XlsxParser
15 16 * @package yii\multiparser
16 17 */
17   -class XlsxParser extends TableParser {
  18 +class XlsxParser extends TableParser
  19 +{
18 20  
19 21 /**
20 22 * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера
... ... @@ -39,7 +41,7 @@ class XlsxParser extends TableParser {
39 41  
40 42 parent::setup();
41 43  
42   - if ( $this->path_for_extract_files == '' ) {
  44 + if ($this->path_for_extract_files == '') {
43 45 $this->path_for_extract_files = sys_get_temp_dir();
44 46 }
45 47 }
... ... @@ -47,18 +49,18 @@ class XlsxParser extends TableParser {
47 49  
48 50 public function read()
49 51 {
50   - $this->extractFiles();
  52 + $this->extractFiles();
51 53 $this->readSheets();
52 54 $this->readStrings();
53   - foreach ( $this->sheets_arr as $sheet ) {
  55 + foreach ($this->sheets_arr as $sheet) {
54 56 //проходим по всем файлам из директории /xl/worksheets/
55 57 $this->current_sheet = $sheet;
56 58 $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml';
57   - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) {
58   - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" );
  59 + if (file_exists($sheet_path) && is_readable($sheet_path)) {
  60 + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator");
59 61 $this->current_node = $xml->sheetData->row;
60 62 $this->current_node->rewind();
61   - if ( $this->current_node->valid() ) {
  63 + if ($this->current_node->valid()) {
62 64 parent::read();
63 65 }
64 66 }
... ... @@ -66,49 +68,48 @@ class XlsxParser extends TableParser {
66 68  
67 69 $this->cleanUp();
68 70  
69   - if ( $this->active_sheet ) {
  71 + if ($this->active_sheet) {
70 72 // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив
71   - return $this->result[ $this->current_sheet ];
72   - }else{
  73 + return $this->result[$this->current_sheet];
  74 + } else {
73 75 return $this->result;
74 76 }
75 77  
76 78 }
77 79  
78   - protected function extractFiles ()
  80 + protected function extractFiles()
79 81 {
80 82 $this->path_for_extract_files = $this->path_for_extract_files . session_id();
81   - if ( !file_exists($this->path_for_extract_files )) {
82   - if ( !mkdir( $this->path_for_extract_files ) )
83   - {
84   - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files );
  83 + if (!file_exists($this->path_for_extract_files)) {
  84 + if (!mkdir($this->path_for_extract_files)) {
  85 + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files);
85 86 }
86 87 }
87 88  
88 89 $zip = new \ZipArchive;
89   - if ( $zip->open( $this->file_path ) === TRUE ) {
90   - $zip->extractTo( $this->path_for_extract_files . '/' );
  90 + if ($zip->open($this->file_path) === TRUE) {
  91 + $zip->extractTo($this->path_for_extract_files . '/');
91 92 $zip->close();
92 93 } else {
93 94  
94   - throw new \Exception( 'Ошибка чтения xlsx файла' );
  95 + throw new \Exception('Ошибка чтения xlsx файла');
95 96 }
96 97 unset($zip);
97 98 }
98 99  
99   - protected function readSheets ()
  100 + protected function readSheets()
100 101 {
101   - if ( $this->active_sheet ) {
102   - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet;
  102 + if ($this->active_sheet) {
  103 + $this->sheets_arr[] = 'sheet' . $this->active_sheet;
103 104 return;
104 105 }
105 106  
106   - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' );
107   - foreach ( $xml->sheets->children() as $sheet ) {
  107 + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml');
  108 + foreach ($xml->sheets->children() as $sheet) {
108 109 $sheet_name = '';
109 110 $sheet_id = 0;
110 111 $attr = $sheet->attributes();
111   - foreach ( $attr as $name => $value ) {
  112 + foreach ($attr as $name => $value) {
112 113 if ($name == 'name')
113 114 $sheet_name = (string)$value;
114 115  
... ... @@ -116,74 +117,101 @@ class XlsxParser extends TableParser {
116 117 $sheet_id = $value;
117 118  
118 119 }
119   - if ( $sheet_name && $sheet_id ) {
  120 + if ($sheet_name && $sheet_id) {
120 121 $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id;
121 122 }
122 123 //
123 124 }
124 125 }
125 126  
126   - protected function readStrings ()
  127 + protected function readStrings()
127 128 {
128   - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' );
129   - foreach ( $xml->children() as $item ) {
  129 + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml');
  130 + foreach ($xml->children() as $item) {
130 131 $this->strings_arr[] = (string)$item->t;
131 132 }
132 133 }
133 134  
134 135  
135   -
136   - // protected function readRow ( $item, $sheet , $current_row )
137   - protected function readRow ( )
  136 + protected function readRow()
138 137 {
139 138 $this->row = [];
140 139 $node = $this->current_node->getChildren();
141 140 if ($node === NULL) {
142 141 return;
143 142 }
144   - //foreach ( $node as $child ) {
145   - for ( $node->rewind(); $node->valid(); $node->next() ) {
  143 +
  144 + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) {
146 145 $child = $node->current();
147 146 $attr = $child->attributes();
148 147  
149   - if( isset($child->v) ) {
  148 + // define the index of result array
  149 + // $attr['r'] - contain the address of cells - A1, B1 ...
  150 + if (isset($attr['r'])) {
  151 + // override index
  152 + $i = $this->convertCellToIndex( $attr['r'] );
  153 +
  154 + if ( $this->keys !== Null ){
  155 + if( isset( $this->keys[$i] ) ){
  156 + //$i = $this->keys[$i];
  157 + } else {
  158 + // we have a keys, but this one we didn't find, so skip it
  159 + continue;
  160 + }
  161 + }
  162 + }
  163 + // define the value of result array
  164 + if (isset($child->v)) {
150 165 $value = (string)$child->v;
151   - }else{
  166 +
  167 + if ( isset($attr['t']) )
  168 + // it's not a value it's a string, so fetch it from string array
  169 + $value = $this->strings_arr[$value];
  170 +
  171 + } else {
152 172 $value = '';
153 173 }
154   - if ( isset( $attr['t'] ) ) {
155   - $this->row[] = $this->strings_arr[ $value ];
156   - }else{
157   - $this->row[] = $value;
158   - }
  174 +
  175 + // set
  176 + $this->row[$i] = $value;
159 177  
160 178 }
161   - // дополним ряд пустыми значениями если у нас ключей больше чем значений
162   - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) {
163   - $extra_coloumn = count( $this->keys ) - count( $this->row );
164   - for ( $i = 1; $i <= $extra_coloumn; $i++ ) {
165   - $this->row[] = '';
  179 +// // fill the row by empty values for keys that we are missed in previous step
  180 + // only for 'has_header_row = true' mode
  181 + if ( $this->has_header_row && $this->keys !== Null ) {
  182 + $extra_column = count( $this->keys ) - count( $this->row );
  183 + if ( $extra_column ) {
  184 + foreach ( $this->keys as $key => $key ) {
  185 +
  186 + if ( isset( $this->row[$key] ) ) {
  187 + continue;
  188 + }
  189 + $this->row[$key] = '';
  190 + }
166 191 }
  192 +
167 193 }
  194 + ksort( $this->row );
168 195 $this->current_node->next();
169 196 }
170 197  
171   - protected function isEmptyRow(){
  198 + protected function isEmptyRow()
  199 + {
172 200  
173 201 $is_empty = false;
174 202  
175   - if ( !count( $this->row ) || !$this->current_node->valid() ) {
  203 + if (!count($this->row) || !$this->current_node->valid()) {
176 204 return true;
177 205 }
178 206  
179 207 $j = 0;
180   - for ($i = 1; $i <= count( $this->row ); $i++) {
  208 + for ($i = 1; $i <= count($this->row); $i++) {
181 209  
182   - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) {
  210 + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) {
183 211 $j++;
184 212 }
185 213  
186   - if ( $j >= $this->min_column_quantity ) {
  214 + if ($j >= $this->min_column_quantity) {
187 215 $is_empty = true;
188 216 break;
189 217 }
... ... @@ -192,29 +220,32 @@ class XlsxParser extends TableParser {
192 220 return $is_empty;
193 221 }
194 222  
195   - protected function isEmptyColumn( $val ){
  223 + protected function isEmptyColumn($val)
  224 + {
196 225 return $val == '';
197 226 }
198 227  
199   - protected function setResult( ){
200   - $this->result[ $this->current_sheet ][] = $this->row;
  228 + protected function setResult()
  229 + {
  230 + $this->result[$this->current_sheet][] = $this->row;
201 231 }
202 232  
203   - protected function deleteExtractFiles ()
  233 + protected function deleteExtractFiles()
204 234 {
205   - $this->removeDir( $this->path_for_extract_files );
  235 + $this->removeDir($this->path_for_extract_files);
206 236  
207 237 }
208 238  
209   - protected function removeDir($dir) {
  239 + protected function removeDir($dir)
  240 + {
210 241 if (is_dir($dir)) {
211 242 $objects = scandir($dir);
212 243 foreach ($objects as $object) {
213 244 if ($object != "." && $object != "..") {
214   - if (filetype($dir."/".$object) == "dir")
215   - $this->removeDir($dir."/".$object);
  245 + if (filetype($dir . "/" . $object) == "dir")
  246 + $this->removeDir($dir . "/" . $object);
216 247 else
217   - unlink($dir."/".$object);
  248 + unlink($dir . "/" . $object);
218 249 }
219 250 }
220 251 reset($objects);
... ... @@ -223,6 +254,36 @@ class XlsxParser extends TableParser {
223 254 }
224 255  
225 256  
  257 + /**
  258 + * @param $cell_address - string with address like A1, B1 ...
  259 + * @return int - integer index
  260 + * this method has a constraint - 'Z' - it's a last column to convert,
  261 + * column with 'AA..' address and bigger - return index = 0
  262 + */
  263 + protected function convertCellToIndex($cell_address)
  264 + {
  265 + $index = 0;
  266 +
  267 + $address_letter = substr($cell_address, 0, 1);
  268 + $address_arr = range('A', 'Z');
  269 +
  270 + if ( $search_value = array_search( $address_letter, $address_arr ) )
  271 + $index = $search_value;
  272 +
  273 + return $index;
  274 +
  275 + }
  276 +// @todo - переписать родительский метод в универсальной манере а не переопределять его
  277 + protected function setKeysFromHeader(){
  278 + if ( $this->has_header_row ) {
  279 +
  280 + if ($this->keys === NULL) {
  281 + $this->keys = $this->row;
  282 + return true;
  283 + }
  284 + }
  285 + return false;
  286 + }
226 287 protected function cleanUp()
227 288 {
228 289 parent::cleanUp();
... ... @@ -230,9 +291,13 @@ class XlsxParser extends TableParser {
230 291 unset($this->sheets_arr);
231 292 unset($this->current_node);
232 293  
233   - $this->deleteExtractFiles();
234 294  
235 295 }
236 296  
  297 + function __destruct()
  298 + {
  299 + $this->deleteExtractFiles();
  300 + }
  301 +
237 302  
238 303 }
239 304 \ No newline at end of file
... ...