From f6e54131c1adc6b153e334f17010c74745a6a83a Mon Sep 17 00:00:00 2001 From: Mihail Date: Wed, 18 Nov 2015 14:39:25 +0200 Subject: [PATCH] fixed keys and header issue and handle with empty columns --- lib/TableParser.php | 4 +++- lib/XlsxParser.php | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------ 2 files changed, 128 insertions(+), 61 deletions(-) diff --git a/lib/TableParser.php b/lib/TableParser.php index 36f15e2..6694bbf 100644 --- a/lib/TableParser.php +++ b/lib/TableParser.php @@ -9,6 +9,8 @@ namespace yii\multiparser; +use common\components\CustomVarDamp; + abstract class TableParser extends Parser { @@ -107,7 +109,7 @@ abstract class TableParser extends Parser { { do { - $this->current_row_number ++; + $this->current_row_number++; $this->readRow(); } while( $this->isEmptyRow() ); diff --git a/lib/XlsxParser.php b/lib/XlsxParser.php index 2b7213c..fc48c53 100644 --- a/lib/XlsxParser.php +++ b/lib/XlsxParser.php @@ -7,6 +7,7 @@ */ namespace yii\multiparser; + use common\components\CustomVarDamp; @@ -14,7 +15,8 @@ use common\components\CustomVarDamp; * Class XlsxParser * @package yii\multiparser */ -class XlsxParser extends TableParser { +class XlsxParser extends TableParser +{ /** * @var string - путь куда будут распаковываться файлы, если не указанно - во временный каталог сервера @@ -39,7 +41,7 @@ class XlsxParser extends TableParser { parent::setup(); - if ( $this->path_for_extract_files == '' ) { + if ($this->path_for_extract_files == '') { $this->path_for_extract_files = sys_get_temp_dir(); } } @@ -47,18 +49,18 @@ class XlsxParser extends TableParser { public function read() { - $this->extractFiles(); + $this->extractFiles(); $this->readSheets(); $this->readStrings(); - foreach ( $this->sheets_arr as $sheet ) { + foreach ($this->sheets_arr as $sheet) { //проходим по всем файлам из директории /xl/worksheets/ $this->current_sheet = $sheet; $sheet_path = $this->path_for_extract_files . '/xl/worksheets/' . $sheet . '.xml'; - if ( file_exists( $sheet_path ) && is_readable( $sheet_path ) ) { - $xml = simplexml_load_file( $sheet_path, "SimpleXMLIterator" ); + if (file_exists($sheet_path) && is_readable($sheet_path)) { + $xml = simplexml_load_file($sheet_path, "SimpleXMLIterator"); $this->current_node = $xml->sheetData->row; $this->current_node->rewind(); - if ( $this->current_node->valid() ) { + if ($this->current_node->valid()) { parent::read(); } } @@ -66,49 +68,48 @@ class XlsxParser extends TableParser { $this->cleanUp(); - if ( $this->active_sheet ) { + if ($this->active_sheet) { // в настройках указан конкретный лист с которого будем производить чтение, поэтому и возвращаем подмассив - return $this->result[ $this->current_sheet ]; - }else{ + return $this->result[$this->current_sheet]; + } else { return $this->result; } } - protected function extractFiles () + protected function extractFiles() { $this->path_for_extract_files = $this->path_for_extract_files . session_id(); - if ( !file_exists($this->path_for_extract_files )) { - if ( !mkdir( $this->path_for_extract_files ) ) - { - throw new \Exception( 'Ошибка создания временного каталога - ' . $this->path_for_extract_files ); + if (!file_exists($this->path_for_extract_files)) { + if (!mkdir($this->path_for_extract_files)) { + throw new \Exception('Ошибка создания временного каталога - ' . $this->path_for_extract_files); } } $zip = new \ZipArchive; - if ( $zip->open( $this->file_path ) === TRUE ) { - $zip->extractTo( $this->path_for_extract_files . '/' ); + if ($zip->open($this->file_path) === TRUE) { + $zip->extractTo($this->path_for_extract_files . '/'); $zip->close(); } else { - throw new \Exception( 'Ошибка чтения xlsx файла' ); + throw new \Exception('Ошибка чтения xlsx файла'); } unset($zip); } - protected function readSheets () + protected function readSheets() { - if ( $this->active_sheet ) { - $this->sheets_arr[ ] = 'sheet' . $this->active_sheet; + if ($this->active_sheet) { + $this->sheets_arr[] = 'sheet' . $this->active_sheet; return; } - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/workbook.xml' ); - foreach ( $xml->sheets->children() as $sheet ) { + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/workbook.xml'); + foreach ($xml->sheets->children() as $sheet) { $sheet_name = ''; $sheet_id = 0; $attr = $sheet->attributes(); - foreach ( $attr as $name => $value ) { + foreach ($attr as $name => $value) { if ($name == 'name') $sheet_name = (string)$value; @@ -116,74 +117,101 @@ class XlsxParser extends TableParser { $sheet_id = $value; } - if ( $sheet_name && $sheet_id ) { + if ($sheet_name && $sheet_id) { $this->sheets_arr[$sheet_name] = 'Sheet' . $sheet_id; } // } } - protected function readStrings () + protected function readStrings() { - $xml = simplexml_load_file( $this->path_for_extract_files . '/xl/sharedStrings.xml' ); - foreach ( $xml->children() as $item ) { + $xml = simplexml_load_file($this->path_for_extract_files . '/xl/sharedStrings.xml'); + foreach ($xml->children() as $item) { $this->strings_arr[] = (string)$item->t; } } - - // protected function readRow ( $item, $sheet , $current_row ) - protected function readRow ( ) + protected function readRow() { $this->row = []; $node = $this->current_node->getChildren(); if ($node === NULL) { return; } - //foreach ( $node as $child ) { - for ( $node->rewind(); $node->valid(); $node->next() ) { + + for ($node->rewind(), $i = 0; $node->valid(); $node->next(), $i++) { $child = $node->current(); $attr = $child->attributes(); - if( isset($child->v) ) { + // define the index of result array + // $attr['r'] - contain the address of cells - A1, B1 ... + if (isset($attr['r'])) { + // override index + $i = $this->convertCellToIndex( $attr['r'] ); + + if ( $this->keys !== Null ){ + if( isset( $this->keys[$i] ) ){ + //$i = $this->keys[$i]; + } else { + // we have a keys, but this one we didn't find, so skip it + continue; + } + } + } + // define the value of result array + if (isset($child->v)) { $value = (string)$child->v; - }else{ + + if ( isset($attr['t']) ) + // it's not a value it's a string, so fetch it from string array + $value = $this->strings_arr[$value]; + + } else { $value = ''; } - if ( isset( $attr['t'] ) ) { - $this->row[] = $this->strings_arr[ $value ]; - }else{ - $this->row[] = $value; - } + + // set + $this->row[$i] = $value; } - // дополним ряд пустыми значениями если у нас ключей больше чем значений - if ( $this->has_header_row && ( count( $this->keys ) > count( $this->row ) ) ) { - $extra_coloumn = count( $this->keys ) - count( $this->row ); - for ( $i = 1; $i <= $extra_coloumn; $i++ ) { - $this->row[] = ''; +// // fill the row by empty values for keys that we are missed in previous step + // only for 'has_header_row = true' mode + if ( $this->has_header_row && $this->keys !== Null ) { + $extra_column = count( $this->keys ) - count( $this->row ); + if ( $extra_column ) { + foreach ( $this->keys as $key => $key ) { + + if ( isset( $this->row[$key] ) ) { + continue; + } + $this->row[$key] = ''; + } } + } + ksort( $this->row ); $this->current_node->next(); } - protected function isEmptyRow(){ + protected function isEmptyRow() + { $is_empty = false; - if ( !count( $this->row ) || !$this->current_node->valid() ) { + if (!count($this->row) || !$this->current_node->valid()) { return true; } $j = 0; - for ($i = 1; $i <= count( $this->row ); $i++) { + for ($i = 1; $i <= count($this->row); $i++) { - if ( isset($this->row[$i - 1]) && $this->isEmptyColumn( $this->row[$i - 1] ) ) { + if (isset($this->row[$i - 1]) && $this->isEmptyColumn($this->row[$i - 1])) { $j++; } - if ( $j >= $this->min_column_quantity ) { + if ($j >= $this->min_column_quantity) { $is_empty = true; break; } @@ -192,29 +220,32 @@ class XlsxParser extends TableParser { return $is_empty; } - protected function isEmptyColumn( $val ){ + protected function isEmptyColumn($val) + { return $val == ''; } - protected function setResult( ){ - $this->result[ $this->current_sheet ][] = $this->row; + protected function setResult() + { + $this->result[$this->current_sheet][] = $this->row; } - protected function deleteExtractFiles () + protected function deleteExtractFiles() { - $this->removeDir( $this->path_for_extract_files ); + $this->removeDir($this->path_for_extract_files); } - protected function removeDir($dir) { + protected function removeDir($dir) + { if (is_dir($dir)) { $objects = scandir($dir); foreach ($objects as $object) { if ($object != "." && $object != "..") { - if (filetype($dir."/".$object) == "dir") - $this->removeDir($dir."/".$object); + if (filetype($dir . "/" . $object) == "dir") + $this->removeDir($dir . "/" . $object); else - unlink($dir."/".$object); + unlink($dir . "/" . $object); } } reset($objects); @@ -223,6 +254,36 @@ class XlsxParser extends TableParser { } + /** + * @param $cell_address - string with address like A1, B1 ... + * @return int - integer index + * this method has a constraint - 'Z' - it's a last column to convert, + * column with 'AA..' address and bigger - return index = 0 + */ + protected function convertCellToIndex($cell_address) + { + $index = 0; + + $address_letter = substr($cell_address, 0, 1); + $address_arr = range('A', 'Z'); + + if ( $search_value = array_search( $address_letter, $address_arr ) ) + $index = $search_value; + + return $index; + + } +// @todo - переписать родительский метод в универсальной манере а не переопределять его + protected function setKeysFromHeader(){ + if ( $this->has_header_row ) { + + if ($this->keys === NULL) { + $this->keys = $this->row; + return true; + } + } + return false; + } protected function cleanUp() { parent::cleanUp(); @@ -230,9 +291,13 @@ class XlsxParser extends TableParser { unset($this->sheets_arr); unset($this->current_node); - $this->deleteExtractFiles(); } + function __destruct() + { + $this->deleteExtractFiles(); + } + } \ No newline at end of file -- libgit2 0.21.4