CsvBulkLoader.php
7.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
<?php
/**
* Utility class to facilitate complex CSV-imports by defining column-mappings
* and custom converters.
*
* Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
* input.
*
* @see http://tools.ietf.org/html/rfc4180
*
* @package framework
* @subpackage bulkloading
*
* @todo Support for deleting existing records not matched in the import
* (through relation checks)
*/
class CsvBulkLoader extends BulkLoader {
/**
* Delimiter character (Default: comma).
*
* @var string
*/
public $delimiter = ',';
/**
* Enclosure character (Default: doublequote)
*
* @var string
*/
public $enclosure = '"';
/**
* Identifies if csv the has a header row.
*
* @var boolean
*/
public $hasHeaderRow = true;
/**
* @inheritDoc
*/
public function preview($filepath) {
return $this->processAll($filepath, true);
}
/**
* @param string $filepath
* @param boolean $preview
*/
protected function processAll($filepath, $preview = false) {
$results = new BulkLoader_Result();
$csv = new CSVParser(
$filepath,
$this->delimiter,
$this->enclosure
);
// ColumnMap has two uses, depending on whether hasHeaderRow is set
if($this->columnMap) {
// if the map goes to a callback, use the same key value as the map
// value, rather than function name as multiple keys may use the
// same callback
foreach($this->columnMap as $k => $v) {
if(strpos($v, "->") === 0) {
$map[$k] = $k;
} else {
$map[$k] = $v;
}
}
if($this->hasHeaderRow) {
$csv->mapColumns($map);
} else {
$csv->provideHeaderRow($map);
}
}
foreach($csv as $row) {
$this->processRecord($row, $this->columnMap, $results, $preview);
}
return $results;
}
/**
* @todo Better messages for relation checks and duplicate detection
* Note that columnMap isn't used.
*
* @param array $record
* @param array $columnMap
* @param BulkLoader_Result $results
* @param boolean $preview
*
* @return int
*/
protected function processRecord($record, $columnMap, &$results, $preview = false) {
$class = $this->objectClass;
// find existing object, or create new one
$existingObj = $this->findExistingObject($record, $columnMap);
$obj = ($existingObj) ? $existingObj : new $class();
// first run: find/create any relations and store them on the object
// we can't combine runs, as other columns might rely on the relation being present
$relations = array();
foreach($record as $fieldName => $val) {
// don't bother querying of value is not set
if($this->isNullValue($val)) continue;
// checking for existing relations
if(isset($this->relationCallbacks[$fieldName])) {
// trigger custom search method for finding a relation based on the given value
// and write it back to the relation (or create a new object)
$relationName = $this->relationCallbacks[$fieldName]['relationname'];
if($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
} elseif($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
}
if(!$relationObj || !$relationObj->exists()) {
$relationClass = $obj->has_one($relationName);
$relationObj = new $relationClass();
//write if we aren't previewing
if (!$preview) $relationObj->write();
}
$obj->{"{$relationName}ID"} = $relationObj->ID;
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
} elseif(strpos($fieldName, '.') !== false) {
// we have a relation column with dot notation
list($relationName, $columnName) = explode('.', $fieldName);
// always gives us an component (either empty or existing)
$relationObj = $obj->getComponent($relationName);
if (!$preview) $relationObj->write();
$obj->{"{$relationName}ID"} = $relationObj->ID;
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
}
}
// second run: save data
foreach($record as $fieldName => $val) {
// break out of the loop if we are previewing
if ($preview) {
break;
}
// look up the mapping to see if this needs to map to callback
$mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
if($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
$funcName = substr($this->columnMap[$fieldName], 2);
$this->$funcName($obj, $val, $record);
} else if($obj->hasMethod("import{$fieldName}")) {
$obj->{"import{$fieldName}"}($val, $record);
} else {
$obj->update(array($fieldName => $val));
}
}
// write record
$id = ($preview) ? 0 : $obj->write();
// @todo better message support
$message = '';
// save to results
if($existingObj) {
$results->addUpdated($obj, $message);
} else {
$results->addCreated($obj, $message);
}
$objID = $obj->ID;
$obj->destroy();
// memory usage
unset($existingObj);
unset($obj);
return $objID;
}
/**
* Find an existing objects based on one or more uniqueness columns
* specified via {@link self::$duplicateChecks}.
*
* @param array $record CSV data column
*
* @return mixed
*/
public function findExistingObject($record) {
$SNG_objectClass = singleton($this->objectClass);
// checking for existing records (only if not already found)
foreach($this->duplicateChecks as $fieldName => $duplicateCheck) {
if(is_string($duplicateCheck)) {
$SQL_fieldName = Convert::raw2sql($duplicateCheck);
if(!isset($record[$SQL_fieldName]) || empty($record[$SQL_fieldName])) {
//skip current duplicate check if field value is empty
continue;
}
$SQL_fieldValue = Convert::raw2sql($record[$SQL_fieldName]);
$existingRecord = DataObject::get_one($this->objectClass, "\"$SQL_fieldName\" = '{$SQL_fieldValue}'");
if($existingRecord) {
return $existingRecord;
}
} elseif(is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
if($this->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
} elseif($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
} else {
user_error("CsvBulkLoader::processRecord():"
. " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
}
if($existingRecord) {
return $existingRecord;
}
} else {
user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
}
}
return false;
}
/**
* Determine whether any loaded files should be parsed with a
* header-row (otherwise we rely on {@link self::$columnMap}.
*
* @return boolean
*/
public function hasHeaderRow() {
return ($this->hasHeaderRow || isset($this->columnMap));
}
}