7 from cStringIO import StringIO
9 from StringIO import StringIO
13 from openerp.osv import orm, fields
14 from openerp.tools.translate import _
16 FIELDS_RECURSION_LIMIT = 2
17 ERROR_PREVIEW_BYTES = 200
18 _logger = logging.getLogger(__name__)
19 class ir_import(orm.TransientModel):
20 _name = 'base_import.import'
21 # allow imports to survive for 12h in case user is slow
22 _transient_max_hours = 12.0
25 'res_model': fields.char('Model', size=64),
26 'file': fields.binary(
27 'File', help="File to check and/or import, raw binary (not base64)"),
28 'file_name': fields.char('File Name', size=None),
29 'file_type': fields.char('File Type', size=None),
32 def get_fields(self, cr, uid, model, context=None,
33 depth=FIELDS_RECURSION_LIMIT):
34 """ Recursively get fields for the provided model (through
35 fields_get) and filter them according to importability
37 The output format is a list of ``Field``, with ``Field``
42 .. attribute:: id (str)
44 A non-unique identifier for the field, used to compute
45 the span of the ``required`` attribute: if multiple
46 ``required`` fields have the same id, only one of them
49 .. attribute:: name (str)
51 The field's logical (OpenERP) name within the scope of
54 .. attribute:: string (str)
56 The field's human-readable name (``@string``)
58 .. attribute:: required (bool)
60 Whether the field is marked as required in the
61 model. Clients must provide non-empty import values
62 for all required fields or the import will error out.
64 .. attribute:: fields (list(Field))
66 The current field's subfields. The database and
67 external identifiers for m2o and m2m fields; a
68 filtered and transformed fields_get for o2m fields (to
69 a variable depth defined by ``depth``).
71 Fields with no sub-fields will have an empty list of
74 :param str model: name of the model to get fields form
75 :param int landing: depth of recursion into o2m fields
80 'string': _("External ID"),
84 fields_got = self.pool[model].fields_get(cr, uid, context=context)
85 for name, field in fields_got.iteritems():
86 # an empty string means the field is deprecated, @deprecated must
87 # be absent or False to mean not-deprecated
88 if field.get('deprecated', False) is not False:
90 if field.get('readonly'):
91 states = field.get('states')
94 # states = {state: [(attr, value), (attr2, value2)], state2:...}
95 if not any(attr == 'readonly' and value is False
96 for attr, value in itertools.chain.from_iterable(
97 states.itervalues())):
103 'string': field['string'],
104 # Y U NO ALWAYS HAS REQUIRED
105 'required': bool(field.get('required')),
109 if field['type'] in ('many2many', 'many2one'):
111 dict(f, name='id', string=_("External ID")),
112 dict(f, name='.id', string=_("Database ID")),
114 elif field['type'] == 'one2many' and depth:
115 f['fields'] = self.get_fields(
116 cr, uid, field['relation'], context=context, depth=depth-1)
120 # TODO: cache on model?
123 def _read_csv(self, record, options):
124 """ Returns a CSV-parsed iterator of all empty lines in the file
126 :throws csv.Error: if an error is detected during CSV parsing
127 :throws UnicodeDecodeError: if ``options.encoding`` is incorrect
129 csv_iterator = csv.reader(
130 StringIO(record.file),
131 quotechar=str(options['quoting']),
132 delimiter=str(options['separator']))
133 csv_nonempty = itertools.ifilter(None, csv_iterator)
134 # TODO: guess encoding with chardet? Or https://github.com/aadsm/jschardet
135 encoding = options.get('encoding', 'utf-8')
136 return itertools.imap(
137 lambda row: [item.decode(encoding) for item in row],
140 def _match_header(self, header, fields, options):
141 """ Attempts to match a given header to a field of the
144 :param str header: header name from the CSV file
147 :returns: an empty list if the header couldn't be matched, or
148 all the fields to traverse
152 # FIXME: should match all translations & original
153 # TODO: use string distance (levenshtein? hamming?)
154 if header == field['name'] \
155 or header.lower() == field['string'].lower():
158 if '/' not in header:
161 # relational field path
164 # Iteratively dive into fields tree
165 for section in header.split('/'):
166 # Strip section in case spaces are added around '/' for
167 # readability of paths
168 match = self._match_header(section.strip(), subfields, options)
169 # Any match failure, exit
170 if not match: return []
171 # prep subfields for next iteration within match[0]
173 subfields = field['fields']
174 traversal.append(field)
177 def _match_headers(self, rows, fields, options):
178 """ Attempts to match the imported model's fields to the
179 titles of the parsed CSV file, if the file is supposed to have
182 Will consume the first line of the ``rows`` iterator.
184 Returns a pair of (None, None) if headers were not requested
185 or the list of headers and a dict mapping cell indices
186 to key paths in the ``fields`` tree
188 :param Iterator rows:
191 :rtype: (None, None) | (list(str), dict(int: list(str)))
193 if not options.get('headers'):
197 return headers, dict(
198 (index, [field['name'] for field in self._match_header(header, fields, options)] or None)
199 for index, header in enumerate(headers)
202 def parse_preview(self, cr, uid, id, options, count=10, context=None):
203 """ Generates a preview of the uploaded files, and performs
204 fields-matching between the import's file data and the model's
207 If the headers are not requested (not options.headers),
208 ``matches`` and ``headers`` are both ``False``.
210 :param id: identifier of the import
211 :param int count: number of preview lines to generate
212 :param options: format-specific options.
213 CSV: {encoding, quoting, separator, headers}
214 :type options: {str, str, str, bool}
215 :returns: {fields, matches, headers, preview} | {error, preview}
216 :rtype: {dict(str: dict(...)), dict(int, list(str)), list(str), list(list(str))} | {str, str}
218 (record,) = self.browse(cr, uid, [id], context=context)
219 fields = self.get_fields(cr, uid, record.res_model, context=context)
222 rows = self._read_csv(record, options)
224 headers, matches = self._match_headers(rows, fields, options)
225 # Match should have consumed the first row (iif headers), get
226 # the ``count`` next rows for preview
227 preview = list(itertools.islice(rows, count))
228 assert preview, "CSV file seems to have no content"
231 'matches': matches or False,
232 'headers': headers or False,
236 # Due to lazy generators, UnicodeDecodeError (for
237 # instance) may only be raised when serializing the
238 # preview to a list in the return.
239 _logger.debug("Error during CSV parsing preview", exc_info=True)
242 # iso-8859-1 ensures decoding will always succeed,
243 # even if it yields non-printable characters. This is
244 # in case of UnicodeDecodeError (or csv.Error
245 # compounded with UnicodeDecodeError)
246 'preview': record.file[:ERROR_PREVIEW_BYTES]
247 .decode( 'iso-8859-1'),
250 def _convert_import_data(self, record, fields, options, context=None):
251 """ Extracts the input browse_record and fields list (with
252 ``False``-y placeholders for fields to *not* import) into a
253 format Model.import_data can use: a fields list without holes
254 and the precisely matching data matrix
256 :param browse_record record:
257 :param list(str|bool): fields
258 :returns: (data, fields)
259 :rtype: (list(list(str)), list(str))
260 :raises ValueError: in case the import data could not be converted
262 # Get indices for non-empty fields
263 indices = [index for index, field in enumerate(fields) if field]
265 raise ValueError(_("You must configure at least one field to import"))
266 # If only one index, itemgetter will return an atom rather
268 if len(indices) == 1: mapper = lambda row: [row[indices[0]]]
269 else: mapper = operator.itemgetter(*indices)
270 # Get only list of actually imported fields
271 import_fields = filter(None, fields)
273 rows_to_import = self._read_csv(record, options)
274 if options.get('headers'):
275 rows_to_import = itertools.islice(
276 rows_to_import, 1, None)
278 row for row in itertools.imap(mapper, rows_to_import)
279 # don't try inserting completely empty rows (e.g. from
280 # filtering out o2m fields)
284 return data, import_fields
286 def do(self, cr, uid, id, fields, options, dryrun=False, context=None):
287 """ Actual execution of the import
289 :param fields: import mapping: maps each column to a field,
290 ``False`` for the columns to ignore
291 :type fields: list(str|bool)
293 :param bool dryrun: performs all import operations (and
294 validations) but rollbacks writes, allows
295 getting as much errors as possible without
296 the risk of clobbering the database.
297 :returns: A list of errors. If the list is empty the import
298 executed fully and correctly. If the list is
299 non-empty it contains dicts with 3 keys ``type`` the
300 type of error (``error|warning``); ``message`` the
301 error message associated with the error (a string)
302 and ``record`` the data which failed to import (or
303 ``false`` if that data isn't available or provided)
304 :rtype: list({type, message, record})
306 cr.execute('SAVEPOINT import')
308 (record,) = self.browse(cr, uid, [id], context=context)
310 data, import_fields = self._convert_import_data(
311 record, fields, options, context=context)
312 except ValueError, e:
315 'message': unicode(e),
319 _logger.info('importing %d rows...', len(data))
320 import_result = self.pool[record.res_model].load(
321 cr, uid, import_fields, data, context=context)
324 # If transaction aborted, RELEASE SAVEPOINT is going to raise
325 # an InternalError (ROLLBACK should work, maybe). Ignore that.
326 # TODO: to handle multiple errors, create savepoint around
327 # write and release it in case of write error (after
328 # adding error to errors array) => can keep on trying to
329 # import stuff, and rollback at the end if there is any
330 # error in the results.
333 cr.execute('ROLLBACK TO SAVEPOINT import')
335 cr.execute('RELEASE SAVEPOINT import')
336 except psycopg2.InternalError:
339 return import_result['messages']