7 from cStringIO import StringIO
9 from StringIO import StringIO
13 from openerp.osv import orm, fields
14 from openerp.tools.translate import _
16 FIELDS_RECURSION_LIMIT = 2
17 ERROR_PREVIEW_BYTES = 200
18 _logger = logging.getLogger(__name__)
19 class ir_import(orm.TransientModel):
20 _name = 'base_import.import'
21 # allow imports to survive for 12h in case user is slow
22 _transient_max_hours = 12.0
25 'res_model': fields.char('Model', size=64),
26 'file': fields.binary(
27 'File', help="File to check and/or import, raw binary (not base64)"),
28 'file_name': fields.char('File Name', size=None),
29 'file_type': fields.char('File Type', size=None),
32 def get_fields(self, cr, uid, model, context=None,
33 depth=FIELDS_RECURSION_LIMIT):
34 """ Recursively get fields for the provided model (through
35 fields_get) and filter them according to importability
37 The output format is a list of ``Field``, with ``Field``
42 .. attribute:: id (str)
44 A non-unique identifier for the field, used to compute
45 the span of the ``required`` attribute: if multiple
46 ``required`` fields have the same id, only one of them
49 .. attribute:: name (str)
51 The field's logical (OpenERP) name within the scope of
54 .. attribute:: string (str)
56 The field's human-readable name (``@string``)
58 .. attribute:: required (bool)
60 Whether the field is marked as required in the
61 model. Clients must provide non-empty import values
62 for all required fields or the import will error out.
64 .. attribute:: fields (list(Field))
66 The current field's subfields. The database and
67 external identifiers for m2o and m2m fields; a
68 filtered and transformed fields_get for o2m fields (to
69 a variable depth defined by ``depth``).
71 Fields with no sub-fields will have an empty list of
74 :param str model: name of the model to get fields form
75 :param int landing: depth of recursion into o2m fields
80 'string': _("External ID"),
84 fields_got = self.pool[model].fields_get(cr, uid, context=context)
85 for name, field in fields_got.iteritems():
86 # an empty string means the field is deprecated, @deprecated must
87 # be absent or False to mean not-deprecated
88 if field.get('deprecated', False) is not False:
90 if field.get('readonly'):
91 states = field.get('states')
94 # states = {state: [(attr, value), (attr2, value2)], state2:...}
95 if not any(attr == 'readonly' and value is False
96 for attr, value in itertools.chain.from_iterable(
97 states.itervalues())):
103 'string': field['string'],
104 # Y U NO ALWAYS HAS REQUIRED
105 'required': bool(field.get('required')),
109 if field['type'] in ('many2many', 'many2one'):
111 dict(f, name='id', string=_("External ID")),
112 dict(f, name='.id', string=_("Database ID")),
114 elif field['type'] == 'one2many' and depth:
115 f['fields'] = self.get_fields(
116 cr, uid, field['relation'], context=context, depth=depth-1)
117 if self.pool['res.users'].has_group(cr, uid, 'base.group_no_one'):
118 f['fields'].append({'id' : '.id', 'name': '.id', 'string': _("Database ID"), 'required': False, 'fields': []})
122 # TODO: cache on model?
125 def _read_csv(self, record, options):
126 """ Returns a CSV-parsed iterator of all empty lines in the file
128 :throws csv.Error: if an error is detected during CSV parsing
129 :throws UnicodeDecodeError: if ``options.encoding`` is incorrect
131 csv_iterator = csv.reader(
132 StringIO(record.file),
133 quotechar=str(options['quoting']),
134 delimiter=str(options['separator']))
135 csv_nonempty = itertools.ifilter(None, csv_iterator)
136 # TODO: guess encoding with chardet? Or https://github.com/aadsm/jschardet
137 encoding = options.get('encoding', 'utf-8')
138 return itertools.imap(
139 lambda row: [item.decode(encoding) for item in row],
142 def _match_header(self, header, fields, options):
143 """ Attempts to match a given header to a field of the
146 :param str header: header name from the CSV file
149 :returns: an empty list if the header couldn't be matched, or
150 all the fields to traverse
154 # FIXME: should match all translations & original
155 # TODO: use string distance (levenshtein? hamming?)
156 if header == field['name'] \
157 or header.lower() == field['string'].lower():
160 if '/' not in header:
163 # relational field path
166 # Iteratively dive into fields tree
167 for section in header.split('/'):
168 # Strip section in case spaces are added around '/' for
169 # readability of paths
170 match = self._match_header(section.strip(), subfields, options)
171 # Any match failure, exit
172 if not match: return []
173 # prep subfields for next iteration within match[0]
175 subfields = field['fields']
176 traversal.append(field)
179 def _match_headers(self, rows, fields, options):
180 """ Attempts to match the imported model's fields to the
181 titles of the parsed CSV file, if the file is supposed to have
184 Will consume the first line of the ``rows`` iterator.
186 Returns a pair of (None, None) if headers were not requested
187 or the list of headers and a dict mapping cell indices
188 to key paths in the ``fields`` tree
190 :param Iterator rows:
193 :rtype: (None, None) | (list(str), dict(int: list(str)))
195 if not options.get('headers'):
199 return headers, dict(
200 (index, [field['name'] for field in self._match_header(header, fields, options)] or None)
201 for index, header in enumerate(headers)
204 def parse_preview(self, cr, uid, id, options, count=10, context=None):
205 """ Generates a preview of the uploaded files, and performs
206 fields-matching between the import's file data and the model's
209 If the headers are not requested (not options.headers),
210 ``matches`` and ``headers`` are both ``False``.
212 :param id: identifier of the import
213 :param int count: number of preview lines to generate
214 :param options: format-specific options.
215 CSV: {encoding, quoting, separator, headers}
216 :type options: {str, str, str, bool}
217 :returns: {fields, matches, headers, preview} | {error, preview}
218 :rtype: {dict(str: dict(...)), dict(int, list(str)), list(str), list(list(str))} | {str, str}
220 (record,) = self.browse(cr, uid, [id], context=context)
221 fields = self.get_fields(cr, uid, record.res_model, context=context)
224 rows = self._read_csv(record, options)
226 headers, matches = self._match_headers(rows, fields, options)
227 # Match should have consumed the first row (iif headers), get
228 # the ``count`` next rows for preview
229 preview = list(itertools.islice(rows, count))
230 assert preview, "CSV file seems to have no content"
233 'matches': matches or False,
234 'headers': headers or False,
238 # Due to lazy generators, UnicodeDecodeError (for
239 # instance) may only be raised when serializing the
240 # preview to a list in the return.
241 _logger.debug("Error during CSV parsing preview", exc_info=True)
244 # iso-8859-1 ensures decoding will always succeed,
245 # even if it yields non-printable characters. This is
246 # in case of UnicodeDecodeError (or csv.Error
247 # compounded with UnicodeDecodeError)
248 'preview': record.file[:ERROR_PREVIEW_BYTES]
249 .decode( 'iso-8859-1'),
252 def _convert_import_data(self, record, fields, options, context=None):
253 """ Extracts the input browse_record and fields list (with
254 ``False``-y placeholders for fields to *not* import) into a
255 format Model.import_data can use: a fields list without holes
256 and the precisely matching data matrix
258 :param browse_record record:
259 :param list(str|bool): fields
260 :returns: (data, fields)
261 :rtype: (list(list(str)), list(str))
262 :raises ValueError: in case the import data could not be converted
264 # Get indices for non-empty fields
265 indices = [index for index, field in enumerate(fields) if field]
267 raise ValueError(_("You must configure at least one field to import"))
268 # If only one index, itemgetter will return an atom rather
270 if len(indices) == 1: mapper = lambda row: [row[indices[0]]]
271 else: mapper = operator.itemgetter(*indices)
272 # Get only list of actually imported fields
273 import_fields = filter(None, fields)
275 rows_to_import = self._read_csv(record, options)
276 if options.get('headers'):
277 rows_to_import = itertools.islice(
278 rows_to_import, 1, None)
280 row for row in itertools.imap(mapper, rows_to_import)
281 # don't try inserting completely empty rows (e.g. from
282 # filtering out o2m fields)
286 return data, import_fields
288 def do(self, cr, uid, id, fields, options, dryrun=False, context=None):
289 """ Actual execution of the import
291 :param fields: import mapping: maps each column to a field,
292 ``False`` for the columns to ignore
293 :type fields: list(str|bool)
295 :param bool dryrun: performs all import operations (and
296 validations) but rollbacks writes, allows
297 getting as much errors as possible without
298 the risk of clobbering the database.
299 :returns: A list of errors. If the list is empty the import
300 executed fully and correctly. If the list is
301 non-empty it contains dicts with 3 keys ``type`` the
302 type of error (``error|warning``); ``message`` the
303 error message associated with the error (a string)
304 and ``record`` the data which failed to import (or
305 ``false`` if that data isn't available or provided)
306 :rtype: list({type, message, record})
308 cr.execute('SAVEPOINT import')
310 (record,) = self.browse(cr, uid, [id], context=context)
312 data, import_fields = self._convert_import_data(
313 record, fields, options, context=context)
314 except ValueError, e:
317 'message': unicode(e),
321 _logger.info('importing %d rows...', len(data))
322 import_result = self.pool[record.res_model].load(
323 cr, uid, import_fields, data, context=context)
326 # If transaction aborted, RELEASE SAVEPOINT is going to raise
327 # an InternalError (ROLLBACK should work, maybe). Ignore that.
328 # TODO: to handle multiple errors, create savepoint around
329 # write and release it in case of write error (after
330 # adding error to errors array) => can keep on trying to
331 # import stuff, and rollback at the end if there is any
332 # error in the results.
335 cr.execute('ROLLBACK TO SAVEPOINT import')
337 cr.execute('RELEASE SAVEPOINT import')
338 except psycopg2.InternalError:
341 return import_result['messages']