Viewing file:
crashdb.py (19.99 KB) -rw-r--r--Select action/file-type:

(
+) |

(
+) |

(
+) |
Code (
+) |
Session (
+) |

(
+) |
SDB (
+) |

(
+) |

(
+) |

(
+) |

(
+) |

(
+) |
'''Abstract crash database interface.'''
# Copyright (C) 2007 - 2009 Canonical Ltd.
# Author: Martin Pitt <martin.pitt@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version. See http://www.gnu.org/copyleft/gpl.html for
# the full text of the license.
import os, os.path, datetime, sys
from exceptions import Exception
from packaging_impl import impl as packaging
def _u(str):
'''Convert str to an unicode if it isn't already.'''
if type(str) == type(''):
return str.decode('utf-8', 'ignore')
return str
class CrashDatabase:
def __init__(self, auth_file, bugpattern_baseurl, options):
'''Initialize crash database connection.
You need to specify an implementation specific file with the
authentication credentials for retracing access for download() and
update(). For upload() and get_comment_url() you can use None.
options is a dictionary with additional settings from crashdb.conf; see
get_crashdb() for details.
'''
self.auth_file = auth_file
self.options = options
self.bugpattern_baseurl = bugpattern_baseurl
self.duplicate_db = None
def get_bugpattern_baseurl(self):
'''Return the base URL for bug patterns.
See apport.report.Report.search_bug_patterns() for details. If this
function returns None, bug patterns are disabled.
'''
return self.bugpattern_baseurl
#
# API for duplicate detection
#
# Tests are in apport/crashdb_impl/memory.py.
def init_duplicate_db(self, path):
'''Initialize duplicate database.
path specifies an SQLite database. It will be created if it does not
exist yet.
'''
import sqlite3 as dbapi2
assert dbapi2.paramstyle == 'qmark', \
'this module assumes qmark dbapi parameter style'
init = not os.path.exists(path) or path == ':memory:' or \
os.path.getsize(path) == 0
self.duplicate_db = dbapi2.connect(path, timeout=7200)
if init:
cur = self.duplicate_db.cursor()
cur.execute('''CREATE TABLE crashes (
signature VARCHAR(255) NOT NULL,
crash_id INTEGER NOT NULL,
fixed_version VARCHAR(50),
last_change TIMESTAMP)''')
self.duplicate_db.commit()
# verify integrity
cur = self.duplicate_db.cursor()
cur.execute('PRAGMA integrity_check')
result = cur.fetchall()
if result != [('ok',)]:
raise SystemError('Corrupt duplicate db:' + str(result))
def check_duplicate(self, id, report=None):
'''Check whether a crash is already known.
If the crash is new, it will be added to the duplicate database and the
function returns None. If the crash is already known, the function
returns a pair (crash_id, fixed_version), where fixed_version might be
None if the crash is not fixed in the latest version yet. Depending on
whether the version in report is smaller than/equal to the fixed
version or larger, this calls close_duplicate() or mark_regression().
If the report does not have a valid crash signature, this function does
nothing and just returns None.
By default, the report gets download()ed, but for performance reasons
it can be explicitly passed to this function if it is already available.
'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
if not report:
report = self.download(id)
self._mark_dup_checked(id, report)
if 'DuplicateSignature' in report:
sig = report['DuplicateSignature']
else:
sig = report.crash_signature()
if not sig:
return None
existing = self._duplicate_search_signature(sig, id)
# sort existing in ascending order, with unfixed last, so that
# version comparisons find the closest fix first
def cmp(x, y):
if x == y:
return 0
if x == '':
if y == None:
return -1
else:
return 1
if y == '':
if x == None:
return 1
else:
return -1
if x == None:
return 1
if y == None:
return -1
return packaging.compare_versions(x, y)
existing.sort(cmp, lambda k: k[1])
if existing:
# update status of existing master bugs
for (ex_id, _) in existing:
self._duplicate_db_sync_status(ex_id)
existing = self._duplicate_search_signature(sig, id)
existing.sort(cmp, lambda k: k[1])
if not existing:
# add a new entry
cur = self.duplicate_db.cursor()
cur.execute('INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)', (_u(sig), id, None))
self.duplicate_db.commit()
return None
try:
report_package_version = report['Package'].split()[1]
except (KeyError, IndexError):
report_package_version = None
# search the newest fixed id or an unfixed id to check whether there is
# a regression (crash happening on a later version than the latest
# fixed one)
for (ex_id, ex_ver) in existing:
if not ex_ver or \
not report_package_version or \
packaging.compare_versions(report_package_version, ex_ver) < 0:
self.close_duplicate(id, ex_id)
break
else:
# regression, mark it as such in the crash db
self.mark_regression(id, ex_id)
# create a new record
cur = self.duplicate_db.cursor()
cur.execute('INSERT INTO crashes VALUES (?, ?, ?, CURRENT_TIMESTAMP)', (_u(sig), id, None))
self.duplicate_db.commit()
# we now track this as a new crash
return None
return (ex_id, ex_ver)
def duplicate_db_fixed(self, id, version):
'''Mark given crash ID as fixed in the duplicate database.
version specifies the package version the crash was fixed in (None for
'still unfixed').
'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
cur = self.duplicate_db.cursor()
n = cur.execute('UPDATE crashes SET fixed_version = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
(version, id))
assert n.rowcount == 1
self.duplicate_db.commit()
def duplicate_db_remove(self, id):
'''Remove crash from the duplicate database.
This happens when a report got rejected or manually duplicated.
'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
cur = self.duplicate_db.cursor()
cur.execute('DELETE FROM crashes WHERE crash_id = ?', [id])
self.duplicate_db.commit()
def duplicate_db_change_master_id(self, old_id, new_id):
'''Change a crash ID.'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
cur = self.duplicate_db.cursor()
cur.execute('UPDATE crashes SET crash_id = ?, last_change = CURRENT_TIMESTAMP WHERE crash_id = ?',
[new_id, old_id])
self.duplicate_db.commit()
def _duplicate_search_signature(self, sig, id):
'''Look up signature in the duplicate db.
Return [(id, fixed_version)] tuple list.
There might be several matches if a crash has been reintroduced in a
later version.
id is the bug we are looking to find a duplicate for. The result will
never contain id, to avoid marking a bug as a duplicate of itself if a
bug is reprocessed more than once.
'''
cur = self.duplicate_db.cursor()
cur.execute('SELECT crash_id, fixed_version FROM crashes WHERE signature = ? AND crash_id <> ?', [_u(sig), id])
return cur.fetchall()
def _duplicate_db_dump(self, with_timestamps=False):
'''Return the entire duplicate database as a dictionary.
The returned dictionary maps "signature" to (crash_id, fixed_version)
pairs.
If with_timestamps is True, then the map will contain triples
(crash_id, fixed_version, last_change) instead.
This is mainly useful for debugging and test suites.
'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
dump = {}
cur = self.duplicate_db.cursor()
cur.execute('SELECT * FROM crashes')
for (sig, id, ver, last_change) in cur:
if with_timestamps:
dump[sig] = (id, ver, last_change)
else:
dump[sig] = (id, ver)
return dump
def _duplicate_db_sync_status(self, id):
'''Update the duplicate db to the reality of the report in the crash db.
This uses get_fixed_version() to get the status of the given crash.
An invalid ID gets removed from the duplicate db, and a crash which got
fixed is marked as such in the database.
'''
assert self.duplicate_db, 'init_duplicate_db() needs to be called before'
cur = self.duplicate_db.cursor()
cur.execute('SELECT fixed_version FROM crashes WHERE crash_id = ?', [id])
db_fixed_version = cur.fetchone()
if not db_fixed_version:
return
db_fixed_version = db_fixed_version[0]
real_fixed_version = self.get_fixed_version(id)
# crash got rejected
if real_fixed_version == 'invalid':
print('DEBUG: bug %i was invalidated, removing from database' % id)
self.duplicate_db_remove(id)
return
# crash got fixed
if not db_fixed_version and real_fixed_version:
print('DEBUG: bug %i got fixed in version %s, updating database' % (id, real_fixed_version))
self.duplicate_db_fixed(id, real_fixed_version)
return
# crash got reopened
if db_fixed_version and not real_fixed_version:
print('DEBUG: bug %i got reopened, dropping fixed version %s from database' % (id, db_fixed_version))
self.duplicate_db_fixed(id, real_fixed_version)
return
#
# Abstract functions that need to be implemented by subclasses
#
def upload(self, report, progress_callback = None):
'''Upload given problem report return a handle for it.
This should happen noninteractively.
If the implementation supports it, and a function progress_callback is
passed, that is called repeatedly with two arguments: the number of
bytes already sent, and the total number of bytes to send. This can be
used to provide a proper upload progress indication on frontends.
This method can raise a NeedsCredentials exception in case of failure.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_comment_url(self, report, handle):
'''Return an URL that should be opened after report has been uploaded
and upload() returned handle.
Should return None if no URL should be opened (anonymous filing without
user comments); in that case this function should do whichever
interactive steps it wants to perform.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def download(self, id):
'''Download the problem report from given ID and return a Report.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def update(self, id, report, comment, change_description=False,
attachment_comment=None, key_filter=None):
'''Update the given report ID with all data from report.
This creates a text comment with the "short" data (see
ProblemReport.write_mime()), and creates attachments for all the
bulk/binary data.
If change_description is True, and the crash db implementation supports
it, the short data will be put into the description instead (like in a
new bug).
comment will be added to the "short" data. If attachment_comment is
given, it will be added to the attachment uploads.
If key_filter is a list or set, then only those keys will be added.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def update_traces(self, id, report, comment=''):
'''Update the given report ID for retracing results.
This updates Stacktrace, ThreadStacktrace, StacktraceTop,
and StacktraceSource. You can also supply an additional comment.
'''
self.update(id, report, comment, key_filter=['Stacktrace',
'ThreadStacktrace', 'StacktraceSource', 'StacktraceTop'])
def set_credentials(self, username, password):
'''Set username and password.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_distro_release(self, id):
'''Get 'DistroRelease: <release>' from the report ID.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_unretraced(self):
'''Return set of crash IDs which have not been retraced yet.
This should only include crashes which match the current host
architecture.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_dup_unchecked(self):
'''Return set of crash IDs which need duplicate checking.
This is mainly useful for crashes of scripting languages such as
Python, since they do not need to be retraced. It should not return
bugs that are covered by get_unretraced().
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_unfixed(self):
'''Return an ID set of all crashes which are not yet fixed.
The list must not contain bugs which were rejected or duplicate.
This function should make sure that the returned list is correct. If
there are any errors with connecting to the crash database, it should
raise an exception (preferably IOError).
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_fixed_version(self, id):
'''Return the package version that fixes a given crash.
Return None if the crash is not yet fixed, or an empty string if the
crash is fixed, but it cannot be determined by which version. Return
'invalid' if the crash report got invalidated, such as closed a
duplicate or rejected.
This function should make sure that the returned result is correct. If
there are any errors with connecting to the crash database, it should
raise an exception (preferably IOError).
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def get_affected_packages(self, id):
'''Return list of affected source packages for given ID.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def is_reporter(self, id):
'''Check whether the user is the reporter of given ID.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def can_update(self, id):
'''Check whether the user is eligible to update a report.
A user should add additional information to an existing ID if (s)he is
the reporter or subscribed, the bug is open, not a duplicate, etc. The
exact policy and checks should be done according to the particular
implementation.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def duplicate_of(self, id):
'''Return master ID for a duplicate bug.
If the bug is not a duplicate, return None.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def close_duplicate(self, id, master):
'''Mark a crash id as duplicate of given master ID.
If master is None, id gets un-duplicated.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def mark_regression(self, id, master):
'''Mark a crash id as reintroducing an earlier crash which is
already marked as fixed (having ID 'master').'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def mark_retraced(self, id):
'''Mark crash id as retraced.'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def mark_retrace_failed(self, id, invalid_msg=None):
'''Mark crash id as 'failed to retrace'.
If invalid_msg is given, the bug should be closed as invalid with given
message, otherwise just marked as a failed retrace.
This can be a no-op if you are not interested in this.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
def _mark_dup_checked(self, id, report):
'''Mark crash id as checked for being a duplicate
This is an internal method that should not be called from outside.
'''
raise NotImplementedError('this method must be implemented by a concrete subclass')
#
# factory
#
def get_crashdb(auth_file, name = None, conf = None):
'''Return a CrashDatabase object for the given crash db name.
This reads the configuration file 'conf'.
If name is None, it defaults to the 'default' value in conf.
If conf is None, it defaults to the environment variable
APPORT_CRASHDB_CONF; if that does not exist, the hardcoded default is
/etc/apport/crashdb.conf. This Python syntax file needs to specify:
- A string variable 'default', giving a default value for 'name' if that is
None.
- A dictionary 'databases' which maps names to crash db configuration
dictionaries. These need to have at least the keys 'impl' (Python module
in apport.crashdb_impl which contains a concrete 'CrashDatabase' class
implementation for that crash db type) and 'bug_pattern_url', which
specifies an URL for bug patterns (or None if those are not used for that
crash db).
'''
if not conf:
conf = os.environ.get('APPORT_CRASHDB_CONF', '/etc/apport/crashdb.conf')
settings = {}
execfile(conf, settings)
# Load third parties crashdb.conf
confdDir = conf + '.d'
if os.path.isdir(confdDir):
for cf in os.listdir(confdDir):
cfpath = os.path.join(confdDir, cf)
if os.path.isfile(cfpath) and cf.endswith('.conf'):
try:
execfile(cfpath, settings['databases'])
except Exception as e:
# ignore broken files
sys.stderr.write('Invalid file %s: %s\n' % (cfpath, str(e)))
pass
if not name:
name = settings['default']
db = settings['databases'][name]
bug_pattern_url = db.get('bug_pattern_url')
if not bug_pattern_url:
# fall back to default database's
bug_pattern_url = settings['databases'][settings['default']].get('bug_pattern_url')
m = __import__('apport.crashdb_impl.' + db['impl'], globals(), locals(), ['CrashDatabase'])
return m.CrashDatabase(auth_file, bug_pattern_url, db)
class NeedsCredentials(Exception):
'''This may be raised when unable to log in to the crashdb.'''
pass