Commit 5a0d3d39 authored by Matěj Laitl's avatar Matěj Laitl
Browse files

SqlScanResultProcessor: don't accidentally delete tracks, defensive rewrite

This fixes data-loss that I can trigger every time by toggling "Local
Files & USB Mass Storage Backend" in Config -> Plugins, restarting
Amarok and triggering collection update / rescan.

In theory, more things such as cloning changing disk could trigger this
problem, from SqlScanResultProcessor::deleteDeletedDirectories() comment:

We need to match directories by their (absolute) path, otherwise following
scenario triggers statistics loss (bug 298275):

1. user relocates collection to different filesystem, but clones path structure
   or toggles MassStorageDeviceHandler enabled in Config -> plugins.
2. collectionscanner knows nothings about directory ids, so it doesn't detect
   any track changes and emits a bunch of skipped (unchanged) dirs with no
   tracks.
3. SqlRegistry::getDirectory() called there from returns different directory id
   then in past.
4. deleteDeletedDirectories() is called, and if it operates on directory ids,
   it happily removes _all_ directories, taking tracks with it.
5. Tracks disappear from the UI until full rescan, stats, lyrics, labels are
   lost forever.

Also add a handful of asserts, ScanResultProcessor is very complicated
and small error or corner-case in logic may result in horrible data
losses.

Reporters of linked bugs, please try to reproduce your data-loss with
this patch applied and report back in both cases. In negative case,
please reopen and attach full updated amarok --debug log.

After this patch, only (statistics, lyrics and labels)-loss operations
should be:
 a) moving track out of mounted collection folders [by design]
 b) changing both metadata and url from outside of a track not tagged
    by amarok_afttagger [we can do nothing about this]

BUG: 298275
FIXED-IN: 2.6
parent bd771b95
......@@ -6,6 +6,10 @@ VERSION 2.6-RC2
CHANGES:
* Show all audio and video files in file browser. (BR 303253)
BUGFIXES:
* Don't loose statistics, labels and lyrics upon collection scan in some
circumstances, e.g. when disk holding colleciton is swapped. (BR 298275)
VERSION 2.6-RC1
CHANGES:
* Remove codec install support. It's long ago been implemented in phonon.
......
......@@ -71,10 +71,16 @@ SqlScanResultProcessor::unblockUpdates()
void
SqlScanResultProcessor::commitDirectory( CollectionScanner::Directory *directory )
{
// --- updated the directory entry
int dirId = m_collection->registry()->getDirectory( directory->path(), directory->mtime() );
QString path = directory->path();
// a bit of paranoia:
if( m_foundDirectories.contains( path ) )
warning() << "commitDirectory(): duplicate directory path" << path << "in"
<< "collectionscanner output. This shouldn't happen.";
// getDirectory() updates the directory entry mtime:
int dirId = m_collection->registry()->getDirectory( path, directory->mtime() );
m_directoryIds.insert( directory, dirId );
m_foundDirectories.insert( dirId, directory );
m_foundDirectories.insert( path, dirId );
ScanResultProcessor::commitDirectory( directory );
}
......@@ -85,8 +91,8 @@ SqlScanResultProcessor::commitAlbum( CollectionScanner::Album *album )
// debug() << "SRP::commitAlbum on"<<album->name()<< "artist"<<album->artist();
// --- get or create the album
KSharedPtr<Meta::SqlAlbum> metaAlbum;
metaAlbum = KSharedPtr<Meta::SqlAlbum>::staticCast( m_collection->getAlbum( album->name(), album->artist() ) );
Meta::SqlAlbumPtr metaAlbum;
metaAlbum = Meta::SqlAlbumPtr::staticCast( m_collection->getAlbum( album->name(), album->artist() ) );
m_albumIds.insert( album, metaAlbum->id() );
// --- add all tracks
......@@ -120,16 +126,20 @@ void
SqlScanResultProcessor::commitTrack( CollectionScanner::Track *track,
CollectionScanner::Album *srcAlbum )
{
Q_ASSERT( track );
Q_ASSERT( srcAlbum );
Q_ASSERT( m_directoryIds.contains( track->directory() ) );
int directoryId = m_directoryIds.value( track->directory() );
Q_ASSERT( m_albumIds.contains( srcAlbum ) );
int albumId = m_albumIds.value( srcAlbum );
QString uid = track->uniqueid();
if( uid.isEmpty() )
{
warning() << "got track with no unique id from the scanner, not adding";
m_lastErrors.append( QString("Not adding track %1 because it has no unique id").
warning() << "commitTrack(): got track with empty unique id from the scanner,"
<< "not adding it";
m_lastErrors.append( QString( "Not adding track %1 because it has no unique id." ).
arg(track->path()) );
return;
}
......@@ -140,56 +150,64 @@ SqlScanResultProcessor::commitTrack( CollectionScanner::Track *track,
if( m_foundTracks.contains( uid ) )
{
warning() << "track"<<track->path()<<"with uid"<<uid<<"already committed. There seems to be a duplicate uid.";
m_lastErrors.append( QString("Track %1 with uid %2 already committed. There seems to be a duplicate uid.").
arg(track->path(), uid) );
const UrlEntry old = m_urlsCache.value( m_uidCache.value( uid ) );
QString text = QString( "When commiting track %1 with uid %2 we detected that the "
"same uid is already commited. This means that you most probably have "
"duplicates in your collection folders. The offending track is %3." ).arg(
track->path(), uid, old.path );
warning() << "commitTrack():" << text.toLocal8Bit().data();
m_lastErrors.append( text );
return;
}
m_foundTracks.insert( uid );
// --- find an existing track by uid
KSharedPtr<Meta::SqlTrack> metaTrack;
Meta::SqlTrackPtr metaTrack;
// find an existing track by uid
if( m_uidCache.contains( uid ) )
{
UrlEntry entry = m_urlsCache.value( m_uidCache.value( uid ) );
urlsCacheRemove( entry.id ); // remove the old cache entry
int urlId = m_uidCache.value( uid );
Q_ASSERT( m_urlsCache.contains( urlId ) );
UrlEntry entry = m_urlsCache.value( urlId );
entry.path = track->path();
entry.directoryId = directoryId;
urlsCacheInsert( entry );
urlsCacheInsert( entry ); // removes the previous entry (by id) first
// check if there is an older track at the same position.
if( m_pathCache.contains( track->path() ) )
{
const UrlEntry &otherEntry = m_urlsCache.value( m_pathCache.value( track->path() ) );
if( entry.id != otherEntry.id )
{
removeTrack( otherEntry.id, otherEntry.uid );
urlsCacheRemove( otherEntry.id );
}
}
metaTrack = KSharedPtr<Meta::SqlTrack>::staticCast( m_collection->trackForUrl( uid ) );
metaTrack = Meta::SqlTrackPtr::staticCast( m_collection->trackForUrl( uid ) );
Q_ASSERT( metaTrack->urlId() == entry.id );
}
// --- find an existing track by path or create a new one
if( !metaTrack )
// find an existing track by path
else if( m_pathCache.contains( track->path() ) )
{
int urlId = m_pathCache.value( track->path() );
Q_ASSERT( m_urlsCache.contains( urlId ) );
UrlEntry entry = m_urlsCache.value( urlId );
entry.uid = uid;
entry.directoryId = directoryId;
urlsCacheInsert( entry ); // removes the previous entry (by id) first
metaTrack = Meta::SqlTrackPtr::staticCast( m_collection->registry()->getTrack( urlId ) );
Q_ASSERT( metaTrack->urlId() == entry.id );
}
// create a new one
else
{
static int autoDecrementId = -1;
UrlEntry entry;
entry.id = -1;
entry.id = autoDecrementId--;
entry.path = track->path();
if( m_pathCache.contains( track->path() ) )
{
UrlEntry entry = m_urlsCache.value( m_pathCache.value( track->path() ) );
urlsCacheRemove( entry.id ); // remove the old cache entry
}
entry.uid = uid;
entry.directoryId = directoryId;
urlsCacheInsert( entry ); // and insert it again (or new)
urlsCacheInsert( entry );
metaTrack = KSharedPtr<Meta::SqlTrack>::staticCast( m_collection->getTrack( deviceId, rpath, directoryId, uid ) );
metaTrack = Meta::SqlTrackPtr::staticCast( m_collection->getTrack( deviceId, rpath, directoryId, uid ) );
}
if( !metaTrack )
{
warning() << "Something went wrong when importing track"<<track->path();
QString text = QString( "Something went wrong when importing track %1, metaTrack "
"is null while it shouldn't be." ).arg( track->path() );
warning() << "commitTrack():" << text.toLocal8Bit().data();
m_lastErrors.append( text );
return;
}
......@@ -292,11 +310,10 @@ SqlScanResultProcessor::commitTrack( CollectionScanner::Track *track,
metaTrack->setWriteFile( true );
}
void
SqlScanResultProcessor::deleteDeletedDirectories()
{
DEBUG_BLOCK
SqlStorage *storage = m_collection->sqlStorage();
// -- get a list of all mounted device ids
......@@ -304,21 +321,50 @@ SqlScanResultProcessor::deleteDeletedDirectories()
QString deviceIds;
foreach( int id, idList )
{
if ( !deviceIds.isEmpty() ) deviceIds += ',';
if ( !deviceIds.isEmpty() )
deviceIds += ',';
deviceIds += QString::number( id );
}
// -- get all (mounted) directories
QString query = QString( "SELECT id FROM directories "
"WHERE deviceid IN (%1);").arg( deviceIds );
QString query = QString( "SELECT id, deviceid, dir FROM directories "
"WHERE deviceid IN (%1)" ).arg( deviceIds );
QStringList res = storage->query( query );
// -- check if the have been found during the scan
for( int i = 0; i < res.count(); )
{
int dirId = res.at(i++).toInt();
if( !m_foundDirectories.contains( dirId ) )
int dirId = res.at( i++ ).toInt();
int deviceId = res.at( i++ ).toInt();
QString dir = res.at( i++ );
/* we need to match directories by their (absolute) path, otherwise following
* scenario triggers statistics loss (bug 298275):
*
* 1. user relocates collection to different filesystem, but clones path structure
* or toggles MassStorageDeviceHandler enabled in Config -> plugins.
* 2. collectionscanner knows nothings about directory ids, so it doesn't detect
* any track changes and emits a bunch of skipped (unchanged) dirs with no
* tracks.
* 3. SqlRegistry::getDirectory() called there from returns different directory id
* then in past.
* 4. deleteDeletedDirectories() is called, and if it operates on directory ids,
* it happily removes _all_ directories, taking tracks with it.
* 5. Tracks disappear from the UI until full rescan, stats, lyrics, labels are
* lost forever.
*/
QString path = m_collection->mountPointManager()->getAbsolutePath( deviceId, dir );
bool deleteThisDir = false;
if( !m_foundDirectories.contains( path ) )
deleteThisDir = true;
else if( m_foundDirectories.value( path ) != dirId )
{
int newDirId = m_foundDirectories.value( path );
// as a safety measure, we don't delete the old dir if relocation fails
deleteThisDir = relocateTracksToNewDirectory( dirId, newDirId );
}
if( deleteThisDir )
{
deleteDeletedTracks( dirId );
query = QString( "DELETE FROM directories WHERE id = %1;" ).arg( dirId );
......@@ -330,6 +376,7 @@ SqlScanResultProcessor::deleteDeletedDirectories()
void
SqlScanResultProcessor::deleteDeletedTracks( CollectionScanner::Directory *directory )
{
Q_ASSERT( m_directoryIds.contains( directory ) );
int directoryId = m_directoryIds.value( directory );
deleteDeletedTracks( directoryId );
}
......@@ -343,30 +390,80 @@ SqlScanResultProcessor::deleteDeletedTracks( int directoryId )
// -- check if the tracks have been found during the scan
foreach( int urlId, urlIds )
{
QString uid = m_urlsCache.value( urlId ).uid;
Q_ASSERT( m_urlsCache.contains( urlId ) );
const UrlEntry &entry = m_urlsCache[ urlId ];
Q_ASSERT( entry.directoryId == directoryId );
if( !m_foundTracks.contains( entry.uid ) )
{
removeTrack( entry );
urlsCacheRemove( entry );
}
}
}
bool
SqlScanResultProcessor::relocateTracksToNewDirectory( int oldDirId, int newDirId )
{
QList<int> urlIds = m_directoryCache.values( oldDirId );
if( urlIds.isEmpty() )
return true; // nothing to do
MountPointManager *manager = m_collection->mountPointManager();
SqlRegistry *reg = m_collection->registry();
SqlStorage *storage = m_collection->sqlStorage();
// sanity checking, not strictly needed, but imagine new device appearing in the
// middle of the scan, so rather prevent db corruption:
QStringList res = storage->query( QString( "SELECT deviceid FROM directories "
"WHERE id = %1" ).arg( newDirId ) );
if( res.count() != 1 )
{
warning() << "relocateTracksToNewDirectory(): no or multiple entries when"
<< "quering directory with id" << newDirId;
return false;
}
int newDirDeviceId = res.at( 0 ).toInt();
if( !m_foundTracks.contains( uid ) )
foreach( int urlId, urlIds )
{
Q_ASSERT( m_urlsCache.contains( urlId ) );
UrlEntry entry = m_urlsCache.value( urlId );
Meta::SqlTrackPtr track = Meta::SqlTrackPtr::staticCast( reg->getTrack( urlId ) );
Q_ASSERT( track );
// not strictly needed, but we want to sanity check it to prevent corrupt db
int deviceId = manager->getIdForUrl( entry.path );
if( newDirDeviceId != deviceId )
{
removeTrack( urlId, uid );
urlsCacheRemove( urlId );
warning() << "relocateTracksToNewDirectory(): device id from newDirId ("
<< res.at( 0 ).toInt() << ") and device id from mountPointManager ("
<< deviceId << ") don't match!";
return false;
}
QString rpath = manager->getRelativePath( deviceId, entry.path );
track->setUrl( deviceId, rpath, newDirId );
entry.directoryId = newDirId;
urlsCacheInsert( entry ); // removes the previous entry (by id) first
}
return true;
}
void
SqlScanResultProcessor::removeTrack( int urlId, const QString uid )
SqlScanResultProcessor::removeTrack( const UrlEntry &entry )
{
debug() << "deleteTrack" << uid <<"url id"<< urlId;
if( m_collection->registry()->m_uidMap.contains( uid ) )
static_cast<Meta::SqlTrack*>(const_cast<Meta::Track*>(m_collection->registry()->m_uidMap.value( uid ).data()))->remove();
debug() << "removeTrack(" << entry << ")";
SqlRegistry *reg = m_collection->registry();
if( reg->m_uidMap.contains( entry.uid ) )
Meta::SqlTrackPtr::staticCast( reg->m_uidMap[ entry.uid ] )->remove();
else
m_collection->registry()->removeTrack( urlId, uid );
reg->removeTrack( entry.id, entry.uid );
}
void
SqlScanResultProcessor::urlsCacheInit()
{
DEBUG_BLOCK
SqlStorage *storage = m_collection->sqlStorage();
QString query = QString( "SELECT id, deviceid, rpath, directory, uniqueid FROM urls;");
......@@ -380,13 +477,11 @@ SqlScanResultProcessor::urlsCacheInit()
int directoryId = res.at(i++).toInt();
QString uid = res.at(i++);
if( !directoryId && !rpath.isEmpty() )
{
warning() << "Found urls entry without directory. A phantom track. Removing"<<rpath;
removeTrack( id, uid );
}
QString path = m_collection->mountPointManager()->getAbsolutePath( deviceId, rpath );
QString path;
if( deviceId )
path = m_collection->mountPointManager()->getAbsolutePath( deviceId, rpath );
else
path = rpath;
UrlEntry entry;
entry.id = id;
......@@ -394,22 +489,44 @@ SqlScanResultProcessor::urlsCacheInit()
entry.directoryId = directoryId;
entry.uid = uid;
if( !directoryId )
{
warning() << "Found urls entry without directory. A phantom track. Removing" << path;
removeTrack( entry );
continue;
}
urlsCacheInsert( entry );
}
}
void
SqlScanResultProcessor::urlsCacheInsert( UrlEntry entry )
SqlScanResultProcessor::urlsCacheInsert( const UrlEntry &entry )
{
if( !m_urlsCache.contains( entry.id ) )
urlsCacheRemove( entry.id );
// this case is normal operation
if( m_urlsCache.contains( entry.id ) )
urlsCacheRemove( m_urlsCache[ entry.id ] );
if( !entry.path.isEmpty() && m_pathCache.contains( entry.path ) ) {
// no idea how this can happen, but we clean it up
debug() << "Duplicate path in database:"<<entry.path;
removeTrack( entry.id, entry.uid ); // this will now delete the statistics, too
entry.path.clear();
// following cases shoudn't normally happen:
if( m_uidCache.contains( entry.uid ) )
{
int oldId = m_uidCache.value( entry.uid );
Q_ASSERT( m_urlsCache.contains( oldId ) );
const UrlEntry &old = m_urlsCache[ oldId ];
warning() << "urlsCacheInsert(): found duplicate in uid. old" << old
<< "will be hidden by the new one in the cache:" << entry;
}
if( m_pathCache.contains( entry.path ) )
{
int oldId = m_pathCache.value( entry.path );
Q_ASSERT( m_urlsCache.contains( oldId ) );
const UrlEntry &old = m_urlsCache[ oldId ];
warning() << "urlsCacheInsert(): found duplicate in path. old" << old
<< "will be hidden by the new one in the cache:" << entry;
}
// this will signify error in this class:
Q_ASSERT( !m_directoryCache.contains( entry.directoryId, entry.id ) );
m_urlsCache.insert( entry.id, entry );
m_uidCache.insert( entry.uid, entry.id );
......@@ -418,14 +535,21 @@ SqlScanResultProcessor::urlsCacheInsert( UrlEntry entry )
}
void
SqlScanResultProcessor::urlsCacheRemove( int id )
SqlScanResultProcessor::urlsCacheRemove( const UrlEntry &entry )
{
if( !m_urlsCache.contains( id ) )
if( !m_urlsCache.contains( entry.id ) )
return;
const UrlEntry &entry = m_urlsCache.value( id );
m_uidCache.remove( entry.uid );
m_pathCache.remove( entry.path );
m_directoryCache.remove( entry.directoryId, id );
m_urlsCache.remove( id );
m_directoryCache.remove( entry.directoryId, entry.id );
m_urlsCache.remove( entry.id );
}
QDebug
operator<<( QDebug dbg, const SqlScanResultProcessor::UrlEntry &entry )
{
dbg.nospace() << "Entry(id=" << entry.id << ", path=" << entry.path << ", dirId="
<< entry.directoryId << ", uid=" << entry.uid << ")";
return dbg.space();
}
......@@ -77,18 +77,6 @@ class AMAROK_SQLCOLLECTION_EXPORT_TESTS SqlScanResultProcessor : public ScanResu
void deleteDeletedTracks( int directoryId );
private:
void removeTrack( int urlId, const QString uid );
Collections::SqlCollection* m_collection;
/** Contains all found directories with the directory id and the path */
QHash<int, CollectionScanner::Directory*> m_foundDirectories;
/** Contains all found tracks with the unique id */
QSet<QString> m_foundTracks;
QHash<CollectionScanner::Directory*, int> m_directoryIds;
QHash<CollectionScanner::Album*, int> m_albumIds;
// to speed up the scanning we buffer the whole urls table
struct UrlEntry
{
......@@ -97,10 +85,41 @@ class AMAROK_SQLCOLLECTION_EXPORT_TESTS SqlScanResultProcessor : public ScanResu
int directoryId;
QString uid;
};
friend QDebug operator<<( QDebug, const UrlEntry& );
/**
* Directory id has changed from @param oldDirId to @param newDirId without
* actual change in the absolute directory path or contents. Try to relocate
* tracks to the new directory, updating necessary fields.
* @return true if all tracks were sucessfully relocated and the old dir can be
* deleted without losses, false otherwise.
*/
bool relocateTracksToNewDirectory( int oldDirId, int newDirId );
/**
* Remove track from the database. Does not touch the cache - you should probably
* call urlsCacheRemove( entry )
*/
void removeTrack( const UrlEntry &entry );
Collections::SqlCollection* m_collection;
/** Contains all found directories with their absolute path and id */
QHash<QString, int> m_foundDirectories;
/** Contains all found tracks with the unique id */
QSet<QString> m_foundTracks;
QHash<CollectionScanner::Directory*, int> m_directoryIds;
QHash<CollectionScanner::Album*, int> m_albumIds;
void urlsCacheInit();
void urlsCacheInsert( UrlEntry entry );
void urlsCacheRemove( int id );
/**
* Inserts @param entry into caches. If an entry with same url id already exists
* in cache, it will be replaced. Duplicates in uidUrl and path are _not_ avoided,
* m_uidCache and m_pathCache will point to most recently added entry.
*/
void urlsCacheInsert( const UrlEntry &entry );
void urlsCacheRemove( const UrlEntry &entry );
/// maps UrlEntry id to UrlEntry
QHash<int, UrlEntry> m_urlsCache;
......@@ -108,9 +127,10 @@ class AMAROK_SQLCOLLECTION_EXPORT_TESTS SqlScanResultProcessor : public ScanResu
QHash<QString, int> m_uidCache;
/// maps path to UrlEntry id
QHash<QString, int> m_pathCache;
/// maps directory if to UrlEntry id
/// maps directory id to UrlEntry id
QMultiHash<int, int> m_directoryCache;
};
QDebug operator<<( QDebug dbg, const SqlScanResultProcessor::UrlEntry &entry );
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment