Check MD5 only when it is needed (fixes #101)

This makes grive check md5 sums only when:
1) a local rename is supposed (when there are a new file and a deleted file of the same size)
2) local ctime is changed, but file size isn't
pull/126/head
Vitaliy Filippov 2016-09-28 01:19:13 +03:00
parent 59d02a65cb
commit dd77c99872
11 changed files with 128 additions and 55 deletions

View File

@ -89,6 +89,8 @@ Grive uses cmake to build. Basic install sequence is
- ignore regexp does not persist anymore (note that Grive will still track it to not
accidentally delete remote files when changing ignore regexp)
- added options to limit upload and download speed
- faster upload of new and changed files. now Grive uploads files without first calculating
md5 checksum when file is created locally or when its size changes.
### Grive2 v0.5

View File

@ -36,7 +36,8 @@ Entry::Entry( ) :
m_is_dir ( true ),
m_resource_id ( "folder:root" ),
m_change_stamp ( -1 ),
m_is_removed ( false )
m_is_removed ( false ),
m_size ( 0 )
{
}
@ -65,6 +66,11 @@ std::string Entry::MD5() const
return m_md5 ;
}
u64_t Entry::Size() const
{
return m_size ;
}
DateTime Entry::MTime() const
{
return m_mtime ;

View File

@ -19,6 +19,7 @@
#pragma once
#include "util/Types.hh"
#include "util/DateTime.hh"
#include "util/FileSystem.hh"
@ -44,6 +45,7 @@ public :
bool IsDir() const ;
std::string MD5() const ;
DateTime MTime() const ;
u64_t Size() const ;
std::string Name() const ;
@ -80,6 +82,7 @@ protected :
DateTime m_mtime ;
bool m_is_removed ;
u64_t m_size ;
} ;
} // end of namespace gr

View File

@ -47,6 +47,7 @@ namespace gr {
Resource::Resource( const fs::path& root_folder ) :
m_name ( root_folder.string() ),
m_kind ( "folder" ),
m_size ( 0 ),
m_id ( "folder:root" ),
m_href ( "root" ),
m_is_editable( true ),
@ -60,6 +61,7 @@ Resource::Resource( const fs::path& root_folder ) :
Resource::Resource( const std::string& name, const std::string& kind ) :
m_name ( name ),
m_kind ( kind ),
m_size ( 0 ),
m_is_editable( true ),
m_parent ( 0 ),
m_state ( unknown ),
@ -149,6 +151,7 @@ void Resource::AssignIDs( const Entry& remote )
m_content = remote.ContentSrc() ;
m_is_editable = remote.IsEditable() ;
m_etag = remote.ETag() ;
m_md5 = remote.MD5() ;
}
}
@ -193,7 +196,7 @@ void Resource::FromRemoteFile( const Entry& remote )
m_state = local_deleted ;
}
}
// remote checksum unknown, assume the file is not changed in remote
else if ( remote.MD5().empty() )
{
@ -201,16 +204,9 @@ void Resource::FromRemoteFile( const Entry& remote )
Path(), log::verbose ) ;
m_state = sync ;
}
// if checksum is equal, no need to compare the mtime
else if ( remote.MD5() == m_md5 )
{
Log( "file %1% is already in sync", Path(), log::verbose ) ;
m_state = sync ;
}
// use mtime to check which one is more recent
else
else if ( remote.Size() != m_size || remote.MD5() != GetMD5() )
{
assert( m_state != unknown ) ;
@ -230,6 +226,13 @@ void Resource::FromRemoteFile( const Entry& remote )
else
Trace( "file %1% state is %2%", m_name, m_state ) ;
}
// if checksum is equal, no need to compare the mtime
else
{
Log( "file %1% is already in sync", Path(), log::verbose ) ;
m_state = sync ;
}
}
void Resource::FromDeleted( Val& state )
@ -242,6 +245,8 @@ void Resource::FromDeleted( Val& state )
m_md5 = state["md5"];
if ( state.Has( "srv_time" ) )
m_mtime.Assign( state[ "srv_time" ].U64(), 0 ) ;
if ( state.Has( "size" ) )
m_size = state[ "size" ].U64();
m_state = both_deleted;
}
@ -259,7 +264,7 @@ void Resource::FromLocal( Val& state )
bool is_dir;
try
{
os::Stat( path, &m_ctime, NULL, &is_dir ) ;
os::Stat( path, &m_ctime, (off64_t*)&m_size, &is_dir ) ;
}
catch ( os::Error &e )
{
@ -287,9 +292,9 @@ void Resource::FromLocal( Val& state )
{
if ( !is_dir )
{
m_md5 = crypt::MD5::Get( path );
// File is changed locally. TODO: Detect conflicts
is_changed = !state.Has( "md5" ) || m_md5 != state["md5"].Str();
is_changed = ( state.Has( "size" ) && m_size != state["size"].U64() ) ||
!state.Has( "md5" ) || GetMD5() != state["md5"].Str();
}
else
is_changed = true;
@ -479,30 +484,33 @@ void Resource::Sync( Syncer *syncer, ResourceTree *res_tree, const Val& options
}
}
void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options )
bool Resource::CheckRename( Syncer* syncer, ResourceTree *res_tree )
{
assert( !IsRoot() || m_state == sync ) ; // root is always sync
assert( IsRoot() || !syncer || m_parent->IsFolder() ) ;
assert( IsRoot() || m_parent->m_state != remote_deleted ) ;
assert( IsRoot() || m_parent->m_state != local_deleted ) ;
const fs::path path = Path() ;
// Detect renames
if ( !IsFolder() && ( m_state == local_new || m_state == local_deleted ||
m_state == remote_new || m_state == remote_deleted ) )
if ( !IsFolder() && ( m_state == local_new || m_state == remote_new ) )
{
details::MD5Range moved = res_tree->FindByMD5( m_md5 );
bool is_local = m_state == local_new || m_state == local_deleted;
State other;
if ( m_state == local_new )
other = local_deleted;
else if ( m_state == local_deleted )
other = local_new;
else if ( m_state == remote_new )
other = remote_deleted;
else
other = remote_new;
bool is_local = m_state == local_new;
State other = is_local ? local_deleted : remote_deleted;
if ( is_local )
{
// First check size index for locally added files
details::SizeRange moved = res_tree->FindBySize( m_size );
bool found = false;
for ( details::SizeMap::iterator i = moved.first ; i != moved.second; i++ )
{
Resource *m = *i;
if ( m->m_state == other )
{
found = true;
break;
}
}
if ( !found )
{
// Don't check md5 sums if there are no deleted files with same size
return false;
}
}
details::MD5Range moved = res_tree->FindByMD5( GetMD5() );
for ( details::MD5Map::iterator i = moved.first ; i != moved.second; i++ )
{
Resource *m = *i;
@ -530,10 +538,25 @@ void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& opti
}
from->m_state = both_deleted;
to->m_state = sync;
return;
return true;
}
}
}
return false;
}
void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options )
{
assert( !IsRoot() || m_state == sync ) ; // root is always sync
assert( IsRoot() || !syncer || m_parent->IsFolder() ) ;
assert( IsRoot() || m_parent->m_state != remote_deleted ) ;
assert( IsRoot() || m_parent->m_state != local_deleted ) ;
const fs::path path = Path() ;
// Detect renames
if ( CheckRename( syncer, res_tree ) )
return;
switch ( m_state )
{
@ -688,6 +711,7 @@ void Resource::SetIndex( bool re_stat )
if ( !is_dir )
{
m_json->Set( "md5", Val( m_md5 ) );
m_json->Set( "size", Val( m_size ) );
m_json->Del( "tree" );
}
else
@ -695,6 +719,7 @@ void Resource::SetIndex( bool re_stat )
// add tree item if it does not exist
m_json->Item( "tree" );
m_json->Del( "md5" );
m_json->Del( "size" );
}
}
@ -731,11 +756,28 @@ std::string Resource::StateStr() const
return ss.str() ;
}
u64_t Resource::Size() const
{
return m_size ;
}
std::string Resource::MD5() const
{
return m_md5 ;
}
std::string Resource::GetMD5()
{
if ( m_md5.empty() && !IsFolder() && m_local_exists )
{
// MD5 checksum is calculated lazily and only when really needed:
// 1) when a local rename is supposed (when there are a new file and a deleted file of the same size)
// 2) when local ctime is changed, but file size isn't
m_md5 = crypt::MD5::Get( Path() );
}
return m_md5 ;
}
bool Resource::IsRoot() const
{
// Root entry does not show up in file feeds, so we check for empty parent (and self-href)

View File

@ -19,6 +19,7 @@
#pragma once
#include "util/Types.hh"
#include "util/DateTime.hh"
#include "util/Exception.hh"
#include "util/FileSystem.hh"
@ -108,7 +109,9 @@ public :
bool IsInRootTree() const ;
bool IsRoot() const ;
bool HasID() const ;
u64_t Size() const;
std::string MD5() const ;
std::string GetMD5() ;
void FromRemote( const Entry& remote ) ;
void FromDeleted( Val& state ) ;
@ -141,6 +144,7 @@ private :
void DeleteIndex() ;
void SetIndex( bool ) ;
bool CheckRename( Syncer* syncer, ResourceTree *res_tree ) ;
void SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options ) ;
private :
@ -149,6 +153,7 @@ private :
std::string m_md5 ;
DateTime m_mtime ;
DateTime m_ctime ;
u64_t m_size ;
std::string m_id ;
std::string m_href ;

View File

@ -105,7 +105,13 @@ MD5Range ResourceTree::FindByMD5( const std::string& md5 )
return MD5Range( map.end(), map.end() ) ;
}
/// Reinsert should be called when the ID/HREF were updated
SizeRange ResourceTree::FindBySize( u64_t size )
{
SizeMap& map = m_set.get<BySize>() ;
return map.equal_range( size );
}
/// Reinsert should be called when the ID/HREF/MD5 were updated
bool ResourceTree::ReInsert( Resource *coll )
{
Set& s = m_set.get<ByIdentity>() ;

View File

@ -36,19 +36,23 @@ namespace details
struct ByMD5 {} ;
struct ByHref {} ;
struct ByIdentity {} ;
struct BySize {} ;
typedef multi_index_container<
Resource*,
indexed_by<
hashed_non_unique<tag<ByHref>, const_mem_fun<Resource, std::string, &Resource::SelfHref> >,
hashed_non_unique<tag<ByMD5>, const_mem_fun<Resource, std::string, &Resource::MD5> >,
hashed_non_unique<tag<BySize>, const_mem_fun<Resource, u64_t, &Resource::Size> >,
hashed_unique<tag<ByIdentity>, identity<Resource*> >
>
> Folders ;
typedef Folders::index<ByMD5>::type MD5Map ;
typedef Folders::index<ByHref>::type HrefMap ;
typedef Folders::index<BySize>::type SizeMap ;
typedef Folders::index<ByIdentity>::type Set ;
typedef std::pair<SizeMap::iterator, SizeMap::iterator> SizeRange ;
typedef std::pair<MD5Map::iterator, MD5Map::iterator> MD5Range ;
}
@ -70,6 +74,7 @@ public :
Resource* FindByHref( const std::string& href ) ;
const Resource* FindByHref( const std::string& href ) const ;
details::MD5Range FindByMD5( const std::string& md5 ) ;
details::SizeRange FindBySize( u64_t size ) ;
bool ReInsert( Resource *coll ) ;

View File

@ -104,20 +104,21 @@ void State::FromLocal( const fs::path& p, Resource* folder, Val& tree )
{
// if the Resource object of the child already exists, it should
// have been so no need to do anything here
Resource *c = folder->FindChild( fname ) ;
Resource *c = folder->FindChild( fname ), *c2 = c ;
if ( !c )
{
c = new Resource( fname, "" ) ;
folder->AddChild( c ) ;
m_res.Insert( c ) ;
c2 = new Resource( fname, "" ) ;
folder->AddChild( c2 ) ;
}
leftover.erase( fname );
Val& rec = tree.Item( fname );
if ( m_force )
rec.Del( "srv_time" );
c->FromLocal( rec ) ;
if ( c->IsFolder() )
FromLocal( *i, c, rec.Item( "tree" ) ) ;
c2->FromLocal( rec ) ;
if ( !c )
m_res.Insert( c2 ) ;
if ( c2->IsFolder() )
FromLocal( *i, c2, rec.Item( "tree" ) ) ;
}
}
@ -129,17 +130,18 @@ void State::FromLocal( const fs::path& p, Resource* folder, Val& tree )
else
{
// Restore state of locally deleted files
Resource *c = folder->FindChild( i->first ) ;
Resource *c = folder->FindChild( i->first ), *c2 ;
if ( !c )
{
c = new Resource( i->first, i->second.Has( "tree" ) ? "folder" : "file" ) ;
folder->AddChild( c ) ;
m_res.Insert( c ) ;
c2 = new Resource( i->first, i->second.Has( "tree" ) ? "folder" : "file" ) ;
folder->AddChild( c2 ) ;
}
Val& rec = tree.Item( i->first );
if ( m_force || m_ign_changed )
rec.Del( "srv_time" );
c->FromDeleted( rec );
c2->FromDeleted( rec );
if ( !c )
m_res.Insert( c2 ) ;
}
}
}

View File

@ -44,6 +44,7 @@ void Entry2::Update( const Val& item )
// changestamp only appears in change feed entries
m_change_stamp = is_chg ? item["id"].Int() : -1 ;
m_is_removed = is_chg && item["deleted"].Bool() ;
m_size = 0 ;
const Val& file = is_chg && !m_is_removed ? item["file"] : item;
@ -75,6 +76,7 @@ void Entry2::Update( const Val& item )
else
{
m_md5 = file["md5Checksum"] ;
m_size = file["fileSize"].U64() ;
m_content_src = file["downloadUrl"] ;
// convert to lower case for easy comparison
std::transform( m_md5.begin(), m_md5.end(), m_md5.begin(), tolower ) ;

View File

@ -44,7 +44,7 @@ void Stat( const fs::path& filename, DateTime *t, off_t *size, bool *is_dir )
Stat( filename.string(), t, size, is_dir ) ;
}
void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir )
void Stat( const std::string& filename, DateTime *t, off64_t *size, bool *is_dir )
{
struct stat s = {} ;
if ( ::stat( filename.c_str(), &s ) != 0 )
@ -65,10 +65,10 @@ void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir )
*t = DateTime( s.st_ctim.tv_sec, s.st_ctim.tv_nsec);
#endif
}
if (size)
if ( size )
*size = s.st_size;
if (is_dir)
*is_dir = S_ISDIR(s.st_mode) ? true : false;
if ( is_dir )
*is_dir = S_ISDIR( s.st_mode ) ? true : false;
}
void SetFileTime( const fs::path& filename, const DateTime& t )

View File

@ -33,8 +33,8 @@ namespace os
{
struct Error : virtual Exception {} ;
void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir ) ;
void Stat( const fs::path& filename, DateTime *t, off_t *size, bool *is_dir ) ;
void Stat( const std::string& filename, DateTime *t, off64_t *size, bool *is_dir ) ;
void Stat( const fs::path& filename, DateTime *t, off64_t *size, bool *is_dir ) ;
void SetFileTime( const std::string& filename, const DateTime& t ) ;
void SetFileTime( const fs::path& filename, const DateTime& t ) ;