mirror of https://github.com/vitalif/grive2
Check MD5 only when it is needed (fixes #101)
This makes grive check md5 sums only when: 1) a local rename is supposed (when there are a new file and a deleted file of the same size) 2) local ctime is changed, but file size isn'tpull/126/head
parent
59d02a65cb
commit
dd77c99872
|
@ -89,6 +89,8 @@ Grive uses cmake to build. Basic install sequence is
|
|||
- ignore regexp does not persist anymore (note that Grive will still track it to not
|
||||
accidentally delete remote files when changing ignore regexp)
|
||||
- added options to limit upload and download speed
|
||||
- faster upload of new and changed files. now Grive uploads files without first calculating
|
||||
md5 checksum when file is created locally or when its size changes.
|
||||
|
||||
### Grive2 v0.5
|
||||
|
||||
|
|
|
@ -36,7 +36,8 @@ Entry::Entry( ) :
|
|||
m_is_dir ( true ),
|
||||
m_resource_id ( "folder:root" ),
|
||||
m_change_stamp ( -1 ),
|
||||
m_is_removed ( false )
|
||||
m_is_removed ( false ),
|
||||
m_size ( 0 )
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -65,6 +66,11 @@ std::string Entry::MD5() const
|
|||
return m_md5 ;
|
||||
}
|
||||
|
||||
u64_t Entry::Size() const
|
||||
{
|
||||
return m_size ;
|
||||
}
|
||||
|
||||
DateTime Entry::MTime() const
|
||||
{
|
||||
return m_mtime ;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "util/Types.hh"
|
||||
#include "util/DateTime.hh"
|
||||
#include "util/FileSystem.hh"
|
||||
|
||||
|
@ -44,6 +45,7 @@ public :
|
|||
bool IsDir() const ;
|
||||
std::string MD5() const ;
|
||||
DateTime MTime() const ;
|
||||
u64_t Size() const ;
|
||||
|
||||
std::string Name() const ;
|
||||
|
||||
|
@ -80,6 +82,7 @@ protected :
|
|||
|
||||
DateTime m_mtime ;
|
||||
bool m_is_removed ;
|
||||
u64_t m_size ;
|
||||
} ;
|
||||
|
||||
} // end of namespace gr
|
||||
|
|
|
@ -47,6 +47,7 @@ namespace gr {
|
|||
Resource::Resource( const fs::path& root_folder ) :
|
||||
m_name ( root_folder.string() ),
|
||||
m_kind ( "folder" ),
|
||||
m_size ( 0 ),
|
||||
m_id ( "folder:root" ),
|
||||
m_href ( "root" ),
|
||||
m_is_editable( true ),
|
||||
|
@ -60,6 +61,7 @@ Resource::Resource( const fs::path& root_folder ) :
|
|||
Resource::Resource( const std::string& name, const std::string& kind ) :
|
||||
m_name ( name ),
|
||||
m_kind ( kind ),
|
||||
m_size ( 0 ),
|
||||
m_is_editable( true ),
|
||||
m_parent ( 0 ),
|
||||
m_state ( unknown ),
|
||||
|
@ -149,6 +151,7 @@ void Resource::AssignIDs( const Entry& remote )
|
|||
m_content = remote.ContentSrc() ;
|
||||
m_is_editable = remote.IsEditable() ;
|
||||
m_etag = remote.ETag() ;
|
||||
m_md5 = remote.MD5() ;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -193,7 +196,7 @@ void Resource::FromRemoteFile( const Entry& remote )
|
|||
m_state = local_deleted ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// remote checksum unknown, assume the file is not changed in remote
|
||||
else if ( remote.MD5().empty() )
|
||||
{
|
||||
|
@ -201,16 +204,9 @@ void Resource::FromRemoteFile( const Entry& remote )
|
|||
Path(), log::verbose ) ;
|
||||
m_state = sync ;
|
||||
}
|
||||
|
||||
// if checksum is equal, no need to compare the mtime
|
||||
else if ( remote.MD5() == m_md5 )
|
||||
{
|
||||
Log( "file %1% is already in sync", Path(), log::verbose ) ;
|
||||
m_state = sync ;
|
||||
}
|
||||
|
||||
// use mtime to check which one is more recent
|
||||
else
|
||||
else if ( remote.Size() != m_size || remote.MD5() != GetMD5() )
|
||||
{
|
||||
assert( m_state != unknown ) ;
|
||||
|
||||
|
@ -230,6 +226,13 @@ void Resource::FromRemoteFile( const Entry& remote )
|
|||
else
|
||||
Trace( "file %1% state is %2%", m_name, m_state ) ;
|
||||
}
|
||||
|
||||
// if checksum is equal, no need to compare the mtime
|
||||
else
|
||||
{
|
||||
Log( "file %1% is already in sync", Path(), log::verbose ) ;
|
||||
m_state = sync ;
|
||||
}
|
||||
}
|
||||
|
||||
void Resource::FromDeleted( Val& state )
|
||||
|
@ -242,6 +245,8 @@ void Resource::FromDeleted( Val& state )
|
|||
m_md5 = state["md5"];
|
||||
if ( state.Has( "srv_time" ) )
|
||||
m_mtime.Assign( state[ "srv_time" ].U64(), 0 ) ;
|
||||
if ( state.Has( "size" ) )
|
||||
m_size = state[ "size" ].U64();
|
||||
m_state = both_deleted;
|
||||
}
|
||||
|
||||
|
@ -259,7 +264,7 @@ void Resource::FromLocal( Val& state )
|
|||
bool is_dir;
|
||||
try
|
||||
{
|
||||
os::Stat( path, &m_ctime, NULL, &is_dir ) ;
|
||||
os::Stat( path, &m_ctime, (off64_t*)&m_size, &is_dir ) ;
|
||||
}
|
||||
catch ( os::Error &e )
|
||||
{
|
||||
|
@ -287,9 +292,9 @@ void Resource::FromLocal( Val& state )
|
|||
{
|
||||
if ( !is_dir )
|
||||
{
|
||||
m_md5 = crypt::MD5::Get( path );
|
||||
// File is changed locally. TODO: Detect conflicts
|
||||
is_changed = !state.Has( "md5" ) || m_md5 != state["md5"].Str();
|
||||
is_changed = ( state.Has( "size" ) && m_size != state["size"].U64() ) ||
|
||||
!state.Has( "md5" ) || GetMD5() != state["md5"].Str();
|
||||
}
|
||||
else
|
||||
is_changed = true;
|
||||
|
@ -479,30 +484,33 @@ void Resource::Sync( Syncer *syncer, ResourceTree *res_tree, const Val& options
|
|||
}
|
||||
}
|
||||
|
||||
void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options )
|
||||
bool Resource::CheckRename( Syncer* syncer, ResourceTree *res_tree )
|
||||
{
|
||||
assert( !IsRoot() || m_state == sync ) ; // root is always sync
|
||||
assert( IsRoot() || !syncer || m_parent->IsFolder() ) ;
|
||||
assert( IsRoot() || m_parent->m_state != remote_deleted ) ;
|
||||
assert( IsRoot() || m_parent->m_state != local_deleted ) ;
|
||||
|
||||
const fs::path path = Path() ;
|
||||
|
||||
// Detect renames
|
||||
if ( !IsFolder() && ( m_state == local_new || m_state == local_deleted ||
|
||||
m_state == remote_new || m_state == remote_deleted ) )
|
||||
if ( !IsFolder() && ( m_state == local_new || m_state == remote_new ) )
|
||||
{
|
||||
details::MD5Range moved = res_tree->FindByMD5( m_md5 );
|
||||
bool is_local = m_state == local_new || m_state == local_deleted;
|
||||
State other;
|
||||
if ( m_state == local_new )
|
||||
other = local_deleted;
|
||||
else if ( m_state == local_deleted )
|
||||
other = local_new;
|
||||
else if ( m_state == remote_new )
|
||||
other = remote_deleted;
|
||||
else
|
||||
other = remote_new;
|
||||
bool is_local = m_state == local_new;
|
||||
State other = is_local ? local_deleted : remote_deleted;
|
||||
if ( is_local )
|
||||
{
|
||||
// First check size index for locally added files
|
||||
details::SizeRange moved = res_tree->FindBySize( m_size );
|
||||
bool found = false;
|
||||
for ( details::SizeMap::iterator i = moved.first ; i != moved.second; i++ )
|
||||
{
|
||||
Resource *m = *i;
|
||||
if ( m->m_state == other )
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( !found )
|
||||
{
|
||||
// Don't check md5 sums if there are no deleted files with same size
|
||||
return false;
|
||||
}
|
||||
}
|
||||
details::MD5Range moved = res_tree->FindByMD5( GetMD5() );
|
||||
for ( details::MD5Map::iterator i = moved.first ; i != moved.second; i++ )
|
||||
{
|
||||
Resource *m = *i;
|
||||
|
@ -530,10 +538,25 @@ void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& opti
|
|||
}
|
||||
from->m_state = both_deleted;
|
||||
to->m_state = sync;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Resource::SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options )
|
||||
{
|
||||
assert( !IsRoot() || m_state == sync ) ; // root is always sync
|
||||
assert( IsRoot() || !syncer || m_parent->IsFolder() ) ;
|
||||
assert( IsRoot() || m_parent->m_state != remote_deleted ) ;
|
||||
assert( IsRoot() || m_parent->m_state != local_deleted ) ;
|
||||
|
||||
const fs::path path = Path() ;
|
||||
|
||||
// Detect renames
|
||||
if ( CheckRename( syncer, res_tree ) )
|
||||
return;
|
||||
|
||||
switch ( m_state )
|
||||
{
|
||||
|
@ -688,6 +711,7 @@ void Resource::SetIndex( bool re_stat )
|
|||
if ( !is_dir )
|
||||
{
|
||||
m_json->Set( "md5", Val( m_md5 ) );
|
||||
m_json->Set( "size", Val( m_size ) );
|
||||
m_json->Del( "tree" );
|
||||
}
|
||||
else
|
||||
|
@ -695,6 +719,7 @@ void Resource::SetIndex( bool re_stat )
|
|||
// add tree item if it does not exist
|
||||
m_json->Item( "tree" );
|
||||
m_json->Del( "md5" );
|
||||
m_json->Del( "size" );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -731,11 +756,28 @@ std::string Resource::StateStr() const
|
|||
return ss.str() ;
|
||||
}
|
||||
|
||||
u64_t Resource::Size() const
|
||||
{
|
||||
return m_size ;
|
||||
}
|
||||
|
||||
std::string Resource::MD5() const
|
||||
{
|
||||
return m_md5 ;
|
||||
}
|
||||
|
||||
std::string Resource::GetMD5()
|
||||
{
|
||||
if ( m_md5.empty() && !IsFolder() && m_local_exists )
|
||||
{
|
||||
// MD5 checksum is calculated lazily and only when really needed:
|
||||
// 1) when a local rename is supposed (when there are a new file and a deleted file of the same size)
|
||||
// 2) when local ctime is changed, but file size isn't
|
||||
m_md5 = crypt::MD5::Get( Path() );
|
||||
}
|
||||
return m_md5 ;
|
||||
}
|
||||
|
||||
bool Resource::IsRoot() const
|
||||
{
|
||||
// Root entry does not show up in file feeds, so we check for empty parent (and self-href)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "util/Types.hh"
|
||||
#include "util/DateTime.hh"
|
||||
#include "util/Exception.hh"
|
||||
#include "util/FileSystem.hh"
|
||||
|
@ -108,7 +109,9 @@ public :
|
|||
bool IsInRootTree() const ;
|
||||
bool IsRoot() const ;
|
||||
bool HasID() const ;
|
||||
u64_t Size() const;
|
||||
std::string MD5() const ;
|
||||
std::string GetMD5() ;
|
||||
|
||||
void FromRemote( const Entry& remote ) ;
|
||||
void FromDeleted( Val& state ) ;
|
||||
|
@ -141,6 +144,7 @@ private :
|
|||
void DeleteIndex() ;
|
||||
void SetIndex( bool ) ;
|
||||
|
||||
bool CheckRename( Syncer* syncer, ResourceTree *res_tree ) ;
|
||||
void SyncSelf( Syncer* syncer, ResourceTree *res_tree, const Val& options ) ;
|
||||
|
||||
private :
|
||||
|
@ -149,6 +153,7 @@ private :
|
|||
std::string m_md5 ;
|
||||
DateTime m_mtime ;
|
||||
DateTime m_ctime ;
|
||||
u64_t m_size ;
|
||||
|
||||
std::string m_id ;
|
||||
std::string m_href ;
|
||||
|
|
|
@ -105,7 +105,13 @@ MD5Range ResourceTree::FindByMD5( const std::string& md5 )
|
|||
return MD5Range( map.end(), map.end() ) ;
|
||||
}
|
||||
|
||||
/// Reinsert should be called when the ID/HREF were updated
|
||||
SizeRange ResourceTree::FindBySize( u64_t size )
|
||||
{
|
||||
SizeMap& map = m_set.get<BySize>() ;
|
||||
return map.equal_range( size );
|
||||
}
|
||||
|
||||
/// Reinsert should be called when the ID/HREF/MD5 were updated
|
||||
bool ResourceTree::ReInsert( Resource *coll )
|
||||
{
|
||||
Set& s = m_set.get<ByIdentity>() ;
|
||||
|
|
|
@ -36,19 +36,23 @@ namespace details
|
|||
struct ByMD5 {} ;
|
||||
struct ByHref {} ;
|
||||
struct ByIdentity {} ;
|
||||
struct BySize {} ;
|
||||
|
||||
typedef multi_index_container<
|
||||
Resource*,
|
||||
indexed_by<
|
||||
hashed_non_unique<tag<ByHref>, const_mem_fun<Resource, std::string, &Resource::SelfHref> >,
|
||||
hashed_non_unique<tag<ByMD5>, const_mem_fun<Resource, std::string, &Resource::MD5> >,
|
||||
hashed_non_unique<tag<BySize>, const_mem_fun<Resource, u64_t, &Resource::Size> >,
|
||||
hashed_unique<tag<ByIdentity>, identity<Resource*> >
|
||||
>
|
||||
> Folders ;
|
||||
|
||||
typedef Folders::index<ByMD5>::type MD5Map ;
|
||||
typedef Folders::index<ByHref>::type HrefMap ;
|
||||
typedef Folders::index<BySize>::type SizeMap ;
|
||||
typedef Folders::index<ByIdentity>::type Set ;
|
||||
typedef std::pair<SizeMap::iterator, SizeMap::iterator> SizeRange ;
|
||||
typedef std::pair<MD5Map::iterator, MD5Map::iterator> MD5Range ;
|
||||
}
|
||||
|
||||
|
@ -70,6 +74,7 @@ public :
|
|||
Resource* FindByHref( const std::string& href ) ;
|
||||
const Resource* FindByHref( const std::string& href ) const ;
|
||||
details::MD5Range FindByMD5( const std::string& md5 ) ;
|
||||
details::SizeRange FindBySize( u64_t size ) ;
|
||||
|
||||
bool ReInsert( Resource *coll ) ;
|
||||
|
||||
|
|
|
@ -104,20 +104,21 @@ void State::FromLocal( const fs::path& p, Resource* folder, Val& tree )
|
|||
{
|
||||
// if the Resource object of the child already exists, it should
|
||||
// have been so no need to do anything here
|
||||
Resource *c = folder->FindChild( fname ) ;
|
||||
Resource *c = folder->FindChild( fname ), *c2 = c ;
|
||||
if ( !c )
|
||||
{
|
||||
c = new Resource( fname, "" ) ;
|
||||
folder->AddChild( c ) ;
|
||||
m_res.Insert( c ) ;
|
||||
c2 = new Resource( fname, "" ) ;
|
||||
folder->AddChild( c2 ) ;
|
||||
}
|
||||
leftover.erase( fname );
|
||||
Val& rec = tree.Item( fname );
|
||||
if ( m_force )
|
||||
rec.Del( "srv_time" );
|
||||
c->FromLocal( rec ) ;
|
||||
if ( c->IsFolder() )
|
||||
FromLocal( *i, c, rec.Item( "tree" ) ) ;
|
||||
c2->FromLocal( rec ) ;
|
||||
if ( !c )
|
||||
m_res.Insert( c2 ) ;
|
||||
if ( c2->IsFolder() )
|
||||
FromLocal( *i, c2, rec.Item( "tree" ) ) ;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,17 +130,18 @@ void State::FromLocal( const fs::path& p, Resource* folder, Val& tree )
|
|||
else
|
||||
{
|
||||
// Restore state of locally deleted files
|
||||
Resource *c = folder->FindChild( i->first ) ;
|
||||
Resource *c = folder->FindChild( i->first ), *c2 ;
|
||||
if ( !c )
|
||||
{
|
||||
c = new Resource( i->first, i->second.Has( "tree" ) ? "folder" : "file" ) ;
|
||||
folder->AddChild( c ) ;
|
||||
m_res.Insert( c ) ;
|
||||
c2 = new Resource( i->first, i->second.Has( "tree" ) ? "folder" : "file" ) ;
|
||||
folder->AddChild( c2 ) ;
|
||||
}
|
||||
Val& rec = tree.Item( i->first );
|
||||
if ( m_force || m_ign_changed )
|
||||
rec.Del( "srv_time" );
|
||||
c->FromDeleted( rec );
|
||||
c2->FromDeleted( rec );
|
||||
if ( !c )
|
||||
m_res.Insert( c2 ) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@ void Entry2::Update( const Val& item )
|
|||
// changestamp only appears in change feed entries
|
||||
m_change_stamp = is_chg ? item["id"].Int() : -1 ;
|
||||
m_is_removed = is_chg && item["deleted"].Bool() ;
|
||||
m_size = 0 ;
|
||||
|
||||
const Val& file = is_chg && !m_is_removed ? item["file"] : item;
|
||||
|
||||
|
@ -75,6 +76,7 @@ void Entry2::Update( const Val& item )
|
|||
else
|
||||
{
|
||||
m_md5 = file["md5Checksum"] ;
|
||||
m_size = file["fileSize"].U64() ;
|
||||
m_content_src = file["downloadUrl"] ;
|
||||
// convert to lower case for easy comparison
|
||||
std::transform( m_md5.begin(), m_md5.end(), m_md5.begin(), tolower ) ;
|
||||
|
|
|
@ -44,7 +44,7 @@ void Stat( const fs::path& filename, DateTime *t, off_t *size, bool *is_dir )
|
|||
Stat( filename.string(), t, size, is_dir ) ;
|
||||
}
|
||||
|
||||
void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir )
|
||||
void Stat( const std::string& filename, DateTime *t, off64_t *size, bool *is_dir )
|
||||
{
|
||||
struct stat s = {} ;
|
||||
if ( ::stat( filename.c_str(), &s ) != 0 )
|
||||
|
@ -65,10 +65,10 @@ void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir )
|
|||
*t = DateTime( s.st_ctim.tv_sec, s.st_ctim.tv_nsec);
|
||||
#endif
|
||||
}
|
||||
if (size)
|
||||
if ( size )
|
||||
*size = s.st_size;
|
||||
if (is_dir)
|
||||
*is_dir = S_ISDIR(s.st_mode) ? true : false;
|
||||
if ( is_dir )
|
||||
*is_dir = S_ISDIR( s.st_mode ) ? true : false;
|
||||
}
|
||||
|
||||
void SetFileTime( const fs::path& filename, const DateTime& t )
|
||||
|
|
|
@ -33,8 +33,8 @@ namespace os
|
|||
{
|
||||
struct Error : virtual Exception {} ;
|
||||
|
||||
void Stat( const std::string& filename, DateTime *t, off_t *size, bool *is_dir ) ;
|
||||
void Stat( const fs::path& filename, DateTime *t, off_t *size, bool *is_dir ) ;
|
||||
void Stat( const std::string& filename, DateTime *t, off64_t *size, bool *is_dir ) ;
|
||||
void Stat( const fs::path& filename, DateTime *t, off64_t *size, bool *is_dir ) ;
|
||||
|
||||
void SetFileTime( const std::string& filename, const DateTime& t ) ;
|
||||
void SetFileTime( const fs::path& filename, const DateTime& t ) ;
|
||||
|
|
Loading…
Reference in New Issue