/* grive: an GPL program to sync a local directory with Google Drive Copyright (C) 2012 Wan Wai Ho This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "Resource.hh" #include "CommonUri.hh" #include "Entry.hh" #include "http/Agent.hh" #include "http/Download.hh" #include "http/Header.hh" // #include "http/ResponseLog.hh" #include "http/StringResponse.hh" #include "http/XmlResponse.hh" #include "json/Val.hh" #include "util/CArray.hh" #include "util/Crypt.hh" #include "util/log/Log.hh" #include "util/OS.hh" #include "util/File.hh" #include "xml/Node.hh" #include "xml/NodeSet.hh" #include "xml/String.hh" #include "xml/TreeBuilder.hh" #include #include #include // for debugging #include namespace gr { namespace v1 { // hard coded XML file const std::string xml_meta = "\n" "" "" "%2%" "" ; /// default constructor creates the root folder Resource::Resource(const fs::path& root_folder) : m_name ( root_folder.string() ), m_kind ( "folder" ), m_id ( "folder:root" ), m_href ( root_href ), m_create ( root_create ), m_parent ( 0 ), m_state ( sync ) { } Resource::Resource( const std::string& name, const std::string& kind ) : m_name ( name ), m_kind ( kind ), m_parent ( 0 ), m_state ( unknown ) { } void Resource::SetState( State new_state ) { // only the new and delete states need to be set recursively assert( new_state == remote_new || new_state == remote_deleted || new_state == local_new || new_state == local_deleted ) ; m_state = new_state ; std::for_each( m_child.begin(), m_child.end(), boost::bind( &Resource::SetState, _1, new_state ) ) ; } void Resource::FromRemoteFolder( const Entry& remote, const DateTime& last_sync ) { fs::path path = Path() ; if ( remote.CreateLink().empty() ) Log( "folder %1% is read-only", path, log::verbose ) ; // already sync if ( fs::is_directory( path ) ) { Log( "folder %1% is in sync", path, log::verbose ) ; m_state = sync ; } // remote file created after last sync, so remote is newer else if ( remote.MTime() > last_sync ) { if ( fs::exists( path ) ) { // TODO: handle type change Log( "%1% changed from folder to file", path, log::verbose ) ; m_state = sync ; } else { // make all children as remote_new, if any Log( "folder %1% is created in remote", path, log::verbose ) ; SetState( remote_new ) ; } } else { if ( fs::exists( path ) ) { // TODO: handle type chage Log( "%1% changed from file to folder", path, log::verbose ) ; m_state = sync ; } else { Log( "folder %1% is deleted in local", path, log::verbose ) ; SetState( local_deleted ) ; } } } /// Update the state according to information (i.e. Entry) from remote. This function /// compares the modification time and checksum of both copies and determine which /// one is newer. void Resource::FromRemote( const Entry& remote, const DateTime& last_sync ) { // sync folder if ( remote.Kind() == "folder" && IsFolder() ) FromRemoteFolder( remote, last_sync ) ; else FromRemoteFile( remote, last_sync ) ; AssignIDs( remote ) ; assert( m_state != unknown ) ; if ( m_state == remote_new || m_state == remote_changed ) { m_md5 = remote.MD5() ; m_mtime = remote.MTime() ; } } void Resource::AssignIDs( const Entry& remote ) { // the IDs from change feed entries are different if ( !remote.IsChange() ) { m_id = remote.ResourceID() ; m_href = remote.SelfHref() ; m_edit = remote.EditLink() ; m_create = remote.CreateLink() ; m_content = remote.ContentSrc() ; m_etag = remote.ETag() ; } } void Resource::FromRemoteFile( const Entry& remote, const DateTime& last_sync ) { assert( m_parent != 0 ) ; fs::path path = Path() ; // recursively create/delete folder if ( m_parent->m_state == remote_new || m_parent->m_state == remote_deleted || m_parent->m_state == local_new || m_parent->m_state == local_deleted ) { Log( "file %1% parent %2% recursively in %3% (%4%)", path, ( m_parent->m_state == remote_new || m_parent->m_state == local_new ) ? "created" : "deleted", ( m_parent->m_state == remote_new || m_parent->m_state == remote_deleted ) ? "remote" : "local", m_parent->m_state, log::verbose ) ; m_state = m_parent->m_state ; } // local not exists else if ( !fs::exists( path ) ) { Trace( "file %1% change stamp = %2%", Path(), remote.ChangeStamp() ) ; if ( remote.MTime() > last_sync || remote.ChangeStamp() > 0 ) { Log( "file %1% is created in remote (change %2%)", path, remote.ChangeStamp(), log::verbose ) ; m_state = remote_new ; } else { Log( "file %1% is deleted in local", path, log::verbose ) ; m_state = local_deleted ; } } // remote checksum unknown, assume the file is not changed in remote else if ( remote.MD5().empty() ) { Log( "file %1% has unknown checksum in remote. assuned in sync", Path(), log::verbose ) ; m_state = sync ; } // if checksum is equal, no need to compare the mtime else if ( remote.MD5() == m_md5 ) { Log( "file %1% is already in sync", Path(), log::verbose ) ; m_state = sync ; } // use mtime to check which one is more recent else { assert( m_state != unknown ) ; // if remote is modified if ( remote.MTime() > m_mtime ) { Log( "file %1% is changed in remote", path, log::verbose ) ; m_state = remote_changed ; } // remote also has the file, so it's not new in local else if ( m_state == local_new || m_state == remote_deleted ) { Log( "file %1% is changed in local", path, log::verbose ) ; m_state = local_changed ; } else Trace( "file %1% state is %2%", m_name, m_state ) ; } } /// Update the resource with the attributes of local file or directory. This /// function will propulate the fields in m_entry. void Resource::FromLocal( const DateTime& last_sync ) { fs::path path = Path() ; //assert( fs::exists( path ) ) ; // root folder is always in sync if ( !IsRoot() ) { m_mtime = os::FileCTime( path ) ; // follow parent recursively if ( m_parent->m_state == local_new || m_parent->m_state == local_deleted ) m_state = local_new ; // if the file is not created after last sync, assume file is // remote_deleted first, it will be updated to sync/remote_changed // in FromRemote() else m_state = ( m_mtime > last_sync ? local_new : remote_deleted ) ; m_name = path.filename().string() ; //m_kind = fs::is_directory(path) ? "folder" : "file" ; m_md5 = IsFolder() ? "" : crypt::MD5::Get( path ) ; } assert( m_state != unknown ) ; } std::string Resource::SelfHref() const { return m_href ; } std::string Resource::Name() const { return m_name ; } std::string Resource::ResourceID() const { return m_id ; } const Resource* Resource::Parent() const { assert( m_parent == 0 || m_parent->IsFolder() ) ; return m_parent ; } Resource* Resource::Parent() { assert( m_parent == 0 || m_parent->IsFolder() ) ; return m_parent ; } void Resource::AddChild( Resource *child ) { assert( child != 0 ) ; assert( child->m_parent == 0 || child->m_parent == this ) ; assert( child != this ) ; child->m_parent = this ; m_child.push_back( child ) ; } bool Resource::IsFolder() const { return m_kind == "folder" ; } fs::path Resource::Path() const { assert( m_parent != this ) ; assert( m_parent == 0 || m_parent->IsFolder() ) ; return m_parent != 0 ? (m_parent->Path() / m_name) : m_name ; } bool Resource::IsInRootTree() const { assert( m_parent == 0 || m_parent->IsFolder() ) ; return m_parent == 0 ? (SelfHref() == root_href) : m_parent->IsInRootTree() ; } Resource* Resource::FindChild( const std::string& name ) { for ( std::vector::iterator i = m_child.begin() ; i != m_child.end() ; ++i ) { assert( (*i)->m_parent == this ) ; if ( (*i)->m_name == name ) return *i ; } return 0 ; } // try to change the state to "sync" void Resource::Sync( http::Agent *http, DateTime& sync_time, const Val& options ) { assert( m_state != unknown ) ; assert( !IsRoot() || m_state == sync ) ; // root folder is already synced SyncSelf( http, options ) ; // we want the server sync time, so we will take the server time of the last file uploaded to store as the sync time // m_mtime is updated to server modified time when the file is uploaded sync_time = std::max(sync_time, m_mtime); // if myself is deleted, no need to do the childrens if ( m_state != local_deleted && m_state != remote_deleted ) std::for_each( m_child.begin(), m_child.end(), boost::bind( &Resource::Sync, _1, http, boost::ref(sync_time), options ) ) ; } void Resource::SyncSelf( http::Agent* http, const Val& options ) { assert( !IsRoot() || m_state == sync ) ; // root is always sync assert( IsRoot() || http == 0 || fs::is_directory( m_parent->Path() ) ) ; assert( IsRoot() || m_parent->m_state != remote_deleted ) ; assert( IsRoot() || m_parent->m_state != local_deleted ) ; const fs::path path = Path() ; switch ( m_state ) { case local_new : Log( "sync %1% doesn't exist in server, uploading", path, log::info ) ; if ( http != 0 && Create( http ) ) m_state = sync ; break ; case local_deleted : Log( "sync %1% deleted in local. deleting remote", path, log::info ) ; if ( http != 0 ) DeleteRemote( http ) ; break ; case local_changed : Log( "sync %1% changed in local. uploading", path, log::info ) ; if ( http != 0 && EditContent( http, options["new-rev"].Bool() ) ) m_state = sync ; break ; case remote_new : Log( "sync %1% created in remote. creating local", path, log::info ) ; if ( http != 0 ) { if ( IsFolder() ) fs::create_directories( path ) ; else Download( http, path ) ; m_state = sync ; } break ; case remote_changed : assert( !IsFolder() ) ; Log( "sync %1% changed in remote. downloading", path, log::info ) ; if ( http != 0 ) { Download( http, path ) ; m_state = sync ; } break ; case remote_deleted : Log( "sync %1% deleted in remote. deleting local", path, log::info ) ; if ( http != 0 ) DeleteLocal() ; break ; case sync : Log( "sync %1% already in sync", path, log::verbose ) ; break ; // shouldn't go here case unknown : assert( false ) ; break ; default : break ; } } /// this function doesn't really remove the local file. it renames it. void Resource::DeleteLocal() { static const boost::format trash_file( "%1%-%2%" ) ; assert( m_parent != 0 ) ; fs::path parent = m_parent->Path() ; fs::path dest = ".trash" / parent / Name() ; std::size_t idx = 1 ; while ( fs::exists( dest ) && idx != 0 ) dest = ".trash" / parent / (boost::format(trash_file) % Name() % idx++).str() ; // wrap around! just remove the file if ( idx == 0 ) fs::remove_all( Path() ) ; else { fs::create_directories( dest.parent_path() ) ; fs::rename( Path(), dest ) ; } } void Resource::DeleteRemote( http::Agent *http ) { assert( http != 0 ) ; http::StringResponse str ; try { http::Header hdr ; hdr.Add( "If-Match: " + m_etag ) ; // doesn't know why, but an update before deleting seems to work always http::XmlResponse xml ; http->Get( m_href, &xml, hdr ) ; AssignIDs( Entry( xml.Response() ) ) ; http->Custom( "DELETE", m_href, &str, hdr ) ; } catch ( Exception& e ) { // don't rethrow here. there are some cases that I don't know why // the delete will fail. Trace( "Exception %1% %2%", boost::diagnostic_information(e), str.Response() ) ; } } void Resource::Download( http::Agent* http, const fs::path& file ) const { assert( http != 0 ) ; http::Download dl( file.string(), http::Download::NoChecksum() ) ; long r = http->Get( m_content, &dl, http::Header() ) ; if ( r <= 400 ) { if ( m_mtime != DateTime() ) os::SetFileTime( file, m_mtime ) ; else Log( "encountered zero date time after downloading %1%", file, log::warning ) ; } } bool Resource::EditContent( http::Agent* http, bool new_rev ) { assert( http != 0 ) ; assert( m_parent != 0 ) ; assert( m_parent->m_state == sync ) ; // upload link missing means that file is read only if ( m_edit.empty() ) { Log( "Cannot upload %1%: file read-only. %2%", m_name, m_state, log::warning ) ; return false ; } return Upload( http, m_edit + (new_rev ? "?new-revision=true" : ""), false ) ; } bool Resource::Create( http::Agent* http ) { assert( http != 0 ) ; assert( m_parent != 0 ) ; assert( m_parent->IsFolder() ) ; assert( m_parent->m_state == sync ) ; if ( IsFolder() ) { std::string uri = feed_base ; if ( !m_parent->IsRoot() ) uri += ("/" + http->Escape(m_parent->m_id) + "/contents") ; std::string meta = (boost::format( xml_meta ) % "folder" % xml::Escape(m_name) ).str() ; http::Header hdr ; hdr.Add( "Content-Type: application/atom+xml" ) ; http::XmlResponse xml ; // http::ResponseLog log( "create", ".xml", &xml ) ; http->Post( uri, meta, &xml, hdr ) ; AssignIDs( Entry( xml.Response() ) ) ; return true ; } else if ( !m_parent->m_create.empty() ) { return Upload( http, m_parent->m_create + "?convert=false", true ) ; } else { Log( "parent of %1% does not exist: cannot upload", Name(), log::warning ) ; return false ; } } bool Resource::Upload( http::Agent* http, const std::string& link, bool post) { assert( http != 0 ) ; File file( Path() ) ; std::ostringstream xcontent_len ; xcontent_len << "X-Upload-Content-Length: " << file.Size() ; http::Header hdr ; hdr.Add( "Content-Type: application/atom+xml" ) ; hdr.Add( "X-Upload-Content-Type: application/octet-stream" ) ; hdr.Add( xcontent_len.str() ) ; hdr.Add( "If-Match: " + m_etag ) ; hdr.Add( "Expect:" ) ; std::string meta = (boost::format( xml_meta ) % m_kind % xml::Escape(m_name) ).str() ; bool retrying=false; while ( true ) { if ( retrying ) { file.Seek( 0, SEEK_SET ); os::Sleep( 5 ); } try { http::StringResponse str ; if ( post ) http->Post( link, meta, &str, hdr ) ; else http->Put( link, meta, &str, hdr ) ; } catch ( Error &e ) { std::string const *info = boost::get_error_info(e); if ( info && (*info == "XML_Parse") ) { Log( "Error parsing pre-upload response XML, retrying whole upload in 5s", log::warning ); retrying = true; continue; } else { throw e; } } http::Header uphdr ; uphdr.Add( "Expect:" ) ; uphdr.Add( "Accept:" ) ; // the content upload URL is in the "Location" HTTP header std::string uplink = http->RedirLocation() ; http::XmlResponse xml ; long http_code = 0; try { http_code = http->Put( uplink, &file, &xml, uphdr ) ; } catch ( Error &e ) { std::string const *info = boost::get_error_info(e); if ( info && (*info == "XML_Parse") ) { Log( "Error parsing response XML, retrying whole upload in 5s", log::warning ); retrying = true; continue; } else { throw e; } } if ( http_code == 410 || http_code == 412 ) { Log( "request failed with %1%, retrying whole upload in 5s", http_code, log::warning ) ; retrying = true; continue; } if ( retrying ) Log( "upload succeeded on retry", log::warning ); Entry responseEntry = Entry( xml.Response() ); AssignIDs( responseEntry ) ; m_mtime = responseEntry.MTime(); break; } return true ; } Resource::iterator Resource::begin() const { return m_child.begin() ; } Resource::iterator Resource::end() const { return m_child.end() ; } std::size_t Resource::size() const { return m_child.size() ; } std::ostream& operator<<( std::ostream& os, Resource::State s ) { static const char *state[] = { "sync", "local_new", "local_changed", "local_deleted", "remote_new", "remote_changed", "remote_deleted" } ; assert( s >= 0 && s < Count(state) ) ; return os << state[s] ; } std::string Resource::StateStr() const { std::ostringstream ss ; ss << m_state ; return ss.str() ; } std::string Resource::MD5() const { return m_md5 ; } bool Resource::IsRoot() const { return m_parent == 0 ; } bool Resource::HasID() const { return !m_href.empty() && !m_id.empty() ; } } } // end of namespace