1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9  """Directory cache implementation. 
 10   
 11  This module contains the implementation of a cache that uses individual 
 12  files stored in a dedicated cache directory to store the cached contents. 
 13  The cache class is L{directory_cache_t} which can be passed to the C{cache} 
 14  argument of the L{parse()} function. 
 15  """ 
 16   
 17  import os, os.path, gzip, md5 
 18  import cPickle 
 19  import declarations_cache 
 20   
 22      """Entry of the index table in the directory cache index. 
 23   
 24      Each cached header file (i.e. each *.cache file) has a corresponding 
 25      index_entry_t object. This object is used to determine whether the 
 26      cache file with the declarations is still valid or not. 
 27   
 28      This class is a helper class for the directory_cache_t class. 
 29      """ 
 30       
 31 -    def __init__( self, filesigs, configsig ): 
  32          """Constructor. 
 33   
 34          filesigs is a list of tuples (fileid, sig)... 
 35          configsig is the signature of the configuration object. 
 36          """ 
 37          self.filesigs = filesigs 
 38          self.configsig = configsig 
  39   
 40 -    def __getstate__(self): 
  41          return (self.filesigs, self.configsig) 
  42   
 43 -    def __setstate__(self, state): 
  44          self.filesigs, self.configsig = state 
   45    
 46   
 48      """Cache class that stores its data as multiple files inside a directory. 
 49   
 50      The cache stores one index file called "index.dat" which is always 
 51      read by the cache when the cache object is created. Each header file 
 52      will have its corresponding *.cache file that stores the declarations 
 53      found in the header file. The index file is used to determine whether 
 54      a *.cache file is still valid or not (by checking if one of the dependent 
 55      files (i.e. the header file itself and all included files) have been 
 56      modified since the last run). 
 57      """ 
 58   
 59 -    def __init__( self, dir="cache", compression=False, md5_sigs=True ): 
  60          """Constructor. 
 61   
 62          dir is the cache directory (it is created if it does not exist). 
 63          If compression is set to True the cache files will be compressed 
 64          using gzip. 
 65          md5_sigs determines whether file modifications is checked by 
 66          computing a md5 digest or by checking the modification date. 
 67          """ 
 68          declarations_cache.cache_base_t.__init__(self) 
 69   
 70           
 71          self.__dir = os.path.abspath(dir) 
 72   
 73           
 74          self.__compression = compression 
 75   
 76           
 77           
 78           
 79          self.__md5_sigs = md5_sigs 
 80   
 81           
 82          self.__filename_rep = filename_repository_t(self.__md5_sigs) 
 83   
 84           
 85           
 86           
 87          self.__index = {} 
 88   
 89           
 90          self.__modified_flag = False 
 91   
 92           
 93          if os.path.isfile(self.__dir): 
 94              raise ValueError, "Cannot use %s as cache directory. There is already a file with that name."%self.__dir 
 95   
 96           
 97          if os.path.isdir(self.__dir): 
 98              self._load() 
 99          else: 
100               
101              os.mkdir(self.__dir) 
 102   
104          """Save the index table to disk.""" 
105   
106          self._save()       
 107   
108   
109 -    def update(self, source_file, configuration, declarations, included_files): 
 110          """Replace a cache entry by a new value. 
111   
112          @param source_file: Header file name. 
113          @type source_file: str 
114          @param configuration: Configuration  object. 
115          @type configuration: L{config_t} 
116          @param declarations: Declarations contained in the header file. 
117          @type declarations: picklable object 
118          @param included_files: Dependent files 
119          @type included_files: list of str 
120          """ 
121           
122          source_file = os.path.normpath(source_file) 
123          included_files = map(lambda p: os.path.normpath(p), included_files) 
124   
125           
126           
127          dependent_files = {} 
128          for name in [source_file]+included_files: 
129              dependent_files[name] = 1 
130          dependent_files = dependent_files.keys() 
131   
132          key = self._create_cache_key(source_file) 
133           
134           
135           
136          self._remove_entry(source_file, key) 
137   
138           
139   
140           
141          filesigs = [] 
142          for filename in dependent_files: 
143              id_,sig = self.__filename_rep.acquire_filename(filename) 
144              filesigs.append((id_,sig)) 
145   
146          configsig = self._create_config_signature(configuration) 
147          entry = index_entry_t(filesigs, configsig) 
148          self.__index[key] = entry 
149          self.__modified_flag = True 
150   
151           
152          cachefilename = self._create_cache_filename(source_file) 
153          self._write_file(cachefilename, declarations) 
 154   
155          
157          """Return the cached declarations or None. 
158   
159          @param source_file: Header file name 
160          @type source_file: str 
161          @param configuration: Configuration object 
162          @type configuration: L{config_t} 
163          @return: Cached declarations or None 
164          """ 
165   
166           
167          key = self._create_cache_key(source_file) 
168          entry = self.__index.get(key) 
169          if entry==None: 
170   
171              return None 
172   
173           
174           
175           
176           
177           
178           
179           
180           
181           
182   
183           
184          configsig = self._create_config_signature(configuration) 
185          if configsig!=entry.configsig: 
186   
187              return None 
188   
189           
190          for id_, sig in entry.filesigs: 
191              if self.__filename_rep.is_file_modified(id_, sig): 
192   
193                  return None 
194   
195           
196          cachefilename = self._create_cache_filename(source_file) 
197          decls = self._read_file(cachefilename) 
198   
199   
200          return decls 
 201   
203          """Load the cache. 
204   
205          Loads the file index.dat which contains the index table and 
206          the file name repository. 
207   
208          This method is called by the constructor. 
209          """ 
210   
211          indexfilename = os.path.join(self.__dir, "index.dat") 
212          if os.path.exists(indexfilename): 
213              data = self._read_file(indexfilename) 
214              self.__index = data[0] 
215              self.__filename_rep = data[1] 
216              if self.__filename_rep._md5_sigs!=self.__md5_sigs: 
217                  print "CACHE: Warning: md5_sigs stored in the cache is set to %s."%self.__filename_rep._md5_sigs 
218                  print "       Please remove the cache to change this setting." 
219                  self.__md5_sigs = self.__filename_rep._md5_sigs 
220          else: 
221              self.__index = {} 
222              self.__filename_rep = filename_repository_t(self.__md5_sigs) 
223               
224          self.__modified_flag = False 
 225   
227          """Save the cache index if it was modified. 
228   
229          Saves the index table and the file name repository in the file 
230          index.dat. 
231          """ 
232          if self.__modified_flag: 
233              self.__filename_rep.update_id_counter() 
234              indexfilename = os.path.join(self.__dir, "index.dat") 
235              self._write_file(indexfilename, (self.__index,self.__filename_rep)) 
236              self.__modified_flag = False 
 237   
239          """Read a Python object from a cache file. 
240   
241          Reads a pickled object from disk and returns it. 
242   
243          @param filename: Name of the file that should be read. 
244          @type filename: str 
245          @returns: Unpickled file contents 
246          """ 
247          if self.__compression: 
248              f = gzip.GzipFile(filename, "rb") 
249          else: 
250              f = file(filename, "rb") 
251          res = cPickle.load(f) 
252          f.close() 
253          return res 
 254   
256          """Write a data item into a file. 
257   
258          The data object is written to a file using the pickle mechanism. 
259   
260          @param filename: Output file name 
261          @type filename: str 
262          @param data: A Python object that will be pickled 
263          @type data: picklable object 
264          """ 
265          if self.__compression: 
266              f = gzip.GzipFile(filename, "wb") 
267          else: 
268              f = file(filename, "wb") 
269          cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) 
270          f.close()         
 271           
272 -    def _remove_entry(self, source_file, key): 
 273          """Remove an entry from the cache. 
274   
275          source_file is the name of the header and key is its corresponding 
276          cache key (obtained by a call to L{_create_cache_key()}). 
277          The entry is removed from the index table, any referenced file 
278          name is released and the cache file is deleted. 
279   
280          If key references a non-existing entry, the method returns 
281          immediately. 
282   
283          @param source_file: Header file name 
284          @type source_file: str 
285          @param key: Key value for the specified header file 
286          @type key: hashable object 
287          """ 
288           
289          entry = self.__index.get(key) 
290          if entry==None: 
291              return 
292   
293           
294          for id_, sig in entry.filesigs: 
295              self.__filename_rep.release_filename(id_) 
296   
297           
298          del self.__index[key] 
299          self.__modified_flag = True 
300   
301           
302          cachefilename = self._create_cache_filename(source_file) 
303          try: 
304              os.remove(cachefilename) 
305          except OSError, e: 
306              print "Could not remove cache file (%s)"%e 
 307   
308   
310          """Return the cache key for a header file. 
311   
312          @param source_file: Header file name 
313          @type source_file: str 
314          @returns: Key for the given header file 
315          @rtype: str 
316          """ 
317          path, name = os.path.split(source_file) 
318          return name+str(hash(path)) 
 319   
321          """Return the cache file name for a header file. 
322   
323          @param source_file: Header file name 
324          @type source_file: str 
325          @returns: Cache file name (*.cache) 
326          @rtype: str 
327          """ 
328          res = self._create_cache_key(source_file)+".cache" 
329          return os.path.join(self.__dir, res) 
 330   
 350   
351   
352           
353   
355      """This is a record stored in the filename_repository_t class. 
356   
357      The class is an internal class used in the implementation of the 
358      filename_repository_t class and it just serves as a container for 
359      the file name and the reference count. 
360      """ 
361   
362 -    def __init__( self, filename ): 
 363          """Constructor. 
364   
365          The reference count is initially set to 0. 
366          """ 
367           
368          self.filename = filename 
369           
370          self.refcount = 0 
371           
372           
373           
374           
375           
376          self.sig_valid = False 
377          self.signature = None 
 378   
379 -    def __getstate__(self): 
 380           
381          return (self.filename, self.refcount) 
 382   
383 -    def __setstate__(self, state): 
 384          self.filename, self.refcount = state 
385          self.sig_valid = False 
386          self.signature = None 
 387   
388 -    def inc_ref_count(self): 
 389          """Increase the reference count by 1.""" 
390          self.refcount += 1 
 391   
392 -    def dec_ref_count(self): 
 393          """Decrease the reference count by 1 and return the new count.""" 
394          self.refcount -= 1 
395          return self.refcount 
  396   
397   
399      """File name repository. 
400   
401      This class stores file names and can check whether a file has been 
402      modified or not since a previous call. 
403      A file name is stored by calling acquire_filename() which returns 
404      an ID and a signature of the file. The signature can later be used 
405      to check if the file was modified by calling is_file_modified(). 
406      If the file name is no longer required release_filename() should be 
407      called so that the entry can be removed from the repository. 
408      """ 
409   
411          """Constructor. 
412          """ 
413           
414           
415           
416           
417          self._md5_sigs = md5_sigs 
418   
419           
420          self.__id_lut = {} 
421   
422           
423           
424           
425           
426           
427          self.__entries = {} 
428   
429           
430          self.__next_id = 1 
 431   
433          """Acquire a file name and return its id and its signature. 
434          """ 
435          id_ = self.__id_lut.get(name) 
436           
437          if id_==None: 
438               
439              id_ = self.__next_id 
440              self.__next_id += 1 
441              self.__id_lut[name] = id_ 
442              entry = filename_entry_t(name) 
443              self.__entries[id_] = entry 
444          else: 
445               
446              entry = self.__entries[id_] 
447   
448          entry.inc_ref_count() 
449          return id_, self._get_signature(entry) 
 450               
452          """Release a file name. 
453          """ 
454          entry = self.__entries.get(id_) 
455          if entry==None: 
456              raise ValueError, "Invalid filename id (%d)"%id_ 
457   
458           
459          if entry.dec_ref_count()==0: 
460              del self.__entries[id_] 
461              del self.__id_lut[entry.filename] 
 462   
464          """Check if the file referred to by id_ has been modified. 
465          """ 
466          entry = self.__entries.get(id_) 
467          if entry==None: 
468              raise ValueError, "Invalid filename id_ (%d)"%id_ 
469   
470           
471          if entry.sig_valid: 
472               
473              filesig = entry.signature 
474          else: 
475               
476              filesig = self._get_signature(entry) 
477              entry.signature = filesig 
478              entry.sig_valid = True 
479   
480          return filesig!=signature 
 481   
483          """Update the id_ counter so that it doesn't grow forever. 
484          """ 
485          if len(self.__entries)==0: 
486              self.__next_id = 1 
487          else: 
488              self.__next_id = max(self.__entries.keys())+1 
 489   
491          """Return the signature of the file stored in entry. 
492          """ 
493          if self._md5_sigs: 
494               
495              if not os.path.exists(entry.filename): 
496                  return None 
497              try: 
498                  f = file(entry.filename) 
499              except IOError, e: 
500                  print "Cannot determine md5 digest:",e 
501                  return None 
502              data = f.read() 
503              f.close() 
504              return md5.new(data).digest() 
505          else: 
506               
507              try: 
508                  return os.path.getmtime(entry.filename) 
509              except OSError, e: 
510                  return None 
 511   
513          """Dump contents for debugging/testing. 
514          """ 
515   
516          print 70*"-" 
517          print "ID lookup table:" 
518          for name in self.__id_lut: 
519              id_ = self.__id_lut[name] 
520              print "  %s -> %d"%(name, id_) 
521   
522          print 70*"-" 
523          print "%-4s %-60s %s"%("ID", "Filename", "Refcount") 
524          print 70*"-" 
525          for id_ in self.__entries: 
526              entry = self.__entries[id_] 
527              print "%04d %-60s %d"%(id_, entry.filename, entry.refcount) 
  528