#!/usr/bin/env python # # readahead-blocks-digest: digests the raw readahead info # dumped in the kernel log, giving information about accessed # file sections and process executions. # # Home page and updates: # http://free-electrons.com/community/tools/readahead/ # # Usage: readahead-blocks-digest dump_file # # Version 0.1 # Copyright (C) 2007 Free Electrons # Author: Michael Opdenacker # Home page: http://free-electrons.com/community/tools # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 675 Mass Ave, Cambridge, MA 02139, USA. import sys, os, re ########################################################## # Common routines ########################################################## # For the given file or directory, # find its mount point (superblock), # and return the major and minor # numbers of the corresponding device. def get_file_dev_major (filename): return os.major(os.stat(filename).st_dev) def get_file_dev_minor (filename): return os.minor(os.stat(filename).st_dev) def skip_dir (root, dir, ignore_list): # Ignore directories corresponding to virtual # or ram filesystems (major=0) path = os.path.join(root,dir) if get_file_dev_major(path)==0: return True for f in ignore_list: if re.compile('^'+f).match(path): return True return False # Returns the major and minor number # of the device of the # Scans all the files in a directory (typically a mount) # point, to be able to retrieve a fine name # from a inode and a major and minor number def get_dir_inodes (dirname): global inode_path for root, dirs, files in os.walk(dirname, topdown=True): # Ignore directories corresponding to virtual # or ram filesystems (major=0) for dir in dirs: if skip_dir(root, dir, ['/home', '/data', '/root1', '/root2']): print 'Skipping ' + os.path.join(root, dir) dirs.remove(dir) for name in files: file_path = os.path.join(root, name) # print file_path # Ignore symlinks # and other non regular files (named pipes, device files...) if (not os.path.islink(file_path)) and os.path.isfile(file_path): #major = get_file_dev_major(file_path) #minor = get_file_dev_minor(file_path) #inode_path[os.stat(file_path).st_ino, major, minor]=file_path inode = int(os.stat(file_path).st_ino) inode_path[inode]=file_path ########################################################## def record_reads (filename, start, end): global file_blocks newblock = [start, end] if not file_blocks.has_key(filename): file_blocks[filename] = [newblock] else: # Insert the new block in the block list blocks=file_blocks[filename] for i in range(len(blocks)): if start < blocks[i][0]: # Insert block here blocks.insert(i,newblock) break else: # Insert at the end blocks.append(newblock) # Post process the list to eliminate any overlap # after block insertion i = 0 while i <= len(blocks) -2: current_end = blocks[i][1] next_start = blocks[i+1][0] if current_end >= next_start: # Merge current block with the next one current_start = blocks[i][0] next_end = blocks[i+1][1] new_end = max(current_end, next_end) blocks[i:i+2] = [[current_start, new_end]] else: i += 1 file_blocks[filename] = blocks ########################################################## def get_readsize (filename): global file_blocks blocks = file_blocks[filename] # Returns the number of bytes actually read # from the file size = 0 for i in range(len(blocks)): size += blocks[i][1] - blocks[i][0] return size ########################################################## def round_down (n, divider): # Returns the greatest multiple of divider # which is lower than or equal to n return (n/divider)*divider def round_up (n, divider): # Returns the smallest multiple of divider # which is greater than or equal to n down = round_down(n, divider) if down == n: return n else: return down+divider ########################################################## def blocks_to_str (blocks): # Turns a block list into a string # for use by the readahead program string = '' for block in blocks: string += ' ' + str(block[0]) + ' ' + str(block[1]) return string ########################################################## # Main program ########################################################## inode_path = dict() file_blocks = dict() read_files = [] block_size = 4096 get_dir_inodes('/') input_fid = open('edgy-log', 'r') line = input_fid.readline() output_fid = open('readahead_file_blocks', 'w') whole_fid = open('readahead_file_whole', 'w') stat_fid = open('readahead_file_stats', 'w') debug_fid = open('readahead_file_debug', 'w') while (line != ''): columns=line.split() # Discard file entries which do not have 7 words # and do not start with 'RAINFO' # (different or incorrect lines) if len(columns) == 7 and columns[0] == 'RAINFO': inode=int(columns[2]) event_type=columns[1] offset=int(columns[5]) size=int(columns[6]) #block_start = round_down(offset, block_size) #block_end = round_down(offset+size, block_size) if not inode_path.has_key(inode): print 'Skipped inode: ' + str(inode) else: filename = inode_path[inode] filesize = os.path.getsize(filename) # Reduce the size so that it doesn't go beyond the file size # The libc tries to read entire pages indeed # even if there are much less bytes to read if offset+size > filesize: size = filesize - offset # Ignore attempts to read 0 bytes # Also happens as a result of the previous manipulation of 'size' # when we are at the end of the file if size > 0: # Add the file to the list of read files # Useful to keep track of the file read order if read_files.count(filename) == 0: read_files.append(filename) # Record the block read # Temporary fix : ignore filenames with spaces if filename.find(' ') == -1: record_reads(filename, offset, offset+size) debug_fid.write(event_type + ' ' + filename + ' ' + str(offset) + ' ' + str(size) + '\n') line = input_fid.readline() input_fid.close() debug_fid.close() total_size = 0; read_size = 0; for filename in read_files: # Don't forget we removed files with spaces in their names if file_blocks.has_key(filename): output_fid.write(filename + blocks_to_str(file_blocks[filename])+ '\n') file_size = os.path.getsize(filename) total_size += file_size file_read_size = get_readsize(filename) read_size += file_read_size stat_fid.write(filename + ' : ' + str(file_read_size) + ' / ' + str(file_size)) whole_fid.write(filename + ' 0 ' + str(file_size) + '\n') if file_size != 0: stat_fid.write(' (' + str((file_read_size*100)/file_size) + '%)') stat_fid.write('\n') stat_fid.write('Total file size : ' + str(total_size) + '\n') stat_fid.write('Total read size : ' + str(read_size) + '(' + str((read_size*100)/total_size) + '%)\n') output_fid.close() whole_fid.close() stat_fid.close()