Read Apache log files line by line: fileStream.readLine()

I wanted to read access log files by an Adobe Air application. As these files can get very big, there is no way to load the entire file into memory. Unfortunately, FileStream does not help much because there is no readLine() method like in Java’s BufferReader.readLine().

Googling reveals a nice implementation of Sandeep Gupta: FileStreamWithLineReader. While it works fine for pure ASCII files, it’s bogus for files with multibytes characters like öäü. Also, I’m no fan of extending classes and prefer encapsulation of the FileStream. FileStreamLineReader is slower compared to FileStreamWithLineReader but it works for all characters.

Here’s an example how to use the class:

var accessLog:File = new File("access.log")
var logStream:FileStreamWithLineReader = new FileStreamWithLineReader()
logStream.open(accessLog, FileMode.READ)
while(logStream.lineAvailable) {
	var log:String = logStream.readUTFLine()
}

package 
{
import flash.filesystem.File;
import flash.filesystem.FileStream;
import flash.utils.ByteArray;


public class FileStreamLineReader {
	
	private var _buffer:ByteArray
	private var _bufferSize:uint
	private var _fileStream:FileStream
	private var _lastLineEndPosition:int = 0
	private var _lineAvailable:Boolean = true
	
	function FileStreamLineReader(bufferSize:uint=512) {
		_bufferSize = bufferSize
	}
	
	public function open(file:File, fileMode:String):void {
		_buffer = new ByteArray()
		_fileStream = new FileStream()
		_fileStream.open(file, fileMode)
	}
		
	public function get lineAvailable():Boolean {
		return _lineAvailable
	}
	
	public function readUTFLine():String {
		return readMultiByteLine("utf-8")
	}
	
	public function readMultiByteLine(charSet:String):String {
		var toReturn:String = readLine(charSet)
		
		// the following check is a fix when on windows the buffer reads between the values of
		// 13 and 10, which are used to indicate the end of line
		if(toReturn != null && toReturn.charCodeAt(toReturn.length - 1) == 13) {
			return toReturn.substr(0, toReturn.length - 1)
		}
		
		return toReturn
	}
	
	private function readLine(charSet:String):String {
		const initialReadPosition:Number = _lastLineEndPosition
		_fileStream.position = initialReadPosition
		var adaptedBufferSize:uint = _bufferSize
		while (true) {
			var bytesToRead:uint = Math.min(adaptedBufferSize, _fileStream.bytesAvailable)
			_fileStream.readBytes(_buffer, 0, bytesToRead)
			var currentReadString:String = _buffer.readMultiByte(bytesToRead, charSet)
			var index:int = currentReadString.indexOf('\n')
			if(index != -1) {
				currentReadString = currentReadString.substr(0, index - 1)
				_buffer.clear()
				_buffer.writeMultiByte(currentReadString, charSet)
				_lastLineEndPosition += _buffer.length + 2
				_buffer.clear()
				return currentReadString
			} else {
				_buffer.clear()
				if(_fileStream.bytesAvailable == 0) {
					_lineAvailable = false
					return currentReadString
				} else {
					_fileStream.position = initialReadPosition
					adaptedBufferSize *= 2
				}
			}
		}
		throw new Error("could not find line")
	}
	
	public function close():void {
		_fileStream.close()
	}

}
}

Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Connecting to %s

Follow

Get every new post delivered to your Inbox.