{ Created: 2001-11-20 by Berend de Boer See http://www.pobox.com/~berend/delphi/ for updates. Low level object that can generate XML and keeps some state so it can check if you made a mistake. XML generation is streaming, so you cannot change it once it is written. } unit xml_writer; interface uses SysUtils, string_stack, smart_mem, utf_util; const { the only known XML entities } QuoteAmp = '&'; QuoteLt = '<'; QuoteGt = '>'; QuoteQuote = '"'; QuoteApos = '''; type { supported encodings } TXMLEncoding = (encUnspecified, encISO_8859_1, encUTF_16, encUTF_8); type TXMLWriter = class protected { state } FEncoding: TXMLEncoding; { encoding used } FHeaderless: Boolean; { create XML header or not } FInitialCapacity: Integer; { capacity for XML string } FNewLineWritten: Boolean; { we've ended the current string with a new line } FStream: TSmartMemoryStream; { where the XML is stored } FTagStack: TStringStack; { we keep track of tag nesting } FIsStartTagClosed: Boolean; procedure AssertLastTagClosed; procedure Indent; function IsValidAttributeName(const AAttributeName: WideString): Boolean; { returns True if AAttributeName is according to the XML specs } function IsValidTagName(const ATagName: WideString): Boolean; { returns True if AAttributeName is according to the XML specs } function LastTag: WideString; function MakeValidAttributeValue(const AValue: WideString): WideString; { make AValue a valid contents for an attribute, for example any '"' occuring in the string is prefixed } public constructor Create(AInitialCapacity: Integer); constructor CreateFragment(AInitialCapacity: Integer); constructor CreateWithEncoding(AInitialCapacity: Integer; AEncoding: TXMLEncoding); destructor Destroy; override; procedure AddData(const Data: WideString); { write data in the current tag } procedure AddEntity(const EntityName: WideString); { write an entity } procedure AddRaw(const Data: WideString); { write uninterpreted block of data, client has to make sure this is valid } procedure AddTag(const Tag, Data: WideString); { equal to StartTag, AddData and StopTag } function AsLatin1: String; { return XML document in ISO-8859-1 format } function AsString: WideString; { return XML document as text } function AsUTF8: UTF8String; { return XML document in UTF8 format } procedure Clear; { start with a new, empty document or fragment } function IsTagStarted: Boolean; procedure NewLine; procedure SetAttribute(const Attribute, Value: WideString); procedure SetDocType(const RootTag, SystemId: WideString); procedure StartTag(const Tag: WideString); procedure StopTag; { if no data written, tag is closed with /> } public { utility functions to format date/time } function XMLDate(dt: TDateTime): WideString; { our way of writing dates } function XMLDateTime(dt: TDateTime): WideString; { our way of writing datetimes } function XMLTime(dt: TDateTime): WideString; { our way of writing times } end; function ToXMLAttributeValue(const s: WideString): WideString; function ToXMLData(const s: WideString): WideString; implementation { utility functions } const PartQuoteAmp = 'amp;'; PartQuoteLt = 'lt;'; PartQuoteGt = 'gt;'; PartQuoteQuote = 'quot;'; function ToXMLAttributeValue(const s: WideString): WideString; var i: integer; begin Result := s; i := 1; while i <= length(Result) do begin case Result[i] of '"': begin Result[i] := '&'; Insert(PartQuoteQuote, Result, i+1); i := i + length(QuoteQuote); end; '<': begin Result[i] := '&'; Insert(PartQuoteLt, Result, i+1); i := i + length(QuoteLt); end; '&': begin Result[i] := '&'; Insert(PartQuoteAmp, Result, i+1); i := i + length(QuoteAmp); end; else Inc(i); end; end; end; function ToXMLData(const s: WideString): WideString; var i: integer; begin Result := s; i := 1; while i <= length(Result) do begin case Result[i] of '<': begin Result[i] := '&'; Insert(PartQuoteLt, Result, i+1); i := i + length(QuoteLt); end; '&': begin Result[i] := '&'; Insert(PartQuoteAmp, Result, i+1); i := i + length(QuoteAmp); end; else Inc(i); end; end; end; { TXMLWriter } constructor TXMLWriter.Create(AInitialCapacity: Integer); begin CreateWithEncoding(AInitialCapacity, encUnspecified); end; constructor TXMLWriter.CreateFragment(AInitialCapacity: Integer); begin {require} Assert(AInitialCapacity >= 0, 'require: positive capacity.'); FTagStack := TStringStack.Create; FHeaderless := True; FInitialCapacity := AInitialCapacity; FEncoding := encUnspecified; Clear; end; constructor TXMLWriter.CreateWithEncoding(AInitialCapacity: Integer; AEncoding: TXMLEncoding); begin {require} Assert(AInitialCapacity >= 0, 'require: positive capacity.'); FTagStack := TStringStack.Create; FInitialCapacity := AInitialCapacity; FEncoding := AEncoding; Clear; end; destructor TXMLWriter.Destroy; begin FTagStack.Free; FStream.Free; inherited; end; procedure TXMLWriter.AddData(const Data: WideString); begin AddRaw(ToXMLData(Data)); end; procedure TXMLWriter.AddEntity(const EntityName: WideString); begin AddRaw('&' + EntityName + ';'); end; procedure TXMLWriter.AddRaw(const Data: WideString); begin AssertLastTagClosed; FStream.WriteString(Data); end; procedure TXMLWriter.AddTag(const Tag, Data: WideString); begin StartTag(Tag); AddData(Data); StopTag; end; procedure TXMLWriter.AssertLastTagClosed; begin {require} // we are at the beginning of a new line if not FIsStartTagClosed then begin FSTream.WriteString('>'); end; FIsStartTagClosed := True; {ensure} Assert(FIsStartTagClosed, 'ensure: start tag ends with >.'); end; function TXMLWriter.AsLatin1: String; begin Result := AsString; end; function TXMLWriter.AsString: WideString; begin {require} Assert(not IsTagStarted, 'require: no tag has been started.'); if not FNewLineWritten then NewLine; Result := FStream.DataString; end; function TXMLWriter.AsUTF8: UTF8String; begin Result := UTF8Encode(AsString); end; procedure TXMLWriter.Clear; begin FTagStack.Clear; FSTream.Free; FStream := TSmartMemoryStream.Create; // set initial size so it doesn't grow unless really needed FStream.Size := FInitialCapacity; FIsStartTagClosed := True; if not FHeaderless then begin FStream.WriteString(' encUnspecified then begin FStream.WriteString (' encoding="'); case FEncoding of encISO_8859_1: FStream.WriteString('ISO-8859-1'); encUTF_8: FStream.WriteString('UTF-8'); encUTF_16: FStream.WriteString('UTF-16'); end; FStream.WriteString('"'); end; FStream.WriteString('?>'); NewLine; end else begin FNewLineWritten := True; end; end; procedure TXMLWriter.Indent; begin FStream.WriteString(StringOfChar(' ', 2*FTagStack.Count)); FNewLineWritten := False; end; function TXMLWriter.IsTagStarted: Boolean; begin Result := FTagStack.Count > 0; end; function TXMLWriter.IsValidAttributeName( const AAttributeName: WideString): Boolean; { a too simple check for invalid characters, more or less correct for ANSI names. } const InvalidFirstChar = [#0..#255] - ['a'..'z','A'..'Z','_',':']; InvalidOtherChar = InvalidFirstChar - ['.','-','0'..'9',#$B7]; // [#0..#$40,#$5B..#$60,#$7B..#$FF]; //InvalidOtherChar = [#0..#$2C,#$5B..#$60,#$7B..#$B6,#$B8..#$FF]; //ValidNameOtherChar:WideString = ['a'..'z','A'..'Z','_',':','0'..'9','.','-']; var i: integer; c: Char; begin Result := AAttributeName <> ''; if Result then begin if Ord(AAttributeName[1]) < 256 then begin c := Char(Ord(AAttributeName[1])); Result := not (c in InvalidFirstChar); end else Result := True; if Result then begin for i := 2 to length(AAttributeName) do begin if Ord(AAttributeName[i]) < 256 then begin c := Char(Ord(AAttributeName[1])); Result := not (c in InvalidOtherChar); if not Result then break; end; end; end; end; end; function TXMLWriter.IsValidTagName( const ATagName: WideString): Boolean; { not entirely correct implementation as some names are correct but it still returns False, but that occurs only with names using characters outside the american alphabet } begin Result := IsValidAttributeName(ATagName); end; function TXMLWriter.LastTag: WideString; begin {require} Assert(not FTagStack.IsEmpty, 'require: tag_is_started.'); Result := FTagStack.Top; end; function TXMLWriter.MakeValidAttributeValue( const AValue: WideString): WideString; begin Result := ToXMLAttributeValue(AValue); end; procedure TXMLWriter.NewLine; const CRLF = #13#10; begin FStream.WriteString(CRLF); FNewLineWritten := True; end; procedure TXMLWriter.SetAttribute(const Attribute, Value: WideString); begin {require} Assert(IsValidAttributeName(Attribute), Format('require: valid attribute name (wrong: %s).', [Attribute])); Assert(IsTagStarted, 'require: there should be a tag to set attributes.'); Assert(not FIsStartTagClosed, 'require: start tag''s closing > not already written.'); FStream.WriteString(' '); FStream.WriteString(Attribute); FStream.WriteString('="'); FStream.WriteString(MakeValidAttributeValue(Value)); FStream.WriteString('"'); end; procedure TXMLWriter.SetDocType(const RootTag, SystemId: WideString); begin {require} Assert(FTagStack.IsEmpty, 'require: start of document.'); FSTream.WriteString(Format('', [RootTag, SystemId])); NewLine; end; procedure TXMLWriter.StartTag(const Tag: WideString); begin {require} Assert(IsValidTagName(Tag), Format('require: valid tag name (wrong: %s).', [Tag])); // cursor is at the beginning of a new line AssertLastTagClosed; if not FNewLineWritten then NewLine; Indent; FStream.WriteString('<'); FStream.WriteString(Tag); FIsStartTagClosed := False; FTagStack.Push(Tag); end; procedure TXMLWriter.StopTag; var TagName: WideString; begin {require} Assert(IsTagStarted, 'require: tag_is_started.'); TagName := LastTag; FTagStack.Pop; if FIsStartTagCLosed then begin if FNewLineWritten then Indent; FStream.WriteString(''); end else begin FStream.WriteString('/>'); FIsStartTagClosed := True; end; NewLine; {ensure} Assert(FIsStartTagClosed, 'ensure: start tag ends with >.'); end; function TXMLWriter.XMLDate(dt: TDateTime): WideString; begin Result := FormatDateTime('yyyy-mm-dd', dt); end; function TXMLWriter.XMLDateTime(dt: TDateTime): WideString; var p: Integer; begin Result := FormatDateTime('yyyy-mm-dd hh:nn:ss', dt); p := Pos(' ', Result); if p > 0 then Result[p] := 'T'; end; function TXMLWriter.XMLTime(dt: TDateTime): WideString; begin {require} // date part is 0 or 1 if dt >= 1 then Result := '24:00:00' else Result := FormatDateTime('hh:nn:ss', dt); end; end.