{ Created: 2001-11-20 by Berend de Boer <berend@pobox.com>

  See http://www.pobox.com/~berend/delphi/ for updates.


  Low level object that can generate XML and keeps some state so it
  can check if you made a mistake. XML generation is streaming, so you
  cannot change it once it is written.

}


unit xml_writer;

interface

uses
  SysUtils,
  string_stack,
  smart_mem,
  utf_util;


const { the only known XML entities }
  QuoteAmp = '&amp;';
  QuoteLt = '&lt;';
  QuoteGt = '&gt;';
  QuoteQuote = '&quot;';
  QuoteApos = '&apos;';

type { supported encodings }
  TXMLEncoding = (encUnspecified, encISO_8859_1, encUTF_16, encUTF_8);

type
  TXMLWriter = class
  protected { state }
    FEncoding: TXMLEncoding;
      { encoding used }
    FHeaderless: Boolean;
      { create XML header or not }
    FInitialCapacity: Integer;
      { capacity for XML string }
    FNewLineWritten: Boolean;
      { we've ended the current string with a new line }
    FStream: TSmartMemoryStream;
      { where the XML is stored }
    FTagStack: TStringStack;
      { we keep track of tag nesting }
    FIsStartTagClosed: Boolean;

    procedure AssertLastTagClosed;
    procedure Indent;
    function  IsValidAttributeName(const AAttributeName: WideString): Boolean;
      { returns True if AAttributeName is according to the XML specs }

    function  IsValidTagName(const ATagName: WideString): Boolean;
      { returns True if AAttributeName is according to the XML specs }

    function  LastTag: WideString;

    function  MakeValidAttributeValue(const AValue: WideString): WideString;
      { make AValue a valid contents for an attribute, for example any '"'
        occuring in the string is prefixed }

  public
    constructor Create(AInitialCapacity: Integer);
    constructor CreateFragment(AInitialCapacity: Integer);
    constructor CreateWithEncoding(AInitialCapacity: Integer; AEncoding: TXMLEncoding);
    destructor Destroy; override;

    procedure AddData(const Data: WideString);
      { write data in the current tag }

    procedure AddEntity(const EntityName: WideString);
      { write an entity }

    procedure AddRaw(const Data: WideString);
      { write uninterpreted block of data, client has to make sure this is valid }

    procedure AddTag(const Tag, Data: WideString);
      { equal to StartTag, AddData and StopTag }

    function  AsLatin1: String;
      { return XML document in ISO-8859-1 format }

    function  AsString: WideString;
      { return XML document as text }

    function  AsUTF8: UTF8String;
      { return XML document in UTF8 format }

    procedure Clear;
      { start with a new, empty document or fragment }

    function IsTagStarted: Boolean;

    procedure NewLine;
    procedure SetAttribute(const Attribute, Value: WideString);
    procedure SetDocType(const RootTag, SystemId: WideString);
    procedure StartTag(const Tag: WideString);

    procedure StopTag;
      { if no data written, tag is closed with /> }

  public { utility functions to format date/time }
    function XMLDate(dt: TDateTime): WideString;
      { our way of writing dates }

    function XMLDateTime(dt: TDateTime): WideString;
      { our way of writing datetimes }

    function XMLTime(dt: TDateTime): WideString;
      { our way of writing times }
  end;


function ToXMLAttributeValue(const s: WideString): WideString;
function ToXMLData(const s: WideString): WideString;


implementation


{ utility functions }

const
  PartQuoteAmp = 'amp;';
  PartQuoteLt = 'lt;';
  PartQuoteGt = 'gt;';
  PartQuoteQuote = 'quot;';

function ToXMLAttributeValue(const s: WideString): WideString;
var
  i: integer;
begin
  Result := s;
  i := 1;
  while i <= length(Result) do
  begin
    case Result[i] of
      '"':
        begin
          Result[i] := '&';
          Insert(PartQuoteQuote, Result, i+1);
          i := i + length(QuoteQuote);
        end;
      '<':
        begin
          Result[i] := '&';
          Insert(PartQuoteLt, Result, i+1);
          i := i + length(QuoteLt);
        end;
      '&':
        begin
          Result[i] := '&';
          Insert(PartQuoteAmp, Result, i+1);
          i := i + length(QuoteAmp);
        end;
    else
      Inc(i);
    end;
  end;
end;


function ToXMLData(const s: WideString): WideString;
var
  i: integer;
begin
  Result := s;
  i := 1;
  while i <= length(Result) do
  begin
    case Result[i] of
      '<':
        begin
          Result[i] := '&';
          Insert(PartQuoteLt, Result, i+1);
          i := i + length(QuoteLt);
        end;
      '&':
        begin
          Result[i] := '&';
          Insert(PartQuoteAmp, Result, i+1);
          i := i + length(QuoteAmp);
        end;
    else
      Inc(i);
    end;
  end;
end;


{ TXMLWriter }

constructor TXMLWriter.Create(AInitialCapacity: Integer);
begin
  CreateWithEncoding(AInitialCapacity, encUnspecified);
end;


constructor TXMLWriter.CreateFragment(AInitialCapacity: Integer);
begin
{require}
  Assert(AInitialCapacity >= 0, 'require: positive capacity.');

  FTagStack := TStringStack.Create;
  FHeaderless := True;
  FInitialCapacity := AInitialCapacity;
  FEncoding := encUnspecified;
  Clear;
end;


constructor TXMLWriter.CreateWithEncoding(AInitialCapacity: Integer; AEncoding: TXMLEncoding);
begin
{require}
  Assert(AInitialCapacity >= 0, 'require: positive capacity.');

  FTagStack := TStringStack.Create;
  FInitialCapacity := AInitialCapacity;
  FEncoding := AEncoding;
  Clear;
end;


destructor TXMLWriter.Destroy;
begin
  FTagStack.Free;
  FStream.Free;
  inherited;
end;


procedure TXMLWriter.AddData(const Data: WideString);
begin
  AddRaw(ToXMLData(Data));
end;


procedure TXMLWriter.AddEntity(const EntityName: WideString);
begin
  AddRaw('&' + EntityName + ';');
end;


procedure TXMLWriter.AddRaw(const Data: WideString);
begin
  AssertLastTagClosed;
  FStream.WriteString(Data);
end;


procedure TXMLWriter.AddTag(const Tag, Data: WideString);
begin
  StartTag(Tag);
  AddData(Data);
  StopTag;
end;


procedure TXMLWriter.AssertLastTagClosed;
begin
{require}
  // we are at the beginning of a new line

  if not FIsStartTagClosed then
  begin
    FSTream.WriteString('>');
  end;
  FIsStartTagClosed := True;

{ensure}
  Assert(FIsStartTagClosed, 'ensure: start tag ends with >.');
end;


function TXMLWriter.AsLatin1: String;
begin
  Result := AsString;
end;


function TXMLWriter.AsString: WideString;
begin
{require}
  Assert(not IsTagStarted, 'require: no tag has been started.');

  if not FNewLineWritten then
    NewLine;
  Result := FStream.DataString;
end;


function TXMLWriter.AsUTF8: UTF8String;
begin
  Result := UTF8Encode(AsString);
end;


procedure TXMLWriter.Clear;
begin
  FTagStack.Clear;
  FSTream.Free;
  FStream := TSmartMemoryStream.Create;
  // set initial size so it doesn't grow unless really needed
  FStream.Size := FInitialCapacity;
  FIsStartTagClosed := True;
  if not FHeaderless then
  begin
    FStream.WriteString('<?xml version="1.0"');
    if FEncoding <> encUnspecified then
    begin
      FStream.WriteString (' encoding="');
      case FEncoding of
        encISO_8859_1: FStream.WriteString('ISO-8859-1');
        encUTF_8: FStream.WriteString('UTF-8');
        encUTF_16: FStream.WriteString('UTF-16');
      end;
      FStream.WriteString('"');
    end;
    FStream.WriteString('?>');
    NewLine;
  end
  else
  begin
    FNewLineWritten := True;
  end;
end;


procedure TXMLWriter.Indent;
begin
  FStream.WriteString(StringOfChar(' ', 2*FTagStack.Count));
  FNewLineWritten := False;
end;


function TXMLWriter.IsTagStarted: Boolean;
begin
  Result := FTagStack.Count > 0;
end;


function TXMLWriter.IsValidAttributeName(
  const AAttributeName: WideString): Boolean;
{ a too simple check for invalid characters, more or less correct
  for ANSI names. }
const
  InvalidFirstChar = [#0..#255] - ['a'..'z','A'..'Z','_',':'];
  InvalidOtherChar = InvalidFirstChar - ['.','-','0'..'9',#$B7];
//    [#0..#$40,#$5B..#$60,#$7B..#$FF];
  //InvalidOtherChar = [#0..#$2C,#$5B..#$60,#$7B..#$B6,#$B8..#$FF];
  //ValidNameOtherChar:WideString = ['a'..'z','A'..'Z','_',':','0'..'9','.','-'];
var
  i: integer;
  c: Char;
begin
  Result := AAttributeName <> '';
  if Result then
  begin
    if Ord(AAttributeName[1]) < 256 then
    begin
      c := Char(Ord(AAttributeName[1]));
      Result := not (c in InvalidFirstChar);
    end
    else
      Result := True;
    if Result then
    begin
      for i := 2 to length(AAttributeName) do
      begin
        if Ord(AAttributeName[i]) < 256 then
        begin
          c := Char(Ord(AAttributeName[1]));
          Result := not (c in InvalidOtherChar);
          if not Result then
            break;
        end;
      end;
    end;
  end;
end;


function TXMLWriter.IsValidTagName(
  const ATagName: WideString): Boolean;
{ not entirely correct implementation as some names are correct
  but it still returns False, but that occurs only with names using
  characters outside the american alphabet }
begin
  Result := IsValidAttributeName(ATagName);
end;


function TXMLWriter.LastTag: WideString;
begin
{require}
  Assert(not FTagStack.IsEmpty, 'require: tag_is_started.');

  Result := FTagStack.Top;
end;


function TXMLWriter.MakeValidAttributeValue(
  const AValue: WideString): WideString;
begin
  Result := ToXMLAttributeValue(AValue);
end;


procedure TXMLWriter.NewLine;
const
  CRLF = #13#10;
begin
  FStream.WriteString(CRLF);
  FNewLineWritten := True;
end;


procedure TXMLWriter.SetAttribute(const Attribute, Value: WideString);
begin
{require}
  Assert(IsValidAttributeName(Attribute), Format('require: valid attribute name (wrong: %s).', [Attribute]));
  Assert(IsTagStarted, 'require: there should be a tag to set attributes.');
  Assert(not FIsStartTagClosed, 'require: start tag''s closing > not already written.');

  FStream.WriteString(' ');
  FStream.WriteString(Attribute);
  FStream.WriteString('="');
  FStream.WriteString(MakeValidAttributeValue(Value));
  FStream.WriteString('"');
end;


procedure TXMLWriter.SetDocType(const RootTag, SystemId: WideString);
begin
{require}
  Assert(FTagStack.IsEmpty, 'require: start of document.');

  FSTream.WriteString(Format('<!DOCTYPE %s SYSTEM "%s">', [RootTag, SystemId]));
  NewLine;
end;


procedure TXMLWriter.StartTag(const Tag: WideString);
begin
{require}
  Assert(IsValidTagName(Tag), Format('require: valid tag name (wrong: %s).', [Tag]));
  // cursor is at the beginning of a new line

  AssertLastTagClosed;
  if not FNewLineWritten then
    NewLine;
  Indent;
  FStream.WriteString('<');
  FStream.WriteString(Tag);
  FIsStartTagClosed := False;
  FTagStack.Push(Tag);
end;


procedure TXMLWriter.StopTag;
var
  TagName: WideString;
begin
{require}
  Assert(IsTagStarted, 'require: tag_is_started.');

  TagName := LastTag;
  FTagStack.Pop;
  if FIsStartTagCLosed then
  begin
    if FNewLineWritten then
      Indent;
    FStream.WriteString('</');
    FStream.WriteString(TagName);
    FStream.WriteString('>');
  end
  else
  begin
    FStream.WriteString('/>');
    FIsStartTagClosed := True;
  end;
  NewLine;

{ensure}
  Assert(FIsStartTagClosed, 'ensure: start tag ends with >.');
end;


function TXMLWriter.XMLDate(dt: TDateTime): WideString;
begin
  Result := FormatDateTime('yyyy-mm-dd', dt);
end;


function TXMLWriter.XMLDateTime(dt: TDateTime): WideString;
var
  p: Integer;
begin
  Result := FormatDateTime('yyyy-mm-dd hh:nn:ss', dt);
  p := Pos(' ', Result);
  if p > 0 then
    Result[p] := 'T';
end;


function TXMLWriter.XMLTime(dt: TDateTime): WideString;
begin
{require}
  // date part is 0 or 1

  if dt >= 1 then
    Result := '24:00:00'
  else
    Result := FormatDateTime('hh:nn:ss', dt);
end;


end.
