(* Copyright (C) 1992, Digital Equipment Corporation           *)
(* All rights reserved.                                        *)
(* See the file COPYRIGHT for a full description.              *)

(* File: String.m3                                             *)
(* Last modified on Mon Jun  8 14:16:05 PDT 1992 by kalsow     *)
(*      modified on Wed Nov 28 02:23:29 1990 by muller         *)

UNSAFE MODULE String;

IMPORT MBuf, Text, TextF, Word, Module, Convert;

CONST
  NO_TEXT    = -1; (* textID *)
  C_RESERVED = -2; (* internal string class *)
  MIN_BUFFER = 8192 - 2 * BYTESIZE (INTEGER); (* leave room for headers *)

TYPE
  StrPtr = UNTRACED REF CHAR;
  Buffer = UNTRACED REF ARRAY OF CHAR;

REVEAL
  T = UNTRACED BRANDED REF RECORD
    prefix : T         := NIL;
    suffix : T         := NIL;
    start  : StrPtr    := NIL;
    length : INTEGER   := 0;
    class  : INTEGER   := 0;
    hash   : INTEGER   := 0;
    textID : INTEGER   := NO_TEXT;
    uid    : INTEGER   := NO_TEXT;
    next   : T         := NIL;
  END;
  (* There are three variants of a String.T:
       (textID # NO_TEXT) => the whole string is in texts[textID]
       (start # NIL)      => the characters in [start..start+length)
       ELSE               => prefix & suffix
  *)

CONST
  Digits = ARRAY [0..9] OF CHAR {'0','1','2','3','4','5','6','7','8','9'};

VAR
  nextChar  : CARDINAL := 0;
  nTexts    : INTEGER := 0;
  hashTable := NEW (REF ARRAY OF T, 2048);
  texts     := NEW (REF ARRAY OF TEXT, 256);
  chars     := NEW (Buffer, MIN_BUFFER);
  int_cache := ARRAY [0..31] OF T {NIL, ..};
  next_t    : T := NIL;
  i_suffix  : T := NIL;
  m_suffix  : T := NIL;
  nStrings  : INTEGER := 0;

(*-------------------------------------------------------------- exported ---*)

PROCEDURE Add (x: TEXT): T =
  VAR t: T;  my_id: INTEGER;
  BEGIN
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    my_id := InternText (x); (* speculative *)
    next_t.textID := my_id;
    next_t.start  := NIL;
    next_t.length := Text.Length (x);
    next_t.prefix := NIL;
    next_t.suffix := NIL;
    t := Intern ();
    IF (t.textID # my_id) THEN (* we already had this text *) DEC (nTexts) END;
    RETURN t;
  END Add;

PROCEDURE AddInt (i: INTEGER): T =
  BEGIN
    IF (i < 0) OR (LAST (int_cache) < i) THEN RETURN NewInt (i) END;
    IF int_cache [i] = NIL THEN int_cache [i] := NewInt (i) END;
    RETURN int_cache [i];
  END AddInt;

PROCEDURE FromStr (READONLY buf: ARRAY OF CHAR;  length: INTEGER): T =
  VAR t: T;
  BEGIN
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    length := MIN (length, NUMBER (buf));
    next_t.textID := NO_TEXT;
    next_t.start  := ADR (buf [0]);
    next_t.length := length;
    next_t.prefix := NIL;
    next_t.suffix := NIL;
    t := Intern ();
    IF (next_t = NIL) THEN (* a new string! *)
      IF (t.textID = NO_TEXT) THEN (* it's using the new characters! *)
        IF (nextChar + length > LAST (chars^)) THEN
          chars := NEW (Buffer, MAX (MIN_BUFFER, length+1));
          nextChar := 0;
        END;
        SUBARRAY (chars^, nextChar, length) := SUBARRAY (buf, 0, length);
        chars [nextChar + length] := '\000';
        t.start := ADR (chars [nextChar]);
        INC (nextChar, length+1)
      ELSE (* there's a text version of the string *)
        t.start  := NIL;
      END;
    END;
    RETURN t;
  END FromStr;

PROCEDURE Concat (a, b: T): T =
  BEGIN
    IF (a = NIL) OR (a.length = 0) THEN RETURN b END;
    IF (b = NIL) OR (b.length = 0) THEN RETURN a END;
    IF (next_t = NIL) THEN next_t := NEW (T) END;
    next_t.textID := NO_TEXT;
    next_t.start  := NIL;
    next_t.length := a.length + b.length;
    next_t.prefix := a;
    next_t.suffix := b;
    RETURN Intern ();
  END Concat;

PROCEDURE Unique (root: T): T =
  VAR suffix: T;  j: INTEGER;  counter := Module.CurrentCounter ();
  BEGIN
    (* bump the counter *)
    j := LAST (counter);
    WHILE (counter[j] = '9') DO  counter[j] := '0'; DEC (j);  END;
    counter[j] := VAL (ORD (counter[j]) + 1, CHAR);
    Module.SetCurrentCounter (counter);

    (* get the right suffix: 'I' or 'M' *)
    IF Module.IsInterface () THEN
      IF i_suffix = NIL THEN i_suffix := Add ("I") END;
      suffix := i_suffix;
    ELSE
      IF m_suffix = NIL THEN m_suffix := Add ("M") END;
      suffix := m_suffix;
    END;

    RETURN Concat (Concat (root, Module.CurrentName ()),
                   Concat (FromStr (counter, NUMBER (counter)), suffix));
  END Unique;

PROCEDURE ToText (t: T): TEXT =
  VAR x: TEXT;
  BEGIN
    IF (t = NIL) THEN RETURN NIL END;
    IF (t.textID = NO_TEXT) THEN
      x := TextF.New (t.length);
      Flatten (t, x^, 0);
      t.textID := InternText (x);
    END;
    RETURN texts [t.textID];
  END ToText;

PROCEDURE Put (wr: MBuf.T;  t: T) =
  VAR txt: TEXT;  p: StrPtr;
  BEGIN
    IF (t = NIL) THEN
      (* done *)
    ELSIF (t.textID # NO_TEXT) THEN
      txt := texts[t.textID];
      FOR i := 0 TO t.length-1 DO EmitChar (wr, txt[i]) END;
    ELSIF (t.start # NIL) THEN
      p := t.start;
      FOR i := 0 TO t.length-1 DO
        EmitChar (wr, p^);
        INC (p, ADRSIZE (CHAR));
      END;
    ELSE
      Put (wr, t.prefix);
      Put (wr, t.suffix);
    END;
  END Put;

PROCEDURE PutChars (wr: MBuf.T;  t: T) =
  VAR txt: TEXT;  p: StrPtr;  n := 0;
  BEGIN
    IF (t = NIL) THEN
      (* done *)
    ELSIF (t.textID # NO_TEXT) THEN
      txt := texts[t.textID];
      FOR i := 0 TO t.length-1 DO EmitCharLiteral (wr, txt[i], i, n) END;
    ELSIF (t.start # NIL) THEN
      p := t.start;
      FOR i := 0 TO t.length-1 DO
        EmitCharLiteral (wr, p^, i, n);
        INC (p, ADRSIZE (CHAR));
      END;
    ELSE
      PutChars (wr, t.prefix);
      IF (Length (t.prefix) > 0) THEN MBuf.PutText (wr, ",\n") END;
      PutChars (wr, t.suffix);
    END;
  END PutChars;

PROCEDURE PutStack (wr: MBuf.T; s: Stack) =
  BEGIN
    FOR i := 0 TO s.top - 1 DO Put (wr, s.stk[i]); END;
  END PutStack;

PROCEDURE SetClass (t: T;  class: CARDINAL) =
  BEGIN
    t.class := class;
  END SetClass;

PROCEDURE GetClass (t: T): INTEGER =
  BEGIN
    IF (t.class < 0)
      THEN RETURN 0
      ELSE RETURN t.class;
    END;
  END GetClass;

PROCEDURE Length (t: T): INTEGER =
  BEGIN
    IF (t = NIL)
      THEN RETURN 0;
      ELSE RETURN t.length;
    END;
  END Length;

PROCEDURE GetUID (t: T): INTEGER =
  BEGIN
    RETURN t.uid;
  END GetUID;

PROCEDURE SetUID (t: T;  uid: INTEGER) =
  BEGIN
    t.uid := uid;
  END SetUID;

PROCEDURE Hash (t: T): INTEGER =
  BEGIN
    IF (t = NIL)
      THEN RETURN 953;
      ELSE RETURN t.hash;
    END;
  END Hash;

PROCEDURE IsReservedC (t: T): BOOLEAN =
  BEGIN
    RETURN (t # NIL) AND (t.class = C_RESERVED);
  END IsReservedC;

PROCEDURE FileTail (t: T): T =
  VAR x, len: INTEGER;  txt: TEXT;
  BEGIN
    IF (t = NIL) THEN RETURN NIL END;
    txt := ToText (t);

    (* search for the last slash in the string *)
    len := t.length;
    x := len - 1;
    WHILE (x >= 0) AND (txt[x] # '/') DO DEC (x) END;

    IF (x < 0) THEN (* no slashes *) RETURN t END;

    (* else,  build and return the new tail *)
    RETURN FromStr (SUBARRAY (txt^, x+1, len-x-1),  len);
  END FileTail;

(*-------------------------------------------------------------- internal ---*)

PROCEDURE NewInt (i: INTEGER): T =
  <*FATAL Convert.Failed*>
  VAR len: INTEGER;  buf: ARRAY [0..BITSIZE(INTEGER)-1] OF CHAR;
  BEGIN
    len := Convert.FromInt (buf, i);
    RETURN FromStr (buf, len);
  END NewInt;

PROCEDURE InternText (x: TEXT): INTEGER =
  VAR n := nTexts;
  BEGIN
    IF (n > LAST (texts^)) THEN ExpandTexts () END;
    texts[n] := x;
    INC (nTexts);
    RETURN n;
  END InternText;

PROCEDURE ExpandTexts () =
  VAR new := NEW (REF ARRAY OF TEXT, 2 * NUMBER (texts^));
  BEGIN
    FOR i := 0 TO LAST (texts^) DO new[i] := texts[i] END;
    texts := new;
  END ExpandTexts;

PROCEDURE Intern (): T =
  VAR hash, bucket: INTEGER; t: T;
  BEGIN
    (* search the hash table *)
    next_t.hash := 0;
    hash := InternHash (next_t, 0);
    bucket := hash MOD NUMBER (hashTable^);
    t := hashTable[bucket];
    WHILE (t # NIL) DO
      IF (t.hash = hash) AND Equal (t, next_t) THEN
        (* we found a hit! *)
        IF (t.textID = NO_TEXT) THEN
          (* remember the new TEXT if it exists *)
          t.textID := next_t.textID;
        END;
        RETURN t;
      END;
      t := t.next;
    END;

    (* we didn't find the string => add it to the hash table *)
    t := next_t;
    t.hash := hash;
    t.next := hashTable [bucket];
    hashTable [bucket] := t;
    next_t := NIL; (* since we've used it! *)
    INC (nStrings);
    IF (nStrings > 2 * NUMBER (hashTable^)) THEN ExpandHashTable () END;
    RETURN t;
  END Intern;

PROCEDURE ExpandHashTable () =
  VAR
    n_old := NUMBER (hashTable^);
    n_new := n_old * 2 + 7;
    new   := NEW (REF ARRAY OF T, n_new);
    t, u  : T;
    x     : INTEGER;
  BEGIN
    FOR i := 0 TO n_new - 1 DO new[i] := NIL END;

    FOR i := 0 TO n_old - 1 DO
      t := hashTable [i];
      WHILE (t # NIL) DO
        u := t.next;
        x := t.hash MOD n_new;
        t.next := new [x];
        new [x] := t;
        t := u;
      END;
    END;

    hashTable := new;
  END ExpandHashTable;

PROCEDURE InternHash (t: T;  hash: INTEGER): INTEGER =
  VAR p: StrPtr;  txt: TEXT;
  BEGIN
    IF (t = NIL) THEN RETURN 0 END;
    IF (hash = 0) AND (t.hash # 0) THEN RETURN t.hash END;

    IF (t.textID # NO_TEXT) THEN
      txt := texts [t.textID];
      FOR i := 0 TO t.length - 1 DO
        hash := 2 * hash + ORD (txt[i]);
      END;
    ELSIF (t.start # NIL) THEN
      p := t.start;
      FOR i := 0 TO t.length - 1 DO
        hash := 2 * hash + ORD (p^);
        INC (p, ADRSIZE (CHAR));
      END;
    ELSE (* a concatentation *)
      hash := InternHash (t.prefix, hash);
      hash := InternHash (t.suffix, hash);
    END;

    RETURN hash;
  END InternHash;

PROCEDURE Equal (a, b: T): BOOLEAN =
  VAR
    a_len := a.length;
    b_len := b.length;
    a_ptr : StrPtr;
    b_ptr : StrPtr;
    a_buf : ARRAY [0..49] OF CHAR;
    b_buf : ARRAY [0..49] OF CHAR;
    a_txt : TEXT := NIL;
    b_txt : TEXT := NIL;
  BEGIN
    IF (a_len # b_len) THEN RETURN FALSE END;

    IF (a.textID # NO_TEXT) THEN
      a_ptr := ADR (texts[a.textID][0]);
    ELSIF (a.start # NIL) THEN
      a_ptr := a.start;
    ELSIF (a_len < NUMBER (a_buf)) THEN
      Flatten (a, a_buf, 0);
      a_ptr := ADR (a_buf[0]);
    ELSE
      a_txt := TextF.New (a_len);
      Flatten (a, a_txt^, 0);
      a_ptr := ADR (a_txt[0]);
    END;

    IF (b.textID # NO_TEXT) THEN
      b_ptr := ADR (texts[b.textID][0]);
    ELSIF (b.start # NIL) THEN
      b_ptr := b.start;
    ELSIF (b_len < NUMBER (b_buf)) THEN
      Flatten (b, b_buf, 0);
      b_ptr := ADR (b_buf[0]);
    ELSE
      b_txt := TextF.New (b_len);
      Flatten (b, b_txt^, 0);
      b_ptr := ADR (b_txt[0]);
    END;

    FOR i := 0 TO a_len-1 DO
      IF (a_ptr^ # b_ptr^) THEN
        (* not equal! *)
        IF (a_txt # NIL) THEN a.textID := InternText (a_txt) END;
        IF (b_txt # NIL) THEN b.textID := InternText (b_txt) END;
        RETURN FALSE;
      END;
      INC (a_ptr, ADRSIZE (CHAR));
      INC (b_ptr, ADRSIZE (CHAR));
    END;

    (* intern any new texts *)
    IF (a_txt = NIL) AND (b_txt = NIL) THEN
      (* ok *)
    ELSIF (a_txt = NIL) THEN
      IF (a.textID # NO_TEXT)
        THEN b.textID := a.textID;
        ELSE b.textID := InternText (b_txt); a.textID := b.textID;
      END;
    ELSIF (b_txt = NIL) THEN
      IF (b.textID # NO_TEXT)
        THEN a.textID := b.textID;
        ELSE a.textID := InternText (a_txt); b.textID := a.textID;
      END;
    ELSE (* both are texts are new *)
      a.textID := InternText (a_txt);
      b.textID := a.textID;
    END;

    RETURN TRUE;
  END Equal;

PROCEDURE Flatten (t: T;  VAR buf: ARRAY OF CHAR;  start: INTEGER) =
  CONST N = 1024;
  VAR txt: TEXT;  p: UNTRACED REF ARRAY [0..N-1] OF CHAR;  len: INTEGER;
  BEGIN
    WHILE (t # NIL) DO
      IF (t.textID # NO_TEXT) THEN
        txt := texts [t.textID];
        len := t.length;
        SUBARRAY (buf, start, len) := SUBARRAY (txt^, 0, len);
        t := NIL;
      ELSIF (t.start # NIL) THEN
        p := ADR (t.start^);
        len := t.length;
        WHILE (len >= N) DO
          SUBARRAY (buf, start, N) := p^;
          INC (p, ADRSIZE (p^));
          INC (start, N);
          DEC (len, N);
        END;
        IF (len > 0) THEN
          SUBARRAY (buf, start, len) := SUBARRAY (p^, 0, len);
        END;
        t := NIL;
      ELSE
        Flatten (t.suffix, buf, start + Length (t.prefix));
        t := t.prefix;
      END;
    END;
  END Flatten;

PROCEDURE EmitCharLiteral (wr: MBuf.T;  c: CHAR; i: INTEGER; VAR n: INTEGER) =
  BEGIN
    IF (i > 0) THEN  MBuf.PutChar (wr, ',')  END;
    IF (n >= 20) THEN  MBuf.PutChar (wr, '\n'); n := 0  END;
    MBuf.PutChar (wr, '\'');
    EmitChar (wr, c);
    MBuf.PutChar (wr, '\'');
    INC (n);
  END EmitCharLiteral;

PROCEDURE EmitChar (wr: MBuf.T;  c: CHAR) =
  VAR i: INTEGER;
  BEGIN
    IF (c < ' ') OR (c = '\"') OR (c = '\'') OR ('~' < c) OR (c = '\\') THEN
      i := Word.And (ORD (c), 255);
      MBuf.PutChar (wr, '\\');
      MBuf.PutChar (wr, Digits[i DIV 64]);  i := Word.And (i, 63);
      MBuf.PutChar (wr, Digits[i DIV 8]);   i := Word.And (i, 7);
      MBuf.PutChar (wr, Digits[i]);
    ELSE (* simple graphic character *)
      MBuf.PutChar (wr, c);
    END;
  END EmitChar;

(*-------------------------------------------------------- initialization ---*)

CONST
  RW = ARRAY OF TEXT { "asm", "auto", "break", "case", "char", "const",
                       "continue", "default", "do", "double", "else", "enum",
                       "extern", "float", "for", "goto", "if", "int", "long",
                       "register", "return", "short", "signed", "sizeof",
                       "static", "struct", "switch", "typedef", "union",
                       "unsigned", "void", "volatile", "while",
                       (*** gcc 2.0 hacks ***)
                       "inline", "typeof" };

PROCEDURE Initialize () =
  VAR t: T;
  BEGIN
    FOR i := 0 TO LAST (hashTable^) DO
      hashTable[i] := NIL;
    END;
    FOR i := 0 TO LAST (RW) DO
      t := Add (RW [i]);
      <* ASSERT t.class = 0 *>
      t.class := C_RESERVED;
    END;
  END Initialize;

PROCEDURE Reset () =
  VAR t: T;
  BEGIN
    FOR i := FIRST (hashTable^) TO LAST (hashTable^) DO
      t := hashTable[i];
      WHILE (t # NIL) DO t.uid := NO_TEXT;  t := t.next END;
    END;
  END Reset;

BEGIN
END String.
