cProtoBufProtoParser.pas 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270
  1. {******************************************************************************}
  2. { }
  3. { Library: Fundamentals 4.00 }
  4. { File name: cProtoBufParser.pas }
  5. { File version: 0.03 }
  6. { Description: Protocol Buffer proto file parser. }
  7. { }
  8. { Copyright: Copyright (c) 2012-2013, David J Butler }
  9. { All rights reserved. }
  10. { This file is licensed under the BSD License. }
  11. { See http://www.opensource.org/licenses/bsd-license.php }
  12. { Redistribution and use in source and binary forms, with }
  13. { or without modification, are permitted provided that }
  14. { the following conditions are met: }
  15. { Redistributions of source code must retain the above }
  16. { copyright notice, this list of conditions and the }
  17. { following disclaimer. }
  18. { THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND }
  19. { CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED }
  20. { WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED }
  21. { WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A }
  22. { PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL }
  23. { THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, }
  24. { INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR }
  25. { CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, }
  26. { PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF }
  27. { USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) }
  28. { HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER }
  29. { IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING }
  30. { NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE }
  31. { USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE }
  32. { POSSIBILITY OF SUCH DAMAGE. }
  33. { }
  34. { Home page: http://fundementals.sourceforge.net }
  35. { Forum: http://sourceforge.net/forum/forum.php?forum_id=2117 }
  36. { E-mail: fundamentals.library@gmail.com }
  37. { }
  38. { Revision history: }
  39. { }
  40. { 2012/04/15 0.01 Lexer and basic parsing. }
  41. { 2012/04/17 0.02 Line comments, complex types, recursive declarations. }
  42. { 2012/04/25 0.03 Allow enums definitions at root level. }
  43. { 2012/04/26 0.03 Parse and use imported packages. }
  44. { Parser keeps track of line number. }
  45. { }
  46. {******************************************************************************}
  47. {$INCLUDE cProtoBuf.inc}
  48. unit cProtoBufProtoParser;
  49. interface
  50. uses
  51. { System }
  52. SysUtils,
  53. { Fundamentals }
  54. cUtils,
  55. cProtoBufProtoNodes;
  56. type
  57. TpbProtoParserToken = (
  58. pptNone,
  59. pptEndOfText,
  60. pptLineComment,
  61. pptSemiColon,
  62. pptOpenCurly,
  63. pptCloseCurly,
  64. pptEqualSign,
  65. pptOpenSquare,
  66. pptCloseSquare,
  67. pptOpenParenthesis,
  68. pptCloseParenthesis,
  69. pptComma,
  70. pptLiteralInteger,
  71. pptLiteralFloat,
  72. pptLiteralString,
  73. pptIdentifier,
  74. pptMessage,
  75. pptRequired,
  76. pptOptional,
  77. pptRepeated,
  78. pptDouble,
  79. pptFloat,
  80. pptInt32,
  81. pptInt64,
  82. pptUInt32,
  83. pptUInt64,
  84. pptSInt32,
  85. pptSInt64,
  86. pptFixed32,
  87. pptFixed64,
  88. pptSFixed32,
  89. pptSFixed64,
  90. pptBool,
  91. pptString,
  92. pptBytes,
  93. pptDefault,
  94. pptEnum,
  95. pptImport,
  96. pptPackage,
  97. pptOption,
  98. pptExtend,
  99. pptService,
  100. pptPacked,
  101. pptExtensions,
  102. pptTrue,
  103. pptFalse,
  104. pptTo,
  105. pptMax
  106. );
  107. const
  108. pptKeywordFirst = pptMessage;
  109. pptKeywordLast = pptExtensions;
  110. type
  111. EpbProtoParser = class(Exception);
  112. TpbProtoParserStateFlag = (
  113. ppsfPackageIdStatement);
  114. TpbProtoParserStateFlags = set of TpbProtoParserStateFlag;
  115. TpbProtoParser = class
  116. protected
  117. FProtoPath : String;
  118. FFileName : String;
  119. FFileNameUsed : String;
  120. FFileNameName : String;
  121. FBufPtr : PAnsiChar;
  122. FBufSize : Integer;
  123. FBufPos : Integer;
  124. FBufStrRef : AnsiString;
  125. FLineNr : Integer;
  126. FNodeFactory : TpbProtoNodeFactory;
  127. FToken : TpbProtoParserToken;
  128. FTokenStr : AnsiString;
  129. FTokenInt : Int64;
  130. FTokenFloat : Extended;
  131. FStateFlags : TpbProtoParserStateFlags;
  132. procedure ResetParser;
  133. function EndOfText: Boolean;
  134. function SkipChar: Boolean;
  135. function SkipCh(const C: AnsiCharSet): Boolean;
  136. function ExtractChar(var C: AnsiChar): Boolean;
  137. function SkipAllCh(const C: AnsiCharSet): Boolean;
  138. function SkipWhiteSpace: Boolean;
  139. function SkipToStr(const S: AnsiString; const CaseSensitive: Boolean): Boolean;
  140. function ExtractAllCh(const C: AnsiCharSet): AnsiString;
  141. function ExtractTo(const C: AnsiCharSet; var S: AnsiString; const SkipDelim: Boolean): AnsiChar;
  142. function GetNextToken_IdentifierOrKeword: TpbProtoParserToken;
  143. function GetNextToken_Number: TpbProtoParserToken;
  144. function GetNextToken_String: TpbProtoParserToken;
  145. function GetNextToken_LineComment: TpbProtoParserToken;
  146. function GetNextToken: TpbProtoParserToken;
  147. function SkipToken(const Token: TpbProtoParserToken): Boolean;
  148. procedure ExpectToken(const Token: TpbProtoParserToken; const TokenExpected: String);
  149. procedure ExpectDelimiter;
  150. procedure ExpectEqualSign;
  151. function ExpectIdentifier: AnsiString;
  152. function ExpectLiteralInteger: LongInt;
  153. function ExpectLiteralFloat: Extended;
  154. function ExpectLiteralString: AnsiString;
  155. function ExpectLiteralBoolean: Boolean;
  156. function ParseFieldCardinality: TpbProtoFieldCardinality;
  157. function ExpectFieldCardinality: TpbProtoFieldCardinality;
  158. function ParseFieldBaseType: TpbProtoFieldBaseType;
  159. function ExpectFieldType(const AField: TpbProtoField): TpbProtoFieldType;
  160. procedure ParseEnumValue(const P: TpbProtoPackage; const AParentNode: TpbProtoNode; const E: TpbProtoEnum);
  161. function ParseEnum(const P: TpbProtoPackage; const AParentNode: TpbProtoNode): TpbProtoEnum;
  162. function ExpectLiteral(const P: TpbProtoPackage; const AParentNode: TpbProtoNode): TpbProtoLiteral;
  163. procedure ParseFieldOptions(const P: TpbProtoPackage; const M: TpbProtoMessage; const F: TpbProtoField);
  164. procedure ParseField(const P: TpbProtoPackage; const M: TpbProtoMessage);
  165. procedure ParseMessageExtensions(const P: TpbProtoPackage; const M: TpbProtoMessage);
  166. procedure ParseMessageEntry(const P: TpbProtoPackage; const M: TpbProtoMessage);
  167. function ParseMessageDeclaration(const P: TpbProtoPackage; const AParent: TpbProtoNode): TpbProtoMessage;
  168. procedure ParsePackageOption(const APackage: TpbProtoPackage);
  169. procedure ParseImportStatement(const APackage: TpbProtoPackage);
  170. procedure ParsePackageIdStatement(const APackage: TpbProtoPackage);
  171. procedure InitPackage(const P: TpbProtoPackage);
  172. function FindProtoFile(const AFileName: String): String;
  173. procedure ProcessImport(const APackage: TpbProtoPackage; const AFileName: AnsiString);
  174. public
  175. property ProtoPath: String read FProtoPath write FProtoPath;
  176. procedure SetTextBuf(const Buf; const BufSize: Integer);
  177. procedure SetTextStr(const S: AnsiString);
  178. procedure SetFileName(const AFileName: String);
  179. function Parse(const ANodeFactory: TpbProtoNodeFactory): TpbProtoPackage;
  180. end;
  181. implementation
  182. uses
  183. { System }
  184. Classes,
  185. { Fundamentals }
  186. cStrings;
  187. { TpbProtoParser }
  188. type
  189. TKeywordMap = record
  190. Keyword : AnsiString;
  191. Token : TpbProtoParserToken;
  192. end;
  193. const
  194. KeywordMapEntries = 32;
  195. KeywordMap : array[0..KeywordMapEntries - 1] of TKeywordMap = (
  196. (
  197. Keyword : 'message';
  198. Token : pptMessage;
  199. ),
  200. (
  201. Keyword : 'required';
  202. Token : pptRequired;
  203. ),
  204. (
  205. Keyword : 'optional';
  206. Token : pptOptional;
  207. ),
  208. (
  209. Keyword : 'repeated';
  210. Token : pptRepeated;
  211. ),
  212. (
  213. Keyword : 'double';
  214. Token : pptDouble;
  215. ),
  216. (
  217. Keyword : 'float';
  218. Token : pptFloat;
  219. ),
  220. (
  221. Keyword : 'int32';
  222. Token : pptInt32;
  223. ),
  224. (
  225. Keyword : 'int64';
  226. Token : pptInt64;
  227. ),
  228. (
  229. Keyword : 'uint32';
  230. Token : pptUInt32;
  231. ),
  232. (
  233. Keyword : 'uint64';
  234. Token : pptUInt64;
  235. ),
  236. (
  237. Keyword : 'sint32';
  238. Token : pptSInt32;
  239. ),
  240. (
  241. Keyword : 'sint64';
  242. Token : pptSInt64;
  243. ),
  244. (
  245. Keyword : 'fixed32';
  246. Token : pptFixed32;
  247. ),
  248. (
  249. Keyword : 'fixed64';
  250. Token : pptFixed64;
  251. ),
  252. (
  253. Keyword : 'sfixed32';
  254. Token : pptSFixed32;
  255. ),
  256. (
  257. Keyword : 'sfixed64';
  258. Token : pptSFixed64;
  259. ),
  260. (
  261. Keyword : 'bool';
  262. Token : pptBool;
  263. ),
  264. (
  265. Keyword : 'string';
  266. Token : pptString;
  267. ),
  268. (
  269. Keyword : 'bytes';
  270. Token : pptBytes;
  271. ),
  272. (
  273. Keyword : 'default';
  274. Token : pptDefault;
  275. ),
  276. (
  277. Keyword : 'enum';
  278. Token : pptEnum;
  279. ),
  280. (
  281. Keyword : 'import';
  282. Token : pptImport;
  283. ),
  284. (
  285. Keyword : 'package';
  286. Token : pptPackage;
  287. ),
  288. (
  289. Keyword : 'option';
  290. Token : pptOption;
  291. ),
  292. (
  293. Keyword : 'extend';
  294. Token : pptExtend;
  295. ),
  296. (
  297. Keyword : 'service';
  298. Token : pptService;
  299. ),
  300. (
  301. Keyword : 'packed';
  302. Token : pptPacked;
  303. ),
  304. (
  305. Keyword : 'extensions';
  306. Token : pptExtensions;
  307. ),
  308. (
  309. Keyword : 'true';
  310. Token : pptTrue;
  311. ),
  312. (
  313. Keyword : 'false';
  314. Token : pptFalse;
  315. ),
  316. (
  317. Keyword : 'to';
  318. Token : pptTo;
  319. ),
  320. (
  321. Keyword : 'max';
  322. Token : pptMax;
  323. )
  324. );
  325. procedure TpbProtoParser.SetTextBuf(const Buf; const BufSize: Integer);
  326. begin
  327. FFileName := '';
  328. if BufSize < 0 then
  329. raise EpbProtoParser.Create('Invalid parameter');
  330. FBufPtr := @Buf;
  331. FBufSize := BufSize;
  332. FBufPos := 0;
  333. FBufStrRef := '';
  334. end;
  335. procedure TpbProtoParser.SetTextStr(const S: AnsiString);
  336. begin
  337. FFileName := '';
  338. FBufStrRef := S;
  339. FBufSize := Length(S);
  340. FBufPtr := PAnsiChar(FBufStrRef);
  341. FBufPos := 0;
  342. end;
  343. procedure TpbProtoParser.SetFileName(const AFileName: String);
  344. var F : TFileStream;
  345. L : Integer;
  346. S : AnsiString;
  347. E : TSearchRec;
  348. begin
  349. FFileName := AFileName;
  350. FFileNameUsed := FindProtoFile(FFileName);
  351. if FindFirst(FFileNameUsed, faAnyFile, E) = 0 then
  352. begin
  353. // get file name from file system to preserve case
  354. FFileNameName := E.Name;
  355. FindClose(E);
  356. end
  357. else
  358. FFileNameName := ExtractFileName(FFileNameUsed);
  359. F := TFileStream.Create(FFileNameUsed, fmOpenRead);
  360. try
  361. L := F.Size;
  362. SetLength(S, L);
  363. if L > 0 then
  364. F.ReadBuffer(S[1], L);
  365. finally
  366. F.Free;
  367. end;
  368. FBufStrRef := S;
  369. FBufSize := Length(S);
  370. FBufPtr := PAnsiChar(FBufStrRef);
  371. FBufPos := 0;
  372. end;
  373. procedure TpbProtoParser.ResetParser;
  374. begin
  375. FBufPos := 0;
  376. FToken := pptNone;
  377. FStateFlags := [];
  378. FLineNr := 1;
  379. end;
  380. function TpbProtoParser.EndOfText: Boolean;
  381. begin
  382. Result := FBufPos >= FBufSize;
  383. end;
  384. function TpbProtoParser.SkipChar: Boolean;
  385. begin
  386. if EndOfText then
  387. begin
  388. Result := False;
  389. exit;
  390. end;
  391. Inc(FBufPos);
  392. Result := True;
  393. end;
  394. function TpbProtoParser.SkipCh(const C: AnsiCharSet): Boolean;
  395. var N, F : Integer;
  396. P : PAnsiChar;
  397. begin
  398. F := FBufPos;
  399. N := FBufSize - F;
  400. if N <= 0 then
  401. begin
  402. Result := False;
  403. exit;
  404. end;
  405. P := FBufPtr;
  406. Inc(P, F);
  407. if P^ in C then
  408. begin
  409. Inc(FBufPos);
  410. Result := True;
  411. end
  412. else
  413. Result := False;
  414. end;
  415. function TpbProtoParser.ExtractChar(var C: AnsiChar): Boolean;
  416. var N, F : Integer;
  417. P : PAnsiChar;
  418. begin
  419. F := FBufPos;
  420. N := FBufSize - F;
  421. if N <= 0 then
  422. begin
  423. C := #0;
  424. Result := False;
  425. exit;
  426. end;
  427. P := FBufPtr;
  428. Inc(P, F);
  429. C := P^;
  430. Inc(FBufPos);
  431. Result := True;
  432. end;
  433. function TpbProtoParser.SkipAllCh(const C: AnsiCharSet): Boolean;
  434. var N, L, F : Integer;
  435. P : PAnsiChar;
  436. begin
  437. L := 0;
  438. F := FBufPos;
  439. N := FBufSize - F;
  440. P := FBufPtr;
  441. Inc(P, F);
  442. while N > 0 do
  443. if P^ in C then
  444. begin
  445. Inc(P);
  446. Dec(N);
  447. Inc(L);
  448. end
  449. else
  450. break;
  451. if L > 0 then
  452. begin
  453. Inc(FBufPos, L);
  454. Result := True;
  455. end
  456. else
  457. Result := False;
  458. end;
  459. function TpbProtoParser.SkipWhiteSpace: Boolean;
  460. var R : Boolean;
  461. begin
  462. Result := False;
  463. repeat
  464. R := False;
  465. if SkipAllCh([#1..#32] - [#13]) then
  466. R := True;
  467. if SkipCh([#13]) then
  468. begin
  469. Inc(FLineNr);
  470. R := True;
  471. end;
  472. if R then
  473. Result := True;
  474. until not R;
  475. end;
  476. function TpbProtoParser.SkipToStr(const S: AnsiString; const CaseSensitive: Boolean): Boolean;
  477. var N, L, F, C : Integer;
  478. P : PAnsiChar;
  479. R, T : Boolean;
  480. begin
  481. L := Length(S);
  482. F := FBufPos;
  483. N := FBufSize - F;
  484. P := FBufPtr;
  485. Inc(P, F);
  486. R := False;
  487. C := 0;
  488. while N >= L do
  489. begin
  490. if CaseSensitive then
  491. T := SysUtils.CompareMem(PAnsiChar(S), P, L)
  492. else
  493. T := AnsiStrLIComp(PAnsiChar(S), P, L) = 0;
  494. if T then
  495. break;
  496. Dec(N);
  497. Inc(P);
  498. Inc(C);
  499. R := True;
  500. end;
  501. Inc(FBufPos, C);
  502. Result := R;
  503. end;
  504. function TpbProtoParser.ExtractAllCh(const C: AnsiCharSet): AnsiString;
  505. var N, L : Integer;
  506. P, Q : PAnsiChar;
  507. D : AnsiChar;
  508. R : Boolean;
  509. S : AnsiString;
  510. begin
  511. P := FBufPtr;
  512. Inc(P, FBufPos);
  513. Q := P;
  514. N := FBufSize - FBufPos;
  515. L := 0;
  516. while N > 0 do
  517. begin
  518. D := P^;
  519. R := D in C;
  520. if not R then
  521. break
  522. else
  523. Inc(L);
  524. Inc(P);
  525. Dec(N);
  526. end;
  527. SetLength(S, L);
  528. if L > 0 then
  529. Move(Q^, S[1], L);
  530. Inc(FBufPos, L);
  531. Result := S;
  532. end;
  533. function TpbProtoParser.ExtractTo(const C: AnsiCharSet; var S: AnsiString; const SkipDelim: Boolean): AnsiChar;
  534. var N, L : Integer;
  535. P, Q : PAnsiChar;
  536. D : AnsiChar;
  537. R : Boolean;
  538. begin
  539. P := FBufPtr;
  540. Inc(P, FBufPos);
  541. Q := P;
  542. N := FBufSize - FBufPos;
  543. L := 0;
  544. R := False;
  545. D := #0;
  546. while N > 0 do
  547. begin
  548. D := P^;
  549. R := D in C;
  550. if R then
  551. break
  552. else
  553. Inc(L);
  554. Inc(P);
  555. Dec(N);
  556. end;
  557. SetLength(S, L);
  558. if L > 0 then
  559. Move(Q^, S[1], L);
  560. Inc(FBufPos, L);
  561. if R and SkipDelim then
  562. Inc(FBufPos);
  563. Result := D;
  564. end;
  565. function TpbProtoParser.GetNextToken_IdentifierOrKeword: TpbProtoParserToken;
  566. var
  567. S : AnsiString;
  568. I : Integer;
  569. begin
  570. S := ExtractAllCh(['A'..'Z', 'a'..'z', '_', '0'..'9', '.']);
  571. FTokenStr := S;
  572. for I := 0 to KeywordMapEntries - 1 do
  573. if KeywordMap[I].Keyword = S then
  574. begin
  575. Result := KeywordMap[I].Token;
  576. exit;
  577. end;
  578. Result := pptIdentifier;
  579. end;
  580. function TpbProtoParser.GetNextToken_Number: TpbProtoParserToken;
  581. var
  582. S : AnsiString;
  583. I : Int64;
  584. F : Extended;
  585. begin
  586. S := ExtractAllCh(['-', '0'..'9', '.', 'e', 'E']);
  587. if TryStringToInt64A(S, I) then
  588. begin
  589. Result := pptLiteralInteger;
  590. FTokenInt := I;
  591. exit;
  592. end;
  593. if TryStringToFloatA(S, F) then
  594. begin
  595. Result := pptLiteralFloat;
  596. FTokenFloat := F;
  597. exit;
  598. end;
  599. // TODO: hex values e.g. 0xFFFFFFFF, -0x7FFFFFFF
  600. raise EpbProtoParser.CreateFmt('Invalid numeric value (%s)', [S]);
  601. end;
  602. const
  603. SErr_StringNotTerminated = 'String literal not terminated';
  604. function TpbProtoParser.GetNextToken_String: TpbProtoParserToken;
  605. var
  606. S, T : AnsiString;
  607. F : Boolean;
  608. C, D : AnsiChar;
  609. begin
  610. SkipChar;
  611. S := '';
  612. F := False;
  613. repeat
  614. C := ExtractTo(['"', '\'], T, True);
  615. S := S + T;
  616. case C of
  617. '"' :
  618. begin
  619. if SkipCh(['"']) then // escaped quote
  620. S := S + '"'
  621. else
  622. F := True;
  623. end;
  624. '\' : // escape character
  625. begin
  626. if not ExtractChar(C) then
  627. raise EpbProtoParser.Create(SErr_StringNotTerminated);
  628. case C of
  629. '0' : D := #0;
  630. 'r' : D := #13;
  631. 'n' : D := #10;
  632. // TODO: hex format e.g. \xfe
  633. // unicode escaping in string, e.g. "\u1234"
  634. // utf8 escaping in string, e.g. "\341\210\264"
  635. else
  636. D := C;
  637. end;
  638. S := S + D;
  639. end;
  640. else
  641. raise EpbProtoParser.Create(SErr_StringNotTerminated);
  642. end;
  643. until F;
  644. // literal string decoded
  645. FTokenStr := S;
  646. Result := pptLiteralString;
  647. end;
  648. function TpbProtoParser.GetNextToken_LineComment: TpbProtoParserToken;
  649. begin
  650. SkipChar;
  651. if not SkipCh(['/']) then
  652. raise EpbProtoParser.Create('Unexpected token (/)');
  653. ExtractTo([#13, #10], FTokenStr, True);
  654. SkipCh([#13, #10]);
  655. Result := pptLineComment;
  656. end;
  657. function TpbProtoParser.GetNextToken: TpbProtoParserToken;
  658. var C : AnsiChar;
  659. P : PAnsiChar;
  660. T : TpbProtoParserToken;
  661. begin
  662. repeat
  663. FTokenStr := '';
  664. SkipWhiteSpace;
  665. if EndOfText then
  666. begin
  667. FToken := pptEndOfText;
  668. Result := pptEndOfText;
  669. exit;
  670. end;
  671. // single character tokens
  672. P := FBufPtr;
  673. Inc(P, FBufPos);
  674. C := P^;
  675. case C of
  676. ';' : T := pptSemiColon;
  677. '{' : T := pptOpenCurly;
  678. '}' : T := pptCloseCurly;
  679. '=' : T := pptEqualSign;
  680. '[' : T := pptOpenSquare;
  681. ']' : T := pptCloseSquare;
  682. '(' : T := pptOpenParenthesis;
  683. ')' : T := pptCloseParenthesis;
  684. ',' : T := pptComma;
  685. else
  686. T := pptNone;
  687. end;
  688. if T <> pptNone then
  689. begin
  690. SkipChar;
  691. FToken := T;
  692. Result := T;
  693. exit;
  694. end;
  695. // other tokens
  696. case C of
  697. 'A'..'Z',
  698. 'a'..'z',
  699. '_' : T := GetNextToken_IdentifierOrKeword;
  700. '+', '-',
  701. '0'..'9' : T := GetNextToken_Number;
  702. '"' : T := GetNextToken_String;
  703. '/' : T := GetNextToken_LineComment;
  704. else
  705. raise EpbProtoParser.CreateFmt('Unexpected input character (%d)', [Ord(C)]);
  706. end;
  707. until T <> pptLineComment;
  708. FToken := T;
  709. Result := T;
  710. end;
  711. function TpbProtoParser.SkipToken(const Token: TpbProtoParserToken): Boolean;
  712. begin
  713. Result := FToken = Token;
  714. if Result then
  715. GetNextToken;
  716. end;
  717. procedure TpbProtoParser.ExpectToken(const Token: TpbProtoParserToken; const TokenExpected: String);
  718. begin
  719. if FToken <> Token then
  720. raise EpbProtoParser.CreateFmt('%s expected', [TokenExpected]);
  721. GetNextToken;
  722. end;
  723. procedure TpbProtoParser.ExpectDelimiter;
  724. begin
  725. ExpectToken(pptSemiColon, ';');
  726. end;
  727. procedure TpbProtoParser.ExpectEqualSign;
  728. begin
  729. ExpectToken(pptEqualSign, '=');
  730. end;
  731. function TpbProtoParser.ExpectIdentifier: AnsiString;
  732. begin
  733. if not (
  734. (FToken = pptIdentifier) or
  735. ( (FToken >= pptKeywordFirst) and (FToken <= pptKeywordLast) )
  736. ) then
  737. raise EpbProtoParser.Create('Identifier expected');
  738. Result := FTokenStr;
  739. GetNextToken;
  740. end;
  741. function TpbProtoParser.ExpectLiteralInteger: LongInt;
  742. begin
  743. if FToken <> pptLiteralInteger then
  744. raise EpbProtoParser.Create('Integer literal expected');
  745. Result := FTokenInt;
  746. GetNextToken;
  747. end;
  748. function TpbProtoParser.ExpectLiteralFloat: Extended;
  749. begin
  750. if FToken <> pptLiteralFloat then
  751. raise EpbProtoParser.Create('Float literal expected');
  752. Result := FTokenFloat;
  753. GetNextToken;
  754. end;
  755. function TpbProtoParser.ExpectLiteralString: AnsiString;
  756. begin
  757. if FToken <> pptLiteralString then
  758. raise EpbProtoParser.Create('String literal expected');
  759. Result := FTokenStr;
  760. GetNextToken;
  761. end;
  762. function TpbProtoParser.ExpectLiteralBoolean: Boolean;
  763. begin
  764. if not (FToken in [pptTrue, pptFalse]) then
  765. raise EpbProtoParser.Create('Boolean literal expected');
  766. Result := FToken = pptTrue;
  767. GetNextToken;
  768. end;
  769. function TpbProtoParser.ParseFieldCardinality: TpbProtoFieldCardinality;
  770. begin
  771. case FToken of
  772. pptRequired : Result := pfcRequired;
  773. pptOptional : Result := pfcOptional;
  774. pptRepeated : Result := pfcRepeated;
  775. else
  776. Result := pfcNone;
  777. end;
  778. if Result <> pfcNone then
  779. GetNextToken;
  780. end;
  781. function TpbProtoParser.ExpectFieldCardinality: TpbProtoFieldCardinality;
  782. begin
  783. Result := ParseFieldCardinality;
  784. if Result = pfcNone then
  785. raise EpbProtoParser.Create('Field cardinality expected');
  786. end;
  787. function TpbProtoParser.ParseFieldBaseType: TpbProtoFieldBaseType;
  788. begin
  789. case FToken of
  790. pptDouble : Result := pftDouble;
  791. pptFloat : Result := pftFloat;
  792. pptInt32 : Result := pftInt32;
  793. pptInt64 : Result := pftInt64;
  794. pptUInt32 : Result := pftUInt32;
  795. pptUInt64 : Result := pftUInt64;
  796. pptSInt32 : Result := pftSInt32;
  797. pptSInt64 : Result := pftSInt64;
  798. pptFixed32 : Result := pftFixed32;
  799. pptFixed64 : Result := pftFixed64;
  800. pptSFixed32 : Result := pftSFixed32;
  801. pptSFixed64 : Result := pftSFixed64;
  802. pptBool : Result := pftBool;
  803. pptString : Result := pftString;
  804. pptBytes : Result := pftBytes;
  805. else
  806. Result := pftNone;
  807. end;
  808. if Result <> pftNone then
  809. GetNextToken;
  810. end;
  811. function TpbProtoParser.ExpectFieldType(const AField: TpbProtoField): TpbProtoFieldType;
  812. var A : TpbProtoFieldType;
  813. B : TpbProtoFieldBaseType;
  814. begin
  815. A := FNodeFactory.CreateFieldType(AField);
  816. try
  817. B := ParseFieldBaseType;
  818. if B <> pftNone then
  819. A.BaseType := B
  820. else
  821. if FToken = pptIdentifier then
  822. begin
  823. A.BaseType := pftIdentifier;
  824. A.IdenStr := FTokenStr;
  825. GetNextToken;
  826. end
  827. else
  828. raise EpbProtoParser.Create('Field type expected');
  829. except
  830. A.Free;
  831. raise;
  832. end;
  833. Result := A;
  834. end;
  835. procedure TpbProtoParser.ParseEnumValue(const P: TpbProtoPackage; const AParentNode: TpbProtoNode;
  836. const E: TpbProtoEnum);
  837. var V : TpbProtoEnumValue;
  838. begin
  839. V := FNodeFactory.CreateEnumValue(E);
  840. try
  841. V.Name := ExpectIdentifier;
  842. ExpectEqualSign;
  843. V.Value := ExpectLiteralInteger;
  844. ExpectDelimiter;
  845. except
  846. V.Free;
  847. raise;
  848. end;
  849. E.Add(V);
  850. end;
  851. function TpbProtoParser.ParseEnum(const P: TpbProtoPackage; const AParentNode: TpbProtoNode): TpbProtoEnum;
  852. var E : TpbProtoEnum;
  853. begin
  854. Assert(FToken = pptEnum);
  855. GetNextToken;
  856. E := FNodeFactory.CreateEnum(AParentNode);
  857. try
  858. E.Name := ExpectIdentifier;
  859. ExpectToken(pptOpenCurly, '{');
  860. while not (FToken in [pptCloseCurly, pptEndOfText]) do
  861. ParseEnumValue(P, AParentNode, E);
  862. ExpectToken(pptCloseCurly, '}');
  863. except
  864. E.Free;
  865. raise;
  866. end;
  867. Result := E;
  868. end;
  869. function TpbProtoParser.ExpectLiteral(const P: TpbProtoPackage; const AParentNode: TpbProtoNode): TpbProtoLiteral;
  870. var L : TpbProtoLiteral;
  871. begin
  872. L := FNodeFactory.CreateLiteral(AParentNode);
  873. try
  874. case FToken of
  875. pptLiteralInteger :
  876. begin
  877. L.LiteralType := pltInteger;
  878. L.LiteralInt := ExpectLiteralInteger;
  879. end;
  880. pptLiteralFloat :
  881. begin
  882. L.LiteralType := pltFloat;
  883. L.LiteralFloat := ExpectLiteralFloat;
  884. end;
  885. pptLiteralString :
  886. begin
  887. L.LiteralType := pltString;
  888. L.LiteralStr := ExpectLiteralString;
  889. end;
  890. pptTrue, pptFalse :
  891. begin
  892. L.LiteralType := pltBoolean;
  893. L.LiteralBool := FToken = pptTrue;
  894. GetNextToken;
  895. end;
  896. pptIdentifier :
  897. begin
  898. L.LiteralType := pltIdentifier;
  899. L.LiteralIden := ExpectIdentifier;
  900. end;
  901. else
  902. raise EpbProtoParser.Create('Invalid literal value');
  903. end;
  904. except
  905. L.Free;
  906. raise;
  907. end;
  908. Result := L;
  909. end;
  910. procedure TpbProtoParser.ParseFieldOptions(const P: TpbProtoPackage; const M: TpbProtoMessage; const F: TpbProtoField);
  911. var A : TpbProtoOption;
  912. begin
  913. Assert(FToken = pptOpenSquare);
  914. GetNextToken;
  915. repeat
  916. repeat
  917. if SkipToken(pptPacked) then
  918. begin
  919. ExpectEqualSign;
  920. F.OptionPacked := ExpectLiteralBoolean;
  921. end else
  922. if SkipToken(pptDefault) then
  923. begin
  924. ExpectEqualSign;
  925. F.DefaultValue := ExpectLiteral(P, F);
  926. end
  927. else
  928. begin
  929. // unknown option
  930. A := TpbProtoOption.Create(FNodeFactory);
  931. try
  932. A.Custom := SkipToken(pptOpenParenthesis);
  933. A.Name := ExpectIdentifier;
  934. if A.Custom then
  935. ExpectToken(pptCloseParenthesis, ')');
  936. ExpectEqualSign;
  937. A.Value := ExpectLiteral(P, A);
  938. except
  939. A.Free;
  940. raise;
  941. end;
  942. F.AddOption(A);
  943. end;
  944. until not SkipToken(pptComma);
  945. ExpectToken(pptCloseSquare, ']');
  946. until not SkipToken(pptOpenSquare);
  947. end;
  948. const
  949. pbMaxTagID = 536870911;
  950. procedure TpbProtoParser.ParseField(const P: TpbProtoPackage; const M: TpbProtoMessage);
  951. var F : TpbProtoField;
  952. begin
  953. F := FNodeFactory.CreateField(M);
  954. try
  955. F.Cardinality := ExpectFieldCardinality;
  956. F.FieldType := ExpectFieldType(F);
  957. F.Name := ExpectIdentifier;
  958. ExpectEqualSign;
  959. F.TagID := ExpectLiteralInteger;
  960. if (F.TagID <= 0) or (F.TagID > pbMaxTagID) then
  961. raise EpbProtoParser.CreateFmt('TagID out of range (%d)', [F.TagID]);
  962. if (F.TagID >= 19000) and (F.TagID <= 19999) then
  963. raise EpbProtoParser.CreateFmt('TagID reserved (%d)', [F.TagID]);
  964. if FToken = pptOpenSquare then
  965. ParseFieldOptions(P, M, F);
  966. ExpectDelimiter;
  967. if Assigned(M.GetFieldByTagID(F.TagID)) then
  968. raise EpbProtoParser.CreateFmt('Duplicate TagID (%d)', [F.TagID]);
  969. except
  970. F.Free;
  971. raise;
  972. end;
  973. M.AddField(F);
  974. end;
  975. procedure TpbProtoParser.ParseMessageExtensions(const P: TpbProtoPackage; const M: TpbProtoMessage);
  976. begin
  977. Assert(FToken = pptExtensions);
  978. GetNextToken;
  979. M.ExtensionsMin := ExpectLiteralInteger;
  980. ExpectToken(pptTo, 'to');
  981. if SkipToken(pptMax) then
  982. M.ExtensionsMax := pbMaxTagID
  983. else
  984. M.ExtensionsMax := ExpectLiteralInteger;
  985. end;
  986. procedure TpbProtoParser.ParseMessageEntry(const P: TpbProtoPackage; const M: TpbProtoMessage);
  987. begin
  988. case FToken of
  989. pptEnum : M.AddEnum(ParseEnum(P, M));
  990. pptMessage : M.AddMessage(ParseMessageDeclaration(P, M));
  991. pptExtensions : ParseMessageExtensions(P, M);
  992. else
  993. ParseField(P, M);
  994. end;
  995. end;
  996. { example: }
  997. (* message Open { <fields> } *)
  998. function TpbProtoParser.ParseMessageDeclaration(const P: TpbProtoPackage; const AParent: TpbProtoNode): TpbProtoMessage;
  999. var M : TpbProtoMessage;
  1000. begin
  1001. Assert(FToken = pptMessage);
  1002. GetNextToken;
  1003. M := FNodeFactory.CreateMessage(AParent);
  1004. try
  1005. M.Name := ExpectIdentifier;
  1006. ExpectToken(pptOpenCurly, '{');
  1007. while not (FToken in [pptCloseCurly, pptEndOfText]) do
  1008. ParseMessageEntry(P, M);
  1009. ExpectToken(pptCloseCurly, '}');
  1010. except
  1011. M.Free;
  1012. raise;
  1013. end;
  1014. Result := M;
  1015. end;
  1016. { example: }
  1017. { option optimize_for = SPEED; }
  1018. procedure TpbProtoParser.ParsePackageOption(const APackage: TpbProtoPackage);
  1019. var A : TpbProtoOption;
  1020. begin
  1021. Assert(FToken = pptOption);
  1022. GetNextToken;
  1023. A := TpbProtoOption.Create(FNodeFactory);
  1024. try
  1025. A.Custom := SkipToken(pptOpenParenthesis);
  1026. A.Name := ExpectIdentifier;
  1027. if A.Custom then
  1028. ExpectToken(pptCloseParenthesis, ')');
  1029. ExpectEqualSign;
  1030. A.Value := ExpectLiteral(APackage, A);
  1031. ExpectDelimiter;
  1032. except
  1033. A.Free;
  1034. raise;
  1035. end;
  1036. APackage.AddOption(A);
  1037. end;
  1038. { example: }
  1039. { import "myproject/other_protos.proto"; }
  1040. procedure TpbProtoParser.ParseImportStatement(const APackage: TpbProtoPackage);
  1041. var F : AnsiString;
  1042. begin
  1043. Assert(FToken = pptImport);
  1044. GetNextToken;
  1045. F := ExpectLiteralString;
  1046. APackage.AddImport(F);
  1047. ExpectDelimiter;
  1048. ProcessImport(APackage, F);
  1049. end;
  1050. { example: }
  1051. { package foo.bar; }
  1052. procedure TpbProtoParser.ParsePackageIdStatement(const APackage: TpbProtoPackage);
  1053. begin
  1054. Assert(FToken = pptPackage);
  1055. GetNextToken;
  1056. if ppsfPackageIdStatement in FStateFlags then
  1057. raise EpbProtoParser.Create('Duplicate package declaration');
  1058. Include(FStateFlags, ppsfPackageIdStatement);
  1059. APackage.Name := ExpectIdentifier;
  1060. ExpectDelimiter;
  1061. end;
  1062. function TpbProtoParser.Parse(const ANodeFactory: TpbProtoNodeFactory): TpbProtoPackage;
  1063. var P : TpbProtoPackage;
  1064. begin
  1065. if not Assigned(ANodeFactory) then
  1066. raise EpbProtoParser.Create('Node factory required');
  1067. FNodeFactory := ANodeFactory;
  1068. ResetParser;
  1069. try
  1070. GetNextToken;
  1071. P := FNodeFactory.CreatePackage;
  1072. try
  1073. InitPackage(P);
  1074. while FToken <> pptEndOfText do
  1075. case FToken of
  1076. pptPackage : ParsePackageIdStatement(P);
  1077. pptImport : ParseImportStatement(P);
  1078. pptOption : ParsePackageOption(P);
  1079. pptMessage : P.AddMessage(ParseMessageDeclaration(P, P));
  1080. pptSemiColon : GetNextToken;
  1081. pptEnum : P.AddEnum(ParseEnum(P, P));
  1082. else
  1083. raise EpbProtoParser.Create('Unexpected token');
  1084. end;
  1085. except
  1086. P.Free;
  1087. raise;
  1088. end;
  1089. except
  1090. on E: Exception do
  1091. raise EpbProtoParser.CreateFmt('%s(%d): %s', [FFileNameName, FLineNr, E.Message]);
  1092. end;
  1093. Result := P;
  1094. end;
  1095. procedure TpbProtoParser.InitPackage(const P: TpbProtoPackage);
  1096. var S : String;
  1097. begin
  1098. if FFileName <> '' then
  1099. begin
  1100. // set file name
  1101. S := FFileNameName;
  1102. {$IFDEF StringIsUnicode}
  1103. P.FileName := UTF8Encode(S);
  1104. {$ELSE}
  1105. P.FileName := S;
  1106. {$ENDIF}
  1107. // derive default package name from file name
  1108. if ExtractFileExt(S) = '.proto' then
  1109. S := ChangeFileExt(S, '');
  1110. {$IFDEF StringIsUnicode}
  1111. P.Name := UTF8Encode(S);
  1112. {$ELSE}
  1113. P.Name := S;
  1114. {$ENDIF}
  1115. end
  1116. else
  1117. P.Name := '';
  1118. end;
  1119. function TpbProtoParser.FindProtoFile(const AFileName: String): String;
  1120. var F, S : String;
  1121. begin
  1122. if AFileName = '' then
  1123. raise EpbProtoParser.Create('Filename not specified');
  1124. F := '';
  1125. if FileExists(AFileName) then
  1126. F := AFileName;
  1127. if (F = '') and (FProtoPath <> '') then
  1128. begin
  1129. S := IncludeTrailingPathDelimiter(FProtoPath) + AFileName;
  1130. if FileExists(S) then
  1131. F := S;
  1132. end;
  1133. if F = '' then
  1134. F := AFileName;
  1135. Result := F;
  1136. end;
  1137. procedure TpbProtoParser.ProcessImport(const APackage: TpbProtoPackage; const AFileName: AnsiString);
  1138. var
  1139. F : String;
  1140. P : TpbProtoParser;
  1141. A : TpbProtoPackage;
  1142. begin
  1143. F := FindProtoFile(ToStringA(AFileName));
  1144. P := TpbProtoParser.Create;
  1145. try
  1146. P.ProtoPath := FProtoPath;
  1147. P.SetFileName(F);
  1148. A := P.Parse(FNodeFactory);
  1149. APackage.AddImportedPackage(A);
  1150. finally
  1151. P.Free;
  1152. end;
  1153. end;
  1154. end.