lang-css.js 6.04 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159


// Copyright (C) 2009 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


/**
 * @fileoverview
 * Registers a language handler for CSS.
 *
 *
 * To use, include prettify.js and this file in your HTML page.
 * Then put your code in an HTML tag like
 *      <pre class="prettyprint lang-css"></pre>
 *
 *
 * http://www.w3.org/TR/CSS21/grammar.html Section G2 defines the lexical
 * grammar.  This scheme does not recognize keywords containing escapes.
 *
 * @author mikesamuel@gmail.com
 */

// This file is a call to a function defined in prettify.js which defines a
// lexical scanner for CSS and maps tokens to styles.

// The call to PR['registerLangHandler'] is quoted so that Closure Compiler
// will not rename the call so that this language extensions can be
// compiled/minified separately from one another.  Other symbols defined in
// prettify.js are similarly quoted.

// The call is structured thus:
// PR['registerLangHandler'](
//    PR['createSimpleLexer'](
//        shortcutPatterns,
//        fallThroughPatterns),
//    [languageId0, ..., languageIdN])

// Langugage IDs
// =============
// The language IDs are typically the file extensions of source files for
// that language so that users can syntax highlight arbitrary files based
// on just the extension.  This is heuristic, but works pretty well in
// practice.

// Patterns
// ========
// Lexers are typically implemented as a set of regular expressions.
// The SimpleLexer function takes regular expressions, styles, and some
// pragma-info and produces a lexer.  A token description looks like
//   [STYLE_NAME, /regular-expression/, pragmas]

// Initially, simple lexer's inner loop looked like:

//    while sourceCode is not empty:
//      try each regular expression in order until one matches
//      remove the matched portion from sourceCode

// This was really slow for large files because some JS interpreters
// do a buffer copy on the matched portion which is O(n*n)

// The current loop now looks like

//    1. use js-modules/combinePrefixPatterns.js to 
//       combine all regular expressions into one 
//    2. use a single global regular expresion match to extract all tokens
//    3. for each token try regular expressions in order until one matches it
//       and classify it using the associated style

// This is a lot more efficient but it does mean that lookahead and lookbehind
// can't be used across boundaries to classify tokens.

// Sometimes we need lookahead and lookbehind and sometimes we want to handle
// embedded language -- JavaScript or CSS embedded in HTML, or inline assembly
// in C.

// If a particular pattern has a numbered group, and its style pattern starts
// with "lang-" as in
//    ['lang-js', /<script>(.*?)<\/script>/]
// then the token classification step breaks the token into pieces.
// Group 1 is re-parsed using the language handler for "lang-js", and the
// surrounding portions are reclassified using the current language handler.
// This mechanism gives us both lookahead, lookbehind, and language embedding.

// Shortcut Patterns
// =================
// A shortcut pattern is one that is tried before other patterns if the first
// character in the token is in the string of characters.
// This very effectively lets us make quick correct decisions for common token
// types.

// All other patterns are fall-through patterns.


// The comments inline below refer to productions in the CSS specification's
// lexical grammar.  See link above.
PR['registerLangHandler'](
    PR['createSimpleLexer'](
        // Shortcut patterns.
        [
         // The space production <s>
         [PR['PR_PLAIN'],       /^[ \t\r\n\f]+/, null, ' \t\r\n\f']
        ],
        // Fall-through patterns.
        [
         // Quoted strings.  <string1> and <string2>
         [PR['PR_STRING'],
          /^\"(?:[^\n\r\f\\\"]|\\(?:\r\n?|\n|\f)|\\[\s\S])*\"/, null],
         [PR['PR_STRING'],
          /^\'(?:[^\n\r\f\\\']|\\(?:\r\n?|\n|\f)|\\[\s\S])*\'/, null],
         ['lang-css-str', /^url\(([^\)\"\']+)\)/i],
         [PR['PR_KEYWORD'],
          /^(?:url|rgb|\!important|@import|@page|@media|@charset|inherit)(?=[^\-\w]|$)/i,
          null],
         // A property name -- an identifier followed by a colon.
         ['lang-css-kw', /^(-?(?:[_a-z]|(?:\\[0-9a-f]+ ?))(?:[_a-z0-9\-]|\\(?:\\[0-9a-f]+ ?))*)\s*:/i],
         // A C style block comment.  The <comment> production.
         [PR['PR_COMMENT'], /^\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\//],
         // Escaping text spans
         [PR['PR_COMMENT'], /^(?:<!--|-->)/],
         // A number possibly containing a suffix.
         [PR['PR_LITERAL'], /^(?:\d+|\d*\.\d+)(?:%|[a-z]+)?/i],
         // A hex color
         [PR['PR_LITERAL'], /^#(?:[0-9a-f]{3}){1,2}\b/i],
         // An identifier
         [PR['PR_PLAIN'],
          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i],
         // A run of punctuation
         [PR['PR_PUNCTUATION'], /^[^\s\w\'\"]+/]
        ]),
    ['css']);
// Above we use embedded languages to highlight property names (identifiers
// followed by a colon) differently from identifiers in values.
PR['registerLangHandler'](
    PR['createSimpleLexer']([],
        [
         [PR['PR_KEYWORD'],
          /^-?(?:[_a-z]|(?:\\[\da-f]+ ?))(?:[_a-z\d\-]|\\(?:\\[\da-f]+ ?))*/i]
        ]),
    ['css-kw']);
// The content of an unquoted URL literal like url(http://foo/img.png) should
// be colored as string content.  This language handler is used above in the
// URL production to do so.
PR['registerLangHandler'](
    PR['createSimpleLexer']([],
        [
         [PR['PR_STRING'], /^[^\)\"\']+/]
        ]),
    ['css-str']);