001/*
002// $Id: ParseRegion.java 482 2012-01-05 23:27:27Z jhyde $
003//
004// Licensed to Julian Hyde under one or more contributor license
005// agreements. See the NOTICE file distributed with this work for
006// additional information regarding copyright ownership.
007//
008// Julian Hyde licenses this file to you under the Apache License,
009// Version 2.0 (the "License"); you may not use this file except in
010// compliance with the License. You may obtain a copy of the License at:
011//
012// http://www.apache.org/licenses/LICENSE-2.0
013//
014// Unless required by applicable law or agreed to in writing, software
015// distributed under the License is distributed on an "AS IS" BASIS,
016// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017// See the License for the specific language governing permissions and
018// limitations under the License.
019*/
020package org.olap4j.mdx;
021
022/**
023 * Region of parser source code.
024 *
025 * <p>The main purpose of a ParseRegion is to give detailed locations in
026 * error messages and warnings from the parsing and validation process.
027 *
028 * <p>A region has a start and end line number and column number. A region is
029 * a point if the start and end positions are the same.
030 *
031 * <p>The line and column number are one-based, because that is what end-users
032 * understand.
033 *
034 * <p>A region's end-points are inclusive. For example, in the code
035 *
036 * <blockquote><pre>SELECT FROM [Sales]</pre></blockquote>
037 *
038 * the <code>SELECT</code> token has region [1:1, 1:6].
039 *
040 * <p>Regions are immutable.
041 *
042 * @version $Id: ParseRegion.java 482 2012-01-05 23:27:27Z jhyde $
043 * @author jhyde
044 */
045public class ParseRegion {
046    private final int startLine;
047    private final int startColumn;
048    private final int endLine;
049    private final int endColumn;
050
051    private static final String NL = System.getProperty("line.separator");
052
053    /**
054     * Creates a ParseRegion.
055     *
056     * <p>All lines and columns are 1-based and inclusive. For example, the
057     * token "select" in "select from [Sales]" has a region [1:1, 1:6].
058     *
059     * @param startLine Line of the beginning of the region
060     * @param startColumn Column of the beginning of the region
061     * @param endLine Line of the end of the region
062     * @param endColumn Column of the end of the region
063     */
064    public ParseRegion(
065        int startLine,
066        int startColumn,
067        int endLine,
068        int endColumn)
069    {
070        assert endLine >= startLine;
071        assert endLine > startLine || endColumn >= startColumn;
072        this.startLine = startLine;
073        this.startColumn = startColumn;
074        this.endLine = endLine;
075        this.endColumn = endColumn;
076    }
077
078    /**
079     * Creates a ParseRegion.
080     *
081     * All lines and columns are 1-based.
082     *
083     * @param line Line of the beginning and end of the region
084     * @param column Column of the beginning and end of the region
085     */
086    public ParseRegion(
087        int line,
088        int column)
089    {
090        this(line, column, line, column);
091    }
092
093    /**
094     * Return starting line number (1-based).
095     *
096     * @return 1-based starting line number
097     */
098    public int getStartLine() {
099        return startLine;
100    }
101
102    /**
103     * Return starting column number (1-based).
104     *
105     * @return 1-based starting column number
106     */
107    public int getStartColumn() {
108        return startColumn;
109    }
110
111    /**
112     * Return ending line number (1-based).
113     *
114     * @return 1-based ending line number
115     */
116    public int getEndLine() {
117        return endLine;
118    }
119
120    /**
121     * Return ending column number (1-based).
122     *
123     * @return 1-based starting endings column number
124     */
125    public int getEndColumn() {
126        return endColumn;
127    }
128
129    /**
130     * Returns a string representation of this ParseRegion.
131     *
132     * <p>Regions are of the form
133     * <code>[startLine:startColumn, endLine:endColumn]</code>, or
134     * <code>[startLine:startColumn]</code> for point regions.
135     *
136     * @return string representation of this ParseRegion
137     */
138    public String toString() {
139        return "[" + startLine + ":" + startColumn
140            + ((isPoint())
141                ? ""
142                : ", " + endLine + ":" + endColumn)
143            + "]";
144    }
145
146    /**
147     * Returns whether this region has the same start and end point.
148     *
149     * @return whether this region has the same start and end point
150     */
151    public boolean isPoint() {
152        return endLine == startLine && endColumn == startColumn;
153    }
154
155    public int hashCode() {
156        return startLine ^
157            (startColumn << 2) ^
158            (endLine << 4) ^
159            (endColumn << 8);
160    }
161
162    public boolean equals(Object obj) {
163        if (obj instanceof ParseRegion) {
164            final ParseRegion that = (ParseRegion) obj;
165            return this.startLine == that.startLine
166                && this.startColumn == that.startColumn
167                && this.endLine == that.endLine
168                && this.endColumn == that.endColumn;
169        } else {
170            return false;
171        }
172    }
173
174    /**
175     * Combines this region with a list of parse tree nodes to create a
176     * region which spans from the first point in the first to the last point
177     * in the other.
178     *
179     * @param regions Collection of source code regions
180     * @return region which represents the span of the given regions
181     */
182    public ParseRegion plusAll(Iterable<ParseRegion> regions)
183    {
184        return sum(
185            regions,
186            getStartLine(),
187            getStartColumn(),
188            getEndLine(),
189            getEndColumn());
190    }
191
192    /**
193     * Combines the parser positions of a list of nodes to create a position
194     * which spans from the beginning of the first to the end of the last.
195     *
196     * @param nodes Collection of parse tree nodes
197     * @return region which represents the span of the given nodes
198     */
199    public static ParseRegion sum(
200        Iterable<ParseRegion> nodes)
201    {
202        return sum(nodes, Integer.MAX_VALUE, Integer.MAX_VALUE, -1, -1);
203    }
204
205    private static ParseRegion sum(
206        Iterable<ParseRegion> regions,
207        int startLine,
208        int startColumn,
209        int endLine,
210        int endColumn)
211    {
212        int testLine;
213        int testColumn;
214        for (ParseRegion region : regions) {
215            if (region == null) {
216                continue;
217            }
218            testLine = region.getStartLine();
219            testColumn = region.getStartColumn();
220            if ((testLine < startLine)
221                || ((testLine == startLine) && (testColumn < startColumn)))
222            {
223                startLine = testLine;
224                startColumn = testColumn;
225            }
226
227            testLine = region.getEndLine();
228            testColumn = region.getEndColumn();
229            if ((testLine > endLine)
230                || ((testLine == endLine) && (testColumn > endColumn)))
231            {
232                endLine = testLine;
233                endColumn = testColumn;
234            }
235        }
236        return new ParseRegion(startLine, startColumn, endLine, endColumn);
237    }
238
239    /**
240     * Looks for one or two carets in an MDX string, and if present, converts
241     * them into a parser position.
242     *
243     * <p>Examples:
244     *
245     * <ul>
246     * <li>findPos("xxx^yyy") yields {"xxxyyy", position 3, line 1 column 4}
247     * <li>findPos("xxxyyy") yields {"xxxyyy", null}
248     * <li>findPos("xxx^yy^y") yields {"xxxyyy", position 3, line 4 column 4
249     * through line 1 column 6}
250     * </ul>
251     *
252     * @param code Source code
253     * @return object containing source code annotated with region
254     */
255    public static RegionAndSource findPos(String code)
256    {
257        int firstCaret = code.indexOf('^');
258        if (firstCaret < 0) {
259            return new RegionAndSource(code, null);
260        }
261        int secondCaret = code.indexOf('^', firstCaret + 1);
262        if (secondCaret < 0) {
263            String codeSansCaret =
264                code.substring(0, firstCaret)
265                + code.substring(firstCaret + 1);
266            int [] start = indexToLineCol(code, firstCaret);
267            return new RegionAndSource(
268                codeSansCaret,
269                new ParseRegion(start[0], start[1]));
270        } else {
271            String codeSansCaret =
272                code.substring(0, firstCaret)
273                + code.substring(firstCaret + 1, secondCaret)
274                + code.substring(secondCaret + 1);
275            int [] start = indexToLineCol(code, firstCaret);
276
277            // subtract 1 because first caret pushed the string out
278            --secondCaret;
279
280            // subtract 1 because the col position needs to be inclusive
281            --secondCaret;
282            int [] end = indexToLineCol(code, secondCaret);
283            return new RegionAndSource(
284                codeSansCaret,
285                new ParseRegion(start[0], start[1], end[0], end[1]));
286        }
287    }
288
289    /**
290     * Returns the (1-based) line and column corresponding to a particular
291     * (0-based) offset in a string.
292     *
293     * <p>Converse of {@link #lineColToIndex(String, int, int)}.
294     *
295     * @param code Source code
296     * @param i Offset within source code
297     * @return 2-element array containing line and column
298     */
299    private static int [] indexToLineCol(String code, int i) {
300        int line = 0;
301        int j = 0;
302        while (true) {
303            String s;
304            int rn = code.indexOf("\r\n", j);
305            int r = code.indexOf("\r", j);
306            int n = code.indexOf("\n", j);
307            int prevj = j;
308            if ((r < 0) && (n < 0)) {
309                assert rn < 0;
310                s = null;
311                j = -1;
312            } else if ((rn >= 0) && (rn < n) && (rn <= r)) {
313                s = "\r\n";
314                j = rn;
315            } else if ((r >= 0) && (r < n)) {
316                s = "\r";
317                j = r;
318            } else {
319                s = "\n";
320                j = n;
321            }
322            if ((j < 0) || (j > i)) {
323                return new int[] { line + 1, i - prevj + 1 };
324            }
325            assert s != null;
326            j += s.length();
327            ++line;
328        }
329    }
330
331    /**
332     * Finds the position (0-based) in a string which corresponds to a given
333     * line and column (1-based).
334     *
335     * <p>Converse of {@link #indexToLineCol(String, int)}.
336     *
337     * @param code Source code
338     * @param line Line number
339     * @param column Column number
340     * @return Offset within source code
341      */
342    private static int lineColToIndex(String code, int line, int column)
343    {
344        --line;
345        --column;
346        int i = 0;
347        while (line-- > 0) {
348            // Works on linux where line ending is "\n";
349            // also works on windows where line ending is "\r\n".
350            // Even works if they supply linux strings on windows.
351            i = code.indexOf("\n", i)
352                + "\n".length();
353        }
354        return i + column;
355    }
356
357    /**
358     * Generates a string of the source code annotated with caret symbols ("^")
359     * at the beginning and end of the region.
360     *
361     * <p>For example, for the region <code>(1, 9, 1, 12)</code> and source
362     * <code>"values (foo)"</code>,
363     * yields the string <code>"values (^foo^)"</code>.
364     *
365     * @param source Source code
366     * @return Source code annotated with position
367     */
368    public String annotate(String source) {
369        return addCarets(source, startLine, startColumn, endLine, endColumn);
370    }
371
372    /**
373     * Converts a string to a string with one or two carets in it. For example,
374     * <code>addCarets("values (foo)", 1, 9, 1, 11)</code> yields "values
375     * (^foo^)".
376     *
377     * @param sql Source code
378     * @param line Line number
379     * @param col Column number
380     * @param endLine Line number of end of region
381     * @param endCol Column number of end of region
382     * @return String annotated with region
383     */
384    private static String addCarets(
385        String sql,
386        int line,
387        int col,
388        int endLine,
389        int endCol)
390    {
391        String sqlWithCarets;
392        int cut = lineColToIndex(sql, line, col);
393        sqlWithCarets = sql.substring(0, cut) + "^"
394            + sql.substring(cut);
395        if ((col != endCol) || (line != endLine)) {
396            cut = lineColToIndex(sqlWithCarets, endLine, endCol + 1);
397            ++cut; // for caret
398            if (cut < sqlWithCarets.length()) {
399                sqlWithCarets =
400                    sqlWithCarets.substring(0, cut)
401                    + "^" + sqlWithCarets.substring(cut);
402            } else {
403                sqlWithCarets += "^";
404            }
405        }
406        return sqlWithCarets;
407    }
408
409    /**
410     * Combination of a region within an MDX statement with the source text
411     * of the whole MDX statement.
412     *
413     * <p>Useful for reporting errors. For example, the error in the statement
414     *
415     * <blockquote>
416     * <pre>
417     * SELECT {<b><i>[Measures].[Units In Stock]</i></b>} ON COLUMNS
418     * FROM [Sales]
419     * </pre>
420     * </blockquote>
421     *
422     * has source
423     * "SELECT {[Measures].[Units In Stock]} ON COLUMNS\nFROM [Sales]" and
424     * region [1:9, 1:34].
425     */
426    public static class RegionAndSource {
427        public final String source;
428        public final ParseRegion region;
429
430        /**
431         * Creates a RegionAndSource.
432         *
433         * @param source Source MDX code
434         * @param region Coordinates of region within MDX code
435         */
436        public RegionAndSource(String source, ParseRegion region) {
437            this.source = source;
438            this.region = region;
439        }
440    }
441}
442
443// End ParseRegion.java