'''
Functions to slice up a fixed-column-width ASCII grid.
:any:`slice_grids` splits up lines according to a header row with ``|`` separators.
:any:`merged_cells` iterates over this grid and returns merge areas.
Columns are merged if there is something different from ``|`` or space below
the separator in the header row.
Rows are merged by prefixing the cells with ``{``. The symbols must be in the
same text column.
'''
import dataclasses as dc
import textwrap
__all__ = [
'slice_grids',
'SlicedGrid',
'merged_cells',
'MCell',
]
# for testing
_overlapping_merge = '''
| | |
abc {de fgh
{jk {lm nop
{rstuvw xyz
'''
_adj_row_merge = '''
| |
{abc
{def
{ghi
{jkl
{mno
{pqr
'''
[docs]
@dc.dataclass
class SlicedGrid:
# text between (not including) | | splitters
column_heads:list[str] = dc.field(default_factory=list)
# "dumbly" splitted grid cells:
# list of lists, row - column - string
# each string being the cell text including the PRECEDING separator's column
body_lines:list[list[str]] = dc.field(default_factory=list)
subgrids:dict[str,"SlicedGrid"] = dc.field(default_factory=dict)
[docs]
def slice_grids(grid_text) -> SlicedGrid:
'''slice grids up by the first (nonempty) row.
Before slicing, empty lines before/after are removed,
and the text is dedented.
The first row is split by | characters.
The first column can contain a | character or not.
A grid can be followed by subgrids. Start of a subgrid is indicated by the
text ``:subgridid:`` on a separate line, where ``subgridid`` can be any
text. The first ``:`` MUST be on the same indentation level as the first
``|`` (which is required).
Returns a SlicedGrid with Properties:
* column_heads: the split up parts of the first line (not including the separators).
* body_lines: list of following lines; each item is a list of strings,
where each string is the grid "cell" including the preceding separator column.
I.e. if you join the cell list without separator, you regain the text line.
* subgrid: Dictionary of subgrids, keyed by subgridid.
'''
grid_text = textwrap.dedent(grid_text)
lines = grid_text.splitlines()
grid, lines = _slice_grid(lines)
while lines:
idline = lines.pop(0).strip()
if not (idline.startswith(":") and idline.endswith(":")):
raise ValueError(f"Subgrid id is invalid: '{idline}'")
subgrid_id = idline[1:-1]
grid.subgrids[subgrid_id], lines = _slice_grid(lines)
return grid
def _slice_grid(lines) -> tuple[SlicedGrid,list[str]]:
'''slice a grid up by the first (nonempty) row.
Before slicing, empty lines before/after are removed,
and the text is dedented.
The first row is split by | characters.
The first column can contain a | character or not.
Returns a SlicedGrid with Properties:
* column_heads: the split up parts of the first line (not including the separators).
* body_lines: list of following lines; each item is a list of strings,
where each string is the grid "cell" including the preceding separator column.
I.e. if you join the cell list without separator, you regain the text line.
``remainder`` are the lines following the grid.
'''
for n, line in enumerate(lines):
if line.startswith(":") and line.endswith(":"):
remain = lines[n:]
lines = lines[:n]
break
else:
remain = []
# remove leading and trailing whitespace lines
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
if not lines:
return SlicedGrid(), remain
column_heads = lines.pop(0).split('|')
if not column_heads[0]:
# first | is there
column_heads.pop(0)
else:
# no first |, add padding to first column
lines = [' '+line for line in lines]
widths = [len(part)+1 for part in column_heads]
# if any line is longer than the header, adjust width of last column.
maxlen = max((len(line) for line in lines), default=0)
widths[-1] = max(widths[-1], maxlen-sum(widths[:-1]))
body_lines = []
for line in lines:
body_line = []
for w in widths[:]:
cell, line = line[:w], line[w:]
if len(cell) < w:
cell = cell + ' '*(w-len(cell))
body_line.append(cell)
body_lines.append(body_line)
return SlicedGrid(column_heads=column_heads, body_lines=body_lines), remain
[docs]
@dc.dataclass
class MCell:
row:int = 0
"""Cell's row, counting from 0"""
col:int = 0
"""Cell's column, counting from 0"""
text:str = ""
"""Merged-area text"""
rowspan:int = 1
"""Spanned rows, at least 1"""
colspan:int = 1
"""Spanned columns, at least 1"""
[docs]
def merged_cells(sliced_grid):
'''Generator: takes the sliced grid, and returns merged cells one by one.
Cells are merged by the following logic:
* If the first character of a (stripped) cell is '{', cells of the following
row(s) are merged while they also start with '{' in the same column.
* Then, columns are merged if the following (column's) cell starts neither
with space nor with '|'.
Yields MCell instances with:
* row, col: cell position (int, 0-based)
* rowspan, colspan: spanned rows/cols, at least 1
* text: merged area text, as sliced out from the text editor; not
including the leading '{'; "ragged" linebreaks retained.
Iteration order is row-wise.
Merge areas must not overlap. (However this should rarely happen on accident).
Note: If you need two row-merge ranges above each other, indent the
'{' differently.
'''
# make a deep copy first
body_lines = [ cells[:] for cells in sliced_grid.body_lines ]
for row, line in enumerate(body_lines):
for col, cell in enumerate(line):
rowspan = 1
colspan = 1
if cell is None:
# part of previously merged cell
continue
cell = cell[1:]
# calculate Row span
ofs = 0
if cell.lstrip().startswith('{'):
ofs = cell.index('{')+1
if ofs:
while True:
try:
cell_below = body_lines[row+rowspan][col][1:]
except IndexError:
break
# no aligned { or not empty before
if cell_below[ofs-1:ofs] != '{' or cell_below[:ofs-1].strip():
break
rowspan += 1
# now we talked about it, delete prefix
for row2 in range(row, row+rowspan):
body_lines[row2][col] = body_lines[row2][col][ofs:]
cell = cell[ofs:]
# calculate column_span
colspans = [] # collect for merged rows
for row2 in range(row, row+rowspan):
colspan = 1
while True:
try:
cell_right = body_lines[row2][col+colspan]
except IndexError:
break
# "not cell" also covers the "already-merged" case.
if (not cell_right) or cell_right.startswith(' ') or cell_right.startswith('|'):
break
colspan += 1
colspans.append(colspan)
colspan = max(colspans)
# All clear. Collect text.
try:
mrows = [
''.join(body_lines[row2][col:col+colspan])[1:]
for row2 in range(row, row+rowspan)
]
except TypeError as e:
# caught a None entry
raise ValueError('Overlapping merge areas') from e
text = '\n'.join(mrows)
yield MCell(row=row, col=col, text=text, rowspan=rowspan, colspan=colspan)
# white-out merge area
for row2 in range(row, row+rowspan):
for col2 in range(col, col+colspan):
try:
body_lines[row2][col2] = None
except IndexError:
pass