Source code for ascii_designer.ascii_slice

'''
Functions to slice up a fixed-column-width ASCII grid.

:any:`slice_grids` splits up lines according to a header row with ``|`` separators.

:any:`merged_cells` iterates over this grid and returns merge areas.

Columns are merged if there is something different from ``|`` or space below 
the separator in the header row.

Rows are merged by prefixing the cells with ``{``. The symbols must be in the 
same text column.
'''
import dataclasses as dc
import textwrap

__all__ = [
    'slice_grids',
    'SlicedGrid',
    'merged_cells',
    'MCell',
    ] 

# for testing
_overlapping_merge = '''
    |   |   |   
     abc {de fgh
     {jk {lm nop
     {rstuvw xyz
    '''
_adj_row_merge = '''
    |    |   
     {abc
     {def
      {ghi
      {jkl
     {mno
     {pqr
     '''
     
[docs] @dc.dataclass class SlicedGrid: # text between (not including) | | splitters column_heads:list[str] = dc.field(default_factory=list) # "dumbly" splitted grid cells: # list of lists, row - column - string # each string being the cell text including the PRECEDING separator's column body_lines:list[list[str]] = dc.field(default_factory=list) subgrids:dict[str,"SlicedGrid"] = dc.field(default_factory=dict)
[docs] def slice_grids(grid_text) -> SlicedGrid: '''slice grids up by the first (nonempty) row. Before slicing, empty lines before/after are removed, and the text is dedented. The first row is split by | characters. The first column can contain a | character or not. A grid can be followed by subgrids. Start of a subgrid is indicated by the text ``:subgridid:`` on a separate line, where ``subgridid`` can be any text. The first ``:`` MUST be on the same indentation level as the first ``|`` (which is required). Returns a SlicedGrid with Properties: * column_heads: the split up parts of the first line (not including the separators). * body_lines: list of following lines; each item is a list of strings, where each string is the grid "cell" including the preceding separator column. I.e. if you join the cell list without separator, you regain the text line. * subgrid: Dictionary of subgrids, keyed by subgridid. ''' grid_text = textwrap.dedent(grid_text) lines = grid_text.splitlines() grid, lines = _slice_grid(lines) while lines: idline = lines.pop(0).strip() if not (idline.startswith(":") and idline.endswith(":")): raise ValueError(f"Subgrid id is invalid: '{idline}'") subgrid_id = idline[1:-1] grid.subgrids[subgrid_id], lines = _slice_grid(lines) return grid
def _slice_grid(lines) -> tuple[SlicedGrid,list[str]]: '''slice a grid up by the first (nonempty) row. Before slicing, empty lines before/after are removed, and the text is dedented. The first row is split by | characters. The first column can contain a | character or not. Returns a SlicedGrid with Properties: * column_heads: the split up parts of the first line (not including the separators). * body_lines: list of following lines; each item is a list of strings, where each string is the grid "cell" including the preceding separator column. I.e. if you join the cell list without separator, you regain the text line. ``remainder`` are the lines following the grid. ''' for n, line in enumerate(lines): if line.startswith(":") and line.endswith(":"): remain = lines[n:] lines = lines[:n] break else: remain = [] # remove leading and trailing whitespace lines while lines and not lines[0].strip(): lines.pop(0) while lines and not lines[-1].strip(): lines.pop() if not lines: return SlicedGrid(), remain column_heads = lines.pop(0).split('|') if not column_heads[0]: # first | is there column_heads.pop(0) else: # no first |, add padding to first column lines = [' '+line for line in lines] widths = [len(part)+1 for part in column_heads] # if any line is longer than the header, adjust width of last column. maxlen = max((len(line) for line in lines), default=0) widths[-1] = max(widths[-1], maxlen-sum(widths[:-1])) body_lines = [] for line in lines: body_line = [] for w in widths[:]: cell, line = line[:w], line[w:] if len(cell) < w: cell = cell + ' '*(w-len(cell)) body_line.append(cell) body_lines.append(body_line) return SlicedGrid(column_heads=column_heads, body_lines=body_lines), remain
[docs] @dc.dataclass class MCell: row:int = 0 """Cell's row, counting from 0""" col:int = 0 """Cell's column, counting from 0""" text:str = "" """Merged-area text""" rowspan:int = 1 """Spanned rows, at least 1""" colspan:int = 1 """Spanned columns, at least 1"""
[docs] def merged_cells(sliced_grid): '''Generator: takes the sliced grid, and returns merged cells one by one. Cells are merged by the following logic: * If the first character of a (stripped) cell is '{', cells of the following row(s) are merged while they also start with '{' in the same column. * Then, columns are merged if the following (column's) cell starts neither with space nor with '|'. Yields MCell instances with: * row, col: cell position (int, 0-based) * rowspan, colspan: spanned rows/cols, at least 1 * text: merged area text, as sliced out from the text editor; not including the leading '{'; "ragged" linebreaks retained. Iteration order is row-wise. Merge areas must not overlap. (However this should rarely happen on accident). Note: If you need two row-merge ranges above each other, indent the '{' differently. ''' # make a deep copy first body_lines = [ cells[:] for cells in sliced_grid.body_lines ] for row, line in enumerate(body_lines): for col, cell in enumerate(line): rowspan = 1 colspan = 1 if cell is None: # part of previously merged cell continue cell = cell[1:] # calculate Row span ofs = 0 if cell.lstrip().startswith('{'): ofs = cell.index('{')+1 if ofs: while True: try: cell_below = body_lines[row+rowspan][col][1:] except IndexError: break # no aligned { or not empty before if cell_below[ofs-1:ofs] != '{' or cell_below[:ofs-1].strip(): break rowspan += 1 # now we talked about it, delete prefix for row2 in range(row, row+rowspan): body_lines[row2][col] = body_lines[row2][col][ofs:] cell = cell[ofs:] # calculate column_span colspans = [] # collect for merged rows for row2 in range(row, row+rowspan): colspan = 1 while True: try: cell_right = body_lines[row2][col+colspan] except IndexError: break # "not cell" also covers the "already-merged" case. if (not cell_right) or cell_right.startswith(' ') or cell_right.startswith('|'): break colspan += 1 colspans.append(colspan) colspan = max(colspans) # All clear. Collect text. try: mrows = [ ''.join(body_lines[row2][col:col+colspan])[1:] for row2 in range(row, row+rowspan) ] except TypeError as e: # caught a None entry raise ValueError('Overlapping merge areas') from e text = '\n'.join(mrows) yield MCell(row=row, col=col, text=text, rowspan=rowspan, colspan=colspan) # white-out merge area for row2 in range(row, row+rowspan): for col2 in range(col, col+colspan): try: body_lines[row2][col2] = None except IndexError: pass