Skip to content

filereader

FileReader

Utility to ease reading different kind of source files.

Supports different sources where to read the data:

  • The source can be a path to a file, either as a string or as a pathlib.Path instance. The file itself must be UTF-8 encoded.

  • Alternatively the source can be an already opened file object, including a StringIO or BytesIO object. The file can contain either Unicode text or UTF-8 encoded bytes.

  • The third options is giving the source as Unicode text directly. This requires setting accept_text=True when creating the reader.

In all cases bytes are automatically decoded to Unicode and possible BOM removed.

Source code in src/robot/utils/filereader.py
class FileReader:  # FIXME: Rename to SourceReader
    """Utility to ease reading different kind of source files.

    Supports different sources where to read the data:

    - The source can be a path to a file, either as a string or as a
      ``pathlib.Path`` instance. The file itself must be UTF-8 encoded.

    - Alternatively the source can be an already opened file object,
      including a StringIO or BytesIO object. The file can contain either
      Unicode text or UTF-8 encoded bytes.

    - The third options is giving the source as Unicode text directly.
      This requires setting ``accept_text=True`` when creating the reader.

    In all cases bytes are automatically decoded to Unicode and possible
    BOM removed.
    """

    def __init__(self, source: Source, accept_text: bool = False):
        self.file, self._opened = self._get_file(source, accept_text)

    def _get_file(self, source: Source, accept_text: bool) -> 'tuple[TextIO, bool]':
        path = self._get_path(source, accept_text)
        if path:
            file = open(path, 'rb')
            opened = True
        elif is_string(source):
            file = StringIO(source)
            opened = True
        else:
            file = source
            opened = False
        return file, opened

    def _get_path(self, source: Source, accept_text: bool):
        if is_pathlike(source):
            return str(source)
        if not is_string(source):
            return None
        if not accept_text:
            return source
        if '\n' in source:
            return None
        path = Path(source)
        try:
            is_path = path.is_absolute() or path.exists()
        except OSError:    # Can happen on Windows w/ Python < 3.10.
            is_path = False
        return source if is_path else None

    @property
    def name(self) -> str:
        return getattr(self.file, 'name', '<in-memory file>')

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        if self._opened:
            self.file.close()

    def read(self) -> str:
        return self._decode(self.file.read())

    def readlines(self) -> 'Iterator[str]':
        first_line = True
        for line in self.file.readlines():
            yield self._decode(line, remove_bom=first_line)
            first_line = False

    def _decode(self, content: 'str|bytes', remove_bom: bool = True) -> str:
        if is_bytes(content):
            content = content.decode('UTF-8')
        if remove_bom and content.startswith('\ufeff'):
            content = content[1:]
        if '\r\n' in content:
            content = content.replace('\r\n', '\n')
        return content