Utility to ease reading different kind of source files.
Supports different sources where to read the data:
-
The source can be a path to a file, either as a string or as a
pathlib.Path
instance. The file itself must be UTF-8 encoded.
-
Alternatively the source can be an already opened file object,
including a StringIO or BytesIO object. The file can contain either
Unicode text or UTF-8 encoded bytes.
-
The third options is giving the source as Unicode text directly.
This requires setting accept_text=True
when creating the reader.
In all cases bytes are automatically decoded to Unicode and possible
BOM removed.
Source code in src/robot/utils/filereader.py
| class FileReader: # FIXME: Rename to SourceReader
"""Utility to ease reading different kind of source files.
Supports different sources where to read the data:
- The source can be a path to a file, either as a string or as a
``pathlib.Path`` instance. The file itself must be UTF-8 encoded.
- Alternatively the source can be an already opened file object,
including a StringIO or BytesIO object. The file can contain either
Unicode text or UTF-8 encoded bytes.
- The third options is giving the source as Unicode text directly.
This requires setting ``accept_text=True`` when creating the reader.
In all cases bytes are automatically decoded to Unicode and possible
BOM removed.
"""
def __init__(self, source: Source, accept_text: bool = False):
self.file, self._opened = self._get_file(source, accept_text)
def _get_file(self, source: Source, accept_text: bool) -> 'tuple[TextIO, bool]':
path = self._get_path(source, accept_text)
if path:
file = open(path, 'rb')
opened = True
elif is_string(source):
file = StringIO(source)
opened = True
else:
file = source
opened = False
return file, opened
def _get_path(self, source: Source, accept_text: bool):
if is_pathlike(source):
return str(source)
if not is_string(source):
return None
if not accept_text:
return source
if '\n' in source:
return None
path = Path(source)
try:
is_path = path.is_absolute() or path.exists()
except OSError: # Can happen on Windows w/ Python < 3.10.
is_path = False
return source if is_path else None
@property
def name(self) -> str:
return getattr(self.file, 'name', '<in-memory file>')
def __enter__(self):
return self
def __exit__(self, *exc_info):
if self._opened:
self.file.close()
def read(self) -> str:
return self._decode(self.file.read())
def readlines(self) -> 'Iterator[str]':
first_line = True
for line in self.file.readlines():
yield self._decode(line, remove_bom=first_line)
first_line = False
def _decode(self, content: 'str|bytes', remove_bom: bool = True) -> str:
if is_bytes(content):
content = content.decode('UTF-8')
if remove_bom and content.startswith('\ufeff'):
content = content[1:]
if '\r\n' in content:
content = content.replace('\r\n', '\n')
return content
|