Source code for episuite.data

from pathlib import Path
from typing import BinaryIO, Optional, Union

import pandas as pd
import pkg_resources
import requests
from appdirs import AppDirs
from tqdm.auto import tqdm

import episuite


[docs]def get_cache_dir_file(filename: Optional[Union[str, Path]] = None) -> Path: dirs = AppDirs(episuite.__appname__, episuite.__author__, version=episuite.__version__) cache_dir = Path(dirs.user_cache_dir) cache_dir.mkdir(parents=True, exist_ok=True) if filename is not None: cache_dir = cache_dir / Path(filename) return cache_dir
[docs]def load_from_cache(url: str, filename: Union[str, Path], desc: Optional[str] = None, show_progress: bool = True, invalidate: bool = False) -> Path: cache_dir = get_cache_dir_file() filename_output = cache_dir / Path(filename) # Already exists in the cache if filename_output.exists(): if invalidate: filename_output.unlink() else: return filename_output with filename_output.open(mode="wb") as fhandle: download_remote(url, fhandle, desc, show_progress) return filename_output
[docs]def download_remote(url: str, stream: BinaryIO, desc: Optional[str] = None, show_progress: bool = True) -> None: """This function will download data frmo a remote URL and will optionally show the progress. :param url: the url to download from :param stream: buffered IO object :param desc: progress bar description :param show_progress: whether to show progress or not """ resp = requests.get(url, stream=True) content_length = resp.headers.get('content-length', 0) total = int(content_length) with tqdm(desc=desc, total=total, unit='iB', unit_scale=True, unit_divisor=1024, disable=not show_progress) as bar: for data in resp.iter_content(chunk_size=1024): size = stream.write(data) bar.update(size)
[docs]def admissions_sample() -> pd.DataFrame: """Sample data for ICU hospitalization admissions. This data is based on COVID-19 outbreak in Porto Alegre/RS/Brazil. This dataset contains three columns that are described below. DATE_START When the patient enters in the ICU. DATE_END When the patient left the ICU by an outcome. OUTCOME Outcome when the patient left the ICU (DATE_END) :returns: sample data w/ admission """ sample_fname = \ pkg_resources.resource_filename(__name__, "sample_data/admission_sample.csv") df = pd.read_csv(sample_fname, parse_dates=["DATE_START", "DATE_END"]) return df