pyCEURspt API Documentation

`bibtex`

`BibTexConverter`

Convert volumes and papers to corresponding bibtex entries

Source code in ceurspt/bibtex.py

class BibTexConverter:
    """
    Convert volumes and papers to corresponding bibtex entries
    """

    @classmethod
    def convert_volume(cls, volume: Volume) -> str:
        """
        convert given volume to biblatex entry
        """
        library = BibDatabase()
        proceedings_entry = ProceedingsEntry.from_volume(volume)
        library.entries.append(proceedings_entry.to_bibtex_record())
        for paper in volume.papers:
            in_proceedings_entry = InProceedingsEntry.from_paper(paper)
            library.entries.append(
                in_proceedings_entry.to_bibtex_record(
                    crossref=proceedings_entry.get_id()
                )
            )
        bibtex = bibtexparser.dumps(library)
        return bibtex

    @classmethod
    def convert_paper(cls, paper: Paper) -> str:
        """
        convert given paper to biblatex entry
        """
        library = BibDatabase()
        in_proceedings_entry = InProceedingsEntry.from_paper(paper)
        library.entries.append(in_proceedings_entry.to_bibtex_record())
        bibtex = bibtexparser.dumps(library)
        return bibtex

`convert_paper(paper)` `classmethod`

convert given paper to biblatex entry

Source code in ceurspt/bibtex.py

@classmethod
def convert_paper(cls, paper: Paper) -> str:
    """
    convert given paper to biblatex entry
    """
    library = BibDatabase()
    in_proceedings_entry = InProceedingsEntry.from_paper(paper)
    library.entries.append(in_proceedings_entry.to_bibtex_record())
    bibtex = bibtexparser.dumps(library)
    return bibtex

`convert_volume(volume)` `classmethod`

convert given volume to biblatex entry

Source code in ceurspt/bibtex.py

@classmethod
def convert_volume(cls, volume: Volume) -> str:
    """
    convert given volume to biblatex entry
    """
    library = BibDatabase()
    proceedings_entry = ProceedingsEntry.from_volume(volume)
    library.entries.append(proceedings_entry.to_bibtex_record())
    for paper in volume.papers:
        in_proceedings_entry = InProceedingsEntry.from_paper(paper)
        library.entries.append(
            in_proceedings_entry.to_bibtex_record(
                crossref=proceedings_entry.get_id()
            )
        )
    bibtex = bibtexparser.dumps(library)
    return bibtex

`InProceedingsEntry` `dataclass`

see https://ftp.mpi-inf.mpg.de/pub/tex/mirror/ftp.dante.de/pub/tex/macros/latex/contrib/biblatex/doc/biblatex.pdf

Source code in ceurspt/bibtex.py

@dataclass
class InProceedingsEntry:
    """
    see https://ftp.mpi-inf.mpg.de/pub/tex/mirror/ftp.dante.de/pub/tex/macros/latex/contrib/biblatex/doc/biblatex.pdf
    """

    title: str
    author: Union[str, List[str]]
    booktitle: str
    year: str
    date: str

    editor: Optional[Union[str, List[str]]] = None
    subtitle: Optional[str] = None
    titleaddon: Optional[str] = None
    maintitle: Optional[str] = None

    mainsubtitle: Optional[str] = None
    maintitleaddon: Optional[str] = None
    booksubtitle: Optional[str] = None
    booktitleaddon: Optional[str] = None

    eventtitle: Optional[str] = None
    eventtitleaddon: Optional[str] = None
    eventdate: Optional[str] = None
    venue: Optional[str] = None
    language: Optional[str] = "english"

    volume: Optional[str] = None
    part: Optional[str] = None
    volumes: Optional[str] = None
    series: Optional[str] = "CEUR Workshop Proceedings"
    number: Optional[str] = None
    note: Optional[str] = None
    organization: Optional[str] = None

    publisher: Optional[str] = None
    location: Optional[str] = "Aachen"
    month: Optional[str] = None
    isbn: Optional[str] = None
    eid: Optional[str] = None
    chapter: Optional[str] = None
    pages: Optional[str] = None
    addendum: Optional[str] = None

    pubstate: Optional[str] = None
    doi: Optional[str] = None
    eprint: Optional[str] = None
    eprintclass: Optional[str] = None
    eprinttype: Optional[str] = None
    url: Optional[str] = None
    urldate: Optional[str] = None

    @classmethod
    def from_paper(cls, paper: Paper) -> "InProceedingsEntry":
        record = paper.getMergedDict()
        pub_date = (
            datetime.fromisoformat(record.get("spt.volume").get("date"))
            if record.get("spt.volume") is not None
            else None
        )
        authors = record.get("cvb.authors", None)
        if authors is not None:
            if isinstance(authors, str):
                authors = authors.replace(",", " and ")
        elif "dblp.authors" in record:
            authors = " and ".join(
                [
                    author_record.get("label")
                    for author_record in record.get("dblp.authors")
                ]
            )
        in_proceedings = InProceedingsEntry(
            title=record.get("spt.title", None),
            author=authors,
            year=str(pub_date.year),
            date=pub_date.date().isoformat(),
            booktitle=record.get("spt.volume", {}).get("title", None),
            url=(
                str(record.get("spt.pdfUrl"))
                if record.get("spt.pdfUrl", None)
                else None
            ),
            volume=str(record.get("spt.volume", {}).get("number")),
        )
        if hasattr(paper, "vm") and isinstance(paper.vm, Volume):
            volume_record = paper.vm.getMergedDict()
            in_proceedings.eventtitle = volume_record.get("wd.eventLabel", None)
            in_proceedings.eventdate = volume_record.get("wd.startDate", None)
            in_proceedings.venue = ",".join(
                filter(
                    partial(is_not, None),
                    [
                        volume_record.get("wd.locationLabel", None),
                        volume_record.get("wd.countryLabel", None),
                    ],
                )
            )
            in_proceedings.editor = volume_record.get("cvb.editors", "").replace(
                ",", " and"
            )
        in_proceedings.__paper = paper
        return in_proceedings

    def to_bibtex_record(self, crossref: Optional[str] = None) -> dict:
        """
        Convert to bibtex compatible dict
        Args:
            crossref: bibtex key of the proceedings. If set the proceeding specific fields are excluded.
        """
        proceedings_keys = [
            "series",
            "location",
            "eventtitle",
            "venue",
            "volume",
            "editor",
            "eventdate",
        ]
        record_fields = {k: v for k, v in asdict(self).items() if v not in [None, ""]}
        if crossref is not None:
            record_fields = {
                k: v for k, v in record_fields.items() if k not in proceedings_keys
            }
            record_fields["crossref"] = crossref
        record = {
            "ENTRYTYPE": "inproceedings",
            "ID": f"ceur-ws:{self.get_id()}",
            **record_fields,
        }
        return record

    def get_id(self) -> str:
        entry_id = None
        try:
            entry_id = (
                self.__paper.getMergedDict().get("spt.id", None).replace("/", ":")
            )
        except KeyError:
            pass
        return entry_id

`to_bibtex_record(crossref=None)`

Convert to bibtex compatible dict Args: crossref: bibtex key of the proceedings. If set the proceeding specific fields are excluded.

Source code in ceurspt/bibtex.py

def to_bibtex_record(self, crossref: Optional[str] = None) -> dict:
    """
    Convert to bibtex compatible dict
    Args:
        crossref: bibtex key of the proceedings. If set the proceeding specific fields are excluded.
    """
    proceedings_keys = [
        "series",
        "location",
        "eventtitle",
        "venue",
        "volume",
        "editor",
        "eventdate",
    ]
    record_fields = {k: v for k, v in asdict(self).items() if v not in [None, ""]}
    if crossref is not None:
        record_fields = {
            k: v for k, v in record_fields.items() if k not in proceedings_keys
        }
        record_fields["crossref"] = crossref
    record = {
        "ENTRYTYPE": "inproceedings",
        "ID": f"ceur-ws:{self.get_id()}",
        **record_fields,
    }
    return record

`ProceedingsEntry` `dataclass`

see https://ftp.mpi-inf.mpg.de/pub/tex/mirror/ftp.dante.de/pub/tex/macros/latex/contrib/biblatex/doc/biblatex.pdf

Source code in ceurspt/bibtex.py

@dataclass
class ProceedingsEntry:
    """
    see https://ftp.mpi-inf.mpg.de/pub/tex/mirror/ftp.dante.de/pub/tex/macros/latex/contrib/biblatex/doc/biblatex.pdf

    """

    title: str
    year: str
    date: str

    editor: Optional[Union[str, List[str]]] = None
    subtitle: Optional[str] = None
    titleaddon: Optional[str] = None
    maintitle: Optional[str] = None
    mainsubtitle: Optional[str] = None
    maintitleaddon: Optional[str] = None
    eventtitle: Optional[str] = None
    eventtitleaddon: Optional[str] = None
    eventdate: Optional[str] = None
    venue: Optional[str] = None
    language: Optional[str] = "english"
    volume: Optional[str] = None
    part: Optional[str] = None
    volumes: Optional[str] = None
    series: Optional[str] = "CEUR Workshop Proceedings"

    number: Optional[str] = None
    note: Optional[str] = None
    organization: Optional[str] = None
    publisher: Optional[str] = None
    location: Optional[str] = "Aachen"
    month: Optional[str] = None
    isbn: Optional[str] = None

    eid: Optional[str] = None
    chapter: Optional[str] = None
    pages: Optional[str] = None
    pagetotal: Optional[str] = None
    addendum: Optional[str] = None
    pubstate: Optional[str] = None
    doi: Optional[str] = None
    eprint: Optional[str] = None

    eprintclass: Optional[str] = None
    eprinttype: Optional[str] = None
    url: Optional[str] = None
    urldate: Optional[str] = None

    @classmethod
    def from_volume(cls, volume: Volume) -> "ProceedingsEntry":
        """
        Convert given volume to ProceedingsEntry
        """
        record = volume.getMergedDict()
        pub_date = (
            datetime.fromisoformat(record.get("wd.publication_date"))
            if record.get("wd.publication_date") is not None
            else None
        )
        proceeding = ProceedingsEntry(
            title=volume.title,
            date=pub_date.date().isoformat(),
            year=str(pub_date.year) if pub_date else None,
            url=record.get("spt.url"),
            eventtitle=record.get("wd.eventLabel", None),
            eventdate=record.get("wd.startDate", None),
            venue=",".join(
                filter(
                    partial(is_not, None),
                    [
                        record.get("wd.locationLabel", None),
                        record.get("wd.countryLabel", None),
                    ],
                )
            ),
            volume=str(volume.number),
            editor=record.get("cvb.editors", "").replace(",", " and"),
        )
        proceeding.__volume = volume
        return proceeding

    def to_bibtex_record(self) -> dict:
        record = {
            "ENTRYTYPE": "proceedings",
            "ID": self.get_id(),
            **{k: v for k, v in asdict(self).items() if v not in [None, ""]},
        }
        return record

    def get_id(self) -> str:
        entry_id = None
        if hasattr(self, "__volume") and isinstance(self.__volume, Volume):
            entry_id = self.__volume.acronym.replace(" ", "_")
        if entry_id is None:
            entry_id = f"ceur-ws:Vol-{self.volume}"
        return entry_id

`from_volume(volume)` `classmethod`

Convert given volume to ProceedingsEntry

Source code in ceurspt/bibtex.py

@classmethod
def from_volume(cls, volume: Volume) -> "ProceedingsEntry":
    """
    Convert given volume to ProceedingsEntry
    """
    record = volume.getMergedDict()
    pub_date = (
        datetime.fromisoformat(record.get("wd.publication_date"))
        if record.get("wd.publication_date") is not None
        else None
    )
    proceeding = ProceedingsEntry(
        title=volume.title,
        date=pub_date.date().isoformat(),
        year=str(pub_date.year) if pub_date else None,
        url=record.get("spt.url"),
        eventtitle=record.get("wd.eventLabel", None),
        eventdate=record.get("wd.startDate", None),
        venue=",".join(
            filter(
                partial(is_not, None),
                [
                    record.get("wd.locationLabel", None),
                    record.get("wd.countryLabel", None),
                ],
            )
        ),
        volume=str(volume.number),
        editor=record.get("cvb.editors", "").replace(",", " and"),
    )
    proceeding.__volume = volume
    return proceeding

`ceurws`

Created on 2023-03-18

@author: wf

`JsonCacheManager`

a json based cache manager

Source code in ceurspt/ceurws.py

class JsonCacheManager:
    """
    a json based cache manager
    """

    def __init__(self, base_url: str = "http://cvb.bitplan.com"):
        """
        constructor

        base_url(str): the base url to use for the json provider
        """
        self.base_url = base_url

    def json_path(self, lod_name: str) -> str:
        """
        get the json path for the given list of dicts name

        Args:
            lod_name(str): the name of the list of dicts cache to read

        Returns:
            str: the path to the list of dict cache
        """
        root_path = f"{Path.home()}/.ceurws"
        os.makedirs(root_path, exist_ok=True)
        json_path = f"{root_path}/{lod_name}.json"
        return json_path

    def load_lod(self, lod_name: str) -> list:
        """
        load my list of dicts

        Args:
            lod_name(str): the name of the list of dicts cache to read

        Returns:
            list: the list of dicts
        """
        json_path = self.json_path(lod_name)
        if os.path.isfile(json_path):
            try:
                with open(json_path) as json_file:
                    json_str = json_file.read()
                    lod = orjson.loads(json_str)
            except Exception as ex:
                msg = f"Could not read {lod_name} from {json_path} due to {str(ex)}"
                raise Exception(msg)
        else:
            try:
                url = f"{self.base_url}/{lod_name}.json"
                with urllib.request.urlopen(url) as source:
                    json_str = source.read()
                    lod = orjson.loads(json_str)
            except Exception as ex:
                msg = f"Could not read {lod_name} from {url} due to {str(ex)}"
                raise Exception(msg)
        return lod

    def store(self, lod_name: str, lod: list):
        """
        store my list of dicts

        Args:
            lod_name(str): the name of the list of dicts cache to write
            lod(list): the list of dicts to write
        """
        with open(self.json_path(lod_name), "wb") as json_file:
            json_str = orjson.dumps(lod)
            json_file.write(json_str)
            pass

`init(base_url='http://cvb.bitplan.com')`

constructor

base_url(str): the base url to use for the json provider

Source code in ceurspt/ceurws.py

def __init__(self, base_url: str = "http://cvb.bitplan.com"):
    """
    constructor

    base_url(str): the base url to use for the json provider
    """
    self.base_url = base_url

`json_path(lod_name)`

get the json path for the given list of dicts name

Parameters:

Name	Type	Description	Default
`lod_name(str)`		the name of the list of dicts cache to read	required

Returns:

Name	Type	Description
`str`	`str`	the path to the list of dict cache

Source code in ceurspt/ceurws.py

def json_path(self, lod_name: str) -> str:
    """
    get the json path for the given list of dicts name

    Args:
        lod_name(str): the name of the list of dicts cache to read

    Returns:
        str: the path to the list of dict cache
    """
    root_path = f"{Path.home()}/.ceurws"
    os.makedirs(root_path, exist_ok=True)
    json_path = f"{root_path}/{lod_name}.json"
    return json_path

`load_lod(lod_name)`

load my list of dicts

Parameters:

Name	Type	Description	Default
`lod_name(str)`		the name of the list of dicts cache to read	required

Returns:

Name	Type	Description
`list`	`list`	the list of dicts

Source code in ceurspt/ceurws.py

def load_lod(self, lod_name: str) -> list:
    """
    load my list of dicts

    Args:
        lod_name(str): the name of the list of dicts cache to read

    Returns:
        list: the list of dicts
    """
    json_path = self.json_path(lod_name)
    if os.path.isfile(json_path):
        try:
            with open(json_path) as json_file:
                json_str = json_file.read()
                lod = orjson.loads(json_str)
        except Exception as ex:
            msg = f"Could not read {lod_name} from {json_path} due to {str(ex)}"
            raise Exception(msg)
    else:
        try:
            url = f"{self.base_url}/{lod_name}.json"
            with urllib.request.urlopen(url) as source:
                json_str = source.read()
                lod = orjson.loads(json_str)
        except Exception as ex:
            msg = f"Could not read {lod_name} from {url} due to {str(ex)}"
            raise Exception(msg)
    return lod

`store(lod_name, lod)`

store my list of dicts

Parameters:

Name	Type	Description	Default
`lod_name(str)`		the name of the list of dicts cache to write	required
`lod(list)`		the list of dicts to write	required

Source code in ceurspt/ceurws.py

def store(self, lod_name: str, lod: list):
    """
    store my list of dicts

    Args:
        lod_name(str): the name of the list of dicts cache to write
        lod(list): the list of dicts to write
    """
    with open(self.json_path(lod_name), "wb") as json_file:
        json_str = orjson.dumps(lod)
        json_file.write(json_str)
        pass

`Paper` `dataclass`

Bases: Paper

a CEUR-WS Paper with it's behavior

Source code in ceurspt/ceurws.py

class Paper(ceurspt.ceurws_base.Paper):
    """
    a CEUR-WS Paper with it's behavior
    """

    def getBasePath(self) -> Optional[str]:
        """
        get the base path to my files
        """
        if self.pdfUrl:
            base_path = self.pdfUrl.replace("https://ceur-ws.org/", "")
            base_path = base_path.replace(".pdf", "")
            base_path = f"{self.volume.vm.base_path}/{base_path}"
            if os.path.isfile(f"{base_path}.pdf"):
                return base_path
        return None

    def getContentPathByPostfix(self, postfix: str):
        """
        get the content path for the given postfix

        Args:
            postfix(str): the postfix to read

        Returns:
            str: the context path
        """
        base_path = self.getBasePath()
        if base_path is None:
            return None
        text_path = f"{base_path}{postfix}"
        if os.path.isfile(text_path):
            return text_path
        else:
            return None

    def getContentByPostfix(self, postfix: str) -> str:
        """
        get the content for the given postfix

        Args:
            postfix(str): the postfix to read

        Returns:
            str: the context
        """
        text_path = self.getContentPathByPostfix(postfix)
        content = None
        if text_path:
            with open(text_path, "r") as text_file:
                content = text_file.read()
        return content

    def getText(self) -> str:
        """
        get the plain text content of this paper
        """
        text = self.getContentByPostfix("-content.txt")
        return text

    def getPdf(self):
        """
        get the PDF file for this paper
        """
        base_path = self.getBasePath()
        pdf = f"{base_path}.pdf"
        return pdf

    def getMergedDict(self) -> dict:
        """
        get the merged dict for this paper
        """
        my_dict = dataclasses.asdict(self)
        m_dict = {
            "version.version": Version.version,
            "version.cm_url": Version.cm_url,
            "spt.html_url": f"/{self.id}.html",
        }
        for key, value in my_dict.items():
            m_dict[f"spt.{key}"] = value
        pdf_name = self.pdfUrl.replace("https://ceur-ws.org/", "")
        if pdf_name in self.pm.paper_records_by_path:
            pdf_record = self.pm.paper_records_by_path[pdf_name]
            for key, value in pdf_record.items():
                m_dict[f"cvb.{key}"] = value
        if pdf_name in self.pm.paper_dblp_by_path:
            dblp_record = self.pm.paper_dblp_by_path[pdf_name]
            for key, value in dblp_record.items():
                m_dict[f"dblp.{key}"] = value
        return m_dict

    def as_wb_dict(self) -> dict:
        """
        wb create-entity '{"labels":{"en":"a label","fr":"un label"},"descriptions":{"en":"some description","fr":"une description"},"claims":{"P1775":["Q3576110","Q12206942"],"P2002":"bulgroz"}}'
        """
        wb = {
            "labels": {"en": self.title},
            "descriptions": {
                "en": f"scientific paper published in CEUR-WS Volume {self.volume.number}"
            },
            "claims": {
                # P31  :instance of  Q13442814:scholarly article
                "P31": "Q13442814",
                #  P1433: published in
                "P1433": self.volume.wikidataid,
                # P1476:title
                "P1476": {"text": self.title, "language": "en"},
                # P407 :language of work or name  Q1860:English
                "P407": "Q1860",
                #  P953 :full work available at URL
                "P953": self.pdfUrl,
                # P50: author, P1545: series ordinal
                "P50": [],
                # P2093: author name string, P1545: series ordinal
                "P2093": [],
            },
        }
        author_claims = wb["claims"]["P50"]
        author_name_claims = wb["claims"]["P2093"]
        authors = self.getAuthors()
        for index, author in enumerate(authors):
            if not author.wikidata_id:
                author_name_claims.append(
                    {"value": author.name, "qualifiers": {"P1545": f"{index+1}"}}
                )
            else:
                author_claims.append(
                    {"value": author.wikidata_id, "qualifiers": {"P1545": f"{index+1}"}}
                )
        return wb

    def as_wbi_cli_text(self, qid: str) -> str:
        """
        Generates a series of Wikibase CLI command strings to add claims to the entity
        represented by this paper, based on the provided QID.

        Args:
            qid (str): The QID of the Wikibase item to which the claims will be added.

        Returns:
            str: A string containing all the 'wb add-claim' commands separated by newlines.
        """
        # Get the dictionary representation of the paper
        wb_dict = self.as_wb_dict()

        # Initialize an empty list to hold all commands
        cli_commands = []

        # Iterate through each claim to create a separate wb add-claim command
        for prop, value in wb_dict["claims"].items():
            # Handle different structures in claims (e.g., simple vs. complex with qualifiers)
            if isinstance(
                value, list
            ):  # Expecting a list of values (or complex value structures)
                for val in value:
                    # Convert value to a JSON string and escape quotes for command line
                    val_json = json.dumps(val).replace('"', '\\"')
                    cli_commands.append(f'wb add-claim {qid} {prop} "{val_json}"')
            else:  # A single value or simple structure
                # Convert value to a JSON string and escape quotes for command line
                value_json = json.dumps(value).replace('"', '\\"')
                cli_commands.append(f'wb add-claim {qid} {prop} "{value_json}"')

        # Combine all commands into a single string separated by newlines
        cli = "\n".join(cli_commands)
        return cli

    def as_quickstatements(self) -> str:
        """
        return my quickstatements
        """
        m_dict = self.getMergedDict()
        paper_date_str = self.volume.date
        paper_date = datetime.strptime(paper_date_str, "%Y-%m-%d")
        qs_date = f"+{paper_date.isoformat(sep='T',timespec='auto')}Z/11"
        qs = f"""# created by {__file__}
CREATE
# P31  :instance of  Q13442814:scholarly article
LAST|P31|Q13442814
# P1433: published in 
LAST|P1433|{self.volume.wikidataid}
# english label
LAST|Len|"{self.title}"
# english description
LAST|Den|"scientific paper published in CEUR-WS Volume {self.volume.number}"
# P1476:title
LAST|P1476|en:"{self.title}"
# P407 :language of work or name  Q1860:English
LAST|P407|Q1860
# P953 :full work available at URL
LAST|P953|"{self.pdfUrl}"
# P577 :publication date
LAST|P577|{qs_date}
"""
        # @TODO pages ...
        authors = self.getAuthors()
        for index, author in enumerate(authors):
            if not author.wikidata_id:
                qs += f"""# P2093: author name string, P1545: series ordinal
LAST|P2093|"{author.name}"|P1545|"{index+1}"
"""
            else:
                qs += f"""# P50: author, P1545: series ordinal
LAST|P50|{author.wikidata_id}|P1545|"{index+1}"       
"""
            pass
        return qs

    def as_smw_markup(self) -> str:
        """
        return my semantic mediawiki markup

        Returns:
            str: the smw markup for this paper
        """
        m_dict = self.getMergedDict()
        self.authors = m_dict["cvb.authors"]
        if "dblp.dblp_publication_id" in m_dict:
            self.dblpUrl = m_dict["dblp.dblp_publication_id"]
        markup = f"""=Paper=
{{{{Paper
|id={self.id}
|storemode=property
|title={self.title}
|pdfUrl={self.pdfUrl}
|volume=Vol-{self.volume.number}
"""
        for attr in ["authors", "wikidataid", "dblpUrl"]:
            if hasattr(self, attr):
                value = getattr(self, attr)
                if value:
                    markup += f"|{attr}={value}\n"
        markup += f"""}}}}
=={self.title}==
<pdf width="1500px">{self.pdfUrl}</pdf>
<pre>
{self.getText()}
</pre>
        """
        return markup

    def getAuthorIndex(self, name: str, authors: typing.List[str]):
        """
        get the author index
        """
        for i, aname in enumerate(authors):
            if name.lower().startswith(aname.lower()):
                return i
        # if not found put at end
        return len(authors) + 1

    def getAuthors(self) -> typing.List[Scholar]:
        """
        get my authors

        Returns:
            list: a list of Scholars
        """
        m_dict = self.getMergedDict()
        author_names = m_dict["cvb.authors"].split(",")
        if "dblp.authors" in m_dict:
            authors = []
            dblp_author_records = m_dict["dblp.authors"]
            for dblp_author_record in dblp_author_records:
                author = DataClassUtil.dataclass_from_dict(Scholar, dblp_author_record)
                authors.append(author)
                author.index = self.getAuthorIndex(author.label, author_names)
                if author.index < len(author_names):
                    author.name = author_names[author.index]
                else:
                    author.name = author.label
            sorted_authors = sorted(authors, key=lambda author: author.index)
        else:
            sorted_authors = []
            for author_name in author_names:
                scholar = Scholar(dblp_author_id=None, label=author_name)
                scholar.name = author_name
                sorted_authors.append(scholar)
        return sorted_authors

    def getAuthorBar(self):
        """
        show the authors of this paper
        """
        authors = self.getAuthors()
        html = ""
        for author in authors:
            icon_list = [
                {
                    "src": "/static/icons/32px-dblp-icon.png",
                    "title": "dblp",
                    "link": f"{author.dblp_author_id}",
                    "valid": author.dblp_author_id,
                },
                {
                    "src": "/static/icons/32px-ORCID-icon.png",
                    "title": "ORCID",
                    "link": f"https://orcid.org/{author.orcid_id}",
                    "valid": author.orcid_id,
                },
                {
                    "src": "/static/icons/32px-DNB.svg.png",
                    "title": "DNB",
                    "link": f"https://d-nb.info/gnd/{author.gnd_id}",
                    "valid": author.gnd_id,
                },
                {
                    "src": "/static/icons/32px-Scholia_logo.svg.png",
                    "title": "Author@scholia",
                    "link": f"https://scholia.toolforge.org/author/{author.wikidata_id}",
                    "valid": author.wikidata_id,
                },
                {
                    "src": "/static/icons/32px-Wikidata_Query_Service_Favicon_wbg.svg.png",
                    "title": "Author@wikidata",
                    "link": f"https://www.wikidata.org/wiki/{author.wikidata_id}",
                    "valid": author.wikidata_id,
                },
            ]
            soup = BeautifulSoup("<html></html>", "html.parser")
            link_tags = Volume.create_icon_list(soup, icon_list)
            red = (
                not author.wikidata_id
                and not author.dblp_author_id
                and not author.gnd_id
                and not author.orcid_id
            )
            style = "color:red" if red else ""
            html += f"""<span style="{style}">{author.label}"""
            for link_tag in link_tags:
                html += str(link_tag)
            html += "</span>"
            pass
        return html

    def paperLinkParts(self: int, inc: int = 0):
        """
        a relative paper link
        """
        if inc > 0:
            presymbol = "⫸"
            postsymbol = ""
            paper = self.next()
        elif inc < 0:
            presymbol = ""
            postsymbol = "⫷"
            paper = self.prev()
        else:
            presymbol = ""
            postsymbol = ""
            paper = self
        href = None
        text = None
        if paper:
            href = f"/{paper.id}.html"
            text = f"{presymbol}{paper.id}{postsymbol}"
        return href, text

    def paperScrollLinks(self) -> str:
        """
        get the paper scroll links
        """
        scroll_links = ""
        for inc in [-1, 0, 1]:
            href, text = self.paperLinkParts(inc)
            if href:
                scroll_links += f"""<a href="{href}">{text}</a>"""
        return scroll_links

    def prev(self) -> "Paper":
        """
        get the previous paper in this volume
        """
        return self.next(-1)

    def next(self, inc: int = 1) -> "Paper":
        """
        get the next paper in this volume with the given increment

        Args:
            inc(int): the increment +1 = next, -1 = prev
        """
        vol = self.volume
        paper = None
        if vol:
            next_index = self.paper_index + inc
            if next_index >= 0 and next_index < len(vol.papers):
                paper = vol.papers[next_index]
        return paper

    def getIconBar(self, soup):
        """
        get my icon bar

        Parameters:
            soup: The BeautifulSoup object to use for creating new tags.
        """
        pdf_name = self.pdfUrl.replace("https://ceur-ws.org/", "")
        pdf_name = pdf_name.replace(".pdf", "")
        # create a list of icons to add to the div
        icon_list = [
            {
                "src": "/static/icons/32px-text-icon.png",
                "title": "plain text",
                "link": f"/{pdf_name}.txt",
                "valid": self.getContentPathByPostfix(".txt"),
            },
            {
                "src": "/static/icons/32px-PDF_icon.svg.png",
                "title": "original pdf",
                "link": f"/{pdf_name}.pdf",
                "valid": self.getContentPathByPostfix(".pdf"),
            },
            {
                "src": "/static/icons/32px-Cermine-Icon.png",
                "title": "Cermine metadata",
                "link": f"/{pdf_name}.cermine",
                "valid": self.getContentPathByPostfix(".cermine"),
            },
            {
                "src": "/static/icons/32px-GROBID-icon.png",
                "title": "GROBID metadata",
                "link": f"/{pdf_name}.grobid",
                "valid": self.getContentPathByPostfix(".grobid"),
            },
            {
                "src": "/static/icons/32px-QuickStatements-icon.png",
                "title": "Quickstatements",
                "link": f"/{pdf_name}.qs",
                "valid": True,  # @TODO - add check for existing wikidata entry
            },
            {
                "src": "/static/icons/32px-SMW-icon.png",
                "title": "SMW markup",
                "link": f"/{pdf_name}.smw",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-wbjson-icon.png",
                "title": "wikibase CLI JSON metadata",
                "link": f"/{pdf_name}.wbjson",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-Wikibase_logo.svg.png",
                "title": "wikibase CLI",
                "link": f"/{pdf_name}.wbcli",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-JSON_vector_logo.svg.png",
                "title": "JSON metadata",
                "link": f"/{pdf_name}.json",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-YAML_Logo.svg.png",
                "title": "YAML metadata",
                "link": f"/{pdf_name}.yaml",
                "valid": True,
            },
        ]
        icon_tag = Volume.create_icon_bar(soup, icon_list=icon_list)
        return icon_tag

    def asHtml(self):
        """
        return an html response for this paper
        """
        soup = BeautifulSoup("<html></html>", "html.parser")
        icon_bar = self.getIconBar(soup)
        author_bar = self.getAuthorBar()
        content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-type" content="text/html;charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
<title>{self.id} - {self.title}</title>
</head>
<body>
<table style="border: 0; border-spacing: 0; border-collapse: collapse; width: 95%">
<tbody><tr>
<td style="text-align: left; vertical-align: middle">
<a href="http://ceur-ws.org/"><div id="CEURWSLOGO"></div></a>
</td>
<td style="text-align: right; vertical-align: middle">
<div style="float:left" id="CEURCCBY"></div>
{Volume.volLink(self.volume.number,-1)}
<span class="CEURVOLNR">{Volume.volLink(self.volume.number)}</span>
{Volume.volLink(self.volume.number,+1)}<br>
<span class="CEURURN">urn:nbn:de:0074-{self.volume.number}-0</span>
<p class="unobtrusive copyright" style="text-align: justify">Copyright &copy; {self.volume.date[:4]} for
the individual papers by the papers' authors. 
Copyright &copy; <span class="CEURPUBYEAR">{self.volume.date[:4]}</span> for the volume
as a collection by its editors.
This volume and its papers are published under the
Creative Commons License Attribution 4.0 International
<A HREF="https://creativecommons.org/licenses/by/4.0/">(<span class="CEURLIC">CC BY 4.0</span>)</A>.</p>
</td>
</tr>
</tbody></table>
{str(icon_bar)}
<hr/>
{self.paperScrollLinks()}
<hr/>
{str(author_bar)}
<hr/>
<h1>{self.title}<h1>
<embed src="{self.pdfUrl}" style="width:100vw;height:100vh" type="application/pdf">
<body>
</body>
        """
        return content

`asHtml()`

return an html response for this paper

Source code in ceurspt/ceurws.py

    def asHtml(self):
        """
        return an html response for this paper
        """
        soup = BeautifulSoup("<html></html>", "html.parser")
        icon_bar = self.getIconBar(soup)
        author_bar = self.getAuthorBar()
        content = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-type" content="text/html;charset=utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
<title>{self.id} - {self.title}</title>
</head>
<body>
<table style="border: 0; border-spacing: 0; border-collapse: collapse; width: 95%">
<tbody><tr>
<td style="text-align: left; vertical-align: middle">
<a href="http://ceur-ws.org/"><div id="CEURWSLOGO"></div></a>
</td>
<td style="text-align: right; vertical-align: middle">
<div style="float:left" id="CEURCCBY"></div>
{Volume.volLink(self.volume.number,-1)}
<span class="CEURVOLNR">{Volume.volLink(self.volume.number)}</span>
{Volume.volLink(self.volume.number,+1)}<br>
<span class="CEURURN">urn:nbn:de:0074-{self.volume.number}-0</span>
<p class="unobtrusive copyright" style="text-align: justify">Copyright &copy; {self.volume.date[:4]} for
the individual papers by the papers' authors. 
Copyright &copy; <span class="CEURPUBYEAR">{self.volume.date[:4]}</span> for the volume
as a collection by its editors.
This volume and its papers are published under the
Creative Commons License Attribution 4.0 International
<A HREF="https://creativecommons.org/licenses/by/4.0/">(<span class="CEURLIC">CC BY 4.0</span>)</A>.</p>
</td>
</tr>
</tbody></table>
{str(icon_bar)}
<hr/>
{self.paperScrollLinks()}
<hr/>
{str(author_bar)}
<hr/>
<h1>{self.title}<h1>
<embed src="{self.pdfUrl}" style="width:100vw;height:100vh" type="application/pdf">
<body>
</body>
        """
        return content

`as_quickstatements()`

return my quickstatements

Source code in ceurspt/ceurws.py

    def as_quickstatements(self) -> str:
        """
        return my quickstatements
        """
        m_dict = self.getMergedDict()
        paper_date_str = self.volume.date
        paper_date = datetime.strptime(paper_date_str, "%Y-%m-%d")
        qs_date = f"+{paper_date.isoformat(sep='T',timespec='auto')}Z/11"
        qs = f"""# created by {__file__}
CREATE
# P31  :instance of  Q13442814:scholarly article
LAST|P31|Q13442814
# P1433: published in 
LAST|P1433|{self.volume.wikidataid}
# english label
LAST|Len|"{self.title}"
# english description
LAST|Den|"scientific paper published in CEUR-WS Volume {self.volume.number}"
# P1476:title
LAST|P1476|en:"{self.title}"
# P407 :language of work or name  Q1860:English
LAST|P407|Q1860
# P953 :full work available at URL
LAST|P953|"{self.pdfUrl}"
# P577 :publication date
LAST|P577|{qs_date}
"""
        # @TODO pages ...
        authors = self.getAuthors()
        for index, author in enumerate(authors):
            if not author.wikidata_id:
                qs += f"""# P2093: author name string, P1545: series ordinal
LAST|P2093|"{author.name}"|P1545|"{index+1}"
"""
            else:
                qs += f"""# P50: author, P1545: series ordinal
LAST|P50|{author.wikidata_id}|P1545|"{index+1}"       
"""
            pass
        return qs

`as_smw_markup()`

return my semantic mediawiki markup

Returns:

Name	Type	Description
`str`	`str`	the smw markup for this paper

Source code in ceurspt/ceurws.py

    def as_smw_markup(self) -> str:
        """
        return my semantic mediawiki markup

        Returns:
            str: the smw markup for this paper
        """
        m_dict = self.getMergedDict()
        self.authors = m_dict["cvb.authors"]
        if "dblp.dblp_publication_id" in m_dict:
            self.dblpUrl = m_dict["dblp.dblp_publication_id"]
        markup = f"""=Paper=
{{{{Paper
|id={self.id}
|storemode=property
|title={self.title}
|pdfUrl={self.pdfUrl}
|volume=Vol-{self.volume.number}
"""
        for attr in ["authors", "wikidataid", "dblpUrl"]:
            if hasattr(self, attr):
                value = getattr(self, attr)
                if value:
                    markup += f"|{attr}={value}\n"
        markup += f"""}}}}
=={self.title}==
<pdf width="1500px">{self.pdfUrl}</pdf>
<pre>
{self.getText()}
</pre>
        """
        return markup

`as_wb_dict()`

wb create-entity '{"labels":{"en":"a label","fr":"un label"},"descriptions":{"en":"some description","fr":"une description"},"claims":{"P1775":["Q3576110","Q12206942"],"P2002":"bulgroz"}}'

Source code in ceurspt/ceurws.py

def as_wb_dict(self) -> dict:
    """
    wb create-entity '{"labels":{"en":"a label","fr":"un label"},"descriptions":{"en":"some description","fr":"une description"},"claims":{"P1775":["Q3576110","Q12206942"],"P2002":"bulgroz"}}'
    """
    wb = {
        "labels": {"en": self.title},
        "descriptions": {
            "en": f"scientific paper published in CEUR-WS Volume {self.volume.number}"
        },
        "claims": {
            # P31  :instance of  Q13442814:scholarly article
            "P31": "Q13442814",
            #  P1433: published in
            "P1433": self.volume.wikidataid,
            # P1476:title
            "P1476": {"text": self.title, "language": "en"},
            # P407 :language of work or name  Q1860:English
            "P407": "Q1860",
            #  P953 :full work available at URL
            "P953": self.pdfUrl,
            # P50: author, P1545: series ordinal
            "P50": [],
            # P2093: author name string, P1545: series ordinal
            "P2093": [],
        },
    }
    author_claims = wb["claims"]["P50"]
    author_name_claims = wb["claims"]["P2093"]
    authors = self.getAuthors()
    for index, author in enumerate(authors):
        if not author.wikidata_id:
            author_name_claims.append(
                {"value": author.name, "qualifiers": {"P1545": f"{index+1}"}}
            )
        else:
            author_claims.append(
                {"value": author.wikidata_id, "qualifiers": {"P1545": f"{index+1}"}}
            )
    return wb

`as_wbi_cli_text(qid)`

Generates a series of Wikibase CLI command strings to add claims to the entity represented by this paper, based on the provided QID.

Parameters:

Name	Type	Description	Default
`qid`	`str`	The QID of the Wikibase item to which the claims will be added.	required

Returns:

Name	Type	Description
`str`	`str`	A string containing all the 'wb add-claim' commands separated by newlines.

Source code in ceurspt/ceurws.py

def as_wbi_cli_text(self, qid: str) -> str:
    """
    Generates a series of Wikibase CLI command strings to add claims to the entity
    represented by this paper, based on the provided QID.

    Args:
        qid (str): The QID of the Wikibase item to which the claims will be added.

    Returns:
        str: A string containing all the 'wb add-claim' commands separated by newlines.
    """
    # Get the dictionary representation of the paper
    wb_dict = self.as_wb_dict()

    # Initialize an empty list to hold all commands
    cli_commands = []

    # Iterate through each claim to create a separate wb add-claim command
    for prop, value in wb_dict["claims"].items():
        # Handle different structures in claims (e.g., simple vs. complex with qualifiers)
        if isinstance(
            value, list
        ):  # Expecting a list of values (or complex value structures)
            for val in value:
                # Convert value to a JSON string and escape quotes for command line
                val_json = json.dumps(val).replace('"', '\\"')
                cli_commands.append(f'wb add-claim {qid} {prop} "{val_json}"')
        else:  # A single value or simple structure
            # Convert value to a JSON string and escape quotes for command line
            value_json = json.dumps(value).replace('"', '\\"')
            cli_commands.append(f'wb add-claim {qid} {prop} "{value_json}"')

    # Combine all commands into a single string separated by newlines
    cli = "\n".join(cli_commands)
    return cli

`getAuthorBar()`

show the authors of this paper

Source code in ceurspt/ceurws.py

def getAuthorBar(self):
    """
    show the authors of this paper
    """
    authors = self.getAuthors()
    html = ""
    for author in authors:
        icon_list = [
            {
                "src": "/static/icons/32px-dblp-icon.png",
                "title": "dblp",
                "link": f"{author.dblp_author_id}",
                "valid": author.dblp_author_id,
            },
            {
                "src": "/static/icons/32px-ORCID-icon.png",
                "title": "ORCID",
                "link": f"https://orcid.org/{author.orcid_id}",
                "valid": author.orcid_id,
            },
            {
                "src": "/static/icons/32px-DNB.svg.png",
                "title": "DNB",
                "link": f"https://d-nb.info/gnd/{author.gnd_id}",
                "valid": author.gnd_id,
            },
            {
                "src": "/static/icons/32px-Scholia_logo.svg.png",
                "title": "Author@scholia",
                "link": f"https://scholia.toolforge.org/author/{author.wikidata_id}",
                "valid": author.wikidata_id,
            },
            {
                "src": "/static/icons/32px-Wikidata_Query_Service_Favicon_wbg.svg.png",
                "title": "Author@wikidata",
                "link": f"https://www.wikidata.org/wiki/{author.wikidata_id}",
                "valid": author.wikidata_id,
            },
        ]
        soup = BeautifulSoup("<html></html>", "html.parser")
        link_tags = Volume.create_icon_list(soup, icon_list)
        red = (
            not author.wikidata_id
            and not author.dblp_author_id
            and not author.gnd_id
            and not author.orcid_id
        )
        style = "color:red" if red else ""
        html += f"""<span style="{style}">{author.label}"""
        for link_tag in link_tags:
            html += str(link_tag)
        html += "</span>"
        pass
    return html

`getAuthorIndex(name, authors)`

get the author index

Source code in ceurspt/ceurws.py

def getAuthorIndex(self, name: str, authors: typing.List[str]):
    """
    get the author index
    """
    for i, aname in enumerate(authors):
        if name.lower().startswith(aname.lower()):
            return i
    # if not found put at end
    return len(authors) + 1

`getAuthors()`

get my authors

Returns:

Name	Type	Description
`list`	`List[Scholar]`	a list of Scholars

Source code in ceurspt/ceurws.py

def getAuthors(self) -> typing.List[Scholar]:
    """
    get my authors

    Returns:
        list: a list of Scholars
    """
    m_dict = self.getMergedDict()
    author_names = m_dict["cvb.authors"].split(",")
    if "dblp.authors" in m_dict:
        authors = []
        dblp_author_records = m_dict["dblp.authors"]
        for dblp_author_record in dblp_author_records:
            author = DataClassUtil.dataclass_from_dict(Scholar, dblp_author_record)
            authors.append(author)
            author.index = self.getAuthorIndex(author.label, author_names)
            if author.index < len(author_names):
                author.name = author_names[author.index]
            else:
                author.name = author.label
        sorted_authors = sorted(authors, key=lambda author: author.index)
    else:
        sorted_authors = []
        for author_name in author_names:
            scholar = Scholar(dblp_author_id=None, label=author_name)
            scholar.name = author_name
            sorted_authors.append(scholar)
    return sorted_authors

`getBasePath()`

get the base path to my files

Source code in ceurspt/ceurws.py

def getBasePath(self) -> Optional[str]:
    """
    get the base path to my files
    """
    if self.pdfUrl:
        base_path = self.pdfUrl.replace("https://ceur-ws.org/", "")
        base_path = base_path.replace(".pdf", "")
        base_path = f"{self.volume.vm.base_path}/{base_path}"
        if os.path.isfile(f"{base_path}.pdf"):
            return base_path
    return None

`getContentByPostfix(postfix)`

get the content for the given postfix

Parameters:

Name	Type	Description	Default
`postfix(str)`		the postfix to read	required

Returns:

Name	Type	Description
`str`	`str`	the context

Source code in ceurspt/ceurws.py

def getContentByPostfix(self, postfix: str) -> str:
    """
    get the content for the given postfix

    Args:
        postfix(str): the postfix to read

    Returns:
        str: the context
    """
    text_path = self.getContentPathByPostfix(postfix)
    content = None
    if text_path:
        with open(text_path, "r") as text_file:
            content = text_file.read()
    return content

`getContentPathByPostfix(postfix)`

get the content path for the given postfix

Parameters:

Name	Type	Description	Default
`postfix(str)`		the postfix to read	required

Returns:

Name	Type	Description
`str`		the context path

Source code in ceurspt/ceurws.py

def getContentPathByPostfix(self, postfix: str):
    """
    get the content path for the given postfix

    Args:
        postfix(str): the postfix to read

    Returns:
        str: the context path
    """
    base_path = self.getBasePath()
    if base_path is None:
        return None
    text_path = f"{base_path}{postfix}"
    if os.path.isfile(text_path):
        return text_path
    else:
        return None

`getIconBar(soup)`

get my icon bar

Parameters:

Name	Type	Description	Default
`soup`		The BeautifulSoup object to use for creating new tags.	required

Source code in ceurspt/ceurws.py

def getIconBar(self, soup):
    """
    get my icon bar

    Parameters:
        soup: The BeautifulSoup object to use for creating new tags.
    """
    pdf_name = self.pdfUrl.replace("https://ceur-ws.org/", "")
    pdf_name = pdf_name.replace(".pdf", "")
    # create a list of icons to add to the div
    icon_list = [
        {
            "src": "/static/icons/32px-text-icon.png",
            "title": "plain text",
            "link": f"/{pdf_name}.txt",
            "valid": self.getContentPathByPostfix(".txt"),
        },
        {
            "src": "/static/icons/32px-PDF_icon.svg.png",
            "title": "original pdf",
            "link": f"/{pdf_name}.pdf",
            "valid": self.getContentPathByPostfix(".pdf"),
        },
        {
            "src": "/static/icons/32px-Cermine-Icon.png",
            "title": "Cermine metadata",
            "link": f"/{pdf_name}.cermine",
            "valid": self.getContentPathByPostfix(".cermine"),
        },
        {
            "src": "/static/icons/32px-GROBID-icon.png",
            "title": "GROBID metadata",
            "link": f"/{pdf_name}.grobid",
            "valid": self.getContentPathByPostfix(".grobid"),
        },
        {
            "src": "/static/icons/32px-QuickStatements-icon.png",
            "title": "Quickstatements",
            "link": f"/{pdf_name}.qs",
            "valid": True,  # @TODO - add check for existing wikidata entry
        },
        {
            "src": "/static/icons/32px-SMW-icon.png",
            "title": "SMW markup",
            "link": f"/{pdf_name}.smw",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-wbjson-icon.png",
            "title": "wikibase CLI JSON metadata",
            "link": f"/{pdf_name}.wbjson",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-Wikibase_logo.svg.png",
            "title": "wikibase CLI",
            "link": f"/{pdf_name}.wbcli",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-JSON_vector_logo.svg.png",
            "title": "JSON metadata",
            "link": f"/{pdf_name}.json",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-YAML_Logo.svg.png",
            "title": "YAML metadata",
            "link": f"/{pdf_name}.yaml",
            "valid": True,
        },
    ]
    icon_tag = Volume.create_icon_bar(soup, icon_list=icon_list)
    return icon_tag

`getMergedDict()`

get the merged dict for this paper

Source code in ceurspt/ceurws.py

def getMergedDict(self) -> dict:
    """
    get the merged dict for this paper
    """
    my_dict = dataclasses.asdict(self)
    m_dict = {
        "version.version": Version.version,
        "version.cm_url": Version.cm_url,
        "spt.html_url": f"/{self.id}.html",
    }
    for key, value in my_dict.items():
        m_dict[f"spt.{key}"] = value
    pdf_name = self.pdfUrl.replace("https://ceur-ws.org/", "")
    if pdf_name in self.pm.paper_records_by_path:
        pdf_record = self.pm.paper_records_by_path[pdf_name]
        for key, value in pdf_record.items():
            m_dict[f"cvb.{key}"] = value
    if pdf_name in self.pm.paper_dblp_by_path:
        dblp_record = self.pm.paper_dblp_by_path[pdf_name]
        for key, value in dblp_record.items():
            m_dict[f"dblp.{key}"] = value
    return m_dict

`getPdf()`

get the PDF file for this paper

Source code in ceurspt/ceurws.py

def getPdf(self):
    """
    get the PDF file for this paper
    """
    base_path = self.getBasePath()
    pdf = f"{base_path}.pdf"
    return pdf

`getText()`

get the plain text content of this paper

Source code in ceurspt/ceurws.py

def getText(self) -> str:
    """
    get the plain text content of this paper
    """
    text = self.getContentByPostfix("-content.txt")
    return text

`next(inc=1)`

get the next paper in this volume with the given increment

Parameters:

Name	Type	Description	Default
`inc(int)`		the increment +1 = next, -1 = prev	required

Source code in ceurspt/ceurws.py

def next(self, inc: int = 1) -> "Paper":
    """
    get the next paper in this volume with the given increment

    Args:
        inc(int): the increment +1 = next, -1 = prev
    """
    vol = self.volume
    paper = None
    if vol:
        next_index = self.paper_index + inc
        if next_index >= 0 and next_index < len(vol.papers):
            paper = vol.papers[next_index]
    return paper

`paperLinkParts(inc=0)`

a relative paper link

Source code in ceurspt/ceurws.py

def paperLinkParts(self: int, inc: int = 0):
    """
    a relative paper link
    """
    if inc > 0:
        presymbol = "⫸"
        postsymbol = ""
        paper = self.next()
    elif inc < 0:
        presymbol = ""
        postsymbol = "⫷"
        paper = self.prev()
    else:
        presymbol = ""
        postsymbol = ""
        paper = self
    href = None
    text = None
    if paper:
        href = f"/{paper.id}.html"
        text = f"{presymbol}{paper.id}{postsymbol}"
    return href, text

`paperScrollLinks()`

get the paper scroll links

Source code in ceurspt/ceurws.py

def paperScrollLinks(self) -> str:
    """
    get the paper scroll links
    """
    scroll_links = ""
    for inc in [-1, 0, 1]:
        href, text = self.paperLinkParts(inc)
        if href:
            scroll_links += f"""<a href="{href}">{text}</a>"""
    return scroll_links

`prev()`

get the previous paper in this volume

Source code in ceurspt/ceurws.py

def prev(self) -> "Paper":
    """
    get the previous paper in this volume
    """
    return self.next(-1)

`PaperManager`

Bases: JsonCacheManager

manage all papers

Source code in ceurspt/ceurws.py

class PaperManager(JsonCacheManager):
    """
    manage all papers
    """

    def __init__(self, base_url: str):
        """
        constructor

        Args:
            base_url(str): the url of the RESTFul metadata service
        """
        JsonCacheManager.__init__(self, base_url)
        self.papers_by_id: Dict[str, Paper] = {}
        self.papers_by_path: Dict[str, Paper] = {}
        self.paper_records_by_path: Dict[str, dict] = {}
        self.paper_dblp_by_path: Dict[str, dict] = {}

    def getPaper(self, number: int, pdf_name: str):
        """
        get the paper with the given number and pdf name

        Args:
            number(int): the number of the volume the paper is part of
            pdf_name(str): the pdf name of the paper

        Returns:
            Paper: the paper or None if the paper is not found
        """
        pdf_path = f"Vol-{number}/{pdf_name}.pdf"
        paper = None
        if pdf_path in self.papers_by_path:
            paper = self.papers_by_path[pdf_path]
            paper.pm = self
        return paper

    def get_volume_papers(self, number: int) -> List[Paper]:
        """
        Get all papers of given volume number
        Args:
            number(int): the number of the volume the papers are part of
        Returns:
            list of papers
        """
        volume_papers = [
            paper
            for pdf_path, paper in self.papers_by_path.items()
            if pdf_path.startswith(f"Vol-{number}/")
        ]
        return volume_papers

    def getPapers(self, vm: VolumeManager, verbose: bool = False):
        """
        get all papers

        Args:
            vm: VolumeManager
            verbose(bool): if True show verbose loading information
        """
        profiler = Profiler("Loading papers ...", profile=verbose)
        paper_lod = self.load_lod("papers")
        msg = f"{len(paper_lod)} papers"
        profiler.time(msg)
        profiler = Profiler("Loading dblp paper metadata ...", profile=verbose)
        paper_dblp_lod = self.load_lod("papers_dblp")
        msg = f"{len(paper_dblp_lod)} dblp indexed papers"
        profiler.time(msg)
        profiler = Profiler("Linking papers and volumes...", profile=verbose)
        self.papers_by_id = {}
        self.paper_records_by_path = {}
        self.papers_by_path = {}
        for _index, paper_record in enumerate(paper_lod):
            pdf_name = paper_record["pdf_name"]
            volume_number = paper_record["vol_number"]
            volume = vm.getVolume(volume_number)
            # pdf_url=f"https://ceur-ws.org/Vol-{volume_number}/{pdf_name}"
            pdf_path = f"Vol-{volume_number}/{pdf_name}"
            pdf_url = f"https://ceur-ws.org/{pdf_path}"
            try:
                paper = Paper(
                    id=paper_record["id"],
                    title=paper_record["title"],
                    # authors=paper_record["authors"],
                    pdfUrl=pdf_url,
                    volume=volume,
                )
                paper.pm = self
                if volume:
                    volume.addPaper(paper)
                self.papers_by_id[paper_record["id"]] = paper
                self.papers_by_path[pdf_path] = paper
                self.paper_records_by_path[pdf_path] = paper_record
            except Exception as ex:
                print(
                    f"handling of Paper for pdfUrl '{pdf_url}' failed with {str(ex)}",
                    flush=True,
                )
        self.paper_dblp_by_path = {}
        for _index, dblp_record in enumerate(paper_dblp_lod):
            pdf_id = dblp_record["pdf_id"]
            self.paper_dblp_by_path[f"{pdf_id}.pdf"] = dblp_record
        msg = f"{len(self.papers_by_path)} papers linked to volumes"
        profiler.time(msg)

`init(base_url)`

constructor

Parameters:

Name	Type	Description	Default
`base_url(str)`		the url of the RESTFul metadata service	required

Source code in ceurspt/ceurws.py

def __init__(self, base_url: str):
    """
    constructor

    Args:
        base_url(str): the url of the RESTFul metadata service
    """
    JsonCacheManager.__init__(self, base_url)
    self.papers_by_id: Dict[str, Paper] = {}
    self.papers_by_path: Dict[str, Paper] = {}
    self.paper_records_by_path: Dict[str, dict] = {}
    self.paper_dblp_by_path: Dict[str, dict] = {}

`getPaper(number, pdf_name)`

get the paper with the given number and pdf name

Parameters:

Name	Type	Description	Default
`number(int)`		the number of the volume the paper is part of	required
`pdf_name(str)`		the pdf name of the paper	required

Returns:

Name	Type	Description
`Paper`		the paper or None if the paper is not found

Source code in ceurspt/ceurws.py

def getPaper(self, number: int, pdf_name: str):
    """
    get the paper with the given number and pdf name

    Args:
        number(int): the number of the volume the paper is part of
        pdf_name(str): the pdf name of the paper

    Returns:
        Paper: the paper or None if the paper is not found
    """
    pdf_path = f"Vol-{number}/{pdf_name}.pdf"
    paper = None
    if pdf_path in self.papers_by_path:
        paper = self.papers_by_path[pdf_path]
        paper.pm = self
    return paper

`getPapers(vm, verbose=False)`

get all papers

Parameters:

Name	Type	Description	Default
`vm`	`VolumeManager`	VolumeManager	required
`verbose(bool)`		if True show verbose loading information	required

Source code in ceurspt/ceurws.py

def getPapers(self, vm: VolumeManager, verbose: bool = False):
    """
    get all papers

    Args:
        vm: VolumeManager
        verbose(bool): if True show verbose loading information
    """
    profiler = Profiler("Loading papers ...", profile=verbose)
    paper_lod = self.load_lod("papers")
    msg = f"{len(paper_lod)} papers"
    profiler.time(msg)
    profiler = Profiler("Loading dblp paper metadata ...", profile=verbose)
    paper_dblp_lod = self.load_lod("papers_dblp")
    msg = f"{len(paper_dblp_lod)} dblp indexed papers"
    profiler.time(msg)
    profiler = Profiler("Linking papers and volumes...", profile=verbose)
    self.papers_by_id = {}
    self.paper_records_by_path = {}
    self.papers_by_path = {}
    for _index, paper_record in enumerate(paper_lod):
        pdf_name = paper_record["pdf_name"]
        volume_number = paper_record["vol_number"]
        volume = vm.getVolume(volume_number)
        # pdf_url=f"https://ceur-ws.org/Vol-{volume_number}/{pdf_name}"
        pdf_path = f"Vol-{volume_number}/{pdf_name}"
        pdf_url = f"https://ceur-ws.org/{pdf_path}"
        try:
            paper = Paper(
                id=paper_record["id"],
                title=paper_record["title"],
                # authors=paper_record["authors"],
                pdfUrl=pdf_url,
                volume=volume,
            )
            paper.pm = self
            if volume:
                volume.addPaper(paper)
            self.papers_by_id[paper_record["id"]] = paper
            self.papers_by_path[pdf_path] = paper
            self.paper_records_by_path[pdf_path] = paper_record
        except Exception as ex:
            print(
                f"handling of Paper for pdfUrl '{pdf_url}' failed with {str(ex)}",
                flush=True,
            )
    self.paper_dblp_by_path = {}
    for _index, dblp_record in enumerate(paper_dblp_lod):
        pdf_id = dblp_record["pdf_id"]
        self.paper_dblp_by_path[f"{pdf_id}.pdf"] = dblp_record
    msg = f"{len(self.papers_by_path)} papers linked to volumes"
    profiler.time(msg)

`get_volume_papers(number)`

Get all papers of given volume number Args: number(int): the number of the volume the papers are part of Returns: list of papers

Source code in ceurspt/ceurws.py

def get_volume_papers(self, number: int) -> List[Paper]:
    """
    Get all papers of given volume number
    Args:
        number(int): the number of the volume the papers are part of
    Returns:
        list of papers
    """
    volume_papers = [
        paper
        for pdf_path, paper in self.papers_by_path.items()
        if pdf_path.startswith(f"Vol-{number}/")
    ]
    return volume_papers

`Scholar` `dataclass`

Bases: DblpScholar

a scholar

Source code in ceurspt/ceurws.py

class Scholar(ceurspt.models.dblp.DblpScholar):
    """
    a scholar
    """

`Volume`

Bases: Volume

a CEUR-WS Volume with it's behavior

Source code in ceurspt/ceurws.py

class Volume(ceurspt.ceurws_base.Volume):
    """
    a CEUR-WS Volume with it's behavior
    """

    def __init__(self, **kwargs):
        ceurspt.ceurws_base.Volume.__init__(self, **kwargs)
        self.papers = []

    def getMergedDict(self) -> dict:
        """
        get my merged dict

        Returns:
            dict
        """
        my_dict = dataclasses.asdict(self)
        m_dict = {
            "version.version": Version.version,
            "version.cm_url": Version.cm_url,
            "spt.html_url": f"/Vol-{self.number}.html",
        }
        for key, value in my_dict.items():
            m_dict[f"spt.{key}"] = value
        volrecord = self.vm.getVolumeRecord(self.number)
        for key, value in volrecord.items():
            if "." in key:
                m_dict[f"{key}"] = value
            else:
                m_dict[f"cvb.{key}"] = value
        return m_dict

    @classmethod
    def volLinkParts(cls, number: int, inc: int = 0):
        """
        a relative volume link
        """
        if inc > 0:
            presymbol = "⫸"
            postsymbol = ""
        elif inc < 0:
            presymbol = ""
            postsymbol = "⫷"
        else:
            presymbol = ""
            postsymbol = ""
        href = f"/Vol-{number+inc}.html"
        text = f"{presymbol}Vol-{number+inc}{postsymbol}"
        return href, text

    @classmethod
    def volLink(cls, number: int, inc: int = 0) -> str:
        """
        get a relative volume link

        Args:
            number(int): the volume number
            inc(int): the relative increment

        Returns(str):
            a relative volume link
        """
        href, text = cls.volLinkParts(number, inc)
        if number > 0:
            link = f"""<a href="{href}">{text}</a>"""
        else:
            link = ""
        return link

    @classmethod
    def volLink_soup_tag(cls, soup, number: int, inc: int = 0) -> str:
        """
        get a relative volume link as a soup tag

        Args:
            soup(BeautifulSoup): the soup
            number(int): the volume number
            inc(int): the relative increment

        Returns(str):
            a relative volume link
        """
        href, text = cls.volLinkParts(number, inc)
        link = soup.new_tag("a", href=href)
        link.string = text
        return link

    @classmethod
    def create_icon_list(
        cls, soup: BeautifulSoup, icon_list: typing.List[typing.Dict[str, str]]
    ) -> typing.List["Tag"]:
        """
        create a list of icons

        Args:
            soup: The BeautifulSoup object to use for creating new tags.
            icon_list: The list of icons to add to the <div> tag. Each icon is represented as a
                dictionary with the following keys:
                    - src (str): The URL of the icon image file.
                    - title (str): The title text to use as a tooltip for the icon.
                    - link (str): The URL to link to when the icon is clicked.

        Returns:
            a list of link_tags
        """
        link_tags = []
        # iterate over the icon list and create a new tag for each icon
        for icon_data in icon_list:
            # create a new a tag for the link
            link_tag = soup.new_tag("a")
            link_tag["href"] = icon_data["link"]
            # open link in new tab
            link_tag["target"] = "_blank"
            if not icon_data["valid"]:
                link_tag["style"] = "filter: grayscale(1);"

            # create a new img tag for the icon
            icon_tag = soup.new_tag("img")

            # add the icon attributes to the img tag
            icon_tag["src"] = icon_data["src"]
            icon_tag["title"] = icon_data["title"]

            # append the icon tag to the link tag
            link_tag.append(icon_tag)
            link_tags.append(link_tag)
        return link_tags

    @classmethod
    def create_icon_bar(
        cls,
        soup: BeautifulSoup,
        icon_list: typing.List[typing.Dict[str, str]],
        class_name: str = "icon_list",
    ) -> "Tag":
        """
        Creates a new <div> tag with the specified class name and list of icons.

        Args:
            soup: The BeautifulSoup object to use for creating new tags.
            icon_list: The list of icons to add to the <div> tag. Each icon is represented as a
                dictionary with the following keys:
                    - src (str): The URL of the icon image file.
                    - title (str): The title text to use as a tooltip for the icon.
                    - link (str): The URL to link to when the icon is clicked.
            class_name: The name of the CSS class to apply to the <div> tag.

        Returns:
            Tag: The new <div> tag with the specified class name and list of icons.
        """

        # create a new div tag
        div_tag = soup.new_tag("div")

        div_tag.append(soup.new_tag("hr"))

        # add the specified class name to the div tag
        div_tag["class"] = [class_name]

        for link_tag in cls.create_icon_list(soup, icon_list):
            # append the link tag to the div tag
            div_tag.append(link_tag)

        # return the div tag
        return div_tag

    def getIconBar(self, soup):
        """
        get my icon bar

        Parameters:
            soup: The BeautifulSoup object to use for creating new tags.
        """
        volume_record = self.vm.getVolumeRecord(self.number)
        for wd_key, attr in [
            ("wd.event", "wd_event"),
            ("wd.eventSeries", "wd_event_series"),
        ]:
            value = None
            if wd_key in volume_record:
                value = volume_record[wd_key]
                if value:
                    value = value.replace("http://www.wikidata.org/entity/", "")
            setattr(self, attr, value)
        # create a list of icons to add to the div
        icon_list = [
            {
                "src": "/static/icons/32px-dblp-icon.png",
                "title": "dblp",
                "link": f"https://dblp.org/rec/{self.dblp}",
                "valid": self.dblp,
            },
            {
                "src": "/static/icons/32px-DNB.svg.png",
                "title": "k10plus/DNB",
                "link": f"https://opac.k10plus.de/DB=2.299/PPNSET?PPN={self.k10plus}",
                "valid": self.k10plus,
            },
            {
                "src": "/static/icons/32px-Scholia_logo.svg.png",
                "title": "Proceedings@scholia",
                "link": f"https://scholia.toolforge.org/venue/{self.wikidataid}",
                "valid": self.wikidataid,
            },
            {
                "src": "/static/icons/32px-EventIcon.png",
                "title": "Event@scholia",
                "link": f"https://scholia.toolforge.org/event/{self.wd_event}",
                "valid": self.wd_event,
            },
            {
                "src": "/static/icons/32px-EventSeriesIcon.png",
                "title": "EventSeries@scholia",
                "link": f"https://scholia.toolforge.org/event-series/{self.wd_event_series}",
                "valid": self.wd_event_series,
            },
            {
                "src": "/static/icons/32px-Wikidata_Query_Service_Favicon_wbg.svg.png",
                "title": "Proceedings@wikidata",
                "link": f"https://www.wikidata.org/wiki/{self.wikidataid}",
                "valid": self.wikidataid,
            },
            {
                "src": "/static/icons/32px-EventIcon.png",
                "title": "Event@wikidata",
                "link": f"https://www.wikidata.org/wiki/{self.wd_event}",
                "valid": self.wd_event,
            },
            {
                "src": "/static/icons/32px-EventSeriesIcon.png",
                "title": "EventSeries@wikidata",
                "link": f"https://www.wikidata.org/wiki/{self.wd_event_series}",
                "valid": self.wd_event_series,
            },
            {
                "src": "/static/icons/32px-SMW-icon.png",
                "title": "SMW markup",
                "link": f"/Vol-{self.number}.smw",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-JSON_vector_logo.svg.png",
                "title": "JSON metadata",
                "link": f"/Vol-{self.number}.json",
                "valid": True,
            },
            {
                "src": "/static/icons/32px-YAML_Logo.svg.png",
                "title": "YML metadata",
                "link": f"/Vol-{self.number}.yaml",
                "valid": True,
            },
        ]
        icon_tag = Volume.create_icon_bar(soup, icon_list=icon_list)
        return icon_tag

    def addPaper(self, paper: "Paper"):
        """
        add the given paper
        """
        # @TODO fixme to use LinkML generated code
        self.papers.append(paper)
        paper.paper_index = len(self.papers) - 1

    def fix_element_tag(self, element, tag: str = "href", ext: str = ".pdf"):
        """
        fix the given element tag

        Args:
            tag(str): the tag to fix
            ext(str): the extension
        """
        org_tag_value = element[tag]
        value = org_tag_value.replace("http://ceur-ws.org/", "/")
        for file in ["ceur-ws.css", "CEUR-WS-logo.png"]:
            value = value.replace(f"../{file}", f"/static/{file}")
        if ".pdf" in value:
            value = value.replace(".pdf", ext)
            value = f"/Vol-{self.number}/{value}"
            pass
        element[tag] = value

    def add_volume_navigation(self, soup: BeautifulSoup):
        """
        Add navigation bar to volume number to jump to the volume below and above
        Args:
            soup: index page
        """
        vol_tag = soup.find("span", class_="CEURVOLNR")
        if vol_tag:
            prev_link = Volume.volLink_soup_tag(soup, self.number, -1)
            if prev_link:
                vol_tag.insert_before(prev_link)
            next_link = Volume.volLink_soup_tag(soup, self.number, +1)
            if next_link:
                vol_tag.insert_after(next_link)

    def get_empty_volume_page(self, content_html: str = None):
        """
        Get empty volume page
        """
        html_page = f"""
            <!DOCTYPE html>
            <!-- CEURVERSION=2020-07-09 -->
            <html lang="en">
            <head>
            <meta http-equiv="Content-type" content="text/html;charset=utf-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
            </head>
            <!--CEURLANG=eng -->
            <body>

            <table style="border: 0; border-spacing: 0; border-collapse: collapse; width: 95%">
            <tbody><tr>
            <td style="text-align: left; vertical-align: middle">
            <a href="http://ceur-ws.org/"><div id="CEURWSLOGO"></div></a>
            </td>
            <td style="text-align: right; vertical-align: middle">
            <div style="float:left" id="CEURCCBY"></div>
            <span class="CEURVOLNR">Vol-{self.number}</span> <br>
            <span class="CEURURN">urn:nbn:de:0074-3365-4</span>
            </td>
            </tr>
            </tbody></table>
            {content_html}
            </body></html>
        """
        soup = BeautifulSoup(html_page, "html.parser")
        self.add_volume_navigation(soup)
        content = soup.prettify(formatter="html")
        return content

    def getHtml(self, ext: str = ".pdf", fixLinks: bool = True) -> str:
        """
        get my HTML content

        Args:
            ext(str): the extension to use for pdf page details
            fixLinks(bool): if True fix the links
        """
        index_path = f"{self.vol_dir}/index.html"
        try:
            with open(index_path, "r") as index_html:
                content = index_html.read()
                if fixLinks:
                    soup = BeautifulSoup(content, "html.parser")
                    for element in soup.findAll(["link", "a"]):
                        self.fix_element_tag(element, tag="href", ext=ext)
                    for element in soup.findAll(["image"]):
                        self.fix_element_tag(element, tag="src", ext=ext)
                    self.add_volume_navigation(soup)
                    first_hr = soup.find("hr")
                    if first_hr:
                        icon_bar = self.getIconBar(soup)
                        first_hr.insert_before(icon_bar)
                    content = soup.prettify(formatter="html")
            return content
        except Exception as ex:
            err_html = f"""<span style="color:red">reading {index_path} for Volume {self.number} failed: {str(ex)}</span>"""
            content = self.get_empty_volume_page(err_html)
            return content

    def as_smw_markup(self) -> str:
        """
        return my semantic mediawiki markup

        Returns:
            str: the smw markup for this volume
        """
        markup = f"""=Volume=
{{{{Volume
|number={self.number}
|storemode=property
|wikidataid={self.wikidataid}
|title={self.title}
|acronym={self.acronym}
|url={self.url}
|date={self.date}
"""
        for attr in ["dblp", "k10plus"]:
            value = getattr(self, attr)
            if value:
                markup += f"|{attr}={value}\n"
        markup += f"""|urn=urn:nbn:de:0074-1155-8
}}}}"""
        return markup

`addPaper(paper)`

add the given paper

Source code in ceurspt/ceurws.py

def addPaper(self, paper: "Paper"):
    """
    add the given paper
    """
    # @TODO fixme to use LinkML generated code
    self.papers.append(paper)
    paper.paper_index = len(self.papers) - 1

`add_volume_navigation(soup)`

Add navigation bar to volume number to jump to the volume below and above Args: soup: index page

Source code in ceurspt/ceurws.py

def add_volume_navigation(self, soup: BeautifulSoup):
    """
    Add navigation bar to volume number to jump to the volume below and above
    Args:
        soup: index page
    """
    vol_tag = soup.find("span", class_="CEURVOLNR")
    if vol_tag:
        prev_link = Volume.volLink_soup_tag(soup, self.number, -1)
        if prev_link:
            vol_tag.insert_before(prev_link)
        next_link = Volume.volLink_soup_tag(soup, self.number, +1)
        if next_link:
            vol_tag.insert_after(next_link)

`as_smw_markup()`

return my semantic mediawiki markup

Returns:

Name	Type	Description
`str`	`str`	the smw markup for this volume

Source code in ceurspt/ceurws.py

    def as_smw_markup(self) -> str:
        """
        return my semantic mediawiki markup

        Returns:
            str: the smw markup for this volume
        """
        markup = f"""=Volume=
{{{{Volume
|number={self.number}
|storemode=property
|wikidataid={self.wikidataid}
|title={self.title}
|acronym={self.acronym}
|url={self.url}
|date={self.date}
"""
        for attr in ["dblp", "k10plus"]:
            value = getattr(self, attr)
            if value:
                markup += f"|{attr}={value}\n"
        markup += f"""|urn=urn:nbn:de:0074-1155-8
}}}}"""
        return markup

`create_icon_bar(soup, icon_list, class_name='icon_list')` `classmethod`

Creates a new

tag with the specified class name and list of icons.

Parameters:

Name	Type	Description	Default
`soup`	`BeautifulSoup`	The BeautifulSoup object to use for creating new tags.	required
`icon_list`	`List[Dict[str, str]]`	The list of icons to add to the tag. Each icon is represented as a dictionary with the following keys: - src (str): The URL of the icon image file. - title (str): The title text to use as a tooltip for the icon. - link (str): The URL to link to when the icon is clicked.	required
`class_name`	`str`	The name of the CSS class to apply to the tag.	`'icon_list'`

Returns:

Name	Type	Description
`Tag`	`Tag`	The new tag with the specified class name and list of icons.

Source code in ceurspt/ceurws.py

@classmethod
def create_icon_bar(
    cls,
    soup: BeautifulSoup,
    icon_list: typing.List[typing.Dict[str, str]],
    class_name: str = "icon_list",
) -> "Tag":
    """
    Creates a new <div> tag with the specified class name and list of icons.

    Args:
        soup: The BeautifulSoup object to use for creating new tags.
        icon_list: The list of icons to add to the <div> tag. Each icon is represented as a
            dictionary with the following keys:
                - src (str): The URL of the icon image file.
                - title (str): The title text to use as a tooltip for the icon.
                - link (str): The URL to link to when the icon is clicked.
        class_name: The name of the CSS class to apply to the <div> tag.

    Returns:
        Tag: The new <div> tag with the specified class name and list of icons.
    """

    # create a new div tag
    div_tag = soup.new_tag("div")

    div_tag.append(soup.new_tag("hr"))

    # add the specified class name to the div tag
    div_tag["class"] = [class_name]

    for link_tag in cls.create_icon_list(soup, icon_list):
        # append the link tag to the div tag
        div_tag.append(link_tag)

    # return the div tag
    return div_tag

`create_icon_list(soup, icon_list)` `classmethod`

create a list of icons

Parameters:

Name	Type	Description	Default
`soup`	`BeautifulSoup`	The BeautifulSoup object to use for creating new tags.	required
`icon_list`	`List[Dict[str, str]]`	The list of icons to add to the tag. Each icon is represented as a dictionary with the following keys: - src (str): The URL of the icon image file. - title (str): The title text to use as a tooltip for the icon. - link (str): The URL to link to when the icon is clicked.	required

Returns:

Type	Description
`List[Tag]`	a list of link_tags

Source code in ceurspt/ceurws.py

@classmethod
def create_icon_list(
    cls, soup: BeautifulSoup, icon_list: typing.List[typing.Dict[str, str]]
) -> typing.List["Tag"]:
    """
    create a list of icons

    Args:
        soup: The BeautifulSoup object to use for creating new tags.
        icon_list: The list of icons to add to the <div> tag. Each icon is represented as a
            dictionary with the following keys:
                - src (str): The URL of the icon image file.
                - title (str): The title text to use as a tooltip for the icon.
                - link (str): The URL to link to when the icon is clicked.

    Returns:
        a list of link_tags
    """
    link_tags = []
    # iterate over the icon list and create a new tag for each icon
    for icon_data in icon_list:
        # create a new a tag for the link
        link_tag = soup.new_tag("a")
        link_tag["href"] = icon_data["link"]
        # open link in new tab
        link_tag["target"] = "_blank"
        if not icon_data["valid"]:
            link_tag["style"] = "filter: grayscale(1);"

        # create a new img tag for the icon
        icon_tag = soup.new_tag("img")

        # add the icon attributes to the img tag
        icon_tag["src"] = icon_data["src"]
        icon_tag["title"] = icon_data["title"]

        # append the icon tag to the link tag
        link_tag.append(icon_tag)
        link_tags.append(link_tag)
    return link_tags

`fix_element_tag(element, tag='href', ext='.pdf')`

fix the given element tag

Parameters:

Name	Type	Description	Default
`tag(str)`		the tag to fix	required
`ext(str)`		the extension	required

Source code in ceurspt/ceurws.py

def fix_element_tag(self, element, tag: str = "href", ext: str = ".pdf"):
    """
    fix the given element tag

    Args:
        tag(str): the tag to fix
        ext(str): the extension
    """
    org_tag_value = element[tag]
    value = org_tag_value.replace("http://ceur-ws.org/", "/")
    for file in ["ceur-ws.css", "CEUR-WS-logo.png"]:
        value = value.replace(f"../{file}", f"/static/{file}")
    if ".pdf" in value:
        value = value.replace(".pdf", ext)
        value = f"/Vol-{self.number}/{value}"
        pass
    element[tag] = value

`getHtml(ext='.pdf', fixLinks=True)`

get my HTML content

Parameters:

Name	Type	Description	Default
`ext(str)`		the extension to use for pdf page details	required
`fixLinks(bool)`		if True fix the links	required

Source code in ceurspt/ceurws.py

def getHtml(self, ext: str = ".pdf", fixLinks: bool = True) -> str:
    """
    get my HTML content

    Args:
        ext(str): the extension to use for pdf page details
        fixLinks(bool): if True fix the links
    """
    index_path = f"{self.vol_dir}/index.html"
    try:
        with open(index_path, "r") as index_html:
            content = index_html.read()
            if fixLinks:
                soup = BeautifulSoup(content, "html.parser")
                for element in soup.findAll(["link", "a"]):
                    self.fix_element_tag(element, tag="href", ext=ext)
                for element in soup.findAll(["image"]):
                    self.fix_element_tag(element, tag="src", ext=ext)
                self.add_volume_navigation(soup)
                first_hr = soup.find("hr")
                if first_hr:
                    icon_bar = self.getIconBar(soup)
                    first_hr.insert_before(icon_bar)
                content = soup.prettify(formatter="html")
        return content
    except Exception as ex:
        err_html = f"""<span style="color:red">reading {index_path} for Volume {self.number} failed: {str(ex)}</span>"""
        content = self.get_empty_volume_page(err_html)
        return content

`getIconBar(soup)`

get my icon bar

Parameters:

Name	Type	Description	Default
`soup`		The BeautifulSoup object to use for creating new tags.	required

Source code in ceurspt/ceurws.py

def getIconBar(self, soup):
    """
    get my icon bar

    Parameters:
        soup: The BeautifulSoup object to use for creating new tags.
    """
    volume_record = self.vm.getVolumeRecord(self.number)
    for wd_key, attr in [
        ("wd.event", "wd_event"),
        ("wd.eventSeries", "wd_event_series"),
    ]:
        value = None
        if wd_key in volume_record:
            value = volume_record[wd_key]
            if value:
                value = value.replace("http://www.wikidata.org/entity/", "")
        setattr(self, attr, value)
    # create a list of icons to add to the div
    icon_list = [
        {
            "src": "/static/icons/32px-dblp-icon.png",
            "title": "dblp",
            "link": f"https://dblp.org/rec/{self.dblp}",
            "valid": self.dblp,
        },
        {
            "src": "/static/icons/32px-DNB.svg.png",
            "title": "k10plus/DNB",
            "link": f"https://opac.k10plus.de/DB=2.299/PPNSET?PPN={self.k10plus}",
            "valid": self.k10plus,
        },
        {
            "src": "/static/icons/32px-Scholia_logo.svg.png",
            "title": "Proceedings@scholia",
            "link": f"https://scholia.toolforge.org/venue/{self.wikidataid}",
            "valid": self.wikidataid,
        },
        {
            "src": "/static/icons/32px-EventIcon.png",
            "title": "Event@scholia",
            "link": f"https://scholia.toolforge.org/event/{self.wd_event}",
            "valid": self.wd_event,
        },
        {
            "src": "/static/icons/32px-EventSeriesIcon.png",
            "title": "EventSeries@scholia",
            "link": f"https://scholia.toolforge.org/event-series/{self.wd_event_series}",
            "valid": self.wd_event_series,
        },
        {
            "src": "/static/icons/32px-Wikidata_Query_Service_Favicon_wbg.svg.png",
            "title": "Proceedings@wikidata",
            "link": f"https://www.wikidata.org/wiki/{self.wikidataid}",
            "valid": self.wikidataid,
        },
        {
            "src": "/static/icons/32px-EventIcon.png",
            "title": "Event@wikidata",
            "link": f"https://www.wikidata.org/wiki/{self.wd_event}",
            "valid": self.wd_event,
        },
        {
            "src": "/static/icons/32px-EventSeriesIcon.png",
            "title": "EventSeries@wikidata",
            "link": f"https://www.wikidata.org/wiki/{self.wd_event_series}",
            "valid": self.wd_event_series,
        },
        {
            "src": "/static/icons/32px-SMW-icon.png",
            "title": "SMW markup",
            "link": f"/Vol-{self.number}.smw",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-JSON_vector_logo.svg.png",
            "title": "JSON metadata",
            "link": f"/Vol-{self.number}.json",
            "valid": True,
        },
        {
            "src": "/static/icons/32px-YAML_Logo.svg.png",
            "title": "YML metadata",
            "link": f"/Vol-{self.number}.yaml",
            "valid": True,
        },
    ]
    icon_tag = Volume.create_icon_bar(soup, icon_list=icon_list)
    return icon_tag

`getMergedDict()`

get my merged dict

Returns:

Type	Description
`dict`	dict

Source code in ceurspt/ceurws.py

def getMergedDict(self) -> dict:
    """
    get my merged dict

    Returns:
        dict
    """
    my_dict = dataclasses.asdict(self)
    m_dict = {
        "version.version": Version.version,
        "version.cm_url": Version.cm_url,
        "spt.html_url": f"/Vol-{self.number}.html",
    }
    for key, value in my_dict.items():
        m_dict[f"spt.{key}"] = value
    volrecord = self.vm.getVolumeRecord(self.number)
    for key, value in volrecord.items():
        if "." in key:
            m_dict[f"{key}"] = value
        else:
            m_dict[f"cvb.{key}"] = value
    return m_dict

`get_empty_volume_page(content_html=None)`

Get empty volume page

Source code in ceurspt/ceurws.py

def get_empty_volume_page(self, content_html: str = None):
    """
    Get empty volume page
    """
    html_page = f"""
        <!DOCTYPE html>
        <!-- CEURVERSION=2020-07-09 -->
        <html lang="en">
        <head>
        <meta http-equiv="Content-type" content="text/html;charset=utf-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
        </head>
        <!--CEURLANG=eng -->
        <body>

        <table style="border: 0; border-spacing: 0; border-collapse: collapse; width: 95%">
        <tbody><tr>
        <td style="text-align: left; vertical-align: middle">
        <a href="http://ceur-ws.org/"><div id="CEURWSLOGO"></div></a>
        </td>
        <td style="text-align: right; vertical-align: middle">
        <div style="float:left" id="CEURCCBY"></div>
        <span class="CEURVOLNR">Vol-{self.number}</span> <br>
        <span class="CEURURN">urn:nbn:de:0074-3365-4</span>
        </td>
        </tr>
        </tbody></table>
        {content_html}
        </body></html>
    """
    soup = BeautifulSoup(html_page, "html.parser")
    self.add_volume_navigation(soup)
    content = soup.prettify(formatter="html")
    return content

`volLink(number, inc=0)` `classmethod`

get a relative volume link

Parameters:

Name	Type	Description	Default
`number(int)`		the volume number	required
`inc(int)`		the relative increment	required

Returns(str): a relative volume link

Source code in ceurspt/ceurws.py

@classmethod
def volLink(cls, number: int, inc: int = 0) -> str:
    """
    get a relative volume link

    Args:
        number(int): the volume number
        inc(int): the relative increment

    Returns(str):
        a relative volume link
    """
    href, text = cls.volLinkParts(number, inc)
    if number > 0:
        link = f"""<a href="{href}">{text}</a>"""
    else:
        link = ""
    return link

`volLinkParts(number, inc=0)` `classmethod`

a relative volume link

Source code in ceurspt/ceurws.py

@classmethod
def volLinkParts(cls, number: int, inc: int = 0):
    """
    a relative volume link
    """
    if inc > 0:
        presymbol = "⫸"
        postsymbol = ""
    elif inc < 0:
        presymbol = ""
        postsymbol = "⫷"
    else:
        presymbol = ""
        postsymbol = ""
    href = f"/Vol-{number+inc}.html"
    text = f"{presymbol}Vol-{number+inc}{postsymbol}"
    return href, text

`volLink_soup_tag(soup, number, inc=0)` `classmethod`

get a relative volume link as a soup tag

Parameters:

Name	Description	Default
`soup(BeautifulSoup)`	the soup	required
`number(int)`	the volume number	required
`inc(int)`	the relative increment	required

Returns(str): a relative volume link

Source code in ceurspt/ceurws.py

@classmethod
def volLink_soup_tag(cls, soup, number: int, inc: int = 0) -> str:
    """
    get a relative volume link as a soup tag

    Args:
        soup(BeautifulSoup): the soup
        number(int): the volume number
        inc(int): the relative increment

    Returns(str):
        a relative volume link
    """
    href, text = cls.volLinkParts(number, inc)
    link = soup.new_tag("a", href=href)
    link.string = text
    return link

`VolumeManager`

Bases: JsonCacheManager

manage all volumes

Source code in ceurspt/ceurws.py

class VolumeManager(JsonCacheManager):
    """
    manage all volumes
    """

    def __init__(self, base_path: str, base_url: str):
        """
        initialize me with the given base_path

        Args:
            base_path(str): the path to my files
            base_url(str): the url of the RESTFul metadata service
        """
        JsonCacheManager.__init__(self, base_url=base_url)
        self.base_path = base_path
        self.volumes_by_number: Dict[int, Volume] = {}
        self.volume_records_by_number: Dict[int, dict] = {}

    def head_table_html(self) -> str:
        """ """
        html = """<table width="97%" cellspacing="5" cellpadding="0" border="0">
<tbody><tr>
<td valign="middle" align="left">
<div id="CEURWSLOGO"></div>
<!--<img alt="[25years CEUR-WS]" style="padding:4px; float:left;"  width="550" src="./CEUR-WS-logo-originals/2020/CEUR-WS-25anniversary.png"> -->
</td>
<td valign="middle" align="justify">
<font size="-2" face="ARIAL,HELVETICA,VERDANA" color="#363636">

<img alt="[OpenAccess]" style="padding:6px; float:left;" src="/static/OpenAccesslogo_200x313.png" width="18">
CEUR Workshop Proceedings (CEUR-WS.org) is a
<a href="https://ceur-ws.org/CEURWS-VALUES.html">free</a>
<a href="http://www.sherpa.ac.uk/romeo/issn/1613-0073/">open-access</a>
publication service
at <a href="http://sunsite.informatik.rwth-aachen.de">Sun SITE Central Europe</a>
operated under the umbrella of
 <a href="http://www-i5.informatik.rwth-aachen.de">RWTH Aachen University</a>.
CEUR-WS.org is a recognized ISSN publication series,
<a href="https://ceur-ws.org/issn-1613-0073.html">ISSN 1613-0073</a> (<a href="https://portal.issn.org/resource/ISSN/1613-0073?format=json">json</a>).
CEUR-WS.org is hosted at http://SunSITE.Informatik.RWTH-Aachen.DE/Publications/CEUR-WS/.
This service is provided by
the <b><a href="https://ceur-ws.org/CEURWS-TEAM.html">CEUR-WS.org Team</a></b>.
See end of the page for contact details and <a href="https://ceur-ws.org/#IMPRESSUM">Impressum</a>.
</font>
</td>
</tr>
</tbody></table>"""
        return html

    def index_html(
        self, upper: Optional[int] = None, lower: Optional[int] = None
    ) -> str:
        """
        return an index going from the given upper volume number down to the given lower volume number

        Args:
            upper(int): upper volume number to start with
            lower(int): lower volume number to end with

        Returns:
            html code for index
        """
        html = f"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
   "https://www.w3.org/TR/html4/loose.dtd">
<html>
  <head> 
    <meta http-equiv="Content-Type" content="Type=text/html;charset=utf-8">
    <meta name="description" content="CEUR-WS.org provides free online scientific papers">
    <meta name="keywords" content="open access, open archive, free scientific paper, workshop proceedings, online publishing, computer science, information systems" >

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <!-- automatically refresh daily-->
    <meta http-equiv="expires" content="86400">
    <link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
    <link rel="icon" type="image/x-icon" href="/static/favicon.ico">
    <title>CEUR-WS.org - CEUR Workshop Proceedings (free, open-access publishing, computer science/information systems)</title>
    <link rel="shortcut icon" href="/static/ceur-ws.ico">
  </head>
  <body>
     {self.head_table_html()}
     <div>
"""
        # prepare the indexing
        # get the volumes as a list from 1 - top e.g. 3365
        volumes = list(self.volumes_by_number.values())
        # reverse the list
        volumes.reverse()
        # make sure upper and lower values are valid
        if upper is None:
            upper = volumes[0].number
        if lower is None:
            lower = 1
        # loop over the reversed list
        for vol_index in range(len(volumes)):
            vol = volumes[vol_index]
            vol_number = vol.number
            if vol_number > upper:
                continue
            if vol_number < lower:
                break
            if isinstance(vol.title, str):
                vol_title = escape(vol.title)
            else:
                vol_title = "Title missing (Might be one of the empty volumes)"
            if vol_title is None:
                pass
            html += f"""       <div style='bgcolor:#DCDBD7'>
         <b><a name='Vol-{vol_number}'>Vol-{vol_number}</a></b>
         <a href='/Vol-{vol_number}.html'>{vol_title}</a>
       </div>
"""
        html += """    </div>
  </body>
</html>"""
        return html

    def getVolume(self, number: int):
        """
        get my volume by volume number

        Args:
            number(int): the volume to get
        """
        if number in self.volumes_by_number:
            return self.volumes_by_number[number]
        else:
            return None

    def getVolumeRecord(self, number: int):
        if number in self.volume_records_by_number:
            return self.volume_records_by_number[number]
        else:
            return None

    def getVolumes(self, verbose: bool = False):
        """
        get my volumes

        Args:
            verbose(bool): if True show verbose loading information
        """
        profiler = Profiler("Loading volumes", profile=verbose)
        volume_lod = self.load_lod("volumes")
        proceedings_lod = self.load_lod("proceedings")
        self.volumes_by_number = {}
        self.volume_records_by_number = {}
        for volume_record in volume_lod:
            vol_number = volume_record["number"]
            self.volume_records_by_number[vol_number] = volume_record
            title = volume_record["title"]
            pub_date_str = volume_record["pubDate"]
            if pub_date_str:
                pub_date = datetime.fromisoformat(pub_date_str).date()
            else:
                pub_date = None
            acronym = volume_record["acronym"]
            volume = Volume(
                number=vol_number, title=title, date=pub_date, acronym=acronym
            )
            volume.vm = self
            volume.number = int(volume.number)
            vol_dir = f"{self.base_path}/Vol-{vol_number}"
            if os.path.isdir(vol_dir):
                volume.vol_dir = vol_dir
            else:
                volume.vol_dir = None
            self.volumes_by_number[vol_number] = volume
        for proc_record in proceedings_lod:
            number = proc_record["sVolume"]
            if not number:
                print(f"Warning: {proc_record} has no volume number")
            else:
                volume_record = self.volume_records_by_number[number]
                volume = self.volumes_by_number[number]
                for key, value in proc_record.items():
                    volume_record[f"wd.{key}"] = value
                map_pairs = [
                    ("item", "wikidataid"),
                    ("itemDescription", "description"),
                    ("dblpProceedingsId", "dblp"),
                    ("described_at_URL", "url"),
                    ("ppnId", "k10plus"),
                    ("URN_NBN", "urn"),
                ]
                for wd_id, attr in map_pairs:
                    wd_key = f"wd.{wd_id}"
                    if wd_key in volume_record:
                        value = volume_record[wd_key]
                        if isinstance(value, str):
                            value = value.replace("http://www.wikidata.org/entity/", "")
                            value = value.replace("https://www.wikidata.org/wiki/", "")
                        setattr(volume, attr, value)
                        pass
        msg = f"{len(self.volumes_by_number)} volumes"
        profiler.time(msg)

`init(base_path, base_url)`

initialize me with the given base_path

Parameters:

Name	Type	Description	Default
`base_path(str)`		the path to my files	required
`base_url(str)`		the url of the RESTFul metadata service	required

Source code in ceurspt/ceurws.py

def __init__(self, base_path: str, base_url: str):
    """
    initialize me with the given base_path

    Args:
        base_path(str): the path to my files
        base_url(str): the url of the RESTFul metadata service
    """
    JsonCacheManager.__init__(self, base_url=base_url)
    self.base_path = base_path
    self.volumes_by_number: Dict[int, Volume] = {}
    self.volume_records_by_number: Dict[int, dict] = {}

`getVolume(number)`

get my volume by volume number

Parameters:

Name	Type	Description	Default
`number(int)`		the volume to get	required

Source code in ceurspt/ceurws.py

def getVolume(self, number: int):
    """
    get my volume by volume number

    Args:
        number(int): the volume to get
    """
    if number in self.volumes_by_number:
        return self.volumes_by_number[number]
    else:
        return None

`getVolumes(verbose=False)`

get my volumes

Parameters:

Name	Type	Description	Default
`verbose(bool)`		if True show verbose loading information	required

Source code in ceurspt/ceurws.py

def getVolumes(self, verbose: bool = False):
    """
    get my volumes

    Args:
        verbose(bool): if True show verbose loading information
    """
    profiler = Profiler("Loading volumes", profile=verbose)
    volume_lod = self.load_lod("volumes")
    proceedings_lod = self.load_lod("proceedings")
    self.volumes_by_number = {}
    self.volume_records_by_number = {}
    for volume_record in volume_lod:
        vol_number = volume_record["number"]
        self.volume_records_by_number[vol_number] = volume_record
        title = volume_record["title"]
        pub_date_str = volume_record["pubDate"]
        if pub_date_str:
            pub_date = datetime.fromisoformat(pub_date_str).date()
        else:
            pub_date = None
        acronym = volume_record["acronym"]
        volume = Volume(
            number=vol_number, title=title, date=pub_date, acronym=acronym
        )
        volume.vm = self
        volume.number = int(volume.number)
        vol_dir = f"{self.base_path}/Vol-{vol_number}"
        if os.path.isdir(vol_dir):
            volume.vol_dir = vol_dir
        else:
            volume.vol_dir = None
        self.volumes_by_number[vol_number] = volume
    for proc_record in proceedings_lod:
        number = proc_record["sVolume"]
        if not number:
            print(f"Warning: {proc_record} has no volume number")
        else:
            volume_record = self.volume_records_by_number[number]
            volume = self.volumes_by_number[number]
            for key, value in proc_record.items():
                volume_record[f"wd.{key}"] = value
            map_pairs = [
                ("item", "wikidataid"),
                ("itemDescription", "description"),
                ("dblpProceedingsId", "dblp"),
                ("described_at_URL", "url"),
                ("ppnId", "k10plus"),
                ("URN_NBN", "urn"),
            ]
            for wd_id, attr in map_pairs:
                wd_key = f"wd.{wd_id}"
                if wd_key in volume_record:
                    value = volume_record[wd_key]
                    if isinstance(value, str):
                        value = value.replace("http://www.wikidata.org/entity/", "")
                        value = value.replace("https://www.wikidata.org/wiki/", "")
                    setattr(volume, attr, value)
                    pass
    msg = f"{len(self.volumes_by_number)} volumes"
    profiler.time(msg)

`head_table_html()`

Source code in ceurspt/ceurws.py

    def head_table_html(self) -> str:
        """ """
        html = """<table width="97%" cellspacing="5" cellpadding="0" border="0">
<tbody><tr>
<td valign="middle" align="left">
<div id="CEURWSLOGO"></div>
<!--<img alt="[25years CEUR-WS]" style="padding:4px; float:left;"  width="550" src="./CEUR-WS-logo-originals/2020/CEUR-WS-25anniversary.png"> -->
</td>
<td valign="middle" align="justify">
<font size="-2" face="ARIAL,HELVETICA,VERDANA" color="#363636">

<img alt="[OpenAccess]" style="padding:6px; float:left;" src="/static/OpenAccesslogo_200x313.png" width="18">
CEUR Workshop Proceedings (CEUR-WS.org) is a
<a href="https://ceur-ws.org/CEURWS-VALUES.html">free</a>
<a href="http://www.sherpa.ac.uk/romeo/issn/1613-0073/">open-access</a>
publication service
at <a href="http://sunsite.informatik.rwth-aachen.de">Sun SITE Central Europe</a>
operated under the umbrella of
 <a href="http://www-i5.informatik.rwth-aachen.de">RWTH Aachen University</a>.
CEUR-WS.org is a recognized ISSN publication series,
<a href="https://ceur-ws.org/issn-1613-0073.html">ISSN 1613-0073</a> (<a href="https://portal.issn.org/resource/ISSN/1613-0073?format=json">json</a>).
CEUR-WS.org is hosted at http://SunSITE.Informatik.RWTH-Aachen.DE/Publications/CEUR-WS/.
This service is provided by
the <b><a href="https://ceur-ws.org/CEURWS-TEAM.html">CEUR-WS.org Team</a></b>.
See end of the page for contact details and <a href="https://ceur-ws.org/#IMPRESSUM">Impressum</a>.
</font>
</td>
</tr>
</tbody></table>"""
        return html

`index_html(upper=None, lower=None)`

return an index going from the given upper volume number down to the given lower volume number

Parameters:

Name	Type	Description	Default
`upper(int)`		upper volume number to start with	required
`lower(int)`		lower volume number to end with	required

Returns:

Type	Description
`str`	html code for index

Source code in ceurspt/ceurws.py

    def index_html(
        self, upper: Optional[int] = None, lower: Optional[int] = None
    ) -> str:
        """
        return an index going from the given upper volume number down to the given lower volume number

        Args:
            upper(int): upper volume number to start with
            lower(int): lower volume number to end with

        Returns:
            html code for index
        """
        html = f"""<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
   "https://www.w3.org/TR/html4/loose.dtd">
<html>
  <head> 
    <meta http-equiv="Content-Type" content="Type=text/html;charset=utf-8">
    <meta name="description" content="CEUR-WS.org provides free online scientific papers">
    <meta name="keywords" content="open access, open archive, free scientific paper, workshop proceedings, online publishing, computer science, information systems" >

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <!-- automatically refresh daily-->
    <meta http-equiv="expires" content="86400">
    <link rel="stylesheet" type="text/css" href="/static/ceur-ws.css">
    <link rel="icon" type="image/x-icon" href="/static/favicon.ico">
    <title>CEUR-WS.org - CEUR Workshop Proceedings (free, open-access publishing, computer science/information systems)</title>
    <link rel="shortcut icon" href="/static/ceur-ws.ico">
  </head>
  <body>
     {self.head_table_html()}
     <div>
"""
        # prepare the indexing
        # get the volumes as a list from 1 - top e.g. 3365
        volumes = list(self.volumes_by_number.values())
        # reverse the list
        volumes.reverse()
        # make sure upper and lower values are valid
        if upper is None:
            upper = volumes[0].number
        if lower is None:
            lower = 1
        # loop over the reversed list
        for vol_index in range(len(volumes)):
            vol = volumes[vol_index]
            vol_number = vol.number
            if vol_number > upper:
                continue
            if vol_number < lower:
                break
            if isinstance(vol.title, str):
                vol_title = escape(vol.title)
            else:
                vol_title = "Title missing (Might be one of the empty volumes)"
            if vol_title is None:
                pass
            html += f"""       <div style='bgcolor:#DCDBD7'>
         <b><a name='Vol-{vol_number}'>Vol-{vol_number}</a></b>
         <a href='/Vol-{vol_number}.html'>{vol_title}</a>
       </div>
"""
        html += """    </div>
  </body>
</html>"""
        return html

`ceurws_base`

`Boolean`

Bases: Bool

A binary (true or false) value

Source code in ceurspt/ceurws_base.py

class Boolean(Bool):
    """A binary (true or false) value"""

    type_class_uri = XSD.boolean
    type_class_curie = "xsd:boolean"
    type_name = "boolean"
    type_model_uri = CEURWSSCHEMA.Boolean

`Date`

Bases: XSDDate

a date (year, month and day) in an idealized calendar

Source code in ceurspt/ceurws_base.py

class Date(XSDDate):
    """a date (year, month and day) in an idealized calendar"""

    type_class_uri = XSD.date
    type_class_curie = "xsd:date"
    type_name = "date"
    type_model_uri = CEURWSSCHEMA.Date

`DateOrDatetime`

Bases: str

Either a date or a datetime

Source code in ceurspt/ceurws_base.py

class DateOrDatetime(str):
    """Either a date or a datetime"""

    type_class_uri = LINKML.DateOrDatetime
    type_class_curie = "linkml:DateOrDatetime"
    type_name = "date_or_datetime"
    type_model_uri = CEURWSSCHEMA.DateOrDatetime

`Datetime`

Bases: XSDDateTime

The combination of a date and time

Source code in ceurspt/ceurws_base.py

class Datetime(XSDDateTime):
    """The combination of a date and time"""

    type_class_uri = XSD.dateTime
    type_class_curie = "xsd:dateTime"
    type_name = "datetime"
    type_model_uri = CEURWSSCHEMA.Datetime

`Double`

Bases: float

A real number that conforms to the xsd:double specification

Source code in ceurspt/ceurws_base.py

class Double(float):
    """A real number that conforms to the xsd:double specification"""

    type_class_uri = XSD.double
    type_class_curie = "xsd:double"
    type_name = "double"
    type_model_uri = CEURWSSCHEMA.Double

`Float`

Bases: float

A real number that conforms to the xsd:float specification

Source code in ceurspt/ceurws_base.py

class Float(float):
    """A real number that conforms to the xsd:float specification"""

    type_class_uri = XSD.float
    type_class_curie = "xsd:float"
    type_name = "float"
    type_model_uri = CEURWSSCHEMA.Float

`Integer`

Bases: int

An integer

Source code in ceurspt/ceurws_base.py

class Integer(int):
    """An integer"""

    type_class_uri = XSD.integer
    type_class_curie = "xsd:integer"
    type_name = "integer"
    type_model_uri = CEURWSSCHEMA.Integer

`Ncname`

Bases: NCName

Prefix part of CURIE

Source code in ceurspt/ceurws_base.py

class Ncname(NCName):
    """Prefix part of CURIE"""

    type_class_uri = XSD.string
    type_class_curie = "xsd:string"
    type_name = "ncname"
    type_model_uri = CEURWSSCHEMA.Ncname

`Nodeidentifier`

Bases: NodeIdentifier

A URI, CURIE or BNODE that represents a node in a model.

Source code in ceurspt/ceurws_base.py

class Nodeidentifier(NodeIdentifier):
    """A URI, CURIE or BNODE that represents a node in a model."""

    type_class_uri = SHEX.nonLiteral
    type_class_curie = "shex:nonLiteral"
    type_name = "nodeidentifier"
    type_model_uri = CEURWSSCHEMA.Nodeidentifier

`Objectidentifier`

Bases: ElementIdentifier

A URI or CURIE that represents an object in the model.

Source code in ceurspt/ceurws_base.py

class Objectidentifier(ElementIdentifier):
    """A URI or CURIE that represents an object in the model."""

    type_class_uri = SHEX.iri
    type_class_curie = "shex:iri"
    type_name = "objectidentifier"
    type_model_uri = CEURWSSCHEMA.Objectidentifier

`Paper` `dataclass`

Bases: YAMLRoot

A paper is e.g. a scholarly article

Source code in ceurspt/ceurws_base.py

@dataclass
class Paper(YAMLRoot):
    """
    A paper is e.g. a scholarly article
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = CEURWSSCHEMA.Paper
    class_class_curie: ClassVar[str] = "CeurwsSchema:Paper"
    class_name: ClassVar[str] = "Paper"
    class_model_uri: ClassVar[URIRef] = CEURWSSCHEMA.Paper

    description: Optional[str] = None
    id: Optional[str] = None
    wikidataid: Optional[str] = None
    title: Optional[str] = None
    pdfUrl: Optional[Union[str, URI]] = None
    volume: Optional[Union[dict, Volume]] = None
    session: Optional[Union[dict, Session]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.description is not None and not isinstance(self.description, str):
            self.description = str(self.description)

        if self.id is not None and not isinstance(self.id, str):
            self.id = str(self.id)

        if self.wikidataid is not None and not isinstance(self.wikidataid, str):
            self.wikidataid = str(self.wikidataid)

        if self.title is not None and not isinstance(self.title, str):
            self.title = str(self.title)

        if self.pdfUrl is not None and not isinstance(self.pdfUrl, URI):
            self.pdfUrl = URI(self.pdfUrl)

        if self.volume is not None and not isinstance(self.volume, Volume):
            self.volume = Volume(**as_dict(self.volume))

        if self.session is not None and not isinstance(self.session, Session):
            self.session = Session(**as_dict(self.session))

        super().__post_init__(**kwargs)

`Session` `dataclass`

Bases: YAMLRoot

A Session is a a collection of papers as part of a Volume

Source code in ceurspt/ceurws_base.py

@dataclass
class Session(YAMLRoot):
    """
    A Session is a a collection of papers as part of a Volume
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = CEURWSSCHEMA.Session
    class_class_curie: ClassVar[str] = "CeurwsSchema:Session"
    class_name: ClassVar[str] = "Session"
    class_model_uri: ClassVar[URIRef] = CEURWSSCHEMA.Session

    title: Optional[str] = None
    volume: Optional[Union[dict, Volume]] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.title is not None and not isinstance(self.title, str):
            self.title = str(self.title)

        if self.volume is not None and not isinstance(self.volume, Volume):
            self.volume = Volume(**as_dict(self.volume))

        super().__post_init__(**kwargs)

`String`

Bases: str

A character string

Source code in ceurspt/ceurws_base.py

class String(str):
    """A character string"""

    type_class_uri = XSD.string
    type_class_curie = "xsd:string"
    type_name = "string"
    type_model_uri = CEURWSSCHEMA.String

`Time`

Bases: XSDTime

A time object represents a (local) time of day, independent of any particular day

Source code in ceurspt/ceurws_base.py

class Time(XSDTime):
    """A time object represents a (local) time of day, independent of any particular day"""

    type_class_uri = XSD.dateTime
    type_class_curie = "xsd:dateTime"
    type_name = "time"
    type_model_uri = CEURWSSCHEMA.Time

`Uri`

Bases: URI

a complete URI

Source code in ceurspt/ceurws_base.py

class Uri(URI):
    """a complete URI"""

    type_class_uri = XSD.anyURI
    type_class_curie = "xsd:anyURI"
    type_name = "uri"
    type_model_uri = CEURWSSCHEMA.Uri

`Uriorcurie`

Bases: URIorCURIE

a URI or a CURIE

Source code in ceurspt/ceurws_base.py

class Uriorcurie(URIorCURIE):
    """a URI or a CURIE"""

    type_class_uri = XSD.anyURI
    type_class_curie = "xsd:anyURI"
    type_name = "uriorcurie"
    type_model_uri = CEURWSSCHEMA.Uriorcurie

`Volume` `dataclass`

Bases: YAMLRoot

A Volume is a collection of papers mostly documenting the results of an academic event

Source code in ceurspt/ceurws_base.py

@dataclass
class Volume(YAMLRoot):
    """
    A Volume is a collection of papers mostly documenting the results of an academic event
    """

    _inherited_slots: ClassVar[List[str]] = []

    class_class_uri: ClassVar[URIRef] = CEURWSSCHEMA.Volume
    class_class_curie: ClassVar[str] = "CeurwsSchema:Volume"
    class_name: ClassVar[str] = "Volume"
    class_model_uri: ClassVar[URIRef] = CEURWSSCHEMA.Volume

    number: Optional[float] = None
    acronym: Optional[str] = None
    wikidataid: Optional[str] = None
    title: Optional[str] = None
    description: Optional[str] = None
    url: Optional[str] = None
    date: Optional[Union[str, XSDDate]] = None
    dblp: Optional[str] = None
    k10plus: Optional[str] = None
    urn: Optional[str] = None

    def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):
        if self.number is not None and not isinstance(self.number, float):
            self.number = float(self.number)

        if self.acronym is not None and not isinstance(self.acronym, str):
            self.acronym = str(self.acronym)

        if self.wikidataid is not None and not isinstance(self.wikidataid, str):
            self.wikidataid = str(self.wikidataid)

        if self.title is not None and not isinstance(self.title, str):
            self.title = str(self.title)

        if self.description is not None and not isinstance(self.description, str):
            self.description = str(self.description)

        if self.url is not None and not isinstance(self.url, str):
            self.url = str(self.url)

        if self.date is not None and not isinstance(self.date, XSDDate):
            self.date = XSDDate(self.date)

        if self.dblp is not None and not isinstance(self.dblp, str):
            self.dblp = str(self.dblp)

        if self.k10plus is not None and not isinstance(self.k10plus, str):
            self.k10plus = str(self.k10plus)

        if self.urn is not None and not isinstance(self.urn, str):
            self.urn = str(self.urn)

        super().__post_init__(**kwargs)

`dataclass_util`

Created on 30.03.2023

@author: wf

`DataClassUtil`

https://gist.github.com/gatopeich/1efd3e1e4269e1e98fae9983bb914f22

https://stackoverflow.com/a/54769644/1497139

Source code in ceurspt/dataclass_util.py

class DataClassUtil:
    """
    https://gist.github.com/gatopeich/1efd3e1e4269e1e98fae9983bb914f22

    https://stackoverflow.com/a/54769644/1497139
    """

    @classmethod
    def dataclass_from_dict(cls, klass, d):
        try:
            fieldtypes = {f.name: f.type for f in dataclasses.fields(klass)}
            return klass(**{f: cls.dataclass_from_dict(fieldtypes[f], d[f]) for f in d})
        except:
            return d  # Not a dataclass field

`profiler`

Created on 27.03.2023

@author: wf

`Profiler`

simple profiler

Source code in ceurspt/profiler.py

class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        self.starttime = time.time()
        if profile:
            print(f"Starting {msg} ...", flush=True)

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s", flush=True)
        return elapsed

`init(msg, profile=True)`

construct me with the given msg and profile active flag

Parameters:

Name	Type	Description	Default
`msg(str)`		the message to show if profiling is active	required
`profile(bool)`		True if messages should be shown	required

Source code in ceurspt/profiler.py

def __init__(self, msg, profile=True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    self.starttime = time.time()
    if profile:
        print(f"Starting {msg} ...", flush=True)

`time(extraMsg='')`

time the action and print if profile is active

Source code in ceurspt/profiler.py

def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s", flush=True)
    return elapsed

`spt_cmd`

Created on 2023-03-17

@author: wf

`CeurSptCmd`

command line interface for CEUR Single Point of Truth

Source code in ceurspt/spt_cmd.py

class CeurSptCmd:
    """
    command line interface for CEUR Single Point of Truth
    """

    def get_arg_parser(self, description: str, version_msg) -> ArgumentParser:
        """
        Setup command line argument parser

        Args:
            description(str): the description
            version_msg(str): the version message

        Returns:
            ArgumentParser: the argument parser
        """
        script_path = Path(__file__)
        base_path = f"{script_path.parent.parent}/ceur-ws"
        base_url = "http://cvb.bitplan.com"
        parser = ArgumentParser(
            description=description, formatter_class=RawDescriptionHelpFormatter
        )
        parser.add_argument(
            "-a",
            "--about",
            help="show about info [default: %(default)s]",
            action="store_true",
        )
        parser.add_argument(
            "-b",
            "--basepath",
            help="the base path to the ceur-ws volumes [default: %(default)s]",
            default=base_path,
        )
        parser.add_argument(
            "-bu",
            "--baseurl",
            help="the base url to use for the RESTFul metadata service [default: %(default)s]",
            default=base_url,
        )
        parser.add_argument(
            "-d",
            "--debug",
            dest="debug",
            action="store_true",
            help="show debug info [default: %(default)s]",
        )
        parser.add_argument(
            "-rc",
            "--recreate",
            action="store_true",
            help="reload caches e.g. volume table",
        )

        parser.add_argument(
            "-v",
            "--verbose",
            action="store_true",
            help="show verbose infos e.g. on startup [default: %(default)s]",
        )
        parser.add_argument(
            "--host",
            default=self.get_default_host(),
            help="the host to serve / listen from [default: %(default)s]",
        )
        parser.add_argument(
            "--port",
            type=int,
            default=9990,
            help="the port to serve from [default: %(default)s]",
        )
        parser.add_argument(
            "-s",
            "--serve",
            action="store_true",
            help="start webserver [default: %(default)s]",
        )
        parser.add_argument("-V", "--version", action="version", version=version_msg)
        return parser

    def get_default_host(self) -> str:
        """
        get the default host as the fully qualifying hostname
        of the computer the server runs on

        Returns:
            str: the hostname
        """
        host = socket.getfqdn()
        # work around https://github.com/python/cpython/issues/79345
        if (
            host
            == "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa"
        ):
            host = "localhost"  # host="127.0.0.1"
        return host

    def recreate(self, args: Namespace):
        """
        recreate the caches

        Args:
            args(Arguments): command line arguments
        """
        jcm = JsonCacheManager(base_url=args.baseurl)
        for lod_name in [
            "volumes",
            "papers",
            "proceedings",
            "authors_dblp",
            "papers_dblp",
        ]:
            profiler = Profiler(f"read {lod_name} ...", profile=True)
            lod = jcm.load_lod(lod_name)
            _elapsed = profiler.time(f" read {len(lod)} {lod_name}")
            jcm.store(lod_name, lod)
            profiler = Profiler(f"store {lod_name} ...", profile=True)
            _elapsed = profiler.time(f" store {len(lod)} {lod_name}")

    def start(self, args: Namespace):
        """
        Args:
            args(Arguments): command line arguments
        """
        vm = VolumeManager(base_path=args.basepath, base_url=args.baseurl)
        vm.getVolumes(args.verbose)
        pm = PaperManager(base_url=args.baseurl)
        pm.getPapers(vm, args.verbose)
        ws = WebServer(vm, pm)
        uvicorn.run(ws.app, host=args.host, port=args.port)

`get_arg_parser(description, version_msg)`

Setup command line argument parser

Parameters:

Name	Type	Description	Default
`description(str)`		the description	required
`version_msg(str)`		the version message	required

Returns:

Name	Type	Description
`ArgumentParser`	`ArgumentParser`	the argument parser

Source code in ceurspt/spt_cmd.py

def get_arg_parser(self, description: str, version_msg) -> ArgumentParser:
    """
    Setup command line argument parser

    Args:
        description(str): the description
        version_msg(str): the version message

    Returns:
        ArgumentParser: the argument parser
    """
    script_path = Path(__file__)
    base_path = f"{script_path.parent.parent}/ceur-ws"
    base_url = "http://cvb.bitplan.com"
    parser = ArgumentParser(
        description=description, formatter_class=RawDescriptionHelpFormatter
    )
    parser.add_argument(
        "-a",
        "--about",
        help="show about info [default: %(default)s]",
        action="store_true",
    )
    parser.add_argument(
        "-b",
        "--basepath",
        help="the base path to the ceur-ws volumes [default: %(default)s]",
        default=base_path,
    )
    parser.add_argument(
        "-bu",
        "--baseurl",
        help="the base url to use for the RESTFul metadata service [default: %(default)s]",
        default=base_url,
    )
    parser.add_argument(
        "-d",
        "--debug",
        dest="debug",
        action="store_true",
        help="show debug info [default: %(default)s]",
    )
    parser.add_argument(
        "-rc",
        "--recreate",
        action="store_true",
        help="reload caches e.g. volume table",
    )

    parser.add_argument(
        "-v",
        "--verbose",
        action="store_true",
        help="show verbose infos e.g. on startup [default: %(default)s]",
    )
    parser.add_argument(
        "--host",
        default=self.get_default_host(),
        help="the host to serve / listen from [default: %(default)s]",
    )
    parser.add_argument(
        "--port",
        type=int,
        default=9990,
        help="the port to serve from [default: %(default)s]",
    )
    parser.add_argument(
        "-s",
        "--serve",
        action="store_true",
        help="start webserver [default: %(default)s]",
    )
    parser.add_argument("-V", "--version", action="version", version=version_msg)
    return parser

`get_default_host()`

get the default host as the fully qualifying hostname of the computer the server runs on

Returns:

Name	Type	Description
`str`	`str`	the hostname

Source code in ceurspt/spt_cmd.py

def get_default_host(self) -> str:
    """
    get the default host as the fully qualifying hostname
    of the computer the server runs on

    Returns:
        str: the hostname
    """
    host = socket.getfqdn()
    # work around https://github.com/python/cpython/issues/79345
    if (
        host
        == "1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa"
    ):
        host = "localhost"  # host="127.0.0.1"
    return host

`recreate(args)`

recreate the caches

Parameters:

Name	Type	Description	Default
`args(Arguments)`		command line arguments	required

Source code in ceurspt/spt_cmd.py

def recreate(self, args: Namespace):
    """
    recreate the caches

    Args:
        args(Arguments): command line arguments
    """
    jcm = JsonCacheManager(base_url=args.baseurl)
    for lod_name in [
        "volumes",
        "papers",
        "proceedings",
        "authors_dblp",
        "papers_dblp",
    ]:
        profiler = Profiler(f"read {lod_name} ...", profile=True)
        lod = jcm.load_lod(lod_name)
        _elapsed = profiler.time(f" read {len(lod)} {lod_name}")
        jcm.store(lod_name, lod)
        profiler = Profiler(f"store {lod_name} ...", profile=True)
        _elapsed = profiler.time(f" store {len(lod)} {lod_name}")

`start(args)`

Parameters:

Name	Type	Description	Default
`args(Arguments)`		command line arguments	required

Source code in ceurspt/spt_cmd.py

def start(self, args: Namespace):
    """
    Args:
        args(Arguments): command line arguments
    """
    vm = VolumeManager(base_path=args.basepath, base_url=args.baseurl)
    vm.getVolumes(args.verbose)
    pm = PaperManager(base_url=args.baseurl)
    pm.getPapers(vm, args.verbose)
    ws = WebServer(vm, pm)
    uvicorn.run(ws.app, host=args.host, port=args.port)

`main(argv=None)`

main program.

Source code in ceurspt/spt_cmd.py

def main(argv=None):  # IGNORE:C0111
    """main program."""

    if argv is None:
        argv = sys.argv[1:]

    program_name = "ceurspt"
    program_version = f"v{Version.version}"
    program_build_date = str(Version.date)
    program_version_message = f"{program_name} ({program_version},{program_build_date})"

    args = None
    try:
        spt_cmd = CeurSptCmd()
        parser = spt_cmd.get_arg_parser(
            description=Version.license, version_msg=program_version_message
        )
        args = parser.parse_args(argv)
        if len(argv) < 1:
            parser.print_usage()
            sys.exit(1)
        if args.about:
            print(program_version_message)
            print(f"see {Version.doc_url}")
            webbrowser.open(Version.doc_url)
        if args.recreate:
            spt_cmd.recreate(args)
        elif args.serve:
            spt_cmd.start(args)

    except KeyboardInterrupt:
        ###
        # handle keyboard interrupt
        # ###
        return 1
    except Exception as e:
        if DEBUG:
            raise e
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if args is None:
            print("args could not be parsed")
        elif args.debug:
            print(traceback.format_exc())
        return 2

`version`

Created on 2022-09-11

@author: wf

`Version`

Bases: object

Version handling for VolumeBrowser

Source code in ceurspt/version.py

class Version(object):
    """
    Version handling for VolumeBrowser
    """

    name = ""
    version = ceurspt.__version__
    date = "2023-03-17"
    updated = "2023-12-29"
    description = ("CEUR-WS Single Point of Truth RestFUL server",)

    authors = "Tim Holzheim, Wolfgang Fahl"

    doc_url = "https://github.com/ceurws/ceur-spt"
    chat_url = "https://github.com/ceurws/ceur-spt/discussions"
    cm_url = "https://github.com/ceurws/ceur-spt"

    license = f"""Copyright 2023 contributors. All rights reserved.

  Licensed under the Apache License 2.0
  https://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied."""
    longDescription = f"""{name} version {version}
{description}

  Created by {authors} on {date} last updated {updated}"""

`webserver`

Created on 2023-03-17

@author: wf

`WebServer`

the webserver

Source code in ceurspt/webserver.py

class WebServer:
    """
    the webserver
    """

    def __init__(
        self, vm: VolumeManager, pm: PaperManager, static_directory: str = "static"
    ):
        """
        constructor

        Args:
            vm(VolumeManager): the volume manager to use
            pm(PaperManager): the paper manager to use
            static_directory(str): the directory for static html files to use
        """
        self.app = FastAPI()
        # https://fastapi.tiangolo.com/tutorial/static-files/
        self.app.mount(
            "/static", StaticFiles(directory=static_directory), name="static"
        )
        self.vm = vm
        self.pm = pm

        @self.app.get("/index.html/{upper:int}/{lower:int}")
        async def index_html(upper: Optional[int], lower: Optional[int]):
            content = self.vm.index_html(upper=upper, lower=lower)
            return HTMLResponse(content)

        @self.app.get("/index.html")
        async def full_index_html():
            return await index_html(upper=None, lower=None)

        @self.app.get("/Vol-{number:int}/{pdf_name:str}.pdf")
        async def paperPdf(number: int, pdf_name: str):
            """
            get the PDF for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            pdf = paper.getPdf()
            return FileResponse(pdf)

        @self.app.get("/Vol-{number:int}/{pdf_name}.json")
        async def paperJson(number: int, pdf_name: str):
            """
            get the json response for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.getMergedDict()
            return paper_dict

        @self.app.get("/Vol-{number:int}/{pdf_name}.wbjson")
        async def paperWikibaseCliJson(number: int, pdf_name: str):
            """
            get the json response to the wikibase-cli for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.as_wb_dict()
            return paper_dict

        @self.app.get("/Vol-{number:int}/{pdf_name}/{qid}.wbcli")
        async def paperWikibaseCli(number: int, pdf_name, qid: str):
            """
            get the json response to the wikibase-cli for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_cli_text = paper.as_wbi_cli_text(qid)
            return PlainTextResponse(paper_cli_text)

        @self.app.get("/Vol-{number:int}/{pdf_name}.html")
        async def paperHtml(number: int, pdf_name: str):
            """
            get the html response for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            content = paper.asHtml()
            return HTMLResponse(content=content)

        @self.app.get("/Vol-{number:int}/{pdf_name}.txt")
        async def paperText(number: int, pdf_name: str):
            """
            get the text for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            text = paper.getText()
            return PlainTextResponse(text)

        @self.app.get("/Vol-{number:int}/{pdf_name}.smw")
        async def paperSMW(number: int, pdf_name: str):
            """
            Get semantic media wiki markup of the given paper"""
            paper = self.getPaper(number, pdf_name)
            if paper:
                markup = paper.as_smw_markup()
            else:
                markup = f"""{{{{Paper
|id=Vol-{number}/{pdf_name}
|volume=Vol-{number}
}}}}"""
            return PlainTextResponse(markup)

        @self.app.get("/Vol-{number:int}/{pdf_name}.qs")
        async def paperQuickStatementns(number: int, pdf_name: str):
            """
            get the quickstatements for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            qs = paper.as_quickstatements()
            return PlainTextResponse(qs)

        @self.app.get("/Vol-{number:int}/{pdf_name}.grobid")
        async def paperGrobidXml(number: int, pdf_name: str):
            """
            get the grobid XML for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            xml = paper.getContentByPostfix(".tei.xml")
            return Response(content=xml, media_type="application/xml")

        @self.app.get("/Vol-{number:int}/{pdf_name}.cermine")
        async def paperCermineXml(number: int, pdf_name: str):
            """
            get the grobid XML for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            xml = paper.getContentByPostfix(".cermine.xml")
            return Response(content=xml, media_type="application/xml")

        @self.app.get("/Vol-{number:int}.smw")
        async def volumeSMW(number: int):
            """
            Get semantic media wiki markup of volume by given id
            """
            vol = self.getVolume(number)
            if vol:
                markup = vol.as_smw_markup()
            else:
                markup = f"{{{{Volume|number={number}}}}}"
            return PlainTextResponse(markup)

        @self.app.get("/Vol-{number:int}.json")
        async def volumeJson(number: int):
            """
            Get metadata of volume by given id
            """
            vol = self.getVolume(number)
            if vol:
                return vol.getMergedDict()
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/Vol-{number:int}")
        async def volumeHtmlWithPdf(number: int):
            """
            get html Response for the given volume by number
            displaying pdfs directly
            """
            return self.volumeHtml(number, ext=".pdf")

        @self.app.get("/Vol-{number:int}.html")
        async def volumeHtmlWithHtml(number: int):
            """
            get html Response for the given volume by number
            displaying pdfs embedded in html
            """
            return self.volumeHtml(number, ext=".html")

        @self.app.get("/")
        async def home():
            """
            Return the home
            """
            url = "https://github.com/ceurws/ceur-spt"
            response = RedirectResponse(url=url, status_code=302)
            return response

        @self.app.get("/volume/{number:int}", tags=["json"])
        async def volume_citation(number: int):
            """
            Get volume record
            """
            vol = self.getVolume(number)
            if vol:
                record = vol.getMergedDict()
                return record
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/paper", tags=["json"])
        async def volume_citation_paper_by_number(number: int):
            """
            Get volume papers
            """
            vol = self.getVolume(number)
            if vol:
                paper_records = []
                for paper in vol.papers:
                    paper_records.append(paper.getMergedDict())
                return paper_records
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/citation", tags=["citation"])
        async def volume_citation_citation(number: int):
            """
            Get volume citation
            """
            vol = self.getVolume(number)
            if vol:
                citation = BibTexConverter.convert_volume(vol)
                return PlainTextResponse(content=citation)
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/paper/{pdf_name:str}", tags=["json"])
        async def volume_citation_paper_by_name(number: int, pdf_name: str):
            """
            Get paper citation
            """
            paper = self.getPaper(number, pdf_name)
            if paper:
                record = paper.getMergedDict()
                return record
            else:
                return {"error": f"unknown volume number {number} or paper {pdf_name}"}

        @self.app.get(
            "/volume/{number:int}/paper/{pdf_name:str}/citation", tags=["citation"]
        )
        async def volume_paper_citation(number: int, pdf_name: str):
            """
            Get paper citation
            """
            paper = self.getPaper(number, pdf_name)
            if paper:
                citation = BibTexConverter.convert_paper(paper)
                return PlainTextResponse(content=citation)
            else:
                return {"error": f"unknown volume number {number} or paper {pdf_name}"}

        @self.app.get("/Vol-{number:int}/{pdf_name}.yaml")
        async def paperYaml(number: int, pdf_name: str):
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.getMergedDict()
            yaml_content = yaml.dump(paper_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/Vol-{number:int}.yaml")
        async def volumeYaml(number: int):
            vol = self.getVolume(number)
            if vol:
                volume_dict = vol.getMergedDict()
            else:
                volume_dict = {"error": f"unknown volume number {number}"}
            yaml_content = yaml.dump(volume_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/volume/{number:int}/paper.yaml", tags=["yaml"])
        async def volume_papers_yaml(number: int):
            vol = self.getVolume(number)
            if vol:
                paper_records = [paper.getMergedDict() for paper in vol.papers]
            else:
                paper_records = {"error": f"unknown volume number {number}"}
            yaml_content = yaml.dump(paper_records)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/volume/{number:int}/paper/{pdf_name:str}.yaml", tags=["yaml"])
        async def volume_paper_yaml(number: int, pdf_name: str):
            paper = self.getPaper(number, pdf_name)
            if paper:
                paper_dict = paper.getMergedDict()
            else:
                paper_dict = {
                    "error": f"unknown volume number {number} or paper {pdf_name}"
                }
            yaml_content = yaml.dump(paper_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

    def volumeHtml(self, number: int, ext: str = ".pdf") -> HTMLResponse:
        """
        get html Response for the given volume by number
        Args:
            number: volume number
            ext: file extension
        """
        vol = self.getVolume(number)
        if vol:
            content = vol.getHtml(ext=ext, fixLinks=True)
            return HTMLResponse(content=content, status_code=200)
        else:
            content = vol.get_empty_volume_page()
            return HTMLResponse(content=content, status_code=200)

    def getVolume(self, number: int) -> Volume:
        """
        get the volume for the given number

        Args:
            number(int): the number of the volume to fetch

        Returns:
            Volume: the volume or None if the volume number is not known
        """
        vol = self.vm.getVolume(number)
        return vol

    def getPaper(
        self, number: int, pdf_name: str, exceptionOnFail: bool = True
    ) -> Paper:
        """
        get the paper for the given volume number and pdf_name

        Args:
            number(int): the number of the volume the paper is part of
            pdf_name(str): the pdf name of the paper
            exceptionOnFail(bool): if True raise an exception on failure

        Returns:
            Paper: the paper or None if the paper is not found
        """
        paper = self.pm.getPaper(number, pdf_name)
        if paper is None and exceptionOnFail:
            raise HTTPException(
                status_code=404, detail=f"paper Vol-{number}/{pdf_name}.pdf not found"
            )
        return paper

`init(vm, pm, static_directory='static')`

constructor

Parameters:

Name	Description	Default
`vm(VolumeManager)`	the volume manager to use	required
`pm(PaperManager)`	the paper manager to use	required
`static_directory(str)`	the directory for static html files to use	required

Source code in ceurspt/webserver.py

    def __init__(
        self, vm: VolumeManager, pm: PaperManager, static_directory: str = "static"
    ):
        """
        constructor

        Args:
            vm(VolumeManager): the volume manager to use
            pm(PaperManager): the paper manager to use
            static_directory(str): the directory for static html files to use
        """
        self.app = FastAPI()
        # https://fastapi.tiangolo.com/tutorial/static-files/
        self.app.mount(
            "/static", StaticFiles(directory=static_directory), name="static"
        )
        self.vm = vm
        self.pm = pm

        @self.app.get("/index.html/{upper:int}/{lower:int}")
        async def index_html(upper: Optional[int], lower: Optional[int]):
            content = self.vm.index_html(upper=upper, lower=lower)
            return HTMLResponse(content)

        @self.app.get("/index.html")
        async def full_index_html():
            return await index_html(upper=None, lower=None)

        @self.app.get("/Vol-{number:int}/{pdf_name:str}.pdf")
        async def paperPdf(number: int, pdf_name: str):
            """
            get the PDF for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            pdf = paper.getPdf()
            return FileResponse(pdf)

        @self.app.get("/Vol-{number:int}/{pdf_name}.json")
        async def paperJson(number: int, pdf_name: str):
            """
            get the json response for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.getMergedDict()
            return paper_dict

        @self.app.get("/Vol-{number:int}/{pdf_name}.wbjson")
        async def paperWikibaseCliJson(number: int, pdf_name: str):
            """
            get the json response to the wikibase-cli for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.as_wb_dict()
            return paper_dict

        @self.app.get("/Vol-{number:int}/{pdf_name}/{qid}.wbcli")
        async def paperWikibaseCli(number: int, pdf_name, qid: str):
            """
            get the json response to the wikibase-cli for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            paper_cli_text = paper.as_wbi_cli_text(qid)
            return PlainTextResponse(paper_cli_text)

        @self.app.get("/Vol-{number:int}/{pdf_name}.html")
        async def paperHtml(number: int, pdf_name: str):
            """
            get the html response for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            content = paper.asHtml()
            return HTMLResponse(content=content)

        @self.app.get("/Vol-{number:int}/{pdf_name}.txt")
        async def paperText(number: int, pdf_name: str):
            """
            get the text for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            text = paper.getText()
            return PlainTextResponse(text)

        @self.app.get("/Vol-{number:int}/{pdf_name}.smw")
        async def paperSMW(number: int, pdf_name: str):
            """
            Get semantic media wiki markup of the given paper"""
            paper = self.getPaper(number, pdf_name)
            if paper:
                markup = paper.as_smw_markup()
            else:
                markup = f"""{{{{Paper
|id=Vol-{number}/{pdf_name}
|volume=Vol-{number}
}}}}"""
            return PlainTextResponse(markup)

        @self.app.get("/Vol-{number:int}/{pdf_name}.qs")
        async def paperQuickStatementns(number: int, pdf_name: str):
            """
            get the quickstatements for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            qs = paper.as_quickstatements()
            return PlainTextResponse(qs)

        @self.app.get("/Vol-{number:int}/{pdf_name}.grobid")
        async def paperGrobidXml(number: int, pdf_name: str):
            """
            get the grobid XML for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            xml = paper.getContentByPostfix(".tei.xml")
            return Response(content=xml, media_type="application/xml")

        @self.app.get("/Vol-{number:int}/{pdf_name}.cermine")
        async def paperCermineXml(number: int, pdf_name: str):
            """
            get the grobid XML for the given paper
            """
            paper = self.getPaper(number, pdf_name)
            xml = paper.getContentByPostfix(".cermine.xml")
            return Response(content=xml, media_type="application/xml")

        @self.app.get("/Vol-{number:int}.smw")
        async def volumeSMW(number: int):
            """
            Get semantic media wiki markup of volume by given id
            """
            vol = self.getVolume(number)
            if vol:
                markup = vol.as_smw_markup()
            else:
                markup = f"{{{{Volume|number={number}}}}}"
            return PlainTextResponse(markup)

        @self.app.get("/Vol-{number:int}.json")
        async def volumeJson(number: int):
            """
            Get metadata of volume by given id
            """
            vol = self.getVolume(number)
            if vol:
                return vol.getMergedDict()
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/Vol-{number:int}")
        async def volumeHtmlWithPdf(number: int):
            """
            get html Response for the given volume by number
            displaying pdfs directly
            """
            return self.volumeHtml(number, ext=".pdf")

        @self.app.get("/Vol-{number:int}.html")
        async def volumeHtmlWithHtml(number: int):
            """
            get html Response for the given volume by number
            displaying pdfs embedded in html
            """
            return self.volumeHtml(number, ext=".html")

        @self.app.get("/")
        async def home():
            """
            Return the home
            """
            url = "https://github.com/ceurws/ceur-spt"
            response = RedirectResponse(url=url, status_code=302)
            return response

        @self.app.get("/volume/{number:int}", tags=["json"])
        async def volume_citation(number: int):
            """
            Get volume record
            """
            vol = self.getVolume(number)
            if vol:
                record = vol.getMergedDict()
                return record
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/paper", tags=["json"])
        async def volume_citation_paper_by_number(number: int):
            """
            Get volume papers
            """
            vol = self.getVolume(number)
            if vol:
                paper_records = []
                for paper in vol.papers:
                    paper_records.append(paper.getMergedDict())
                return paper_records
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/citation", tags=["citation"])
        async def volume_citation_citation(number: int):
            """
            Get volume citation
            """
            vol = self.getVolume(number)
            if vol:
                citation = BibTexConverter.convert_volume(vol)
                return PlainTextResponse(content=citation)
            else:
                return {"error": f"unknown volume number {number}"}

        @self.app.get("/volume/{number:int}/paper/{pdf_name:str}", tags=["json"])
        async def volume_citation_paper_by_name(number: int, pdf_name: str):
            """
            Get paper citation
            """
            paper = self.getPaper(number, pdf_name)
            if paper:
                record = paper.getMergedDict()
                return record
            else:
                return {"error": f"unknown volume number {number} or paper {pdf_name}"}

        @self.app.get(
            "/volume/{number:int}/paper/{pdf_name:str}/citation", tags=["citation"]
        )
        async def volume_paper_citation(number: int, pdf_name: str):
            """
            Get paper citation
            """
            paper = self.getPaper(number, pdf_name)
            if paper:
                citation = BibTexConverter.convert_paper(paper)
                return PlainTextResponse(content=citation)
            else:
                return {"error": f"unknown volume number {number} or paper {pdf_name}"}

        @self.app.get("/Vol-{number:int}/{pdf_name}.yaml")
        async def paperYaml(number: int, pdf_name: str):
            paper = self.getPaper(number, pdf_name)
            paper_dict = paper.getMergedDict()
            yaml_content = yaml.dump(paper_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/Vol-{number:int}.yaml")
        async def volumeYaml(number: int):
            vol = self.getVolume(number)
            if vol:
                volume_dict = vol.getMergedDict()
            else:
                volume_dict = {"error": f"unknown volume number {number}"}
            yaml_content = yaml.dump(volume_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/volume/{number:int}/paper.yaml", tags=["yaml"])
        async def volume_papers_yaml(number: int):
            vol = self.getVolume(number)
            if vol:
                paper_records = [paper.getMergedDict() for paper in vol.papers]
            else:
                paper_records = {"error": f"unknown volume number {number}"}
            yaml_content = yaml.dump(paper_records)
            return Response(content=yaml_content, media_type="application/x-yaml")

        @self.app.get("/volume/{number:int}/paper/{pdf_name:str}.yaml", tags=["yaml"])
        async def volume_paper_yaml(number: int, pdf_name: str):
            paper = self.getPaper(number, pdf_name)
            if paper:
                paper_dict = paper.getMergedDict()
            else:
                paper_dict = {
                    "error": f"unknown volume number {number} or paper {pdf_name}"
                }
            yaml_content = yaml.dump(paper_dict)
            return Response(content=yaml_content, media_type="application/x-yaml")

`getPaper(number, pdf_name, exceptionOnFail=True)`

get the paper for the given volume number and pdf_name

Parameters:

Name	Description	Default
`number(int)`	the number of the volume the paper is part of	required
`pdf_name(str)`	the pdf name of the paper	required
`exceptionOnFail(bool)`	if True raise an exception on failure	required

Returns:

Name	Type	Description
`Paper`	`Paper`	the paper or None if the paper is not found

Source code in ceurspt/webserver.py

def getPaper(
    self, number: int, pdf_name: str, exceptionOnFail: bool = True
) -> Paper:
    """
    get the paper for the given volume number and pdf_name

    Args:
        number(int): the number of the volume the paper is part of
        pdf_name(str): the pdf name of the paper
        exceptionOnFail(bool): if True raise an exception on failure

    Returns:
        Paper: the paper or None if the paper is not found
    """
    paper = self.pm.getPaper(number, pdf_name)
    if paper is None and exceptionOnFail:
        raise HTTPException(
            status_code=404, detail=f"paper Vol-{number}/{pdf_name}.pdf not found"
        )
    return paper

`getVolume(number)`

get the volume for the given number

Parameters:

Name	Type	Description	Default
`number(int)`		the number of the volume to fetch	required

Returns:

Name	Type	Description
`Volume`	`Volume`	the volume or None if the volume number is not known

Source code in ceurspt/webserver.py

def getVolume(self, number: int) -> Volume:
    """
    get the volume for the given number

    Args:
        number(int): the number of the volume to fetch

    Returns:
        Volume: the volume or None if the volume number is not known
    """
    vol = self.vm.getVolume(number)
    return vol

`volumeHtml(number, ext='.pdf')`

get html Response for the given volume by number Args: number: volume number ext: file extension

Source code in ceurspt/webserver.py

def volumeHtml(self, number: int, ext: str = ".pdf") -> HTMLResponse:
    """
    get html Response for the given volume by number
    Args:
        number: volume number
        ext: file extension
    """
    vol = self.getVolume(number)
    if vol:
        content = vol.getHtml(ext=ext, fixLinks=True)
        return HTMLResponse(content=content, status_code=200)
    else:
        content = vol.get_empty_volume_page()
        return HTMLResponse(content=content, status_code=200)

pyCEURspt API Documentation

bibtex

BibTexConverter

convert_paper(paper) classmethod

convert_volume(volume) classmethod

InProceedingsEntry dataclass

to_bibtex_record(crossref=None)

ProceedingsEntry dataclass

from_volume(volume) classmethod

ceurws

JsonCacheManager

__init__(base_url='http://cvb.bitplan.com')

json_path(lod_name)

load_lod(lod_name)

store(lod_name, lod)

Paper dataclass

asHtml()

as_quickstatements()

as_smw_markup()

as_wb_dict()

as_wbi_cli_text(qid)

getAuthorBar()

getAuthorIndex(name, authors)

getAuthors()

getBasePath()

getContentByPostfix(postfix)

getContentPathByPostfix(postfix)

getIconBar(soup)

getMergedDict()

getPdf()

getText()

next(inc=1)

paperLinkParts(inc=0)

paperScrollLinks()

prev()

PaperManager

__init__(base_url)

getPaper(number, pdf_name)

getPapers(vm, verbose=False)

get_volume_papers(number)

Scholar dataclass

Volume

addPaper(paper)

add_volume_navigation(soup)

as_smw_markup()

create_icon_bar(soup, icon_list, class_name='icon_list') classmethod

create_icon_list(soup, icon_list) classmethod

fix_element_tag(element, tag='href', ext='.pdf')

getHtml(ext='.pdf', fixLinks=True)

getIconBar(soup)

getMergedDict()

get_empty_volume_page(content_html=None)

volLink(number, inc=0) classmethod

volLinkParts(number, inc=0) classmethod

volLink_soup_tag(soup, number, inc=0) classmethod

VolumeManager

__init__(base_path, base_url)

getVolume(number)

getVolumes(verbose=False)

head_table_html()

index_html(upper=None, lower=None)

ceurws_base

Boolean

Date

DateOrDatetime

Datetime

Double

Float

Integer

Ncname

Nodeidentifier

Objectidentifier

Paper dataclass

Session dataclass

String

Time

Uri

Uriorcurie

Volume dataclass

dataclass_util

`bibtex`

`BibTexConverter`

`convert_paper(paper)` `classmethod`

`convert_volume(volume)` `classmethod`

`InProceedingsEntry` `dataclass`

`to_bibtex_record(crossref=None)`

`ProceedingsEntry` `dataclass`

`from_volume(volume)` `classmethod`

`ceurws`

`JsonCacheManager`

`init(base_url='http://cvb.bitplan.com')`

`json_path(lod_name)`

`load_lod(lod_name)`

`store(lod_name, lod)`

`Paper` `dataclass`

`asHtml()`

`as_quickstatements()`

`as_smw_markup()`

`as_wb_dict()`

`as_wbi_cli_text(qid)`

`getAuthorBar()`

`getAuthorIndex(name, authors)`

`getAuthors()`

`getBasePath()`

`getContentByPostfix(postfix)`

`getContentPathByPostfix(postfix)`

`getIconBar(soup)`

`getMergedDict()`

`getPdf()`

`getText()`

`next(inc=1)`

`paperLinkParts(inc=0)`

`paperScrollLinks()`

`prev()`

`PaperManager`

`init(base_url)`

`getPaper(number, pdf_name)`

`getPapers(vm, verbose=False)`

`get_volume_papers(number)`

`Scholar` `dataclass`

`Volume`

`addPaper(paper)`

`add_volume_navigation(soup)`

`as_smw_markup()`

`create_icon_bar(soup, icon_list, class_name='icon_list')` `classmethod`

`create_icon_list(soup, icon_list)` `classmethod`

`fix_element_tag(element, tag='href', ext='.pdf')`

`getHtml(ext='.pdf', fixLinks=True)`

`getIconBar(soup)`

`getMergedDict()`

`get_empty_volume_page(content_html=None)`

`volLink(number, inc=0)` `classmethod`

`volLinkParts(number, inc=0)` `classmethod`

`volLink_soup_tag(soup, number, inc=0)` `classmethod`

`VolumeManager`

`init(base_path, base_url)`

`getVolume(number)`

`getVolumes(verbose=False)`

`head_table_html()`

`index_html(upper=None, lower=None)`

`ceurws_base`

`Boolean`

`Date`

`DateOrDatetime`

`Datetime`

`Double`

`Float`

`Integer`

`Ncname`

`Nodeidentifier`

`Objectidentifier`

`Paper` `dataclass`

`Session` `dataclass`

`String`

`Time`

`Uri`

`Uriorcurie`

`Volume` `dataclass`

`dataclass_util`

`DataClassUtil`