<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/publishing/1.2/JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="other" dtd-version="1.2" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="pmc">F1000Research</journal-id>
            <journal-title-group>
                <journal-title>F1000Research</journal-title>
            </journal-title-group>
            <issn pub-type="epub">2046-1402</issn>
            <publisher>
                <publisher-name>F1000 Research Limited</publisher-name>
                <publisher-loc>London, UK</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="doi">10.12688/f1000research.54159.1</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Opinion Article</subject>
                </subj-group>
                <subj-group>
                    <subject>Articles</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Perspectives on automated composition of workflows in the life sciences</article-title>
                <fn-group content-type="pub-status">
                    <fn>
                        <p>[version 1; peer review: 2 approved]</p>
                    </fn>
                </fn-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Lamprecht</surname>
                        <given-names>Anna-Lena</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-1953-5606</uri>
                    <xref ref-type="corresp" rid="c1">a</xref>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Palmblad</surname>
                        <given-names>Magnus</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Funding Acquisition</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Visualization</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-5865-8994</uri>
                    <xref ref-type="corresp" rid="c2">b</xref>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Ison</surname>
                        <given-names>Jon</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Software</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="corresp" rid="c3">c</xref>
                    <xref ref-type="aff" rid="a3">3</xref>
                </contrib>
                <contrib contrib-type="author" corresp="yes">
                    <name>
                        <surname>Schw&#x00e4;mmle</surname>
                        <given-names>Veit</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Conceptualization</role>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Methodology</role>
                    <role content-type="http://credit.niso.org/">Project Administration</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Validation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Original Draft Preparation</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9708-6722</uri>
                    <xref ref-type="corresp" rid="c4">d</xref>
                    <xref ref-type="aff" rid="a4">4</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Al Manir</surname>
                        <given-names>Mohammad Sadnan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a5">5</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Altintas</surname>
                        <given-names>Ilkay</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a6">6</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Baker</surname>
                        <given-names>Christopher J. O.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a7">7</xref>
                    <xref ref-type="aff" rid="a8">8</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Ben Hadj Amor</surname>
                        <given-names>Ammar</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a9">9</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Capella-Gutierrez</surname>
                        <given-names>Salvador</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-0309-604X</uri>
                    <xref ref-type="aff" rid="a10">10</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Charonyktakis</surname>
                        <given-names>Paulos</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a11">11</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Crusoe</surname>
                        <given-names>Michael R.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2961-9670</uri>
                    <xref ref-type="aff" rid="a12">12</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Gil</surname>
                        <given-names>Yolanda</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a13">13</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Goble</surname>
                        <given-names>Carole</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a14">14</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Griffin</surname>
                        <given-names>Timothy J.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-6801-2559</uri>
                    <xref ref-type="aff" rid="a15">15</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Groth</surname>
                        <given-names>Paul</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-0183-6910</uri>
                    <xref ref-type="aff" rid="a16">16</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Ienasescu</surname>
                        <given-names>Hans</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a17">17</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Jagtap</surname>
                        <given-names>Pratik</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0003-0984-0973</uri>
                    <xref ref-type="aff" rid="a15">15</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Kala&#x0161;</surname>
                        <given-names>Mat&#x00fa;&#x0161;</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-1509-4981</uri>
                    <xref ref-type="aff" rid="a18">18</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Kasalica</surname>
                        <given-names>Vedran</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a1">1</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Khanteymoori</surname>
                        <given-names>Alireza</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-6811-9196</uri>
                    <xref ref-type="aff" rid="a19">19</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Kuhn</surname>
                        <given-names>Tobias</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a12">12</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Mei</surname>
                        <given-names>Hailiang</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a20">20</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>M&#x00e9;nager</surname>
                        <given-names>Herv&#x00e9;</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a21">21</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>M&#x00f6;ller</surname>
                        <given-names>Steffen</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a22">22</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Richardson</surname>
                        <given-names>Robin A.</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-9984-2720</uri>
                    <xref ref-type="aff" rid="a23">23</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Robert</surname>
                        <given-names>Vincent</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a9">9</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Soiland-Reyes</surname>
                        <given-names>Stian</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0001-9842-9718</uri>
                    <xref ref-type="aff" rid="a14">14</xref>
                    <xref ref-type="aff" rid="a24">24</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Stevens</surname>
                        <given-names>Robert</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a14">14</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Szaniszlo</surname>
                        <given-names>Szoke</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a9">9</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Verberne</surname>
                        <given-names>Suzan</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a25">25</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Verhoeven</surname>
                        <given-names>Aswin</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <xref ref-type="aff" rid="a2">2</xref>
                </contrib>
                <contrib contrib-type="author" corresp="no">
                    <name>
                        <surname>Wolstencroft</surname>
                        <given-names>Katherine</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/">Investigation</role>
                    <role content-type="http://credit.niso.org/">Resources</role>
                    <role content-type="http://credit.niso.org/">Writing &#x2013; Review &amp; Editing</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-1279-5133</uri>
                    <xref ref-type="aff" rid="a25">25</xref>
                </contrib>
                <aff id="a1">
                    <label>1</label>Utrecht University, 3584 CS Utrecht, The Netherlands</aff>
                <aff id="a2">
                    <label>2</label>Leiden University Medical Center, 2333 ZA, Leiden, The Netherlands</aff>
                <aff id="a3">
                    <label>3</label>French Institute of Bioinformatics, 91057 &#x00c9;vry, France</aff>
                <aff id="a4">
                    <label>4</label>University of Southern Denmark, 5230 Odense M, Denmark</aff>
                <aff id="a5">
                    <label>5</label>University of Virginia, Charlottesville, VA, 22903, USA</aff>
                <aff id="a6">
                    <label>6</label>University of California San Diego, La Jolla, CA, 92093, USA</aff>
                <aff id="a7">
                    <label>7</label>University of New Brunswick, Saint John, E2L 4L5, Canada</aff>
                <aff id="a8">
                    <label>8</label>IPSNP Computing Inc., Saint John, E2L 4S6, Canada</aff>
                <aff id="a9">
                    <label>9</label>Westerdijk Institute, 3584 CT, Utrecht, The Netherlands</aff>
                <aff id="a10">
                    <label>10</label>Barcelona Supercomputing Center, 08034, Barcelona, Spain</aff>
                <aff id="a11">
                    <label>11</label>Gnosis Data Analysis PC, GR-700 13 Heraklion, Greece</aff>
                <aff id="a12">
                    <label>12</label>VU University Amsterdam, 1081 HV Amsterdam, The Netherlands</aff>
                <aff id="a13">
                    <label>13</label>University of Southern California, Marina Del Rey, CA, 90292, USA</aff>
                <aff id="a14">
                    <label>14</label>Department of Computer Science, University of Manchester, Manchester, M13 9PL, UK</aff>
                <aff id="a15">
                    <label>15</label>Department of Biochemistry, Molecular Biology and Biophysics, University of Minnesota, Minneapolis, MN, 55455, USA</aff>
                <aff id="a16">
                    <label>16</label>University of Amsterdam, 1090 GH Amsterdam, The Netherlands</aff>
                <aff id="a17">
                    <label>17</label>Technical University of Denmark, 2800 Kongens Lyngby, Denmark</aff>
                <aff id="a18">
                    <label>18</label>University of Bergen, 5020 Bergen, Norway</aff>
                <aff id="a19">
                    <label>19</label>Bioinformatics Group, University of Freiburg, 79110 Freiburg, Germany</aff>
                <aff id="a20">
                    <label>20</label>Sequencing Analysis Support Core, Leiden University Medical Center, 2333 ZC Leiden, The Netherlands</aff>
                <aff id="a21">
                    <label>21</label>Institut Pasteur, 75015 Paris, France</aff>
                <aff id="a22">
                    <label>22</label>IBIMA, Rostock University Medical Center, 18057 Rostock, Germany</aff>
                <aff id="a23">
                    <label>23</label>Netherlands eScience Center, 1098 XG Amsterdam, The Netherlands</aff>
                <aff id="a24">
                    <label>24</label>Informatics Institute, University of Amsterdam, 1090 GH Amsterdam, The Netherlands</aff>
                <aff id="a25">
                    <label>25</label>Leiden Institute of Advanced Computer Science, Leiden University, 2333 BE Leiden, The Netherlands</aff>
            </contrib-group>
            <author-notes>
                <corresp id="c1">
                    <label>a</label>
                    <email xlink:href="mailto:a.l.lamprecht@uu.nl">a.l.lamprecht@uu.nl</email>
                </corresp>
                <corresp id="c2">
                    <label>b</label>
                    <email xlink:href="mailto:n.m.palmblad@lumc.nl">n.m.palmblad@lumc.nl</email>
                </corresp>
                <corresp id="c3">
                    <label>c</label>
                    <email xlink:href="mailto:jon.c.ison@gmail.com">jon.c.ison@gmail.com</email>
                </corresp>
                <corresp id="c4">
                    <label>d</label>
                    <email xlink:href="mailto:veits@bmb.sdu.dk">veits@bmb.sdu.dk</email>
                </corresp>
                <fn fn-type="conflict">
                    <p>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>7</day>
                <month>9</month>
                <year>2021</year>
            </pub-date>
            <pub-date pub-type="collection">
                <year>2021</year>
            </pub-date>
            <volume>10</volume>
            <elocation-id>897</elocation-id>
            <history>
                <date date-type="accepted">
                    <day>27</day>
                    <month>8</month>
                    <year>2021</year>
                </date>
            </history>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2021 Lamprecht AL et al.</copyright-statement>
                <copyright-year>2021</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access article distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <self-uri content-type="pdf" xlink:href="https://f1000research.com/articles/10-897/pdf"/>
            <abstract>
                <p>Scientific data analyses often combine several computational tools in automated pipelines, or workflows. Thousands of such workflows have been used in the life sciences, though their composition has remained a cumbersome manual process due to a lack of standards for annotation, assembly, and implementation. Recent technological advances have returned the long-standing vision of automated workflow composition into focus.</p>
                <p> This article summarizes a recent Lorentz Center workshop dedicated to automated composition of workflows in the life sciences. We survey previous initiatives to automate the composition process, and discuss the current state of the art and future perspectives. We start by drawing the &#x201c;big picture&#x201d; of the scientific workflow development life cycle, before surveying and discussing current methods, technologies and practices for semantic domain modelling, automation in workflow development, and workflow assessment. Finally, we derive a roadmap of individual and community-based actions to work toward the vision of automated workflow development in the forthcoming years.</p>
                <p> A central outcome of the workshop is a general description of the workflow life cycle in six stages: 1) scientific question or hypothesis, 2) conceptual workflow, 3) abstract workflow, 4) concrete workflow, 5) production workflow, and 6) scientific results. The transitions between stages are facilitated by diverse tools and methods, usually incorporating domain knowledge in some form. Formal semantic domain modelling is hard and often a bottleneck for the application of semantic technologies. However, life science communities have made considerable progress here in recent years and are continuously improving, renewing interest in the application of semantic technologies for workflow exploration, composition and instantiation. Combined with systematic benchmarking with reference data and large-scale deployment of production-stage workflows, such technologies enable a more systematic process of workflow development than we know today. We believe that this can lead to more robust, reusable, and sustainable workflows in the future.</p>
            </abstract>
            <kwd-group kwd-group-type="author">
                <kwd>scientific workflows</kwd>
                <kwd>computational pipelines</kwd>
                <kwd>automated workflow composition</kwd>
                <kwd>semantic domain modelling</kwd>
                <kwd>workflow benchmarking</kwd>
                <kwd>bioinformatics</kwd>
                <kwd>life sciences</kwd>
            </kwd-group>
            <funding-group>
                <award-group id="fund-1" xlink:href="http://dx.doi.org/10.13039/501100000780">
                    <funding-source>European Commission Horizon 2020 Framework Programme</funding-source>
                    <award-id>H2020-INFRAEDI-02-2018823830</award-id>
                    <award-id>H2020-INFRAEOSC-2018-2824087</award-id>
                </award-group>
                <funding-statement>Stian Soiland-Reyes was supported by BioExcel-2 Centre of Excellence, funded by European Commission Horizon 2020 programme under European Commission contract H2020-INFRAEDI-02-2018 823830. &#13;
Carole Goble was supported by EOSC-Life, funded by European Commission Horizon 2020 programme under grant agreement H2020-INFRAEOSC-2018-2 824087.&#13;
We gratefully acknowledge the financial support from the Lorentz Center, ELIXIR, and the Leiden University Medical Center (LUMC) that made the workshop possible. &#13;
</funding-statement>
                <funding-statement>
                    <italic>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</italic>
                </funding-statement>
            </funding-group>
        </article-meta>
    </front>
    <body>
        <sec id="sec1" sec-type="intro">
            <title>Introduction</title>
            <p>
                <italic toggle="yes">Computational pipelines</italic>, commonly referred to as scientific 
                <italic toggle="yes">workflow</italic>s
                <xref ref-type="fn" rid="fn1">
                    <sup>*</sup>
                </xref>, play a key role in modern life science research.
                <sup>
                    <xref ref-type="bibr" rid="ref1">1</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref3">3</xref>
                </sup> Analyses must be tailored to highly complex biological data by successive application of different algorithms and routines to maximize biological insight. Hence, scientists regularly use sophisticated workflows, composed from several software tools and data resources, for tailored data analysis processes. The highly dynamic eScience software ecosystem, which continuously sees new tools emerging, new reference data being provided and computational infrastructure improving, provides the basis for new and innovative workflows. Once developed, workflows are rarely considered stable, but are regularly adapted and reimplemented to meet the latest state of the art.</p>
            <p>For more than two decades, dedicated scientific workflow management systems
                <sup>
                    <xref ref-type="bibr" rid="ref4">4</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref9">9</xref>
                </sup> have been developed to support researchers at the different stages of the workflow development life cycle.
                <sup>
                    <xref ref-type="bibr" rid="ref10">10</xref>
                </sup> There is a flourishing ecosystem around these systems, including software-oriented ontologies,
                <sup>
                    <xref ref-type="bibr" rid="ref11">11</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> tool registries with rich metadata and functional annotations,
                <sup>
                    <xref ref-type="bibr" rid="ref12">12</xref>,
                    <xref ref-type="bibr" rid="ref17">17</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> containerization technologies,
                <sup>
                    <xref ref-type="bibr" rid="ref20">20</xref>,
                    <xref ref-type="bibr" rid="ref21">21</xref>
                </sup> workflow management and execution frameworks,
                <sup>
                    <xref ref-type="bibr" rid="ref7">7</xref>,
                    <xref ref-type="bibr" rid="ref22">22</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref25">25</xref>
                </sup> workflow repositories,
                <sup>
                    <xref ref-type="bibr" rid="ref26">26</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref29">29</xref>
                </sup> workflow exchange formats,
                <sup>
                    <xref ref-type="bibr" rid="ref30">30</xref>
                </sup> and more.
                <sup>
                    <xref ref-type="bibr" rid="ref31">31</xref>
                </sup> Importantly, with the use of workflows in large scale data science and machine learning systems
                <sup>
                    <xref ref-type="bibr" rid="ref32">32</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref36">36</xref>
                </sup> there has been a large increase in the interest in composing and executing workflows at scale.
                <sup>
                    <xref ref-type="bibr" rid="ref37">37</xref>
                </sup> These developments bring the long-standing vision of 
                <italic toggle="yes">automated workflow composition</italic>
                <sup>
                    <xref ref-type="bibr" rid="ref38">38</xref>
                </sup> - the use of algorithms to perform the often tedious, time-consuming, limited and error-prone workflow development process - within reach.</p>
            <p>To biologists there is a latent fear to have chosen the wrong computational paths for the analysis of their data, which could cause problems during the peer review, and in the worst case misdirect the data interpretation and invalidate downstream experiments. While human expert knowledge is an indispensable factor for validating and curating computational workflows, their automated assembly can significantly reduce the effort of getting from novel ideas to production and mainstream application, and at the same time help to increase scientific quality, reliability, and robustness. In fact, benefits of (partially) automated workflow development are manifold and include:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Minimal technicalities in software composition.</italic> Manual workflow construction can be a tedious process. It requires the workflow developer to get familiar with the individual tools, sort out the compatibility of their input/output data formats, and connect them correctly to perform the intended process. An automated composer would not only save valuable research time, but also reduce errors.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Exhaustive exploration of data-analytical possibilities.</italic> Given the abundance of bioinformatics tools available today, it is impossible for a human to consider all possible combinations that could be relevant for their problem. Indeed, scientists often resort to the tools and workflows with which they are familiar, at the risk of missing better suited or more effective pipelines for their problem. Assisted or even automated workflow composition would systematically and comprehensively explore the workflows that are possible with the available tools, and could also rank the possible workflows based on specific user requirements, such as runtime, compute requirements, underlying database usage, etc. This would enable new scientific findings by discovering well or better performing workflows that researchers would not have thought of themselves.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Generating ensembles of workflows.</italic> When using workflows to test biological hypotheses, automated workflow composition enables us to generate ensembles of orthogonal workflows combining different tools and services seizing on different aspects of the data (for example, algorithms that concentrate on different subsets of the raw data). This idea has been proposed by Gil 
                            <italic toggle="yes">et al</italic>.
                            <sup>
                                <xref ref-type="bibr" rid="ref39">39</xref>
                            </sup> and is not epistemologically novel. As Hempel summarized over half a century ago, &#x201c;The confirmation of a hypothesis depends not only on the quantity of the favorable evidence available, but also on its variety: the greater the variety, the stronger the resulting support&#x201d;.
                            <sup>
                                <xref ref-type="bibr" rid="ref40">40</xref>
                            </sup> As a single, linear, workflow is typically unable to collect 
                            <italic toggle="yes">all</italic> available evidence and parallelization is not always an option, workflow ensembles can provide additional confidence in rejecting null hypotheses.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Repairing workflows by tool substitution.</italic> Within a strictly and semantically well-defined context, alternative, semantically equivalent tools or services may be fully automatically substituted when the default is deprecated or unavailable. In a less well-defined setting, the workflow developer might still be semantically guided towards possible alternatives and receive suggestions for sensible replacement tools. Ideally, the resulting workflows would also be tested automatically, to check if they produce the same or similar output as the old workflow on available benchmarking data. Workflows that can be automatically repaired in this way are inherently more robust and viable.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Optimizing workflow output.</italic> Workflow topology, components as well as parameters can be optimized in an integrated workflow composition and benchmarking framework. This can be used, for example, to maximize output, e.g. identified proteins in a proteomics experiment, or minimize some computational resource, e.g. memory or CPU time. Specific properties of a data set might influence such optimization adapting the methods not only to the data type but also the data itself.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>
                            <italic toggle="yes">Ensuring the methodological quality of workflows.</italic> Automated composition can ensure that data is correctly used within components (e.g. training and test data are properly used in machine learning). Likewise, it can prevent errors in parameter setting as well as combinations of components.</p>
                    </list-item>
                </list>
            </p>
            <p>In this article we report on the state of the art of automated workflow development in the life sciences, discuss current and future challenges and develop perspectives for the coming years. The report is based on discussions during a Lorentz Center workshop (held at the Lorentz Center in Leiden, Netherlands, from 9-13 March 2020) dedicated to this topic
                <sup>
                    <xref ref-type="bibr" rid="ref41">41</xref>
                </sup> (workshop program available in 
                <italic toggle="yes">Extended data</italic>
                <sup>
                    <xref ref-type="bibr" rid="ref115">115</xref>
                </sup>), with the authors as participants. In the section 
                <xref ref-type="sec" rid="sec2">Workflow life cycle</xref> we outline a &#x201c;big picture&#x201d; of the scientific workflow development life cycle, before surveying and discussing current methods, technologies and practices for semantic domain modelling (section 
                <xref ref-type="sec" rid="sec3">Semantic domain modelling</xref>), automation in workflow development (section 
                <xref ref-type="sec" rid="sec6">Automation in workflow development</xref>), and workflow assessment (section 
                <xref ref-type="sec" rid="sec9">Workflow assessment</xref>). In the 
                <xref ref-type="sec" rid="sec13">Roadmap</xref> section, we derive a roadmap of individual and community-based actions to work toward the vision of automated workflow development in the forthcoming years. Finally, the 
                <xref ref-type="sec" rid="sec18">Conclusion</xref> section wraps up the discussion.</p>
        </sec>
        <sec id="sec2">
            <title>Workflow life cycle</title>
            <p>The development of scientific workflows is an involved, multistep, and often iterative process. The schematic process in 
                <xref ref-type="fig" rid="f1">Figure 1</xref> captures the &#x201c;big picture&#x201d; that emerged from the discussions at the Lorentz Center workshop. It extends earlier descriptions of the scientific workflow life cycle,
                <sup>
                    <xref ref-type="bibr" rid="ref42">42</xref>&#x2013;
                    <xref ref-type="bibr" rid="ref44">44</xref>
                </sup> and will provide guidance for the discussion of automation approaches in the remainder of this article. The life cycle distinguishes six principal stages:
                <list list-type="order">
                    <list-item>
                        <label>1.</label>
                        <p>The 
                            <italic toggle="yes">scientific question</italic> to answer, or the 
                            <italic toggle="yes">hypothesis</italic> to test. It guides the subsequent exploration of suitable analysis methods, as well as for the choice of data, methods, tools, platforms, and interpretation of results.</p>
                    </list-item>
                    <list-item>
                        <label>2.</label>
                        <p>The 
                            <italic toggle="yes">conceptual workflow</italic>, as a sketch of the methodical steps that the workflow should perform on data from a specific experiment type, from a domain-specific perspective. It is the result of exploring possible analysis methods for the scientific question/hypothesis and the data at hand. It can be formalized, for example as a Concept Map,
                            <sup>
                                <xref ref-type="bibr" rid="ref45">45</xref>,
                                <xref ref-type="bibr" rid="ref46">46</xref>
                            </sup> but often it will only take the form of a paper or mental sketch. Nevertheless, it is an important stage in the workflow development process.</p>
                    </list-item>
                    <list-item>
                        <label>3.</label>
                        <p>The 
                            <italic toggle="yes">abstract workflow</italic>, describing sequences of computational tools that implement the conceptual workflow. It is the result of composing individual tools into workflows, taking into account the compatibility of their input/output types and other kinds of static information. An abstract workflow is not yet (fully) configured, however, and thus not readily executable.</p>
                    </list-item>
                    <list-item>
                        <label>4.</label>
                        <p>The 
                            <italic toggle="yes">concrete workflow</italic>, as the fully implemented, fully configured and readily executable stage. It is the result of instantiating an abstract workflow with the relevant data and parameters.</p>
                    </list-item>
                    <list-item>
                        <label>5.</label>
                        <p>The 
                            <italic toggle="yes">production workflow</italic>, deployed and ready for (re) use by other parties. It is the result of benchmarking different variations of a workflow in order to arrive at a tested and robust version for wider use.</p>
                    </list-item>
                    <list-item>
                        <label>6.</label>
                        <p>Finally, the 
                            <italic toggle="yes">scientific results</italic> that emerge from executing the workflow with the research data. They are interpreted by the domain scientists, and ideally shared with others in a manner that promotes reproducibility and transparency. This often leads to new scientific questions or hypotheses, to be addressed by another workflow.</p>
                    </list-item>
                </list>
            </p>
            <fig fig-type="figure" id="f1" orientation="portrait" position="float">
                <label>Figure 1. </label>
                <caption>
                    <title>Scientific workflow life cycle.</title>
                </caption>
                <graphic id="gr1" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/57615/04941ead-f75c-4f88-9cdf-b45691a72994_figure1.gif"/>
            </fig>
            <p>In practice, these stages are often not so clearly distinguishable. They can be interleaved, skipped, and taken in a different order than the life cycle suggests. A non-exhaustive list of examples includes:
                <list list-type="bullet">
                    <list-item>
                        <label>&#x2022;</label>
                        <p>A workflow developer might not produce an (explicit) conceptual sketch of the workflow before starting to explore and compose tools, but rather do so in one go.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Trying to compose an abstract workflow might reveal that the research question/hypothesis and/or the conceptual workflow need to be refined.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Many popular workflow management systems, such as Galaxy,
                            <sup>
                                <xref ref-type="bibr" rid="ref7">7</xref>,
                                <xref ref-type="bibr" rid="ref57">57</xref>
                            </sup> handle both composition and instantiation simultaneously and combine abstract and concrete workflows in one formalism. Typically, they also allow for workflow execution for both benchmarking and production purposes, thus covering additional stages in the life cycle.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>A benchmarked workflow might be used to generate results, but is never actually deployed for reuse by others.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Existing workflows from repositories like myExperiment,
                            <sup>
                                <xref ref-type="bibr" rid="ref28">28</xref>,
                                <xref ref-type="bibr" rid="ref29">29</xref>
                            </sup> Dockstore
                            <sup>
                                <xref ref-type="bibr" rid="ref47">47</xref>
                            </sup> or the WorkflowHub
                            <sup>
                                <xref ref-type="bibr" rid="ref26">26</xref>
                            </sup> can be reused at different stages, preceding stages in the principal life cycle to be either skipped or shortened.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Popular production workflows, such as those provided by the Bioinformatics Core Facility
                            <sup>
                                <xref ref-type="bibr" rid="ref48">48</xref>
                            </sup> are routinely used by researchers in a close execution -&gt; results -&gt; interpretation -&gt; execution sub-cycle.</p>
                    </list-item>
                    <list-item>
                        <label>&#x2022;</label>
                        <p>Specific data and study properties pre-determine workflow components by prior knowledge about tool performance.</p>
                    </list-item>
                </list>
            </p>
            <p>The figure also indicates the importance of literature, data, domain ontologies and tool registries and workflow repositories. They provide the basis for exploring, composing, implementing, running, evaluating, sharing, and reusing computational pipelines, and are thus central to the whole workflow life cycle. In fact, they are the enablers of many of the &#x201c;shortcuts&#x201d; outlined above.</p>
            <p>Finally, the figure distinguishes two principal roles in the workflow life cycle: 1) the workflow user, here represented by a wet-lab biologist, who has research questions and data for which they use computational tools and workflows to obtain results, and 2) the workflow developers, here represented by a technology-oriented bioinformatician, who has the skills to develop and provide computational workflows for their colleague&#x2019;s data analysis problems. While there are obviously individuals who perform both roles, there is an increasing specialization happening in the field of scientific workflows, with research software engineers skilled in workflow technologies emerging as a professional profile in its own right.
                <sup>
                    <xref ref-type="bibr" rid="ref49">49</xref>
                </sup>
            </p>
        </sec>
        <sec id="sec3">
            <title>Semantic domain modelling</title>
            <p>In the context of scientific workflow development, the semantic domain model is (formalized) knowledge about the technical entities within a domain. It includes domain ontologies as controlled vocabularies for annotating entities with metadata, and registries and repositories of annotated data, tools and workflows. For the purpose of automating (parts of) the workflow construction process, tools and their functional annotations are of particular importance. Possible connections of individual tools are in the first place determined based on the annotated input/output data types and formats.</p>
            <p>The eScience community, and especially the life science circles, were early adopters of semantic technologies. For example, driven by the myGrid project in the UK, the myGrid Ontology
                <sup>
                    <xref ref-type="bibr" rid="ref16">16</xref>
                </sup> was an early initiative of a software-oriented ontology designed to facilitate bioinformatics service discovery, and the BioCatalogue
                <sup>
                    <xref ref-type="bibr" rid="ref18">18</xref>,
                    <xref ref-type="bibr" rid="ref19">19</xref>
                </sup> was one of the first domain-specific web service registries, providing a curated collection of semantically annotated bioinformatics services. Around the same time in the same context, myExperiment
                <sup>
                    <xref ref-type="bibr" rid="ref28">28</xref>,
                    <xref ref-type="bibr" rid="ref29">29</xref>
                </sup> emerged as one of the first repositories for scientific workflows, allowing users to upload, describe, annotate and share their computational pipelines. As a successor to myExperiment, EOSC-Life has now established the FAIR Computational Workflow registry WorkflowHub.
                <sup>
                    <xref ref-type="bibr" rid="ref50">50</xref>
                </sup> Whereas myExperiment treated workflows as data objects, WorkflowHub recognises them as software objects with dependencies and other properties.</p>
            <p>Over the last decade, these early ideas, approaches and platforms have evolved further, and are now increasingly being adopted by the life science and wider eScience communities.</p>
            <sec id="sec4">
                <title>Examples of semantic domain models</title>
                <p>Three important, contemporary and active semantic domain modelling platforms are EDAM/bio.tools, OntoSoft and SADI. They support the production and dissemination of semantic software descriptions that help to make these tools more FAIR (Findable, Accessible, Interoperable and Reusable).
                    <sup>
                        <xref ref-type="bibr" rid="ref51">51</xref>&#x2013;
                        <xref ref-type="bibr" rid="ref53">53</xref>
                    </sup>
                </p>
                <p>
                    <italic toggle="yes">EDAM and bio.tools</italic>
                </p>
                <p>The EDAM ontology of bioinformatics terms
                    <sup>
                        <xref ref-type="bibr" rid="ref14">14</xref>
                    </sup> and the bio.tools registry
                    <sup>
                        <xref ref-type="bibr" rid="ref54">54</xref>,
                        <xref ref-type="bibr" rid="ref17">17</xref>
                    </sup> have become the primary resources for semantic software annotation in the European life sciences community. EDAM provides a controlled vocabulary for the annotation of computational tools with relevant bioinformatics topics, performed operations, as well as type and format of the input and output data. The bio.tools registry uses EDAM for the fine-grained semantic description of tools and their functionality according to a pragmatic model defined in the biotoolsSchema.
                    <sup>
                        <xref ref-type="bibr" rid="ref55">55</xref>
                    </sup> The annotations facilitate the discovery of individual tools, and the assessment of their (inter) operability such as their combination into workflows. The development of both EDAM and bio.tools is driven and supported by the broader community.</p>
                <p>bio.tools is part of the 
                    <ext-link ext-link-type="uri" xlink:href="https://elixir-europe.org/platforms/tools">ELIXIR Tools Platform</ext-link> and becomes increasingly connected with its other services such as BioContainers,
                    <sup>
                        <xref ref-type="bibr" rid="ref56">56</xref>
                    </sup> Galaxy,
                    <sup>
                        <xref ref-type="bibr" rid="ref7">7</xref>,
                        <xref ref-type="bibr" rid="ref57">57</xref>
                    </sup> BioConda,
                    <sup>
                        <xref ref-type="bibr" rid="ref58">58</xref>
                    </sup> WorkflowHub
                    <sup>
                        <xref ref-type="bibr" rid="ref26">26</xref>
                    </sup> and OpenEBench,
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup> as well as external services like Debian Med.
                    <sup>
                        <xref ref-type="bibr" rid="ref60">60</xref>
                    </sup> This will form a centralised, transparent ecosystem of information about tools and services in the life sciences. Here, EDAM serves as a common language to connect and enrich extensive software dossiers.</p>
                <p>
                    <italic toggle="yes">OntoSoft</italic>
                </p>
                <p>The OntoSoft ontology
                    <sup>
                        <xref ref-type="bibr" rid="ref20">20</xref>,
                        <xref ref-type="bibr" rid="ref21">21</xref>
                    </sup> has been designed as an ontology for scientific software metadata. OntoSoft allows for the description of software. This includes understanding how to access and update that software, how to execute it, how to use it, and information on who supports the software. The OntoSoft ontology is the basis for the design of the user interface in the OntoSoft portal, the organization of the underlying knowledge base, and the integration with other software repositories. Although OntoSoft is currently focused on earth sciences applications, providing geoscientists in the NSF EarthCube project
                    <sup>
                        <xref ref-type="bibr" rid="ref61">61</xref>
                    </sup> with an intelligent system to share and reuse code, its principles are equally applicable in other domains.</p>
                <p>OntoSoft-VFF (Ontology for Software Version, Function and Functionality)
                    <sup>
                        <xref ref-type="bibr" rid="ref62">62</xref>
                    </sup> extends OntoSoft. It stores semantic software metadata needed to manage workflow evolution and updates, suitable to help scientists to find and select the right tools to implement given workflow steps, explore alternative tools to use in their workflows, and keep track of tool and workflow changes. Similarly, OntoSoft is the basis for OKG-Soft,
                    <sup>
                        <xref ref-type="bibr" rid="ref63">63</xref>
                    </sup> an open knowledge graph that describes scientific software in a machine-readable manner and supports the FAIR principles for software.</p>
                <p>
                    <italic toggle="yes">SADI registries</italic>
                </p>
                <p>SADI (Semantic Automated Discovery and Integration)
                    <sup>
                        <xref ref-type="bibr" rid="ref64">64</xref>
                    </sup> is a framework for creating Semantic Web Services and a design pattern for the formal description of the service interfaces. Services are described by an ontology that defines I/O class names, predicates and service names with a unique URL. The ontology specifies an explicit relationship (semantic predicate) describing the functionality of a service between the I/O, for example &#x201c;getDrugNamebyDocument&#x201d;.
                    <sup>
                        <xref ref-type="bibr" rid="ref65">65</xref>
                    </sup> The service descriptions are collected in a SADI registry. From there, SADI Services can be readily discovered and composed into workflows, as all services consume and generate RDF (syntactic interoperability) and thus the output of one SADI service can be directly consumed by any other SADI service. Through the provisioning of Semantic Web services on top of relational databases for semantic querying, SADI facilitates both data-as-a-service and algorithms-as-a-service. Recently Valet SADI
                    <sup>
                        <xref ref-type="bibr" rid="ref66">66</xref>
                    </sup> was developed as a service generator for assisting the technically involved authoring of SADI Web Services. Designed as middleware, SADI is not accessed directly, but through specialized query engines (see section 
                    <underline>SHARE &amp; HYDRA</underline>).</p>
            </sec>
            <sec id="sec5">
                <title>Discussion of semantic domain modelling</title>
                <p>Semantic domain modelling is hard.
                    <sup>
                        <xref ref-type="bibr" rid="ref67">67</xref>
                    </sup> Especially in highly collaborative community efforts like EDAM/bio.tools, OntoSoft and SADI, it is important to realize that the controlled vocabulary defined by the domain ontology constitutes a kind of social contract that all tool annotators must understand and respect. Using the same interpretations of the terms defined by the ontology is crucial for the meaningfulness and consistency of the domain model.</p>
                <p>To be useful for practical application, ontologies have to be designed for a clear purpose. In the context of workflow composition, it needs to be defined, for example, if the ontology is supposed to help the (manual) search for and/or the automated composition of computational tools, and if it targets the creation of informatically, bioinformatically and/or biologically valid workflows. Furthermore, the ontology needs to use an adequate level of detail, neither too simple nor too complex, to avoid overgeneralization as well as overfitting. These challenges are both technological and social, with the latter typically being harder to address. This was also reflected by the discussion of semantic domain modelling during the Lorentz workshop, with the use of EDAM and bio.tools as guiding examples.</p>
                <p>
                    <italic toggle="yes">Scope</italic>
                </p>
                <p>In the case of bio.tools, the EDAM ontology and the biotoolsSchema provide a technical basis and general direction for the annotation of bioinformatics tools in the registry. However, they leave room for interpretation, calling for clarification. What kinds of tools are in scope, and what exactly should be included in their annotation?</p>
                <p>
                    <italic toggle="yes">Content</italic>: The bio.tools Curators Guide
                    <sup>
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> defines the scope of relevant tools as &#x201c;application software with well-defined data processing functions (inputs, outputs and operations)&#x201d;. This clearly includes, for example, command-line tools for sequence alignment, or web services for database searches. For other workflow building blocks this is less clear. On the one hand, workflows often require the inclusion of &#x201c;shims&#x201d;,
                    <sup>
                        <xref ref-type="bibr" rid="ref68">68</xref>,
                        <xref ref-type="bibr" rid="ref69">69</xref>
                    </sup> small formatting or conversion tasks between the actual data processing steps. They are often not considered tools in their own right, but are indispensable for interoperability. Automated workflow composers as well as human workflow developers would hence benefit from their availability in the registry, and it would prevent a lot of reinvented wheels. While these are strong arguments for the inclusion of shims in bio.tools, there is also a certain risk of fragmentation and overloading the registry with trivial functionality that needs to be managed. On the other hand, workflows can also be considered tools that can be used in (other) workflows. From the perspective of a tools registry it is desirable to include them as &#x201c;black boxes&#x201d; providing certain functionality as a service. The inner workings can be visible in a workflow repository like the WorkflowHub, similar to the source code of other computational tools being available in a repository like 
                    <ext-link ext-link-type="uri" xlink:href="https://github.com/">GitHub</ext-link>.</p>
                <p>Similarly, clear guidelines are needed for meaningful annotation of tool suites and multifunctional tools. The bio.tools Curators Guide recommends registering tool suites as such (the biotoolsSchema foresees a tool type &#x201c;Suite&#x201d;), but to also add separate entries for the individual tools to capture their functionality clearly. An example is the 
                    <ext-link ext-link-type="uri" xlink:href="http://www.htslib.org/">
                        <underline>SAMtools</underline>
                    </ext-link> suite
                    <sup>
                        <xref ref-type="bibr" rid="ref70">70</xref>
                    </sup> and its members, such as samtools_sort 
                    <ext-link ext-link-type="uri" xlink:href="https://bio.tools/samtools_sort"/>and samtools_slice_bam. Multimodal tools with different functions should be annotated with multiple specific functions (as supported by the biotoolsSchema) rather than trying to cover all modes of operations in one generic annotation.</p>
                <p>
                    <italic toggle="yes">Annotation</italic>: The biotoolsSchema includes various kinds of attributes that can be used for tool annotation. The bio.tools Curators Guide
                    <sup>
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> provides guidance for tool annotators and describes which information must, should, should not, and must not be included. For automated workflow composition, the annotation of tool function (performed operation, data type and format of inputs and outputs, execution command) is essential. In practice annotators often face ambiguous situations, such as:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Tool inputs can be distinguished into payload data and configuration parameters. For example, a set of sequences to be aligned is payload data, and a gap penalty value is a parameter for an alignment tool. This distinction is however not always clear. For example, a substitution matrix used by the algorithm as a parameter but provided as an additional input file could be placed in either of the categories. Currently tool annotation in bio.tools mostly focuses on the annotation of payload input (for pragmatic reasons such as conciseness and limitation of complexity), but for full automated workflow construction information about the parameters to be configured would also be required.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>The biotoolsSchema readily distinguishes between the type (kind of content, domain perspective) and the format (representation, technical perspective) of input and output data. For example, an &#x201c;RNA sequence&#x201d; is a type, and &#x201c;FASTA&#x201d; is one of the possible formats in which it can be represented. This distinction is however not always easy to make. The IUPAC International Chemical Identifier (InChI),
                                <sup>
                                    <xref ref-type="bibr" rid="ref71">71</xref>
                                </sup> for example, is classified as a format in EDAM, but it could also be viewed as a type of data. The domain ontology should have clear criteria for the classification of such formats, and ideally make sure that all formats included are connected to at least one type of data (and vice versa) to enable meaningful tool annotation.</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Composite data formats contain different parts, where a specific tool might only use one or some of them. For example, RetroPath 2.0
                                <sup>
                                    <xref ref-type="bibr" rid="ref72">72</xref>
                                </sup> works with InChI identifiers that are available from one column of a CSV file. Currently it is not clear how these would be modeled in the ontology and annotated in the registry in the best way. Possible solutions might include a combination of reusing approaches such as the structured metadata for CSV and other tabular data
                                <sup>
                                    <xref ref-type="bibr" rid="ref73">73</xref>
                                </sup> and providing corresponding, ideally automatically generated, shim libraries.</p>
                        </list-item>
                    </list>
                </p>
                <p>Clearly, the domain ontology needs to provide a vocabulary that supports the required annotations and the desired level of granularity. As the needs change, also the ontology has to evolve. EDAM is indeed continually evolving based on input from the bioinformatics and, in particular, the bio.tools community. It is for example well developed for the proteomics domain, due to recent work on (automated) workflow composition and benchmarking.</p>
                <p>
                    <italic toggle="yes">Quality</italic>
                </p>
                <p>The quality of automatically composed workflows critically depends on the quality of the domain ontology and tool annotations (&#x201c;garbage in, garbage out&#x201d;).
                    <sup>
                        <xref ref-type="bibr" rid="ref74">74</xref>,
                        <xref ref-type="bibr" rid="ref75">75</xref>
                    </sup> Hence it is important that all tool annotations consistently adhere to the curation guidelines that capture the defined scope and annotation conventions. Consequently, if annotations are too vague or imprecise, automatic composition will likely generate incorrect or nonsensical workflows. If they are overly specific or narrow, possible workflows might be overlooked due to overfitting. This sounds simple, but is difficult in practice. It requires not only a thorough understanding of the curation guidelines and annotation conventions, but also expert-level knowledge in the application domain of the tools, and ideally practical experience of using them. Variations in the stringency of rules for the annotation being followed have direct effects on the interpretability of queries to the system.</p>
                <p>Note that the semantics of the annotation is typically limited to a positive tagging, that is, no negative statements as in &#x201c;not performing indexing&#x201d; can be expressed, and there are no set operations like intersections or exclusions. When also allowing negative statements, or exceptions to a universal quantifier, this easily leads to semantically incorrect workflows merely by omission. Thus, negative statements or set operations shift responsibilities for correctness to the maintainers of higher-level resources like ontologies. This poses a challenge to synchronize the development of catalogs and ontologies as their granularity in biological expressiveness needs to match the decision making of users for the applicability of scientific tools for a given problem. With a larger amount of scrutiny on the ontologies than on individual catalog entries, this may support stringency and help overall quality.</p>
                <p>The quality problem is aggravated when there is a need to annotate large numbers of tools (at the time of writing, for example, bio.tools comprises almost 19,000 entries). Ideas to scale up annotation rates include text-mining of annotation information from the tool&#x2019;s documentation pages, deriving annotations from other repositories (such as the Galaxy Toolshed
                    <sup>
                        <xref ref-type="bibr" rid="ref76">76</xref>
                    </sup>), and the automation of semantic description of tools and services via propagation from tried-and-tested workflows.
                    <sup>
                        <xref ref-type="bibr" rid="ref77">77</xref>
                    </sup> While enabling the inclusion of more tools in a shorter time, such automated approaches do not guarantee a consistent high-quality annotation of tools. Therefore, automatically annotated tools should be checked for a minimal level of curation quality before being made available for automated workflow composition.</p>
                <p>A community registry like bio.tools needs to find a balance between being open to contributions and curating entries for quality control. At the end of the day the (manual) curation work invested needs to reflect the actual usage, i.e. frequently used tools justify greater curation efforts. Curators need adequate training to become able to deliver high-quality annotations. While desirable, it is unrealistic to assume that this is feasible to provide to everybody who might (potentially) contribute. A pragmatic way out might be to mark curated entries to distinguish them from unchecked ones, for instance with a tag or badge denoting that the functional tool annotation was checked and the tool is suitable to be used by automated workflow composers. This could be combined with procedures to develop consensus annotations for widely used tools by a group of experts, which can then serve as landmark or even &#x201c;gold standard&#x201d; examples for tool annotation. Furthermore, technical monitoring of tools, continuously performed by e.g. OpenEBench,
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup> could provide up-to-date information about tool status and availability. In any case these mechanisms should be defined in a governance model, together with the processes for maintaining and updating the entries in the registry.</p>
                <p>Finally, it is not only tools that develop, but also their metadata in the registries. Inconsistencies can easily lead to errors or unsatisfactory performance of automatically composed workflows. Manual verification is likely to fall short, especially since registries take a passive stance towards tools&#x2019; updates. With good reference workflows and benchmarking data available, however, workflows could be tested automatically as a joint quality control for the tools and their semantic descriptions in the registry. One may anticipate that such an automated quality assurance even prepares for workflow optimization.</p>
                <p>
                    <italic toggle="yes">Incentives</italic>
                </p>
                <p>The success of registries like bio.tools depends on community contributions, which is predicated upon the motivation of tool users and developers. There are several incentives for potential contributors. For example, for tool developers, rich annotation of their software will increase its findability and comprehension, and thus its potential to be (re-)used individually or within a workflow. This in turn can improve the impact and eventual citations of the software. Improved tooling (intuitive user interface, annotation help, very-well defined metadata schemas) that integrates well with the tool development infrastructure can help to lower the threshold to tool registration and annotation. Registering tools in a community is also in line with recent practice guides like the &#x201c;Four simple recommendations to encourage best practices in research software&#x201d;
                    <sup>
                        <xref ref-type="bibr" rid="ref78">78</xref>
                    </sup> and the &#x201c;Five Recommendations for FAIR Research Software&#x201d;,
                    <sup>
                        <xref ref-type="bibr" rid="ref53">53</xref>
                    </sup> which are increasingly attracting the attention of organizations and project-funding agencies. Similarly, publishers might require the registration of tools in a community registry as a condition to accept papers that are describing or using the software, providing additional enforcement. A related problem is the incentives for updating registry entries when the respected tool has changed (updates, new versions, new features).</p>
                <p>Ideally, the problem would be solved through &#x201c;knowledge acquisition by stealth&#x201d;: sneaking in metadata curation steps in people&#x2019;s normal routines, and e.g. scrape them from available documentation or with smart tools that integrally capture the semantics from the start, so that people do not feel like having to do something extra. This is an appealing but complex long-term goal, however, which requires consideration of the entire lifecycle of tool and workflow development.</p>
            </sec>
        </sec>
        <sec id="sec6">
            <title>Automation in workflow development</title>
            <p>The possibilities for automation in the multi-stage workflow development process are manifold and range from assistance in specific phases to full automation. The most likely basic form of assistance, the possibility to search for available components with keywords or filter criteria, is a common feature of visual and interactive workflow management systems. More sophisticated is the assistance through context-dependent suggestions, which can take the form of guided workflow construction. An early example here is the ontology-driven assisted web service composition facilitated by BioMoby,
                <sup>
                    <xref ref-type="bibr" rid="ref79">79</xref>
                </sup> which was integrated in the Taverna workflow system to guide workflow construction.
                <sup>
                    <xref ref-type="bibr" rid="ref80">80</xref>
                </sup> Further down in the workflow life cycle, automated service configuration can assist the user to set parameters and get the workflow ready for execution. Automated service substitution aims to replace unavailable or otherwise deprecated tools by semantically equivalent but operating ones, to repair a workflow and make it executable again. Related, automated shim suggestion is intended to introduce mere technical steps (such as reformattings or format conversions) between the actual data analysis tools. One of the most intriguing forms of automation in workflow development is the automated anticipation, or exploration, of entire new workflows. Systems like Magallanes
                <sup>
                    <xref ref-type="bibr" rid="ref81">81</xref>,
                    <xref ref-type="bibr" rid="ref82">82</xref>
                </sup> and PROPHETS,
                <sup>
                    <xref ref-type="bibr" rid="ref83">83</xref>,
                    <xref ref-type="bibr" rid="ref84">84</xref>
                </sup> for example, have already demonstrated about a decade ago that AI planning and program synthesis techniques can be applied in pursuit of this goal.</p>
            <sec id="sec7">
                <title>Examples of approaches to automation in workflow development</title>
                <p>At the Lorentz workshop, the following four current and active approaches to automation in the workflow design process were presented and discussed in greater detail.</p>
                <p>
                    <italic toggle="yes">The tool recommender system in Galaxy</italic>
                </p>
                <p>Galaxy
                    <sup>
                        <xref ref-type="bibr" rid="ref7">7</xref>,
                        <xref ref-type="bibr" rid="ref57">57</xref>
                    </sup> is a popular web-based platform for high-throughput sequencing data and other big data analyses. Researchers can use it to share data and analyze them by running workflows. Workflows can be imported (shared workflows), extracted from history, or built manually. With more than 2,000 tools available in Galaxy, users need guidance during workflow construction. Therefore, a recommendation system has been developed to suggest possible next tools in a workflow under construction.
                    <sup>
                        <xref ref-type="bibr" rid="ref85">85</xref>
                    </sup> The recommender system uses an approach based on deep learning. The idea is to learn possible tool combinations from existing workflows, and use this knowledge to suggest tools for new workflows. The model is trained on tool sequences that are extracted from workflows. A Recurrent Neural Network with gated recurrent units (RNN-GRU)
                    <sup>
                        <xref ref-type="bibr" rid="ref86">86</xref>
                    </sup> is used, with tool usage as weights. The recommender system uses the trained model to predict possible next tools, ranking them by a score that is provided by the model and indicates the prevalence of the respective combination.</p>
                <p>
                    <italic toggle="yes">Workflow INstance Generation and Selection (WINGS)</italic>
                </p>
                <p>WINGS
                    <sup>
                        <xref ref-type="bibr" rid="ref6">6</xref>,
                        <xref ref-type="bibr" rid="ref39">39</xref>,
                        <xref ref-type="bibr" rid="ref87">87</xref>
                    </sup> is a semantic workflow system that assists scientists with the design of computational experiments. A unique feature of WINGS is its high-level semantic workflow representations that are automatically configured and customized into executable workflow instances. Therefore, WINGS employs workflow reasoning algorithms that use constraint-based planning. The constraints can reference both workflow constituents (steps, data, parameters) and metadata of input datasets, and are used to customize a workflow to a given dataset. For example, a constraint could require that the alignment step and the assembly step in a bioinformatics pipeline are done with the same reference genome. Such constraints capture domain expertise about workflows (as purpose-specific combinations of tools) that goes beyond what the metadata of the individual tools can express.</p>
                <p>Another interesting feature of WINGS with regard to automation is that it allows for the use of abstract (in terms of our 
                    <xref ref-type="fig" rid="f1">Figure 1</xref>: conceptual) steps in the workflow, which can be implemented by different tools or sub-workflows. For example, 
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/operation_3646">PeptideSearch</ext-link> is a method that is performed by the tools 
                    <ext-link ext-link-type="uri" xlink:href="https://www.thegpm.org/TANDEM">X! Tandem</ext-link>,
                    <sup>
                        <xref ref-type="bibr" rid="ref112">112</xref>
                    </sup> 
                    <ext-link ext-link-type="uri" xlink:href="http://bioconductor.org/packages/release/bioc/html/MSGFplus.html">MSGF+</ext-link>
                    <sup>
                        <xref ref-type="bibr" rid="ref113">113</xref>
                    </sup> and 
                    <ext-link ext-link-type="uri" xlink:href="https://lab.vanderbilt.edu/msrc-bioinformatics/myrimatch-source/">Myrimatch</ext-link>.
                    <sup>
                        <xref ref-type="bibr" rid="ref114">114</xref>
                    </sup> The implementations can flexibly be chosen and exchanged, making it easy to quickly generate and compare workflow variants, for example to assess the robustness of the method or to take part in benchmarking challenges.
                    <sup>
                        <xref ref-type="bibr" rid="ref88">88</xref>
                    </sup>
                </p>
                <p>
                    <italic toggle="yes">The Automated Pipeline Explorer (APE)</italic>
                </p>
                <p>APE
                    <sup>
                        <xref ref-type="bibr" rid="ref89">89</xref>
                    </sup> is a command-line tool and application programming interface (API) for the exploration of possible workflows in large collections of semantically annotated tools. Input for APE is a high-level workflow specification that captures the user&#x2019;s intents. It includes the available input data (type and format), the desired output data (type and format), and possibly additional constraints (such as tools to use or to avoid). For example, a proteomics workflow might be specified (using EDAM terms) as taking &#x201c;
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/data_0943">
                        <underline>Mass spectrum</underline>
                    </ext-link>&#x201d; in &#x201c;
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/format_3712">
                        <underline>Thermo RAW</underline>
                    </ext-link>&#x201d; format as input, producing an &#x201c;
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/data_1501">
                        <underline>Amino acid index</underline>
                    </ext-link>&#x201d; 
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/data_1501"/>in any format (see 
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/format_1915">
                        <underline>here</underline>
                    </ext-link>), and using a &#x201c;
                    <ext-link ext-link-type="uri" xlink:href="http://edamontology.org/operation_3633">
                        <underline>Retention time prediction</underline>
                    </ext-link>&#x201d; operation in the analysis. When applied to bio.tools, APE finds several workflows that meet this specification.</p>
                <p>The exploration algorithm in APE is a variant of LTL-guided program synthesis, implemented as bounded search through iterative deepening.
                    <sup>
                        <xref ref-type="bibr" rid="ref90">90</xref>,
                        <xref ref-type="bibr" rid="ref91">91</xref>
                    </sup> The domain model (ontology and tool annotations as provided by EDAM and bio.tools) and workflow specification are encoded as a SAT instance (a propositional Boolean formula in conjunctive normal form), and given to a SAT solver to find a satisfying assignment of variables. The SAT solutions are translated back into actual workflows, which can be transformed further into, for example, executable shell scripts, CWL workflows or other representations.</p>
                <p>
                    <italic toggle="yes">SHARE &amp; HYDRA</italic>
                </p>
                <p>SHARE
                    <sup>
                        <xref ref-type="bibr" rid="ref92">92</xref>
                    </sup> and HYDRA
                    <sup>
                        <xref ref-type="bibr" rid="ref93">93</xref>&#x2013;
                        <xref ref-type="bibr" rid="ref95">95</xref>
                    </sup> are specialized query engines to work with SADI registries. They receive user input in the form of SPARQL queries and use the registry as a knowledge base for automated workflow composition, matching the query to thousands of services (Data as a Service (DaaS)/Application as a Service (AaaS)). Concretely, a SADI query engine maps triple patterns from the WHERE clause of a SPARQL query to indexed SADI properties in the registry. In particular it checks the I/O descriptions to ensure compatibility between services. In doing so it discovers SADI Web Services capable (when combined in a workflow) of generating the required triples. Finally the query engine plans and orchestrates a workflow with calls to RESTful web services, integrating service outputs locally in RDF.</p>
                <p>While SHARE supports the construction of the queries through a textual mechanism called SPARQL Assist, HYDRA also provides a keyword-based and graphical interface. Moreover, HYDRA performs reasoning on an input SPARQL query with respect to ontologies, leveraging the service registry to identify service calls may help answering the query. Iteratively, data returned from service calls triggers new registry calls to identify further relevant service calls. Through this incremental workflow extension, answers are produced incrementally with the user in the loop. The process terminates when all service calls have been made and all available answers have been produced.</p>
            </sec>
            <sec id="sec8">
                <title>Discussion of approaches to automation in workflow development</title>
                <p>The four approaches sketched above introduce automation to different phases of the workflow development life cycle, thus complementing each other. In the following we discuss cross-cutting aspects of automation in different phases of the life cycle.</p>
                <p>
                    <italic toggle="yes">Target audience</italic>
                </p>
                <p>The different approaches clearly fit different user profiles and are intended to serve different target audiences. It seems useful to broadly distinguish between biologists as workflow users and bioinformaticians as workflow developers (obviously there are people who qualify for both roles). Many biologists simply want answers to their (biological) questions. They want to be able to find software solutions for their computational problems that they can trust, that have been tested and evaluated, that are reliable and that will run. They are not interested in the technical details of tools, workflows or even their construction processes. Despite the increasing integration of (bio) informatics education in life science curricula, this is not likely to change much. Hence this group of users is a target audience for production workflows as end results of the workflow development process. The development of workflows is in the hands of tech-savvy bioinformaticians. Within this large group, it seems that the &#x201c;workflow engineer&#x201d; emerges as its own professional profile. They compose, curate, evaluate and deploy workflows for specific bioinformatics problems, and make them ready for the actual end users.</p>
                <p>Of the automation approaches sketched above, HYDRA is the only one that directly and explicitly targets a workflow end user. This is made possible by the careful service curation that enables the direct execution of the workflows composed for the queries. The other approaches are better suited to support workflow developers in different phases of the construction process. APE is most suited to supporting workflow exploration and composition in an early phase, acting as a &#x201c;route planner&#x201d; that automatically explores and generates new possibilities of workflows for an abstractly described problem. The obtained &#x201c;recipes&#x201d; can then be developed further into concrete and executable workflows. The Galaxy tool recommender also targets the early, still exploratory phases of workflow construction, but based on a concrete workflow under construction. WINGS takes an abstract/template workflow as the basis, and then takes care of automatically instantiating it to obtain a concrete and executable workflow. Interestingly, to the best of our knowledge, the existing approaches do not cover automated workflow benchmarking yet, which will however be essential for bringing automatically created workflows to the production stage.</p>
                <p>
                    <italic toggle="yes">Applicability and trust</italic>
                </p>
                <p>All approaches have in common that their applicability depends on the quality of the underlying knowledge base or semantic domain model (cf. Section 
                    <xref ref-type="sec" rid="sec3">Semantic domain modelling</xref>). Generally, a somewhat lower quality seems to be tolerable for assisted workflow composition, as the developer can correct or discard suggestions based on their domain knowledge. This is the case, for example, when using a tool recommender system, like that in Galaxy, where the user can at any point decide whether or not to follow the recommendation. Semi-automated approaches like in APE and WINGS require higher-quality semantic annotations, but as the workflow developer still has the possibility to check and revise the workflow before execution, they can tolerate medium-quality annotations to some extent, Complete automation is possible for specific application areas or use cases with well-defined domain knowledge and high-quality annotations. As the required curation efforts are substantial, it is not realistic to achieve this in a generic framework. HYDRA instances, for example, are set up for well-defined application areas and invest in a rigorous semantic annotation of the provided Web Services. As a result, end users can use HYDRA to query for and directly execute workflows. Another good example for complete automation in a well-defined scope is Automated Machine Learning (AutoML),
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>
                    </sup> where machine learning models are generated automatically given a start and end point.</p>
                <p>Conversely, the higher the degree of automation, the more the user needs to decide whether to blindly trust the outcome, or conduct checks to verify its plausibility and correctness. This is again connected to the quality of the knowledge base/semantic domain model, but also has to do with the degree of involved &#x201c;blackbox&#x201d; behaviour. For example, a HYDRA workflow is not easily validated before execution (as it is directly executed during construction), but the recorded provenance data make it possible to inspect and assess the performed computations afterwards. In contrast, the Galaxy tool recommender leaves the control about selecting workflow steps to the user, but requires trust in the quality of its recommendations. As they are machine-learned from tool combinations in existing workflows, there is likely a bias towards frequently used and against less used and new tools. Further metrics and criteria to base recommendations on are of course possible (such as a functional similarity index, compatibility, citation index or novelty), but in any case they should be made transparent to the user and create awareness for possible biases.</p>
                <p>
                    <italic toggle="yes">Abstract Workflows</italic>
                </p>
                <p>Different levels of abstraction are key to differentiating the phases of the workflow development life cycle and assess the potential for automation (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>). Indeed, automation in the development process usually means automated concretization. The process starts with a domain-level problem description as an initial idea, and then phase by phase boils it down to a concrete implementation, until a production workflow brings it back to the domain level and end user.</p>
                <p>The term 
                    <italic toggle="yes">abstract workflow</italic> has been used with different meanings.
                    <sup>
                        <xref ref-type="bibr" rid="ref38">38</xref>
                    </sup> In the executable Common Workflow Language (CWL),
                    <sup>
                        <xref ref-type="bibr" rid="ref30">30</xref>
                    </sup> for example, it is possible to define a workflow with step/tool implementations (e.g. Docker containers) or with abstract placeholder operations; in either case the workflow defines and connects all workflow steps and their parameters. In CWL an abstract workflow can thus be classified as containing one or more placeholder operations; note however that CWL engines may still permit partial workflow execution for the concrete steps.</p>
                <p>In CWL such abstract operations can still refer to a concrete tool that should be used when implementing the workflow for execution (omitting details such as command line), or to a class of possible tools (e.g. by using an EDAM operation term). In WorkflowHub, CWL is used as the canonical workflow description for workflows where possible, alongside the native workflow description: for example a Galaxy workflow for Climate analysis that is expressed in Abstract CWL.
                    <sup>
                        <xref ref-type="bibr" rid="ref96">96</xref>
                    </sup> In WINGS, an abstract workflow is even more generic and flexible, allowing not only for abstract operations, but also for missing steps that are filled in during workflow elaboration.</p>
                <p>Here we separate the notions of 
                    <italic toggle="yes">conceptual</italic> and 
                    <italic toggle="yes">abstract</italic> workflows (cf. Fig. 1): We define a 
                    <bold>conceptual workflow</bold> to be a sketch of a data analysis pipeline, similar to a concept map,
                    <sup>
                        <xref ref-type="bibr" rid="ref45">45</xref>
                    </sup>
                    <sup>,</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref46">46</xref>
                    </sup> that describes the process in domain-level but implementation-independent terms, for instance using operation and data terms from the EDAM ontology. Next to that, we define an 
                    <bold>abstract workflow</bold> to be a template that describes complete sequences of computational tools, but that is not yet fully configured and executable. Such conceptual and abstract workflows make it possible to focus on the workflow steps without complexities such as parameter settings, execution or data sets. They are also convenient for enabling search (both for users and automation) and comparisons with other workflows, thus providing a suitable exchange level and intersection point for different stakeholders and communities. They can be the target (e.g. the result of workflow exploration with APE) as well as the input (e.g. a starting template for workflow elaboration with WINGS) for automated composition approaches.</p>
                <p>Furthermore, abstract workflows can be obtained by automated abstraction from (collections of) concrete workflow instances.
                    <sup>
                        <xref ref-type="bibr" rid="ref97">97</xref>
                    </sup>
                    <sup>-</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref99">99</xref>
                    </sup> They are useful for better describing, understanding and evaluating workflows, and for preserving the essence of computational pipelines in automatically generated method sections also when the executable instances of the workflow decay.
                    <sup>
                        <xref ref-type="bibr" rid="ref98">98</xref>
                    </sup> They can therefore provide a way to document workflows in a FAIR way.
                    <sup>
                        <xref ref-type="bibr" rid="ref50">50</xref>
                    </sup>
                    <sup>-</sup>
                    <sup>
                        <xref ref-type="bibr" rid="ref52">52</xref>
                    </sup>
                </p>
            </sec>
        </sec>
        <sec id="sec9">
            <title>Workflow assessment</title>
            <p>The abundance of computational tools available in today&#x2019;s eScience ecosystem leads to an enormous number of possible workflows. With automated composition approaches these can be more easily accessed and generated. There is a problem of identifying and selecting the &#x201c;best&#x201d; (whatever that means in the concrete case and context) alternatives among various options at different stages of the workflow development process: Which methods should be chosen given the available data and the analysis goals? Which individual tools should be given preference over others? Which combinations of tools to favor over others? Which workflow candidates to select for implementation? Which workflows to bring into productive use?</p>
            <p>To enable automated composers to make informed choices, tools and workflows need to be compared with well-defined meaningful criteria. To structure the discussion of possible criteria and selection strategies, we follow the traditional distinction between static (based on information that is available without execution of the workflow) and dynamic (based on workflow execution) analysis.</p>
            <sec id="sec10">
                <title>Static analysis</title>
                <p>Static analysis is performed on the &#x201c;source code&#x201d; of the workflow (may be in a classical scripting language or a specific workflow formalism) without actually executing it, treating the individual tools as black-box building blocks that have particular (static) properties. As such, static analysis mostly concerns the early stages of the workflow development life cycle, where the workflow developer (human or machine) can use available information about individual tools and tool combinations to explore possible workflows and the tools to implement them. Arguably, static analysis is often less meaningful than the results obtained through actual execution, but provides a powerful way for pre-screening the capability of a workflow. In addition, in many cases it is simply not possible to implement and execute all possible workflows in order to choose between options. Hence it is important to leverage what can be said about tools and workflows without executing them. Which information can be used to compare and (pre-)select individual tools and entire workflows at this stage? What would we like to see in a workflow before we run it? What can (possibly) be provided by tool registries and workflow repositories?</p>
                <p>There are many qualities and merits to consider, but there seem to be three categories of information relevant at this stage, on which we elaborate in the following: technical parameters, domain-specific considerations, and community influence (summarized in 
                    <xref ref-type="table" rid="T1">Table 1</xref>).</p>
                <table-wrap id="T1" orientation="portrait" position="float">
                    <label>Table 1. </label>
                    <caption>
                        <title>Relevant information for static analysis of workflows.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Technical parameters</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Domain-specific considerations</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Community influence</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <p>
                                        <list list-type="bullet">
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Basic tool information (such as license, version, recent updates)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>(Theoretical) compatibility of tools based on their functional annotation</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>(Practical) compatibility of tools based on their use in existing workflows</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Tool statistics like number of runs, number of users, speed, reliability, etc.</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Service monitoring information about availability, uptime, downtime, runtime, etc.</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Number of shims (format converters) needed in the workflow</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Data format flexibility (generic vs. tailored)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Data-flow properties (such as live and dead variables) of the workflow</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Control-flow properties (such as cyclomatic complexity, parallelization potential)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Tool and workflow FAIRness metrics</p>
                                            </list-item>
                                        </list>
                                    </p>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <p>
                                        <list list-type="bullet">
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Subject-specific unique or essential features of tools that the workflow needs and relation to a typical concept map in the domain</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Establishment of tools (known quality metrics, well-understood configuration). Usage in commonly used workflows and known compatibilities by actual usage</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Novelty of tools (new functionalities, potential for novel results, adaptation to new data types)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Similarity to existing concrete or abstract workflows (see above), workflow motifs</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Type and format of produced results. Potential for direct comparison with output from other workflows</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Availability of common quality control, benchmarks and benchmarking data</p>
                                            </list-item>
                                        </list>
                                    </p>
                                </td>
                                <td align="left" colspan="1" rowspan="1" valign="top">
                                    <p>
                                        <list list-type="bullet">
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Reception in the domain literature (citations, altmetrics, praise and criticism)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Reputation, someone or something being &#x201c;famous&#x201d;</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Trends, currently popular technologies</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>User comments and ratings (reflecting, e.g., adequacy, understandability, usability)</p>
                                            </list-item>
                                            <list-item>
                                                <label>&#x2022;</label>
                                                <p>Trust in developers and/or providers</p>
                                            </list-item>
                                        </list>
                                    </p>
                                </td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>
                    <italic toggle="yes">Technical Parameters</italic>
                </p>
                <p>Technical parameters of individual tools and their combinations are relevant to their operation in the context of a composite workflow. These include a whole range of properties, as indicated in 
                    <xref ref-type="table" rid="T1">Table 1</xref>. It is worth noting here that several of these properties (for static analysis at design time of the workflow) are in fact based on prior dynamic analysis of individual tools or other workflows on different levels of abstraction. This underscores the relevance of systematic dynamic analysis of scientific tools and workflows, discussed further below. For use in (automated) workflow composition, we assume the availability of such information in a tool registry like bio.tools, as additional metadata in the tool annotations. Ideally, such metadata would also be collected &#x201c;by stealth&#x201d; through the major community platforms, and provided to tool registries in a standard format. Similarly, an archive of historical workflow traces would be useful, which could provide representative data about prior use of tools and their combinations.</p>
                <p>
                    <italic toggle="yes">Domain-specific considerations</italic>
                </p>
                <p>While technical parameters are important to consider for operational workflows, they need to be complemented with domain-specific considerations to obtain scientifically meaningful results. Examples of domain-specific considerations in (automated) workflow composition are given in 
                    <xref ref-type="table" rid="T1">Table 1</xref>.</p>
                <p>
                    <italic toggle="yes">Community influence</italic>
                </p>
                <p>Finally, community dynamics also influence which tools and workflows are considered preferable. Examples are given in 
                    <xref ref-type="table" rid="T1">Table 1</xref>. While these influences are probably stronger for human workflow developers, also automated composers rely on community-provided information and are thus not agnostic to the corresponding biases.</p>
            </sec>
            <sec id="sec11">
                <title>Dynamic analysis</title>
                <p>Dynamic analysis refers to assessment based on execution, and naturally mostly concerns workflows at the later stages of the life cycle, when they are configured, executable and can be applied to actual data and produce results. We simply distinguish between plain executability, basic validation and systematic benchmarking in the following. Note that the focus of the discussion during the workshop was on benchmarking.</p>
                <p>
                    <italic toggle="yes">Executability</italic>
                </p>
                <p>Executability is probably the most basic and at the same time the most important property of a computational tool or workflow. Only execution will show if the workflow actually works, if the individual tools are compatible in practice (and not only on the annotation level), and if all components have been configured correctly. In practice, it is not uncommon that this executability is applied as the only criterion: If it works and produces results at all, it is considered good enough. If it does not work, one can either try to fix it (requiring understanding why it fails) or discard it. With automated composition and workflow repositories on the rise, which give easier access to alternative workflows for the same problem, the latter is becoming an increasingly viable option.</p>
                <p>
                    <italic toggle="yes">Validation</italic>
                </p>
                <p>Between mere executability and systematic benchmarking there is an area of workflow assessment that we here call validation. It is about critically assessing the behaviour of the workflow. Is the workflow doing what it is supposed to do? Can the results be true? Note that this notion of validation is related to, but not the same as testing, which is a separate issue for scientific software.
                    <sup>
                        <xref ref-type="bibr" rid="ref100">100</xref>&#x2013;
                        <xref ref-type="bibr" rid="ref102">102</xref>
                    </sup> In scientific practice, validation is often done by the workflow developer through execution of (parts of) workflows and inspection of results to see if they look as expected. Ideally, a &#x201c;testing set of mind&#x201d; would be taken and, for example, the workflow be tried with data that is outside of the supported range to see if/how it crashes or gives incorrect results. Such assessment can be cumbersome and challenging as it requires knowledge of both the tool functions and the applied data.</p>
                <p>In more formal terms, we may classify these concerns into validation, verification, sensitivity analysis and uncertainty quantification (UQ) of a workflow (or workflow instance), with the canonical use of these terms being found in.
                    <sup>
                        <xref ref-type="bibr" rid="ref103">103</xref>
                    </sup> These terms, and the distinction between them, are found more often in discussions of computational models used in physics and engineering,
                    <sup>
                        <xref ref-type="bibr" rid="ref104">104</xref>,
                        <xref ref-type="bibr" rid="ref105">105</xref>
                    </sup> but the concepts are relevant to (and may be adapted to) the general scientific workflows covered in this article.</p>
                <p>In simple terms, validation asks if what we want to do is correct; verification asks if our implementation is doing what we think it is doing; and uncertainty quantification asks about how sensitive our results are, for example, to uncertainty in the initial inputs. More concretely, validation refers to the accuracy of the theoretical approach we are trying to implement (the mathematical model or analysis procedure) and is measured by agreement with 'reality' e.g. comparing predictions with experimental results. Verification is a much broader term, encompassing various assessments of the correctness of the implementation itself - is execution of my workflow or code reproducing the theoretical approach I think it is? Finally, uncertainty quantification is an enormous field in its own right, and there are many levels at which it may be introduced to a workflow, from modification of individual steps (most intrusive) through to repeatedly sampling the entire workflow as a 'black box' (least intrusive, e.g. see reference.
                    <sup>
                        <xref ref-type="bibr" rid="ref106">106</xref>
                    </sup>
                </p>
                <p>While only a subset of the above may be relevant to any given workflow, a scientific workflow in general will be subject to all three concerns. Adoption of automated workflow composition tools in the wider scientific community will likely come with demands for rigorous checks of correctness, and a range of definitions thereof.</p>
                <p>
                    <italic toggle="yes">Benchmarking</italic>
                </p>
                <p>When done systematically, the dynamic analysis of computational tools and workflows is also known as benchmarking.
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup> In the scientific workflow development life cycle (
                    <xref ref-type="fig" rid="f1">Figure 1</xref>) benchmarking helps to determine which workflow versions or instances will be put into production for large-scale use by third parties. It assumes that the workflows are executable and validated. Generally, benchmarking can be performed with regard to scientific (e.g. analysis quality), technical (e.g. runtime performance, robustness) and usability (e.g. ease of reuse) aspects. Ideally, benchmarking uses publicly available (gold, silver, or artificial/synthetic) reference datasets. Benchmarking can be performed in a single lab (often done for benchmarking on specific aspects) or by a community of researchers (advantageous to avoid bias, often done for common and shared challenges). Due to their composite nature, workflows are generally more fragile than individual tools, which is also relevant for their benchmarking. For instance, benchmarking can help to identify problematic tools or non-interoperable tool combinations.</p>
                <p>OpenEBench
                    <sup>
                        <xref ref-type="bibr" rid="ref59">59</xref>
                    </sup> provides a platform for community-based benchmarking of bioinformatics resources. Its scientific benchmarking component provides a virtual research environment in which individual researchers or scientific communities can share data, tools, workflows and metrics for their benchmarking challenges. The virtual research environment supports the execution of automated metrics generation workflows, and the comparison of different resources&#x2019; benchmarking results. In addition, OpenEBench has a technical monitoring component, which automatically checks basic technical properties of the registered resources, and updates the OpenEBench entry on a daily basis. Information captured here includes, for example, the availability of documentation, uptime/access time of remote resources, and the number of citations on corresponding publications. Currently metrics for the FAIR software principles
                    <sup>
                        <xref ref-type="bibr" rid="ref52">52</xref>
                    </sup> are being developed, which will also be integrated in OpenEBench&#x2019;s technical monitoring component.</p>
            </sec>
            <sec id="sec12">
                <title>Discussion of workflow assessment</title>
                <p>Although we discussed separately static and dynamic workflow analysis, it is clear that in practice they are used together, often interleaved. Workflow developers would combine a first few tools, then execute to see what happens, and from there adapt and extend the workflow. Automated approaches to workflow development should follow this pattern, and assist the workflow developer in this incremental, checkpoint-rich style of development, rather than aiming for complete start-to-end automation. In the following we discuss three major, cross-cutting aspects of workflow assessment further: the question of what is &#x201c;best&#x201d;, the idea of a fitness function to measure to what extent a workflow meets its purpose, and a &#x201c;great bake-off&#x201d; perspective on automated composition and systematic benchmarking.</p>
                <p>
                    <italic toggle="yes">The &#x201c;best&#x201d;</italic>
                </p>
                <p>Somewhat didactically, we started off the discussion about workflow assessment with the aim of looking for the &#x201c;best&#x201d; possible workflows. However, obviously it is highly context-dependent and often unclear what this means. With the large number of candidates potentially generated by automated composition, it is often more sensible to cull nonsensical workflows rather than trying to find the very best. Doing this with the most coarse-grained criteria will ideally lead to a significantly reduced list of options that is amenable to more fine-grained analysis and evaluation. Nevertheless, the question of what kind of workflows to prefer remains context-specific. It seems advisable to explicate concrete use cases and personas to get a better understanding of relevant situations, perspectives, and requirements. Some spontaneous ideas from the workshop are summarized in 
                    <xref ref-type="fig" rid="f2">Figure 2</xref>.</p>
                <fig fig-type="figure" id="f2" orientation="portrait" position="float">
                    <label>Figure 2. </label>
                    <caption>
                        <title>Workflow personas and use cases.</title>
                        <p>Demands for configurability are ordered from low (top) to total (bottom). All participants need a publication-ready description of the provenance of their findings for perfect reproducibility. Not on the list is Rob the Routinier, who keeps doing his stuff just the way he did it for the last ten years.</p>
                    </caption>
                    <graphic id="gr2" orientation="portrait" position="float" xlink:href="https://f1000research-files.f1000.com/manuscripts/57615/04941ead-f75c-4f88-9cdf-b45691a72994_figure2.gif"/>
                </fig>
                <p>Obviously these use cases and personas need to be worked out much further and only cover a part of the possible scenarios, but they clearly show that the meaning of &#x201c;best&#x201d; workflow spans a wide range of interpretations.</p>
                <p>
                    <italic toggle="yes">Fitness function</italic>
                </p>
                <p>Another way to think about the problem of workflow assessment is designing a fitness function in an evolutionary computing approach or reward function in reinforcement learning.
                    <sup>
                        <xref ref-type="bibr" rid="ref107">107</xref>
                    </sup> Thus, the goal is to try to learn or optimize a particular workflow given the required output. Such an optimization lens could be useful for applying advances in machine learning or evolutionary computing to workflow composition. Thus, the aim would be to devise functions to evaluate how &#x201c;fit for purpose&#x201d; workflows are, given a goal like, for example, a certain benchmark. Such a function should consider functional (biological) as well as performance (computational) metrics, such as:
                    <list list-type="bullet">
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Biological ground truth, what is known biologically</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Robustness to a non-specialist user, and to evolving technical requirements (execution architecture)</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Performance, reliability, reproducibility</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Tool compatibility, tool popularity</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>Ratings from users who have employed workflows that are presented as possibilities (akin to looking at the ratings of a product on 
                                <ext-link ext-link-type="uri" xlink:href="http://Amazon.com">Amazon.com</ext-link> when making a purchase)</p>
                        </list-item>
                        <list-item>
                            <label>&#x2022;</label>
                            <p>How unique is the combination of tools.</p>
                        </list-item>
                    </list>
                </p>
                <p>A critical challenge for adopting this approach is the translation of the rich information both about workflows and surrounding biological knowledge into structures amenable to these optimization frameworks.</p>
                <p>
                    <italic toggle="yes">Great Bake-Off</italic>
                </p>
                <p>We found it an appealing perspective to think about workflow assessment in the context of automated composition as a &#x2018;Great Bake-Off&#x2019;: carry out a pre-selection of automatically generated alternative workflows with low-effort assessment methods, and then let the remaining candidates compete against each other through rigorous benchmarking, in order to finally determine the workflow(s) to use in production. This process should include all areas of benchmarking (i.e., scientific, technical, and usability), though possibly with varying weighting depending on context. A similar strategy has for example successfully been taken by Automated Machine Learning (AutoML)
                    <sup>
                        <xref ref-type="bibr" rid="ref35">35</xref>
                    </sup> for the automated generation of machine learning models.</p>
                <p>Generally, technical benchmarking is comparatively easy and objective, while scientific and usability benchmarking are quite involved, less objective and more subject to bias. They require expert knowledge and ideally many different test data sets and metrics provided by different people. Critical assessments using unpublished data with ground truth may be a reliable means to get quality benchmarking information from the communities. This can also include the creation of synthetic benchmarking data sets in case no suitable real data is available. Synthetic data might furthermore deliberately contain noise or wrong items, to test if workflows are robust or able to detect unsuitable data quality.</p>
                <p>Obviously, for a &#x2018;Great Bake-Off&#x2019; there needs to be a decision or agreement on the metrics used to determine the &#x201c;best&#x201d;. For wide acceptance, they need to be informative and domain-specific, but also community-supported. These community approaches should be objectively executed without including the tool and workflow developers. Unfortunately, such endeavours are rare at the time being.</p>
            </sec>
        </sec>
        <sec id="sec13">
            <title>Roadmap</title>
            <p>The Lorentz workshop aimed at developing a common perspective on future directions for automated composition of workflow in the life sciences. Ideas were collected in the discussion during the whole week, but the last workshop day was devoted to formulating concrete action items for the coming years. Generally speaking, the overall goal of these actions is to bring the different individual pieces discussed in the workshop together in a (more) coherent framework. We deliberately focused on actions for the next five years, acknowledging that we, in our thinking, need to distinguish between the short-term practical possibilities from long-term speculations and the things that might be achieved only over decades. In the following we outline such future directions for foundations, tooling and infrastructure, community and applications of automated workflow composition in the life sciences.</p>
            <sec id="sec14">
                <title>Foundations</title>
                <p>Achieving broad application of automated workflow composition depends on solid foundations, including a common understanding of its scope and purpose as well as community-approved definitions and standards that support these aims (see 
                    <xref ref-type="table" rid="T2">Table 2</xref>).</p>
                <table-wrap id="T2" orientation="portrait" position="float">
                    <label>Table 2. </label>
                    <caption>
                        <title>Future work on foundations.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Action</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Examples</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Clarification of usage scenarios</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Collect and explicate concrete user stories and scenarios, including personas (&#x201c;as a &lt;role&gt; I want to &lt;capability&gt; so I can &lt;do x&gt;&#x201d;). Elicit requirements, prioritize using the MoSCoW method.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Definition of lacking standards</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Universal identifier for workflows, IDs for code and tools. Format to formally represent parameter sets in a general way.
                                    <break/>Standardized hardware constraints of software (e.g. technical parameters, firmware).</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Development of lacking methods</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Systematic collection and analysis of tool usage data (for funding, sustainability, benchmarking). Alignment and similarity measures between workflows, together with methods for comparing abstract and concrete workflows.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Exploration of new ideas</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">&#x201c;Knowledge acquisition by stealth&#x201d; for scaling up tool annotations and provenance trace collection. The use of workflow provenance traces for heuristic improvements of automated composition. Methods or automated workflow benchmarking, possibly reusing approaches from machine learning (AutoML).</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The lack of clearly defined use cases was a recurring theme during the workshop discussions. While all approaches and methods make implicit assumptions about the usage scenarios and user groups they target, this is hardly spelled out explicitly. Their elucidation (using established methods from software requirements engineering) is a priority and will form a solid basis for future joint efforts. For example, a user who has some data and a desired endpoint, wondering how to possibly get there, might benefit from a &#x201c;PipelineSketcher&#x201d; that can automatically propose suitable sequences of (conceptual) operations. Another user might want to take an existing concrete workflow and get suggestions for new tools, which could be given by a &#x201c;RoboAdvisor&#x201d;.</p>
                <p>Another important area of foundational work is the definition and development of lacking standards and methods, such as universal workflow identifiers and methods for meaningful workflow comparison. Furthermore, several promising, but so far little developed ideas wait to be explored further. Examples include the use of workflow provenance traces as a knowledge base for heuristic improvements of automated composition, methods for automating the benchmarking or workflows, and the concept of &#x201c;knowledge acquisition by stealth&#x201d;.</p>
            </sec>
            <sec id="sec15">
                <title>Tooling and infrastructure</title>
                <p>Developing and maintaining effective tooling and infrastructure for automated composition of workflows is hard. The workflow discussions highlighted several challenges of the current software ecosystem, in particular related to missing functionality, insufficient compatibility and interoperability, usability and convenience. Accordingly, the list of actions on tooling and infrastructure grew very easily (see 
                    <xref ref-type="table" rid="T3">Table 3</xref>).</p>
                <table-wrap id="T3" orientation="portrait" position="float">
                    <label>Table 3. </label>
                    <caption>
                        <title>Future work on tooling and infrastructure.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Action</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Examples</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Provide missing functionality</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Enrich bio.tools entries with additional information, e.g. annotation quality, user ratings, automated composition readiness. Enrich bio.tools with additional functionality, e.g. for finding similar tools, collection of user experience information.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Increase compatibility and interoperability</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Support automated workflow composition in/to general workflow specification languages such as CWL. Exchange valid/benchmarkable workflows in common format (e.g. RO-Crate). Capture parameter settings as standardized items. Automatic conversion of data set metadata (data repositories) into correctly applied workflow parameters. Maintain dedicated libraries of helper services (shims). Improve information exchange between systems, e.g. OpenEBench, bio.tools, Conda and WorkflowHub.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Improve usability and convenience</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Improve the ease and regularity of updating ontologies such as EDAM. Integrate tool recommendation, workflow exploration and user feedback features in WfMS (e.g. Galaxy), workflow repositories and registries (e.g. WorkflowHub). Use registry and workflow engine usage data for training recommendation systems. Collect tool usage data (anonymous, public) and workflow usage data (anonymous, public). Create infrastructure for (automated) workflow integration testing (in silico generated data and community-maintained test data). Support open-source community health checks (e.g. 
                                    <ext-link ext-link-type="uri" xlink:href="https://cauldron.io/">Cauldron</ext-link>, 
                                    <ext-link ext-link-type="uri" xlink:href="https://chaoss.community/">CHAOSS</ext-link>, 
                                    <ext-link ext-link-type="uri" xlink:href="https://www.repostatus.org/">repostatus.org</ext-link>).</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>The concept and technology of nanopublications
                    <sup>
                        <xref ref-type="bibr" rid="ref108">108</xref>
                    </sup> could possibly have a role in contributing to solutions to the above points. Nanopublications are small snippets of provenance-aware semantic representations, which can among other things be used to represent workflows, workflow steps, and the data they consume and produce. An ecosystem of tools and services around nanopublications has recently emerged that allows for decentralized and robust interaction with such semantic representations, which could now be harnessed for automated workflow composition.</p>
                <p>Clearly, the field faces a trade-off between a limited workforce and many wishes, underscoring the need for well-defined use cases and prioritized requirements (see above) and community involvement (see below).</p>
            </sec>
            <sec id="sec16">
                <title>Community</title>
                <p>Community is known to be key, and not surprisingly several actions were proposed in relation to community building and involving the community in further development. Several future steps can benefit from existing communities and connect to ongoing initiatives, making them feasible in the medium term. 
                    <xref ref-type="table" rid="T4">Table 4</xref> shows some of the concrete actions proposed.</p>
                <table-wrap id="T4" orientation="portrait" position="float">
                    <label>Table 4. </label>
                    <caption>
                        <title>Future work on community.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Action</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Examples</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Community building</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Use hackathons to bring the community together, e.g. propose topics in established Hackathons (e.g. BioHackathon Japan, European Biohackathon 2020). Establish a regular dedicated hackathon on the theme of automated workflow composition. Identify opportunities to train researchers to use resources and participate in the community efforts. Identify &#x201c;hot topics&#x201d; and forums for community mobilisation, e.g. collecting abstract workflows for an instructive &#x201c;picture book of bioinformatics&#x201d;.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Community development</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Survey stakeholder needs, including industry, publishers (e.g. Gigascience), data repositories, frameworks (e.g. bioconductor, Linux distributions) etc. Leverage ELIXIR to drive the technical &amp; political consolidation. Establish an ELIXIR Focus Group on automated workflow composition.</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec id="sec17">
                <title>Applications</title>
                <p>At the beginning of the workshop several workflow applications from the fields of genomics, proteomics, proteogenomics, metabolomics, metaomics, scientometrics and text mining were presented to set the scene. After all, such applications should drive the developments, and they will mercilessly put methods, tools and infrastructure to the test. Several applications of the available workflow composition frameworks were sketched by the participants, some more domain-oriented and some more tool-oriented, but all with the potential of creating valuable insights for further developments (see 
                    <xref ref-type="table" rid="T5">Table 5</xref>).</p>
                <table-wrap id="T5" orientation="portrait" position="float">
                    <label>Table 5. </label>
                    <caption>
                        <title>Future work on applications.</title>
                    </caption>
                    <table content-type="article-table" frame="hsides">
                        <thead>
                            <tr>
                                <th align="left" colspan="1" rowspan="1" valign="top">Action</th>
                                <th align="left" colspan="1" rowspan="1" valign="top">Examples</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Annotation of tools</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Map command lines to individual tool functions. Organize available and possible shims. Annotate possible format transformations.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Automated composition of workflows</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Compare the benefits of alternative methods for automated composition (exploration, recommendation, elaboration) on concrete examples. (Try to) reproduce a workflow found in a paper using literature mining and automated workflow composition.</td>
                            </tr>
                            <tr>
                                <td align="left" colspan="1" rowspan="1" valign="top">Benchmarking of workflows</td>
                                <td align="left" colspan="1" rowspan="1" valign="top">Explore the value of automated workflow composition in combination with systematic benchmarking (&#x201c;Great Bake-Off&#x201d;). Work towards a fully benchmarked set of &gt;10 automatically composed proteomics workflows as a demonstrator. Collect data sets with ground truths and benchmark metrics in the omics domains.</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>Currently the coverage in ontologies and maturity of tool annotations vary considerably between life science domains, with genomics and more recently proteomics having received more attention. As success stories from adjacent fields are so important in encouraging joint efforts and widening adoption, we see future efforts focused on providing high-quality tool curations with concomitant ontology updates in specific fields, such as metagenomics, metaproteomics, metabolomics, epidemiology or biomedical imaging.</p>
            </sec>
        </sec>
        <sec id="sec18" sec-type="conclusion">
            <title>Conclusion</title>
            <p>In this report we have summarized the salient points from five days of intense scientific discourse ranging from fine-grained technical details to very broad thematic topics. Naturally, not everything that was discussed in the workshop could be included here. Despite similar ideas and efforts having struggled to find widespread application in the past, the attendees left the workshop with renewed confidence and optimism that we are at least considerably closer now, having clearly identified what development of community standards, ontologies and annotations is still needed to achieve broad adoption of automated workflow composition techniques across the life sciences.</p>
            <p>In the time between the workshop and finalizing this report, several things have happened. For example, bio.tools received a number of new features, and continues to grow. The WorkflowHub has been released and is now in productive use. Along with this, the Bioschemas Computational Workflow Profile,
                <sup>
                    <xref ref-type="bibr" rid="ref109">109</xref>
                </sup> a 
                <ext-link ext-link-type="uri" xlink:href="http://schema.org">schema.org</ext-link>-based specification for describing a computational workflow, has been defined. It is used by the WorkflowHub to mark up its entries and as the description of a workflow in the Workflow-RO-Crate,
                <sup>
                    <xref ref-type="bibr" rid="ref110">110</xref>
                </sup> the interchange packaging format which is a specialisation of the RO-Crate packaging format,
                <sup>
                    <xref ref-type="bibr" rid="ref111">111</xref>
                </sup> also based on 
                <ext-link ext-link-type="uri" xlink:href="http://schema.org">schema.org</ext-link>. This enables workflows and associated components to be exchanged between the WorkflowHub, workflow management systems like Galaxy, Snakemake and Nextflow and their repositories, and workflow utilities like OpenEBench and LifeMonitor. Given its 
                <ext-link ext-link-type="uri" xlink:href="http://schema.org">schema.org</ext-link> web markup basis, workflows marked up using the profile are readily accessible to search engines. Furthermore, case studies have been started, project proposals written, and further papers published.</p>
            <p>While some of these developments were in some form anticipated at the workshop, others emerged from ongoing developments and urgent needs. Perhaps this is representative for a field that strives to push and challenge the frontiers of life science infrastructure. After all, the value of automated workflow composition lies in the unexpected.</p>
        </sec>
        <sec id="sec19">
            <title>Data availability</title>
            <sec id="sec20">
                <title>Underlying data</title>
                <p>No underlying data are associated with this article.</p>
            </sec>
            <sec id="sec21">
                <title>Extended data</title>
                <p>Open Science Framework: Lorentz Center Workshop: Automated Workflow Composition in the Life Sciences. 
                    <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/A5EJ7">https://doi.org/10.17605/OSF.IO/A5EJ7</ext-link>.
                    <sup>
                        <xref ref-type="bibr" rid="ref115">115</xref>
                    </sup>
                </p>
                <p>This project contains the following extended data:
                    <list list-type="bullet">
                        <list-item>
                            <label>-</label>
                            <p>Executive Summary.pdf (short post-workshop summary)</p>
                        </list-item>
                        <list-item>
                            <label>-</label>
                            <p>Workflow Poster.jpg (workshop poster)</p>
                        </list-item>
                        <list-item>
                            <label>-</label>
                            <p>Workshop Program.pdf (workshop agenda)</p>
                        </list-item>
                    </list>
                </p>
                <p>Data are available under the terms of the 
                    <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International license</ext-link> (CC-BY 4.0).</p>
            </sec>
        </sec>
    </body>
    <back>
        <ack>
            <title>Acknowledgments</title>
            <p>In early March 2020, just in time before the Covid-19 pandemic disrupted life as we knew it, 36 researchers from 10 different countries and specializations ranging from biomedicine to computer science met at the Lorentz Center in Leiden (Netherlands) for a weeklong workshop on automated composition of workflows in the life sciences. They worked towards a common level of understanding and joint research agenda that is summarized by this paper. We compliment all participants for their valuable contributions during the workshop keynotes, technical talks, breakout discussions and hackathons.</p>
            <p>In particular, we thank Jennifer Harrow, Rohola Hosseini, Rajaram Kaliyaperumal, Mateusz Kuzak, Melchior du Lac, David L&#x00e4;hnemann, Miriam Paya Milans and Marco Roos for their input to the discussions and collaborative notes that laid the foundations for this manuscript.</p>
            <p>Special thanks go to the Lorentz Center staff who went above and beyond their duties to make the workshop a success.</p>
            <p>The figures in the illustrations were created using 
                <ext-link ext-link-type="uri" xlink:href="http://BioRender.com">BioRender.com</ext-link>.</p>
        </ack>
        <ref-list>
            <title>References</title>
            <ref id="ref1">
                <label>1</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Perkel</surname>
                            <given-names>JM</given-names>
                        </name>
</person-group>:
                    <article-title>That&#x2019;s the way we flow. Computational pipelines turn raw data into reproducible scientific knowledge.</article-title>
                    <source>

                        <italic toggle="yes">Nature.</italic>
</source>
                    <year>2019</year>;<volume>573</volume>:<fpage>149</fpage>&#x2013;<lpage>150</lpage>.
                    <pub-id pub-id-type="doi">10.1038/d41586-019-02619-z</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Atkinson</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gesing</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Montagnat</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Scientific workflows: Past, present and future.</article-title>
                    <source>

                        <italic toggle="yes">Future Gener. Comput. Syst.</italic>
</source>
                    <year>2017</year>;<volume>75</volume>:<fpage>216</fpage>&#x2013;<lpage>227</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.future.2017.05.041</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Taylor</surname>
                            <given-names>IJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Deelman</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gannon</surname>
                            <given-names>DB</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">Workflows for e-Science: Scientific Workflows for Grids.</italic>
</source>
                    <publisher-loc>London</publisher-loc>:
                    <publisher-name>Springer-Verlag</publisher-name>;<year>2007</year>.</mixed-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hull</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wolstencroft</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stevens</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Taverna: a tool for building and running workflows of services|Nucleic Acids Research|Oxford Academic.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2006</year>;<volume>34</volume>:<fpage>W729</fpage>&#x2013;<lpage>W732</lpage>.
                    <pub-id pub-id-type="doi">10.1093/nar/gkl320</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Oinn</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Addis</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferris</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Taverna: a tool for the composition and enactment of bioinformatics workflows.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>2004</year>;<volume>20</volume>:<fpage>3045</fpage>&#x2013;<lpage>3054</lpage>.
                    <pub-id pub-id-type="pmid">15201187</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/bth361</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Intelligent Workflow Systems and Provenance-Aware Software.</article-title>
                    <source>

                        <italic toggle="yes">Int. Congr. Environ. Model. Softw.</italic>
</source>
                    <year>2014</year>.</mixed-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Afgan</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Batut</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2018</year>;<volume>46</volume>,<fpage>W537</fpage>&#x2013;<lpage>W544</lpage>.
                    <pub-id pub-id-type="pmid">29790989</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gky379</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6030816</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lud&#x00e4;scher</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Altintas</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Berkley</surname>
                            <given-names>C</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Scientific workflow management and the Kepler system.</article-title>
                    <source>

                        <italic toggle="yes">Concurr. Comput. Pract. Exp.</italic>
</source>
                    <year>2006</year>;<volume>18</volume>:<fpage>1039</fpage>&#x2013;<lpage>1065</lpage>.
                    <pub-id pub-id-type="doi">10.1002/cpe.994</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Deelman</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferreira da Silva</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vahi</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Pegasus workflow management system: Translational computer science in practice.</article-title>
                    <source>

                        <italic toggle="yes">J. Comput. Sci.</italic>
</source>
                    <year>2020</year>;<fpage>101200</fpage>.
                    <pub-id pub-id-type="doi">10.1016/j.jocs.2020.101200</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <mixed-citation publication-type="web">
                    <source>

                        <italic toggle="yes">Existing Workflow systems.</italic>
</source>Last accessed 2020/02/04.
                    <ext-link ext-link-type="uri" xlink:href="https://s.apache.org/existing-workflow-systems">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Essawy</surname>
                            <given-names>BT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Goodall</surname>
                            <given-names>JL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Evaluation of the OntoSoft Ontology for describing metadata for legacy hydrologic modeling software.</article-title>
                    <source>

                        <italic toggle="yes">Environ. Model. Softw.</italic>
</source>
                    <year>2017</year>;<volume>92</volume>:<fpage>317</fpage>&#x2013;<lpage>329</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.envsoft.2017.01.024</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mishra</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>OntoSoft: A distributed semantic registry for scientific software</chapter-title>. In:
                    <source>

                        <italic toggle="yes">2016 IEEE 12th International Conference on e-Science (e-Science).</italic>
</source>
                    <publisher-loc>Baltimore, MD, USA</publisher-loc>:
                    <publisher-name>IEEE</publisher-name>;<year>2016</year>; pp.<fpage>331</fpage>&#x2013;<lpage>336</lpage>.
                    <pub-id pub-id-type="doi">10.1109/eScience.2016.7870916</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ratnakar</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <chapter-title>OntoSoft: Capturing Scientific Software Metadata</chapter-title>. In:
                    <source>

                        <italic toggle="yes">Proceedings of the Knowledge Capture Conference on ZZZ - K-CAP 2015.</italic>
</source>
                    <publisher-loc>Palisades, NY, USA</publisher-loc>:
                    <publisher-name>ACM Press</publisher-name>;<year>2015</year>; pp.<fpage>1</fpage>&#x2013;<lpage>4</lpage>.
                    <pub-id pub-id-type="doi">10.1145/2815833.2816955</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ison</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kalas</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jonassen</surname>
                            <given-names>I</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>EDAM: an ontology of bioinformatics operations, types of data and identifiers, topics and formats.</article-title>
                    <source>

                        <italic toggle="yes">Bioinforma. Oxf. Engl.</italic>
</source>
                    <year>2013</year>;<volume>29</volume>:<fpage>1325</fpage>&#x2013;<lpage>1332</lpage>.
                    <pub-id pub-id-type="pmid">23479348</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/btt113</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3654706</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <mixed-citation publication-type="web">
                    <collab>PROV-O</collab>:
                    <article-title>The PROV Ontology.</article-title>Last accessed 2019/08/26.
                    <ext-link ext-link-type="uri" xlink:href="https://www.w3.org/TR/2013/REC-prov-o-20130430/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wolstencroft</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alper</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hull</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The (my) Grid ontology: bioinformatics service discovery.</article-title>
                    <source>

                        <italic toggle="yes">Int. J. Bioinforma. Res. Appl.</italic>
</source>
                    <year>2007</year>;<volume>3</volume>:<fpage>303</fpage>&#x2013;<lpage>325</lpage>.
                    <pub-id pub-id-type="pmid">18048194</pub-id>
                    <pub-id pub-id-type="doi">10.1504/IJBRA.2007.015005</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ison</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rapacki</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>M&#x00e9;nager</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Tools and data services registry: a community effort to document bioinformatics resources.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2016</year>;<volume>44</volume>,<fpage>D38</fpage>&#x2013;<lpage>D47</lpage>.
                    <pub-id pub-id-type="doi">10.1093/nar/gkv1116</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bhagat</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tanoh</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nzuobontane</surname>
                            <given-names>E</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>BioCatalogue: a universal catalogue of web services for the life sciences.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2010</year>;<volume>38</volume>:<fpage>W689</fpage>&#x2013;<lpage>W694</lpage>.
                    <pub-id pub-id-type="pmid">20484378</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkq394</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2896129</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goble</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Belhajjame</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tanoh</surname>
                            <given-names>F</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>BioCatalogue: A Curated Web Service Registry For The Life Science Community.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Preced.</italic>
</source>
                    <year>2009</year>;<fpage>1</fpage>&#x2013;<lpage>1</lpage>.
                    <pub-id pub-id-type="doi">10.1038/npre.2009.3132.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hung</surname>
                            <given-names>L-H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hu</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Meiss</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Building Containerized Workflows Using the BioDepot-Workflow-Builder.</article-title>
                    <source>

                        <italic toggle="yes">Cell Syst.</italic>
</source>
                    <year>2019</year>.
                    <pub-id pub-id-type="pmid">31521606</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.cels.2019.08.007</pub-id>
                    <pub-id pub-id-type="pmcid">PMC6883158</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Cito</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ferme</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gall</surname>
                            <given-names>HC</given-names>
                        </name>
</person-group>:
                    <chapter-title>Using Docker Containers to Improve Reproducibility in Software and Web Engineering Research</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Bozzon</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cudre-Maroux</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pautasso</surname>
                            <given-names>C</given-names>
                        </name>
</person-group>: (eds.)
                    <source>

                        <italic toggle="yes">Web Engineering.</italic>
</source>
                    <publisher-name>Springer International Publishing</publisher-name>;<year>2016</year>; pp.<fpage>609</fpage>&#x2013;<lpage>612</lpage>.</mixed-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Di Tommaso</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chatzou</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Floden</surname>
                            <given-names>EW</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Nextflow enables reproducible computational workflows.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Biotechnol.</italic>
</source>
                    <year>2017</year>;<volume>35</volume>:<fpage>316</fpage>&#x2013;<lpage>319</lpage>.
                    <pub-id pub-id-type="pmid">28398311</pub-id>
                    <pub-id pub-id-type="doi">10.1038/nbt.3820</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Voss</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gentry</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Auwera</surname>
                            <given-names>GV</given-names>
                            <prefix>der</prefix>
                        </name>
</person-group>:
                    <article-title>Full-stack genomics pipelining with GATK4 + WDL + Cromwell.</article-title>
                    <source>

                        <italic toggle="yes">F1000Res.</italic>
</source>
                    <year>2017</year>;<fpage>6</fpage>.
                    <pub-id pub-id-type="doi">10.7490/f1000research.1114631.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wolstencroft</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Haines</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fellows</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Taverna workflow suite: designing and executing workflows of Web Services on the desktop, web or in the cloud.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2013</year>;<volume>41</volume>,<fpage>W557</fpage>&#x2013;<lpage>W561</lpage>.
                    <pub-id pub-id-type="pmid">23640334</pub-id>
                    <pub-id pub-id-type="doi">10.1093/nar/gkt328</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3692062</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>K&#x00f6;ster</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rahmann</surname>
                            <given-names>S</given-names>
                        </name>
</person-group>:
                    <article-title>Snakemake&#x2014;a scalable bioinformatics workflow engine.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>2012</year>;<volume>28</volume>:<fpage>2520</fpage>&#x2013;<lpage>2522</lpage>.
                    <pub-id pub-id-type="pmid">22908215</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/bts480</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <mixed-citation publication-type="web">
                    <collab>The WorkflowHub</collab>. Last accessed 2020/10/25.
                    <ext-link ext-link-type="uri" xlink:href="https://workflowhub.eu/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref27">
                <label>27</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>da Silva</surname>
                            <given-names>RF</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pottier</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Coleman</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>WorkflowHub: Community Framework for Enabling Scientific Workflow Research and Development -- Technical Report.</article-title>
                    <source>

                        <italic toggle="yes">ArXiv200900250 Cs.</italic>
</source>
                    <year>2020</year>.</mixed-citation>
            </ref>
            <ref id="ref28">
                <label>28</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goble</surname>
                            <given-names>CA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bhagat</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Aleksejevs</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>myExperiment: a repository and social network for the sharing of bioinformatics workflows.</article-title>
                    <source>

                        <italic toggle="yes">Nucleic Acids Res.</italic>
</source>
                    <year>2010</year>;<volume>38</volume>,<fpage>W677</fpage>&#x2013;<lpage>W682</lpage>.
                    <pub-id pub-id-type="doi">10.1093/nar/gkq429</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref29">
                <label>29</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>De Roure</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Goble</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stevens</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>The design and realisation of the myExperiment Virtual Research Environment for social sharing of workflows.</article-title>
                    <source>

                        <italic toggle="yes">Future Gener. Comput. Syst.</italic>
</source>
                    <year>2009</year>;<volume>25</volume>:<fpage>561</fpage>&#x2013;<lpage>567</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.future.2008.06.010</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref30">
                <label>30</label>
                <mixed-citation publication-type="web">
                    <article-title>Common Workflow Language Specifications, v1.2.</article-title>
                    <ext-link ext-link-type="uri" xlink:href="https://www.commonwl.org/v1.2/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref31">
                <label>31</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>M&#x00f6;ller</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Prescott</surname>
                            <given-names>SW</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wirzenius</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Robust Cross-Platform Workflows: How Technical and Scientific Communities Collaborate to Develop, Test and Share Best Practices for Data Analysis.</article-title>
                    <source>

                        <italic toggle="yes">Data Sci. Eng.</italic>
</source>
                    <year>2017</year>;<volume>2</volume>:<fpage>232</fpage>&#x2013;<lpage>244</lpage>.
                    <pub-id pub-id-type="doi">10.1007/s41019-017-0050-4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref32">
                <label>32</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Z&#x00f6;ller</surname>
                            <given-names>M-A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Huber</surname>
                            <given-names>MF</given-names>
                        </name>
</person-group>:
                    <article-title>Benchmark and Survey of Automated Machine Learning Frameworks.</article-title>
                    <source>

                        <italic toggle="yes">J. Artif. Intell. Res.</italic>
</source>
                    <year>2021</year>;<volume>70</volume>:<fpage>409</fpage>&#x2013;<lpage>472</lpage>.
                    <pub-id pub-id-type="doi">10.1613/jair.1.11854</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref33">
                <label>33</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chen</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chow</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Davidson</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>:
                    <article-title>Developments in MLflow: A System to Accelerate the Machine Learning Lifecycle.</article-title>
                    <source>

                        <italic toggle="yes">In: Proceedings of the Fourth International Workshop on Data Management for End-to-End Machine Learning.</italic>
</source>
                    <publisher-loc>New York, NY, USA</publisher-loc>;
                    <publisher-name>Association for Computing Machinery</publisher-name>;<year>2020</year>; pp.<fpage>1</fpage>&#x2013;<lpage>4</lpage>.
                    <pub-id pub-id-type="doi">10.1145/3399579.3399867</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref34">
                <label>34</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Waring</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lindvall</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Umeton</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Automated machine learning: Review of the state-of-the-art and opportunities for healthcare.</article-title>
                    <source>

                        <italic toggle="yes">Artif. Intell. Med.</italic>
</source>
                    <year>2020</year>;<volume>104</volume>:<fpage>101822</fpage>.
                    <pub-id pub-id-type="pmid">32499001</pub-id>
                    <pub-id pub-id-type="doi">10.1016/j.artmed.2020.101822</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref35">
                <label>35</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hutter</surname>
                            <given-names>F</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kotthoff</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vanschoren</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>Automated Machine Learning: Methods, Systems, Challenges.</article-title>
                    <publisher-name>Springer International Publishing</publisher-name>;<year>2019</year>.
                    <pub-id pub-id-type="doi">10.1007/978-3-030-05318-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref36">
                <label>36</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yao</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ratnakar</surname>
                            <given-names>V</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>P4ML: A Phased Performance-Based Pipeline Planner for Automated Machine Learning.</article-title>
                    <source>

                        <italic toggle="yes">/paper/P4ML%3A-A-Phased-Performance-Based-Pipeline-Planner-Gil-Yao/bf1939b6ed77d8e05b4bc6490a7674a316af2637.</italic>
</source>
                </mixed-citation>
            </ref>
            <ref id="ref37">
                <label>37</label>
                <mixed-citation publication-type="other">
                    <source>

                        <italic toggle="yes">Home.</italic>
</source>Last accessed 2021/03/01.
                    <ext-link ext-link-type="uri" xlink:href="https://airflow.apache.org/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref38">
                <label>38</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <chapter-title>Workflow Composition: Semantic Representations for Flexible Automation</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Taylor</surname>
                            <given-names>IJ</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Deelman</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gannon</surname>
                            <given-names>DB</given-names>
                        </name>

                        <etal/>
</person-group>(eds.):
                    <source>

                        <italic toggle="yes">Workflows for e-Science: Scientific Workflows for Grids.</italic>
</source>
                    <publisher-loc>London</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2007</year>; pp.<fpage>244</fpage>&#x2013;<lpage>257</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-1-84628-757-2_16</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref39">
                <label>39</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ratnakar</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Wings: Intelligent Workflow-Based Design of Computational Experiments.</article-title>
                    <source>

                        <italic toggle="yes">IEEE Intell. Syst.</italic>
</source>
                    <year>2011</year>;<volume>26</volume>:<fpage>62</fpage>&#x2013;<lpage>72</lpage>.
                    <pub-id pub-id-type="doi">10.1109/MIS.2010.9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref40">
                <label>40</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hempel</surname>
                            <given-names>CG</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">Philosophy of Natural Science.</italic>
</source>
                    <publisher-loc>N.J., Prentice-Hall</publisher-loc>:
                    <publisher-name>Englewood Cliffs</publisher-name>;<year>1966</year>.</mixed-citation>
            </ref>
            <ref id="ref41">
                <label>41</label>
                <mixed-citation publication-type="web">
                    <article-title>Center for Scientific Workshops in All Disciplines - Automated Workflow Composition in the Life Sciences.</article-title>last accessed 2020/05/05.
                    <ext-link ext-link-type="uri" xlink:href="https://www.lorentzcenter.nl/automated-workflow-composition-in-the-life-sciences.html">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref42">
                <label>42</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lud&#x00e4;scher</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Weske</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McPhillips</surname>
                            <given-names>T</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Scientific Workflows: Business as Usual?</chapter-title>In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Dayal</surname>
                            <given-names>U</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Eder</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Koehler</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>: (eds.)
                    <source>

                        <italic toggle="yes">Business Process Management.</italic>
</source>
                    <publisher-loc>Berlin, Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2009</year>; pp.<fpage>31</fpage>&#x2013;<lpage>47</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-642-03848-8_4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref43">
                <label>43</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Tan</surname>
                            <given-names>W</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Missier</surname>
                            <given-names>P</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Madduri</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Building Scientific Workflow with Taverna and BPEL: A Comparative Study in caGrid</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Feuerlicht</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lamersdorf</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>(eds.):
                    <source>

                        <italic toggle="yes">Service-Oriented Computing &#x2013; ICSOC 2008 Workshops.</italic>
</source>
                    <publisher-loc>Berlin, Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2009</year>; pp.<fpage>118</fpage>&#x2013;<lpage>129</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-642-01247-1_11</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref44">
                <label>44</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Deelman</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>:
                    <article-title>Managing Large-Scale Scientific Workflows in Distributed Environments: Experiences and Challenges.</article-title>
                    <source>

                        <italic toggle="yes">In: 2006 Second IEEE International Conference on e-Science and Grid Computing (e-Science&#x2019;06).</italic>
</source>
                    <year>2006</year>; pp.<fpage>144</fpage>&#x2013;<lpage>144</lpage>.
                    <pub-id pub-id-type="doi">10.1109/E-SCIENCE.2006.261077</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref45">
                <label>45</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Beard</surname>
                            <given-names>N</given-names>
                        </name>
</person-group>:
                    <article-title>Concept Maps in TeSS.</article-title>
                    <publisher-name>ELIXIR All Hands Meeting</publisher-name>;<year>2019</year>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.slideshare.net/NiallBeard/concept-maps-in-tess">Reference Source</ext-link>.</mixed-citation>
            </ref>
            <ref id="ref46">
                <label>46</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Novak</surname>
                            <given-names>J</given-names>
                        </name>
</person-group>:
                    <article-title>The Theory Underlying Concept Maps and How To Construct Them.</article-title>
                    <ext-link ext-link-type="uri" xlink:href="https://web.stanford.edu/dept/SUSE/projects/ireport/articles/concept_maps/The%20Theory%20Underlying%20Concept%20Maps.pdf">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref47">
                <label>47</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>O&#x2019;Connor</surname>
                            <given-names>BD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yuen</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Chung</surname>
                            <given-names>V</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The Dockstore: enabling modular, community-focused sharing of Docker-based genomics tools and workflows.</article-title>
                    <source>

                        <italic toggle="yes">F1000Res.</italic>
</source>
                    <year>2017</year>;<volume>6</volume>,<fpage>52</fpage>.
                    <pub-id pub-id-type="doi">10.12688/f1000research.10137.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref48">
                <label>48</label>
                <mixed-citation publication-type="web">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Mainz</surname>
                            <given-names>IMB</given-names>
                        </name>
</person-group>:
                    <article-title>Bioinformatics Core Facility.</article-title>Last accessed 2021/01/07.
                    <ext-link ext-link-type="uri" xlink:href="https://www.imb.de/core-facilities/bioinformatics">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref49">
                <label>49</label>
                <mixed-citation publication-type="web">
                    <collab>Research Software Engineers</collab>. Last accessed 2021/03/01.
                    <ext-link ext-link-type="uri" xlink:href="https://researchsoftware.org/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref50">
                <label>50</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goble</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cohen-Boulakia</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Soiland-Reyes</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>FAIR Computational Workflows.</article-title>
                    <source>

                        <italic toggle="yes">Data Intell.</italic>
</source>
                    <year>2020</year>;<volume>2</volume>:<fpage>108</fpage>&#x2013;<lpage>121</lpage>.
                    <pub-id pub-id-type="doi">10.1162/dint_a_00033</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref51">
                <label>51</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>MD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dumontier</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Aalbersberg</surname>
                            <given-names>IjJ</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>The FAIR Guiding Principles for scientific data management and stewardship.</article-title>Last accessed 2018/10/10.
                    <ext-link ext-link-type="uri" xlink:href="https://www.nature.com/articles/sdata201618">Reference Source</ext-link>
                    <pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref52">
                <label>52</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A-L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garcia</surname>
                            <given-names>L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kuzak</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Towards FAIR principles for research software.</article-title>
                    <source>

                        <italic toggle="yes">Data Sci.</italic>
</source>
                    <year>2020</year>;<volume>3</volume>(<issue>1</issue>):<fpage>37</fpage>&#x2013;<lpage>59</lpage>.
                    <pub-id pub-id-type="doi">10.3233/DS-190026</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref53">
                <label>53</label>
                <mixed-citation publication-type="web">
                    <collab>FAIR Research Software</collab>. Last accessed 2020/08/28.
                    <ext-link ext-link-type="uri" xlink:href="https://fair-software.nl/home">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref54">
                <label>54</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ison</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>M&#x00e9;nager</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brancotte</surname>
                            <given-names>B</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Community curation of bioinformatics software and data resources.</article-title>
                    <source>

                        <italic toggle="yes">Brief. Bioinform.</italic>
</source>
                    <year>2019</year>.
                    <pub-id pub-id-type="pmid">31624831</pub-id>
                    <pub-id pub-id-type="doi">10.1093/bib/bbz075</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7947956</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref55">
                <label>55</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Ison</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ienasescu</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Rydza</surname>
                            <given-names>E</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>biotoolsSchema: a formalized schema for bioinformatics software description.</article-title>
                    <source>

                        <italic toggle="yes">GigaScience.</italic>
</source>
                    <year>2021</year>;<fpage>10</fpage>.
                    <pub-id pub-id-type="pmid">33506265</pub-id>
                    <pub-id pub-id-type="doi">10.1093/gigascience/giaa157</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7842104</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref56">
                <label>56</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bai</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bandla</surname>
                            <given-names>C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Guo</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>BioContainers Registry: Searching Bioinformatics and Proteomics Tools, Packages, and Containers.</article-title>
                    <source>

                        <italic toggle="yes">J. Proteome Res.</italic>
</source>
                    <year>2021</year>.
                    <pub-id pub-id-type="pmid">33625229</pub-id>
                    <pub-id pub-id-type="doi">10.1021/acs.jproteome.0c00904</pub-id>
                    <pub-id pub-id-type="pmcid">PMC7611561</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref57">
                <label>57</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Goecks</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Nekrutenko</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Taylor</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences.</article-title>
                    <source>

                        <italic toggle="yes">Genome Biol.</italic>
</source>
                    <year>2010</year>;<volume>11</volume>:<fpage>R86</fpage>.
                    <pub-id pub-id-type="pmid">20738864</pub-id>
                    <pub-id pub-id-type="doi">10.1186/gb-2010-11-8-r86</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2945788</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref58">
                <label>58</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gr&#x00fc;ning</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Dale</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sj&#x00f6;din</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Bioconda: sustainable and comprehensive software distribution for the life sciences.</article-title>
                    <source>

                        <italic toggle="yes">Nat. Methods.</italic>
</source>
                    <year>2018</year>;<volume>15</volume>:<fpage>475</fpage>&#x2013;<lpage>476</lpage>.
                    <pub-id pub-id-type="pmid">29967506</pub-id>
                    <pub-id pub-id-type="doi">10.1038/s41592-018-0046-7</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref59">
                <label>59</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Capella-Guti&#x00e9;rrez</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Iglesia</surname>
                            <given-names>D</given-names>
                            <prefix>de la</prefix>
                        </name>

                        <name name-style="western">
                            <surname>Haas</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Lessons Learned: Recommendations for Establishing Critical Periodic Scientific Benchmarking.</article-title>
                    <year>2017</year>.
                    <pub-id pub-id-type="doi">10.1101/181677</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref60">
                <label>60</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>M&#x00f6;ller</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Krabbenh&#x00f6;ft</surname>
                            <given-names>HN</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Tille</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Community-driven computational biology with Debian Linux.</article-title>
                    <source>

                        <italic toggle="yes">BMC Bioinformatics.</italic>
</source>
                    <year>2010</year>;<volume>11</volume>:<fpage>S5</fpage>.
                    <pub-id pub-id-type="pmid">21210984</pub-id>
                    <pub-id pub-id-type="doi">10.1186/1471-2105-11-S12-S5</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3040531</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref61">
                <label>61</label>
                <mixed-citation publication-type="web">
                    <collab>Homepage|EarthCube</collab>. Last accessed 2020/05/08.
                    <ext-link ext-link-type="uri" xlink:href="http://www.earthcube.org/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref62">
                <label>62</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Carvalho</surname>
                            <given-names>LAMC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Medeiros</surname>
                            <given-names>CB</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Semantic Software Metadata for Workflow Exploration and Evolution.</article-title>
                    <source>

                        <italic toggle="yes">In: 2018 IEEE 14th International Conference on e-Science (e-Science).</italic>
</source>
                    <year>2018</year>; pp.<fpage>431</fpage>&#x2013;<lpage>441</lpage>.
                    <pub-id pub-id-type="doi">10.1109/eScience.2018.00132</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref63">
                <label>63</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Osorio</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khider</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>OKG-Soft: An Open Knowledge Graph with Machine Readable Scientific Software Metadata.</article-title>
                    <source>

                        <italic toggle="yes">In: IEEE eScience 2019.</italic>
</source>
                    <year>2019</year>.</mixed-citation>
            </ref>
            <ref id="ref64">
                <label>64</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>MD</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Vandervalk</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McCarthy</surname>
                            <given-names>L</given-names>
                        </name>
</person-group>:
                    <article-title>The Semantic Automated Discovery and Integration (SADI) Web service Design-Pattern, API and Reference Implementation.</article-title>
                    <source>

                        <italic toggle="yes">J. Biomed. Semant.</italic>
</source>
                    <year>2011</year>;<volume>2</volume>:<fpage>8</fpage>.
                    <pub-id pub-id-type="pmid">22024447</pub-id>
                    <pub-id pub-id-type="doi">10.1186/2041-1480-2-8</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3212890</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref65">
                <label>65</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Bukhari</surname>
                            <given-names>AC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Klein</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>CJO</given-names>
                        </name>
</person-group>:
                    <chapter-title>Towards Interoperable BioNLP Semantic Web Services Using the SADI Framework</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>CJO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Butler</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jurisica</surname>
                            <given-names>I</given-names>
                        </name>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">Data Integration in the Life Sciences.</italic>
</source>
                    <publisher-loc>Berlin, Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2013</year>; pp.<fpage>69</fpage>&#x2013;<lpage>80</lpage>.
                    <pub-id pub-id-type="doi">10.1007/978-3-642-39437-9_6</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref66">
                <label>66</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Al Manir</surname>
                            <given-names>MS</given-names>
                        </name>
</person-group>:
                    <article-title>Generating SADI semantic web services from declarative descriptions.</article-title>
                    <year>2019</year>;
                    <ext-link ext-link-type="uri" xlink:href="https://unbscholar.lib.unb.ca/islandora/object/unbscholar%3A9798/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref67">
                <label>67</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Khider</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Artificial Intelligence for Modeling Complex Systems: Taming the Complexity of Expert Models to Improve Decision Making.</article-title>
                    <source>

                        <italic toggle="yes">ACM Trans. Interact. Intell. Syst. to appear,.</italic>
</source>
                    <pub-id pub-id-type="doi">10.1145/3453172</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref68">
                <label>68</label>
                <mixed-citation publication-type="web">
                    <collab>Bioinformatics shims</collab>: Last accessed 2020/09/03.
                    <ext-link ext-link-type="uri" xlink:href="http://www.cs.man.ac.uk/~hulld/shims.html">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref69">
                <label>69</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Hull</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Stevens</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lord</surname>
                            <given-names>P</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Treating shimantic web syndrome with ontologies.</article-title>
                    <source>

                        <italic toggle="yes">Proc. First Adv. Knowl. Technol. Workshop Semantic Web Serv. AKT-SWS04 KMi.</italic>
</source>
                    <year>2004</year>.</mixed-citation>
            </ref>
            <ref id="ref70">
                <label>70</label>
                <mixed-citation publication-type="web">
                    <source>

                        <italic toggle="yes">Samtools</italic>
</source>. Last accessed 2020/09/03.
                    <ext-link ext-link-type="uri" xlink:href="http://www.htslib.org/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref71">
                <label>71</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Heller</surname>
                            <given-names>SR</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McNaught</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pletnev</surname>
                            <given-names>I</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>InChI, the IUPAC International Chemical Identifier.</article-title>
                    <source>

                        <italic toggle="yes">J. Cheminformatics.</italic>
</source>
                    <year>2015</year>;<volume>7</volume>:<fpage>23</fpage>.
                    <pub-id pub-id-type="doi">10.1186/s13321-015-0068-4</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref72">
                <label>72</label>
                <mixed-citation publication-type="other">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Duigou</surname>
                            <given-names>T</given-names>
                        </name>
</person-group>:
                    <article-title>RetroPath2.0 - a retrosynthesis workflow with tutorial and example data</article-title>.
                    <ext-link ext-link-type="uri" xlink:href="https://www.myexperiment.org/workflows/4987.html">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref73">
                <label>73</label>
                <mixed-citation publication-type="web">
                    <article-title>CSV on the Web: A Primer.</article-title>Last accessed 2020/09/03.
                    <ext-link ext-link-type="uri" xlink:href="https://www.w3.org/TR/tabular-data-primer/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref74">
                <label>74</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kasalica</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schwammle</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Palmblad</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>APE in the Wild: Automated Exploration of Proteomics Workflows in the bio.tools Registry.</article-title>
                    <source>

                        <italic toggle="yes">J. Proteome Res. to appear.</italic>
</source>
                    <year>2021</year>.
                    <pub-id pub-id-type="doi">10.1021/acs.jproteome.0c00983</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref75">
                <label>75</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>M&#x00f6;ller</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Schroeder</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Apweiler</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Consistent integration of non-reliable heterogeneous information resources applied to the annotation of transmembrane proteins.</article-title>
                    <source>

                        <italic toggle="yes">Comput. Chem.</italic>
</source>
                    <year>2001</year>;<volume>26</volume>:<fpage>41</fpage>&#x2013;<lpage>49</lpage>.
                    <pub-id pub-id-type="pmid">11765850</pub-id>
                    <pub-id pub-id-type="doi">10.1016/s0097-8485(01)00098-5</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref76">
                <label>76</label>
                <mixed-citation publication-type="web">
                    <collab>Galaxy Tool Shed</collab>. Last accessed 2020/09/03.
                    <ext-link ext-link-type="uri" xlink:href="https://toolshed.g2.bx.psu.edu/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref77">
                <label>77</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Belhajjame</surname>
                            <given-names>K</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Embury</surname>
                            <given-names>SM</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Paton</surname>
                            <given-names>NW</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Automatic Annotation of Web Services Based on Workflow Definitions</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Cruz</surname>
                            <given-names>I</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Decker</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Allemang</surname>
                            <given-names>D</given-names>
                        </name>

                        <etal/>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">The Semantic Web - ISWC 2006.</italic>
</source>
                    <publisher-loc>Berlin, Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2006</year>; pp.<fpage>116</fpage>&#x2013;<lpage>129</lpage>.
                    <pub-id pub-id-type="doi">10.1007/11926078_9</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref78">
                <label>78</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Jim&#x00e9;nez</surname>
                            <given-names>RC</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kuzak</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Alhamdoosh</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Four simple recommendations to encourage best practices in research software.</article-title>
                    <source>

                        <italic toggle="yes">F1000Res.</italic>
</source>
                    <year>2017</year>;<fpage>6</fpage>.
                    <pub-id pub-id-type="doi">10.12688/f1000research.11407.1</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref79">
                <label>79</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>DiBernardo</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Pottinger</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>M</given-names>
                        </name>
</person-group>:
                    <article-title>Semi-automatic web service composition for the life sciences using the BioMoby semantic web framework.</article-title>
                    <source>

                        <italic toggle="yes">J. Biomed. Inform.</italic>
</source>
                    <year>2008</year>;<volume>41</volume>:<fpage>837</fpage>&#x2013;<lpage>847</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.jbi.2008.02.005</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref80">
                <label>80</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Withers</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kawas</surname>
                            <given-names>E</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McCarthy</surname>
                            <given-names>L</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <chapter-title>Semantically-Guided Workflow Construction in Taverna: The SADI and BioMoby Plug-Ins</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Margaria</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Steffen</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">Leveraging Applications of Formal Methods, Verification, and Validation.</italic>
</source>
                    <publisher-loc>Berlin Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2010</year>; pp.<fpage>301</fpage>&#x2013;<lpage>312</lpage>.</mixed-citation>
            </ref>
            <ref id="ref81">
                <label>81</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>R&#x00ed;os</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Karlsson</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Trelles</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <article-title>Magallanes: a web services discovery and automatic workflow composition tool.</article-title>
                    <source>

                        <italic toggle="yes">BMC Bioinformatics.</italic>
</source>
                    <year>2009</year>;<volume>10</volume>:<fpage>334</fpage>.
                    <pub-id pub-id-type="pmid">19832968</pub-id>
                    <pub-id pub-id-type="doi">10.1186/1471-2105-10-334</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2771019</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref82">
                <label>82</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Karlsson</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Trelles</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <chapter-title>jORCA and Magallanes Sailing Together towards Integration of Web Services</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Freitas</surname>
                            <given-names>AT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Navarro</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">Bioinformatics for Personalized Medicine.</italic>
</source>
                    <publisher-loc>Berlin Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2012</year>; pp.<fpage>94</fpage>&#x2013;<lpage>101</lpage>.</mixed-citation>
            </ref>
            <ref id="ref83">
                <label>83</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Naujokat</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A-L</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Steffen</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <chapter-title>Loose Programming with PROPHETS</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>Lara</surname>
                            <given-names>J</given-names>
                            <prefix>de</prefix>
                        </name>

                        <name name-style="western">
                            <surname>Zisman</surname>
                            <given-names>A</given-names>
                        </name>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">Fundamental Approaches to Software Engineering.</italic>
</source>
                    <publisher-loc>Berlin Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2012</year>; pp.<fpage>94</fpage>&#x2013;<lpage>98</lpage>.</mixed-citation>
            </ref>
            <ref id="ref84">
                <label>84</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A-L</given-names>
                        </name>
</person-group>:
                    <source>

                        <italic toggle="yes">User-Level Workflow Design: A Bioinformatics Perspective.</italic>
</source>
                    <publisher-loc>Berlin, Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2013</year>.</mixed-citation>
            </ref>
            <ref id="ref85">
                <label>85</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kumar</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gr&#x00fc;ning</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Backofen</surname>
                            <given-names>R</given-names>
                        </name>
</person-group>:
                    <article-title>Tool recommender system in Galaxy using deep learning.</article-title>
                    <source>

                        <italic toggle="yes">bioRxiv.</italic>
</source>
                    <year>2019</year>;<fpage>838599</fpage>.
                    <pub-id pub-id-type="doi">10.1101/838599</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref86">
                <label>86</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Chung</surname>
                            <given-names>J</given-names>
                        </name>

                        <name name-style="western">
                            <surname>G&#x00fc;l&#x00e7;ehre</surname>
                            <given-names>&#x00c7;</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Cho</surname>
                            <given-names>K</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling.</article-title>
                    <source>

                        <italic toggle="yes">ArXiv.</italic>
</source>
                    <year>2014</year>.</mixed-citation>
            </ref>
            <ref id="ref87">
                <label>87</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gonz&#x00e1;lez-Calero</surname>
                            <given-names>PA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A semantic framework for automatic generation of computational workflows using distributed data and component catalogues.</article-title>
                    <source>

                        <italic toggle="yes">J. Exp. Theor. Artif. Intell.</italic>
</source>
                    <year>2011</year>;<volume>23</volume>:<fpage>389</fpage>&#x2013;<lpage>467</lpage>.
                    <pub-id pub-id-type="doi">10.1080/0952813X.2010.490962</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref88">
                <label>88</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Srivastava</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Adusumilli</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Boyce</surname>
                            <given-names>H</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Semantic workflows for benchmark challenges: Enhancing comparability, reusability and reproducibility.</article-title>
                    <source>

                        <italic toggle="yes">PSB.</italic>
</source>
                    <year>2019</year>;
                    <pub-id pub-id-type="doi">10.1142/9789813279827_0019</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref89">
                <label>89</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kasalica</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A-L</given-names>
                        </name>
</person-group>:
                    <article-title>APE: A Command-Line Tool and API for Automated Workflow Composition.</article-title>
                    <source>

                        <italic toggle="yes">In: Proceedings of the International Conference on Computational Science (ICCS 2020).</italic>
</source>
                    <year>2020</year>.
                    <pub-id pub-id-type="doi">10.1007/978-3-030-50436-6_34</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref90">
                <label>90</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kasalica</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A-L</given-names>
                        </name>
</person-group>:
                    <article-title>Workflow Discovery with Semantic Constraints: The SAT-Based Implementation of APE.</article-title>
                    <source>

                        <italic toggle="yes">Electron. Commun. EASST.</italic>
</source>
                    <year>2020</year>;<volume>78</volume>.
                    <pub-id pub-id-type="doi">10.14279/tuj.eceasst.78.1092</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref91">
                <label>91</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Steffen</surname>
                            <given-names>B</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Margaria</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Freitag</surname>
                            <given-names>B</given-names>
                        </name>
</person-group>:
                    <article-title>Module Configuration by Minimal Model Construction.</article-title>
                    <source>

                        <italic toggle="yes">Universit&#x00e4;t Passau.</italic>
</source>
                    <year>1993</year>.</mixed-citation>
            </ref>
            <ref id="ref92">
                <label>92</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Vandervalk</surname>
                            <given-names>BP</given-names>
                        </name>

                        <name name-style="western">
                            <surname>McCarthy</surname>
                            <given-names>EL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Wilkinson</surname>
                            <given-names>MD</given-names>
                        </name>
</person-group>:
                    <chapter-title>SHARE: A Semantic Web Query Engine for Bioinformatics</chapter-title>. In:
                    <person-group person-group-type="editor">

                        <name name-style="western">
                            <surname>G&#x00f3;mez-P&#x00e9;rez</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Yu</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ding</surname>
                            <given-names>Y</given-names>
                        </name>
</person-group>(eds.)
                    <source>

                        <italic toggle="yes">The Semantic Web.</italic>
</source>
                    <publisher-loc>Berlin Heidelberg</publisher-loc>:
                    <publisher-name>Springer</publisher-name>;<year>2009</year>; pp.<fpage>367</fpage>&#x2013;<lpage>369</lpage>.</mixed-citation>
            </ref>
            <ref id="ref93">
                <label>93</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Baker</surname>
                            <given-names>CJO</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Manir</surname>
                            <given-names>MSA</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Brenas</surname>
                            <given-names>JH</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Applied Ontologies for Global Health Surveillance and Pandemic Intelligence.</article-title>
                    <source>

                        <italic toggle="yes">medRxiv.</italic>
</source>
                    <year>2020</year>;<fpage>2020.10.17.20214460</fpage>.
                    <pub-id pub-id-type="doi">10.1101/2020.10.17.20214460</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref94">
                <label>94</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Riazanov</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Klein</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Shaban-Nejad</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Semantic querying of relational data for clinical intelligence: a semantic web services-based approach.</article-title>
                    <source>

                        <italic toggle="yes">J. Biomed. Semant.</italic>
</source>
                    <year>2013</year>;<volume>4</volume>:<fpage>9</fpage>.
                    <pub-id pub-id-type="pmid">23497556</pub-id>
                    <pub-id pub-id-type="doi">10.1186/2041-1480-4-9</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3698140</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref95">
                <label>95</label>
                <mixed-citation publication-type="web">
                    <collab>HYDRA|IPSNP Computing Inc</collab>. Last accessed 2021/06/07.
                    <ext-link ext-link-type="uri" xlink:href="http://ipsnp.com/hydra/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref96">
                <label>96</label>
                <mixed-citation publication-type="web">
                    <article-title>Copernicus Essential Climate Variable - select and plot.</article-title>Last accessed 2021/02/24.
                    <ext-link ext-link-type="uri" xlink:href="https://workflowhub.eu/workflows/46">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref97">
                <label>97</label>
                <mixed-citation publication-type="book">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Knoblock</surname>
                            <given-names>M</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">Improving Publication and Reproducibility of Computational Experiments through Workflow Abstractions.</italic>
</source>
                    <publisher-name>K-CAP Workshops</publisher-name>;<year>2017</year>.</mixed-citation>
            </ref>
            <ref id="ref98">
                <label>98</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>
</person-group>:
                    <article-title>Towards Automating Data Narratives.</article-title>
                    <source>

                        <italic toggle="yes">In: Proceedings of the 22nd International Conference on Intelligent User Interfaces.</italic>
</source>
                    <publisher-loc>New York, NY, USA</publisher-loc>:
                    <publisher-name>Association for Computing Machinery</publisher-name>;<year>2017</year>; pp.<fpage>565</fpage>&#x2013;<lpage>576</lpage>.
                    <pub-id pub-id-type="doi">10.1145/3025171.3025193</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref99">
                <label>99</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Garijo</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Gil</surname>
                            <given-names>Y</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Corcho</surname>
                            <given-names>O</given-names>
                        </name>
</person-group>:
                    <article-title>Abstract, link, publish, exploit: An end to end framework for workflow sharing.</article-title>
                    <source>

                        <italic toggle="yes">Future Gener. Comput. Syst.</italic>
</source>
                    <year>2017</year>;<volume>75</volume>:<fpage>271</fpage>&#x2013;<lpage>283</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.future.2017.01.008</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref100">
                <label>100</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kanewala</surname>
                            <given-names>U</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Bieman</surname>
                            <given-names>JM</given-names>
                        </name>
</person-group>:
                    <article-title>Testing scientific software: A systematic literature review.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Softw. Technol.</italic>
</source>
                    <year>2014</year>;<volume>56</volume>:<fpage>1219</fpage>&#x2013;<lpage>1232</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.infsof.2014.05.006</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref101">
                <label>101</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Heaton</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Carver</surname>
                            <given-names>JC</given-names>
                        </name>
</person-group>:
                    <article-title>Claims about the use of software engineering practices in science: A systematic literature review.</article-title>
                    <source>

                        <italic toggle="yes">Inf. Softw. Technol.</italic>
</source>
                    <year>2015</year>;<volume>67</volume>:<fpage>207</fpage>&#x2013;<lpage>219</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.infsof.2015.07.011</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref102">
                <label>102</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Johanson</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hasselbring</surname>
                            <given-names>W</given-names>
                        </name>
</person-group>:
                    <article-title>Software Engineering for Computational Science: Past, Present.</article-title>
                    <source>

                        <italic toggle="yes">Future. Comput. Sci. Eng.</italic>
</source>
                    <year>2018</year>;<volume>20</volume>:<fpage>90</fpage>&#x2013;<lpage>109</lpage>.
                    <pub-id pub-id-type="doi">10.1109/MCSE.2018.021651343</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref103">
                <label>103</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Oberkampf</surname>
                            <given-names>WL</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Roy</surname>
                            <given-names>CJ</given-names>
                        </name>
</person-group>:
                    <article-title>Verification and Validation in Scientific Computing.</article-title>
                    <publisher-loc>Cambridge</publisher-loc>;
                    <publisher-name>Cambridge University Press</publisher-name>;<year>2010</year>.
                    <pub-id pub-id-type="doi">https://doi.org/10.1017/CBO9780511760396</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref104">
                <label>104</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Groen</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Arabnejad</surname>
                            <given-names>H</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jancauskas</surname>
                            <given-names>V</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>VECMAtk: a scalable verification, validation and uncertainty quantification toolkit for scientific simulations.</article-title>
                    <source>

                        <italic toggle="yes">Philos. Trans. R. Soc. Math. Phys. Eng. Sci.</italic>
</source>
                    <year>2021</year>;<volume>379</volume>,<fpage>20200221</fpage>.
                    <pub-id pub-id-type="doi">10.1098/rsta.2020.0221</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref105">
                <label>105</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Coveney</surname>
                            <given-names>PV</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Groen</surname>
                            <given-names>D</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Hoekstra</surname>
                            <given-names>AG</given-names>
                        </name>
</person-group>:
                    <article-title>Reliability and reproducibility in computational science: implementing validation, verification and uncertainty quantification in silico.</article-title>
                    <source>

                        <italic toggle="yes">Philos. Trans. R. Soc. Math. Phys. Eng. Sci.</italic>
</source>
                    <year>2021</year>;<volume>379</volume>:<fpage>20200409</fpage>.
                    <pub-id pub-id-type="doi">10.1098/rsta.2020.0409</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref106">
                <label>106</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Zimo&#x0144;</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Elisseev</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Sawko</surname>
                            <given-names>R</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Uncertainty quantification-as-a-service.</article-title>
                    <source>

                        <italic toggle="yes">In: Proceedings of the 28th Annual International Conference on Computer Science and Software Engineering.</italic>
</source>
                    <publisher-loc>USA</publisher-loc>:
                    <publisher-name>IBM Corp.</publisher-name>;<year>2018</year>; pp.<fpage>331</fpage>&#x2013;<lpage>337</lpage>.</mixed-citation>
            </ref>
            <ref id="ref107">
                <label>107</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Drugan</surname>
                            <given-names>MM</given-names>
                        </name>
</person-group>:
                    <article-title>Reinforcement learning versus evolutionary computation: A survey on hybrid algorithms.</article-title>
                    <source>

                        <italic toggle="yes">Swarm Evol. Comput.</italic>
</source>
                    <year>2019</year>;<volume>44</volume>:<fpage>228</fpage>&#x2013;<lpage>246</lpage>.
                    <pub-id pub-id-type="doi">10.1016/j.swevo.2018.03.011</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref108">
                <label>108</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Kuhn</surname>
                            <given-names>T</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Mero&#x00f1;o-Pe&#x00f1;uela</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Malic</surname>
                            <given-names>A</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>Nanopublications: A Growing Resource of Provenance-Centric Scientific Linked Data.</article-title>
                    <source>

                        <italic toggle="yes">In: 2018 IEEE 14th International Conference on e-Science (e-Science).</italic>
</source>
                    <year>2018</year>; pp.<fpage>83</fpage>&#x2013;<lpage>92</lpage>.
                    <pub-id pub-id-type="doi">10.1109/eScience.2018.00024</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref109">
                <label>109</label>
                <mixed-citation publication-type="web">
                    <article-title>Bioschemas - 1.0 Release (09 March 2021) .</article-title>Last accessed 2021/09/01.
                    <ext-link ext-link-type="uri" xlink:href="https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref110">
                <label>110</label>
                <mixed-citation publication-type="web">
                    <collab>Workflow RO-Crate (DRAFT)</collab>. Last accessed 2021/03/08.
                    <ext-link ext-link-type="uri" xlink:href="https://about.workflowhub.eu/Workflow-RO-Crate/">Reference Source</ext-link>
                </mixed-citation>
            </ref>
            <ref id="ref111">
                <label>111</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Eoghan</surname>
                            <given-names>&#x00d3;C</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Carole</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Peter</surname>
                            <given-names>S</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <article-title>A lightweight approach to research object data packaging.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics Open Source Conference (BOSC).</italic>
</source>
                    <publisher-loc>Basel, Switzerland</publisher-loc>:
                    <publisher-name>ISMB/ECCB 2019</publisher-name>;<year>2019</year>.
                    <pub-id pub-id-type="doi">10.5281/zenodo.3250687</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref112">
                <label>112</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Craig</surname>
                            <given-names>R</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Beavis</surname>
                            <given-names>RC</given-names>
                        </name>
</person-group>:
                    <article-title>TANDEM: matching proteins with tandem mass spectra.</article-title>
                    <source>

                        <italic toggle="yes">Bioinformatics.</italic>
</source>
                    <year>12 June 2004</year>; Volume<volume>20</volume>, Issue<issue>9</issue>, Pages<fpage>1466</fpage>&#x2013;<lpage>1467</lpage>.
                    <pub-id pub-id-type="doi">10.1093/bioinformatics/bth092</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref113">
                <label>113</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Granholm</surname>
                            <given-names>V</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>S</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Jos&#x00e9;</surname>
                            <given-names>CFN</given-names>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">J Proteome Res.</italic>
</source>
                    <year>2014</year>;<volume>13</volume>(<issue>2</issue>),<fpage>890</fpage>&#x2013;<lpage>897</lpage>.
                    <pub-id pub-id-type="doi">10.1021/pr400937n</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref114">
                <label>114</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>David</surname>
                            <given-names>LT</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Christopher</surname>
                            <given-names>G</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Fernando</surname>
                        </name>

                        <etal/>
</person-group>:
                    <source>

                        <italic toggle="yes">J Proteome Res.</italic>
</source>
                    <year>2007</year>;<volume>6</volume>(<issue>2</issue>):<fpage>654</fpage>&#x2013;<lpage>661</lpage>.
                    <pub-id pub-id-type="doi">10.1021/pr0604054</pub-id>
                </mixed-citation>
            </ref>
            <ref id="ref115">
                <label>115</label>
                <mixed-citation publication-type="journal">
                    <person-group person-group-type="author">

                        <name name-style="western">
                            <surname>Lamprecht</surname>
                            <given-names>A</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Palmblad</surname>
                            <given-names>M</given-names>
                        </name>

                        <name name-style="western">
                            <surname>Ison</surname>
                            <given-names>J</given-names>
                        </name>

                        <etal/>
</person-group>: Lorentz Center Workshop:
                    <publisher-name>Automated Workflow Composition in the Life Sciences</publisher-name>;<year>2021, August 16</year>.
                    <pub-id pub-id-type="doi">https://doi.org/10.17605/OSF.IO/A5EJ7</pub-id>
                </mixed-citation>
            </ref>
        </ref-list>
        <fn-group content-type="footnotes">
            <fn id="fn1">
                <label>*</label>
                <p>
We use the terms &#x201c;pipeline&#x201d; and &#x201c;workflow&#x201d; interchangeably here. Another common, more differentiating view is that pipelines are purely computational and as such a subset of the more general notion of workflows, which can also involve a human element.</p>
            </fn>
        </fn-group>
    </back>
    <sub-article article-type="reviewer-report" id="report96797">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.57615.r96797</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Ferreira da Silva</surname>
                        <given-names>Rafael</given-names>
                    </name>
                    <xref ref-type="aff" rid="r96797a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-1720-0928</uri>
                </contrib>
                <aff id="r96797a1">
                    <label>1</label>Information Sciences Institute, University of Southern California, Marina del Rey, CA, USA</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>5</day>
                <month>11</month>
                <year>2021</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2021 Ferreira da Silva R</copyright-statement>
                <copyright-year>2021</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport96797" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.54159.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>This paper presents a summary of the discussions and findings from a Lorentz Center workshop organized in March 2020, with the focus on automated workflow development in the life sciences.</p>
            <p> </p>
            <p> Overall, the paper is well written and presents a community view on the current state-of-the-art, challenges, and perspectives for research efforts. Another key contribution of the paper is the development of a schematic process of the scientific workflow life cycle, which is comprised of six principal stages, each with their associated artifacts. Although a survey of state-of-the-art technologies is not the goal of the paper, it still provides a reasonable amount of background information.</p>
            <p> </p>
            <p> On the downside, the paper could better articulate which properties are inherent to each of the aforementioned stages, and organize the discussions around them.</p>
            <p> </p>
            <p> Specific comments: 
                <list list-type="bullet">
                    <list-item>
                        <p>I would recommend authors to identify common properties for the set of tools described in the paper. The current text alludes to individual tool functionalities but does not clearly discuss their common ground / specificities.</p>
                    </list-item>
                    <list-item>
                        <p>It is not clear to me how "workflow benchmarks" are defined. At some moments I see them as "reference workflows" for validating the workflow structure and data, while at other sections I see them as performance drivers. It might be good to provide an actual definition of the term, or even separate them (as they seem to measure different properties).</p>
                    </list-item>
                    <list-item>
                        <p>The proposed future work (foundations, tooling and infrastructure, community, and applications) could leverage several ongoing efforts from the scientific workflows community such as in [1, 2, 3].</p>
                    </list-item>
                </list>
            </p>
            <p>Is the topic of the opinion article discussed accurately in the context of the current literature?</p>
            <p>Yes</p>
            <p>Are arguments sufficiently supported by evidence from the published literature?</p>
            <p>Yes</p>
            <p>Are all factual statements correct and adequately supported by citations?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn balanced and justified on the basis of the presented arguments?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>scientific workflows, distributed computing, modeling and simulation of distributed systems, parallel computing</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
    </sub-article>
    <sub-article article-type="reviewer-report" id="report94267">
        <front-stub>
            <article-id pub-id-type="doi">10.5256/f1000research.57615.r94267</article-id>
            <title-group>
                <article-title>Reviewer response for version 1</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <name>
                        <surname>Alper</surname>
                        <given-names>Pinar</given-names>
                    </name>
                    <xref ref-type="aff" rid="r94267a1">1</xref>
                    <role>Referee</role>
                    <uri content-type="orcid">https://orcid.org/0000-0002-2224-0780</uri>
                </contrib>
                <aff id="r94267a1">
                    <label>1</label>Luxembourg Centre for Systems Biomedicine, University of Luxembourg, Belvaux, Luxembourg</aff>
            </contrib-group>
            <author-notes>
                <fn fn-type="conflict">
                    <p>
                        <bold>Competing interests: </bold>No competing interests were disclosed.</p>
                </fn>
            </author-notes>
            <pub-date pub-type="epub">
                <day>19</day>
                <month>10</month>
                <year>2021</year>
            </pub-date>
            <permissions>
                <copyright-statement>Copyright: &#x00a9; 2021 Alper P</copyright-statement>
                <copyright-year>2021</copyright-year>
                <license xlink:href="https://creativecommons.org/licenses/by/4.0/">
                    <license-p>This is an open access peer review report distributed under the terms of the Creative Commons Attribution Licence, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <related-article ext-link-type="doi" id="relatedArticleReport94267" related-article-type="peer-reviewed-article" xlink:href="10.12688/f1000research.54159.1"/>
            <custom-meta-group>
                <custom-meta>
                    <meta-name>recommendation</meta-name>
                    <meta-value>approve</meta-value>
                </custom-meta>
            </custom-meta-group>
        </front-stub>
        <body>
            <p>The paper is the final report from a 2020 workshop on automated workflow development in life sciences.</p>
            <p> </p>
            <p> This is overall a well written paper, which, scope-wise, is framed by the discussions of the workshop rather than being an exhaustive state-of-the-art paper on semantic workflow development techniques. The paper is lengthy and covers many topics in a partly explored research space in scientific workflows, it requires multiple reads to have an overall viewpoint.</p>
            <p> </p>
            <p> 
                <bold>The contributions of the paper are the following:</bold> 
                <list list-type="bullet">
                    <list-item>
                        <p>A scientific workflow lifecycle outlining the various activities and artefacts in stages of the lifecycle. I think this is the strongest contribution of the paper. It acts as a map for authors to place various past and future research.</p>
                    </list-item>
                    <list-item>
                        <p>A survey of current technologies and practices that directly or indirectly support workflow composition. This is not an exhaustive survey reporting research and techniques in workflow composition, instead focuses on what is currently working and available to life science researchers.</p>
                    </list-item>
                    <list-item>
                        <p>A set of future directions on research and technology development to enable automated workflow composition across the lifecycle.</p>
                    </list-item>
                </list> The paper reports on a stimulating topic and can act a kick starter for the next era of research on the intersection of semantic technologies and scientific workflows.&#x00a0;</p>
            <p> </p>
            <p> 
                <bold>Suggestions for improvements:</bold> 
                <list list-type="bullet">
                    <list-item>
                        <p>The paper does not utilise section numbers which makes reading difficult. If the journal format allows I&#x2019;d recommend the use of numbering.</p>
                    </list-item>
                    <list-item>
                        <p>Abstract reads&#x00a0;&#x201c;Recent technological advances have returned &#x2026;&#x201d; What are those? Give one example in the abstract.</p>
                    </list-item>
                    <list-item>
                        <p>In the introduction you do not mention explicitly the bioinformatician but say&#x00a0;&#x201c;To biologists there is a latent fear to have chosen &#x2026;&#x201d; and later mention &#x201c;human expert knowledge&#x201d; which I guess refers to the bioinformatician.&#x00a0;So I gather automated composition will ultimately assist the bioinformatician. Without such clarification the paragraph reads as if biologists due to their &#x201c;latent fear of making a mistake&#x201d; will&#x00a0;consult &#x201c;automated assembly&#x201d;. I believe that is an overly optimistic statement without any reference to any surveys or observations on biologists&#x2019; and bioinformaticians&#x2019; expectations or experience with workflows. During peer-review scientists are held _accountable_ for their analytical methods and in this paragraph it is not justified with any references&#x00a0;how an automated decision made by composition software increases trustworthiness of the method.</p>
                    </list-item>
                    <list-item>
                        <p>The introduction states the &#x201d;developments bring the long standing vision of automated composition &#x2026; within reach&#x201d;. Here&#x00a0;1-2 sentences,&#x00a0;will be helpful, that summarise to date how much progress&#x00a0;has been made on this vision (a very brief summary of page 11 essentially).</p>
                    </list-item>
                    <list-item>
                        <p>The paper often cites multiple papers to refer to the same system or approach. Taverna, Galaxy, myExperiment, Biocatalogue, Ontosoft, Magallenes, Prophets are all cited with refs to multiple papers. While this is useful to have an overview of all papers, it would help the reader if you cited the definitive or the more relevant papers for these systems. If both citations are needed then footnotes will be helpful on what different can be found in each particular paper for one system. Tightening up the citations could also help creating space for citations of past workflow systems which have addressed assisted workflow composition e.g.&#x00a0;Vistrails or perhaps references to any useful reviews (if exist) on the topic.</p>
                    </list-item>
                    <list-item>
                        <p>Static analysis of scientific workflows* (an area that has received limited attention), is closely tied to data and control flow constructs in workflow languages and provenance. Static analysis (re: technical parameters) can tell whether resulting provenance traces from a workflow would provide granular traceability or not. This may as well be another technical parameter aiding automated composition.</p>
                    </list-item>
                    <list-item>
                        <p>Static analysis over workflows re: domain-specific characteristics has so far been done to understand, compare and contrast various scientific domains&#x00a0;and their use of workflows **. Findings from such studies may help in refine &#x201c;domain-specific considerations&#x201d;.</p>
                    </list-item>
                </list> </p>
            <p> * Alper P, Belhajjame K, Goble C: Static analysis of Taverna workflows to predict provenance patterns.&#x00a0;
                <italic>Future Generation Computer Systems</italic>. 2017;&#x00a0;
                <bold>75</bold>: 310-329</p>
            <p> </p>
            <p> ** Garijo D, Alper P, Belhajjame K, Corcho O, et al.: Common motifs in scientific workflows: An empirical analysis.&#x00a0;
                <italic>Future Generation Computer Systems</italic>. 2014;&#x00a0;
                <bold>36</bold>: 338-351</p>
            <p>Is the topic of the opinion article discussed accurately in the context of the current literature?</p>
            <p>Yes</p>
            <p>Are arguments sufficiently supported by evidence from the published literature?</p>
            <p>Yes</p>
            <p>Are all factual statements correct and adequately supported by citations?</p>
            <p>Yes</p>
            <p>Are the conclusions drawn balanced and justified on the basis of the presented arguments?</p>
            <p>Yes</p>
            <p>Reviewer Expertise:</p>
            <p>scientific workflows, provenance, data protection</p>
            <p>I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard.</p>
        </body>
        <back>
            <ref-list>
                <title>References</title>
                <ref id="rep-ref-94267-1">
                    <label>1</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Static analysis of Taverna workflows to predict provenance patterns</article-title>.
                        <source>
                            <italic>Future Generation Computer Systems</italic>
                        </source>.<year>2017</year>;<volume>75</volume>:
                        <elocation-id>10.1016/j.future.2017.01.004</elocation-id>
                        <fpage>310</fpage>-<lpage>329</lpage>
                        <pub-id pub-id-type="doi">10.1016/j.future.2017.01.004</pub-id>
                    </mixed-citation>
                </ref>
                <ref id="rep-ref-94267-2">
                    <label>2</label>
                    <mixed-citation publication-type="journal">
                        <person-group person-group-type="author"/>:
                        <article-title>Common motifs in scientific workflows: An empirical analysis</article-title>.
                        <source>
                            <italic>Future Generation Computer Systems</italic>
                        </source>.<year>2014</year>;<volume>36</volume>:
                        <elocation-id>10.1016/j.future.2013.09.018</elocation-id>
                        <fpage>338</fpage>-<lpage>351</lpage>
                        <pub-id pub-id-type="doi">10.1016/j.future.2013.09.018</pub-id>
                    </mixed-citation>
                </ref>
            </ref-list>
        </back>
    </sub-article>
</article>
