Difference between revisions of "Template:Groningen/Flux Based Analysis"

 
(72 intermediate revisions by 3 users not shown)
Line 9: Line 9:
 
<script src="https://code.jquery.com/jquery-3.2.1.min.js" integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4=" crossorigin="anonymous"></script>
 
<script src="https://code.jquery.com/jquery-3.2.1.min.js" integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4=" crossorigin="anonymous"></script>
 
<script type="text/javascript" src="https://2018.igem.org/Template:Groningen/Javascript&action=raw&ctype=text/javascript"></script>
 
<script type="text/javascript" src="https://2018.igem.org/Template:Groningen/Javascript&action=raw&ctype=text/javascript"></script>
 +
<style>
 +
 +
* {box-sizing: border-box;}
 +
 +
.img-zoom-container {
 +
  position: relative;
 +
}
 +
 +
.img-zoom-lens {
 +
  position: absolute;
 +
  border: 1px solid #d4d4d4;
 +
  /*set the size of the lens:*/
 +
  width: 40px;
 +
  height: 40px;
 +
}
 +
 +
.img-zoom-result {
 +
  border: 1px solid #d4d4d4;
 +
  /*set the size of the result div:*/
 +
  width: 450px;
 +
  height: 450px;
 +
  float: right;
 +
}
 +
 +
</style>
 +
<script>
 +
function imageZoom(imgID, resultID) {
 +
  var img, lens, result, cx, cy;
 +
  img = document.getElementById(imgID);
 +
  result = document.getElementById(resultID);
 +
  /*create lens:*/
 +
  lens = document.createElement("DIV");
 +
  lens.setAttribute("class", "img-zoom-lens");
 +
  /*insert lens:*/
 +
  img.parentElement.insertBefore(lens, img);
 +
  /*calculate the ratio between result DIV and lens:*/
 +
  cx = result.offsetWidth / lens.offsetWidth;
 +
  cy = result.offsetHeight / lens.offsetHeight;
 +
  /*set background properties for the result DIV:*/
 +
  result.style.backgroundImage = "url('" + img.src + "')";
 +
  result.style.backgroundSize = (img.width * cx) + "px " + (img.height * cy) + "px";
 +
  /*execute a function when someone moves the cursor over the image, or the lens:*/
 +
  lens.addEventListener("mousemove", moveLens);
 +
  img.addEventListener("mousemove", moveLens);
 +
  /*and also for touch screens:*/
 +
  lens.addEventListener("touchmove", moveLens);
 +
  img.addEventListener("touchmove", moveLens);
 +
  function moveLens(e) {
 +
    var pos, x, y;
 +
    /*prevent any other actions that may occur when moving over the image:*/
 +
    e.preventDefault();
 +
    /*get the cursor's x and y positions:*/
 +
    pos = getCursorPos(e);
 +
    /*calculate the position of the lens:*/
 +
    x = pos.x - (lens.offsetWidth / 2);
 +
    y = pos.y - (lens.offsetHeight / 2);
 +
    /*prevent the lens from being positioned outside the image:*/
 +
    if (x > img.width - lens.offsetWidth) {x = img.width - lens.offsetWidth;}
 +
    if (x < 0) {x = 0;}
 +
    if (y > img.height - lens.offsetHeight) {y = img.height - lens.offsetHeight;}
 +
    if (y < 0) {y = 0;}
 +
    /*set the position of the lens:*/
 +
    lens.style.left = x + "px";
 +
    lens.style.top = y + "px";
 +
    /*display what the lens "sees":*/
 +
    result.style.backgroundPosition = "-" + (x * cx) + "px -" + (y * cy) + "px";
 +
  }
 +
  function getCursorPos(e) {
 +
    var a, x = 0, y = 0;
 +
    e = e || window.event;
 +
    /*get the x and y positions of the image:*/
 +
    a = img.getBoundingClientRect();
 +
    /*calculate the cursor's x and y coordinates, relative to the image:*/
 +
    x = e.pageX - a.left;
 +
    y = e.pageY - a.top;
 +
    /*consider any page scrolling:*/
 +
    x = x - window.pageXOffset;
 +
    y = y - window.pageYOffset;
 +
    return {x : x, y : y};
 +
  }
 +
}
 +
</script>
 
</head>
 
</head>
 
<body>
 
<body>
Line 15: Line 97:
  
 
     <div class="igem_2018_team_column_wrapper">
 
     <div class="igem_2018_team_column_wrapper">
         <div class="column">
+
         <div class="column" style="float:none !important;">
     <h1>Flux Based Analysis</h1>
+
     <img src="https://static.igem.org/mediawiki/2018/1/15/T--Groningen--banner_optimizing.png" class="responsive-img">
 
     <h4>Introduction</h4>
 
     <h4>Introduction</h4>
     <p>The increase in availability of genome scale databases of microorganisms has given rise to the possibility of creating genome scale reaction networks that include corresponding metabolites and relevant genes[1]. One method to study reaction fluxes in such a wide network is called flux based analysis or FBA. As part of our project we used FBA on a genome scale model of S. Cerevisiae in order to achieve the following two goals:</p>
+
     <p>The increase in availability of genome scale databases of microorganisms has given rise to the possibility of creating genome scale reaction networks that include corresponding metabolites and relevant genes[1]. One of the methods to study metabolic fluxes through such a wide network is called flux based analysis or FBA. As part of our project we used FBA on a genome scale model (GEM) of S. cerevisiae in order to achieve the following two goals:</p>
     <p>Goal 1:  study the relation between cell growth and styrene production <br>Goal 2:  visualize interventions that could lead to a higher styrene production </p>
+
     <b><p>Goal 1:  Study the relation between cell growth and styrene production <br>Goal 2:  Find and visualize interventions that lead to a higher styrene production </p></b>
 
     <p>An overview of the different methods and software packages that were used to achieve these goals can be seen in figure 1. </p>
 
     <p>An overview of the different methods and software packages that were used to achieve these goals can be seen in figure 1. </p>
     <figure><img src="https://static.igem.org/mediawiki/2018/b/b7/T--Groningen--Flux-based-fig1.png" width="100%"><figcaption><i>Overview</i></figcaption></figure>  
+
     <figure><img src="https://static.igem.org/mediawiki/2018/7/73/T--Groningen--modoverviewnieuw.png" width="100%"><figcaption><i>Figure 1: overview depicting various methods used in our study</i></figcaption></figure>  
 
     <h4>Theory</h4>  
 
     <h4>Theory</h4>  
     <p>Let us consider a small network consisting of the five reactions (r1 to r5), with mock metabolites A, B ,C and D. </p>  
+
     <p>Let us consider a small network consisting of the five reactions (r1 to r5), with mock metabolites A, B ,C and D (figure 2). </p>  
     <figure><img src="https://static.igem.org/mediawiki/2018/8/87/T--Groningen--Flux-based-fig2.png" width="100%"></figure>   
+
     <figure><img src="https://static.igem.org/mediawiki/2018/7/7f/T--Groningen--mock_network.png" width="70%"><figcaption><i>Figure 2: example of a simple metabolic network</i></figcaption></figure>   
     <p>The relative amount of a particular metabolite taken part in a reaction is represented by its stoichiometric coefficient, so for instance metabolite B has a stoichiometric coefficient of 2 in reaction r2. The whole network of reactions that represents the metabolism of can be represented in a stoichiometric matrix S of m rows and r columns, where m are the amount of metabolites and r the amount of reactions in the network. The matrix will then contain on the i,jth entry the stoichiometric coefficient of metabolite i in reaction j. The stoichiometric coefficient will be negative when the metabolite is a substrate (on the left side of the reaction) and positive when a metabolite is a product (on the right side of the reaction). So our example network can thus be represented in a matrix of S of 3 rows and 5 columns as can be seen in figure 3. To illustrate let’s consider position 1,1 in the matrix, since the first column represents reaction r1 and the first row the metabolite A, we put in the stoichiometric coefficient of A in r1 which is 1.</p>
+
     <p>The relative amount of a particular metabolite participating in a reaction is represented by its stoichiometric coefficient, for instance metabolite B has a stoichiometric coefficient of 2 in reaction r2. The whole network of metabolic reactions can be represented in a stoichiometric matrix S of m rows and r columns, where m is the number of metabolites and r is the number of reactions in the network. The matrix then contains on the i,jth entry the stoichiometric coefficient of metabolite i in reaction j. The stoichiometric coefficient is negative when the metabolite is a substrate (traditionally written on the left side of the reaction) and positive when a metabolite is a product (on the right side of the reaction).Therefore our example network can thus be represented as a matrix S of 4 rows and 5 columns as can be seen in figure 3. To illustrate let us consider position 1,1 in the matrix, since the first column represents reaction r1 and the first row the metabolite A, we put in the stoichiometric coefficient of A in r1 which is 1. The complete matrix S for the mock network will look as follows:</p>
     <p>S=</p>
+
      
    <table><tr><th></th><th>r1</th><th>r2</th><th>r3</th><th>r4</th><th>r5</th></tr><tr><td>A</td><td>1</td><td>-3</td><td></td><td></td><td></td></tr><tr><td>B</td><td></td><td>2</td><td>-2</td><td>-1</td><td></td></tr><tr><td>C</td><td></td><td></td><td>1</td><td></td><td>-1</td></tr><tr><td>D</td><td></td><td></td><td></td><td>1</td><td></td></tr></table>
+
<figure><img src="https://static.igem.org/mediawiki/2018/b/b8/T--Groningen--stochtable.png"width="50%"><figcaption><i>Figure 3: stochiometric matrix describing the network in figure 2</i></figcaption></figure>
    <p>The flux or reaction rate through each reaction can be represented in a vector v of length r, where r are the amount of reactions in the network and every position in the vector represents the flux through a reaction.  If the network is at steady state the following holds:</p><figure><img src="https://static.igem.org/mediawiki/2018/4/40/T--Groningen--Flux-based-sxv.png"></figure>
+
 
     <p>Where S is the stoichiometric matrix and v is the reaction rate vector. Since what we want to know is the flux through each reaction, given a stoichiometric matrix S that represents the whole metabolic network of our cell we have to solve the above equation for v. There usually is no single v that satisfy the above equation, but rather a whole solution space.  In order to narrow down this solution space, we maximize a biologically plausible objective such as biomass growth and select only those flux values that together can optimize this objective. </p>
+
<br>
 +
    <p>The flux or reaction rate through each reaction can be represented as a vector v of length r, where r is the amount of reactions in the network and every position in the vector represents the flux through a single reaction.  If the network is at a steady state (all systems variables are constant) the following holds:</p>
 +
 
 +
<figure><img src="https://static.igem.org/mediawiki/2018/8/81/T--Groningen--steadystateeq.png" width="20%"></figure>  
 +
   
 +
     <p>Where S is the stoichiometric matrix and v is the reaction rate vector. Since what we want to know is the flux through each reaction, given a stoichiometric matrix S that represents the whole metabolic network of our cell we have to solve the above equation for v. In a genome-scale model there are usually fewer equations than variables making the system underdetermined. This means that usually there is more than one  v that satisfy the above equation, creating a whole solution space.  In order make this solution space smaller, we choose a physiologically relevant objective function, such as a maximum biomass growth and optimize network fluxes to satisfy this criterion. </p>
 
     <h4>The model</h4>
 
     <h4>The model</h4>
     <p>The model we used to represent the whole metabolic network of our cell is based on YeastGem7.6:[2] “the current consensus genome-scale metabolic model of Saccharomyces cerevisiae” This model contains three main types of information: 2241 metabolites,  3520 reactions and  1019 genes. The relation between the metabolites and reactions is represented by a stoichiometric matrix, as described above. The interactions between reaction and genes are described by Boolean relationships, meaning that genes can turn reactions on or off.  For the first goal we used a version of this model that is further constrained based on the GECKO method: ecYeast7[3]. </p>
+
     <p>TIn our study we used to publicly available YeastGem8: “the current consensus genome-scale metabolic model of Saccharomyces cerevisiae”.[2] This model contains three main types of information: 2241 metabolites,  3520 reactions and  1019 genes. The relation between the metabolites and reactions is represented by a stoichiometric matrix, as described above. The interactions between genes and reactions are described by Boolean relationships, meaning that genes can turn reactions on or off.  To accomplish our first goal we used a version of the YeastGem model that is further constrained based on the GECKO method: ecYeast7[3]. </p>
     <p>The GECKO method enhances a model by adding enzymes as part of its reactions in order to ensure that fluxes are limited based on the abundance of the enzymes abundance and their turnover numbers. Since the maximum flux through an enzyme catalysed reaction is equal to the turnover number times its abundance the following holds (for enzymes which catalyse only a single reaction)</p>
+
     <p>The GECKO method enhances a model by adding enzymes as part of its reactions. This ensures that fluxes are limited based on the enzymes abundance and their turnover numbers. Since the maximum flux through an enzyme catalysed reaction is equal to the turnover number times its abundance the following holds (for enzymes which catalyse only a single reaction)</p>
     <figure><img src="https://static.igem.org/mediawiki/2018/5/5c/T--Groningen--Flux-based-vkcat.png"></figure>
+
     <figure><img src="https://static.igem.org/mediawiki/2018/3/39/T--Groningen--eceq.png"width=30%></figure>
     <p>Where v is the flux through a reaction Kcat is the enzyme’s turnover number and [E] its concentration. More concretely this means that we have to add the enzymes as metabolites to our model and for every reaction include the enzyme as a substrate with 1/Kcat as its stoichiometric coefficient. (The situation will be slightly more complex for enzymes that calculate multiple reactions, or reaction that can be catalysed by multiple enzymes).</p>
+
     <p>Where v is the flux through a reaction Kcat is the enzyme’s turnover number and [E] its concentration. In practice this means that enzymes have to be included in the model as metabolites with a 1/Kcat stoichiometric coefficient in the reactions they catalyse. </p>
     <p>Now we still to include the enzyme abundance in our model. Since we don’t have the proteomic data to limit each enzyme directly based on its abundance we make use of the single pool assumption. We include a pseudo metabolite “protein pool” representing the total amount of enzymes in the cell and then for each enzyme a reaction in which it is drawn from the protein pool with its molecular weight as a coefficient. Using this method the following reactions were added to the model:</p>
+
     <p>Now we have to include the enzyme abundance in our model. Since we do not have the proteomic data to limit each enzyme directly based on its abundance we make use of the single pool assumption. We include a pseudo metabolite “protein pool” representing the total amount of enzymes in the cell and then for each enzyme a reaction in which the enzyme is drawn from the protein pool with its molecular weight as a coefficient. Using this method the following reactions were added to the model:</p>
     <figure><img src="https://static.igem.org/mediawiki/2018/9/9e/T--Groningen--Flux-based-formulas.png"><figcaption><i>TCA: trans-cinnamic acid, PAL2: Phenylalanine ammonia-lyase 2, FDC1: Ferulic acid decarboxylase 1, ppool: pseudo metabolite representing the total amount of enzymes</i></figcaption></figure>
+
     <figure><img src="https://static.igem.org/mediawiki/2018/a/aa/T--Groningen--modequations.png"width="50%"><figcaption><i>TCA: trans-cinnamic acid, PAL2: Phenylalanine ammonia-lyase 2, FDC1: Ferulic acid decarboxylase 1, ppool: pseudo metabolite representing the total amount of enzymes</i></figcaption></figure>
     <p><i>Table 1. Enzymes</i></p><table><tr><th>Enzyme</th><th>Kcat (/s)</th><th>Weight(kDa)</th><th>uniprot</th><th>Gene</th></tr><tr><td>Ferulic acid decarboxylase 1</td><td>4.6<sup>4</sup></td><td>56.164</td><td>Q03034</td><td>FDC1 (YDR539W)</td></tr><tr><td>Phenylalanine ammonia-lyase 2</td><td>3.2<sup>5</sup></td><td>77.86</td><td>P45724</td><td>PAL2 (YNL294C)</td></tr></table>
+
     <p><i>Table 1. Enzymes</i></p><table><tr><th>Enzyme</th><th>Kcat (/s)</th><th>Weight(kDa)</th><th>uniprot</th><th>Gene</th></tr><tr><td>Ferulic acid decarboxylase 1</td><td>4.6 [4]</td><td>56.164</td><td>Q03034</td><td>FDC1 (YDR539W)</td></tr><tr><td>Phenylalanine ammonia-lyase 2</td><td>3.2 [5]></td><td>77.860</td><td>P45724</td><td>PAL2 (YNL294C)</td></tr></table>
 
     <h4>Goal 1</h4>
 
     <h4>Goal 1</h4>
     <p>The first thing we wanted to look at using our model was the relation between styrene production and cell growth. This is a crucial thing to know, because if producing styrene completely stops cell growth we will have to consider using an inducible system; we would have to design our yeast cell so that it first grows and then activate some switch that activates styrene production and stops biomass growth.  On the other hand, if styrene production doesn’t hinder cell growth too much we can just have our cells produce styrene production while they are acquiring biomass.</p>
+
     <p>First, we studied the relationship between styrene production and cell growth using our updated model. If cell growth is heavily affected by the styrene production, an inducible expression system should be considered. To accomplish it we would have to design our yeast cell so that first the population grows and then induce a switch that activates styrene production and slows down/stops biomass growth.  On the other hand, if styrene production does not hinder cell growth significantly our cells can produce styrene continuously through the cell cycle (growth and reproduction).</p>
     <p>In order to do flux based analyses we made use of the COBRA toolbox, a package for MATLAB that implements the COnstraint- Based Reconstruction and Analysis (COBRA) method[6] The COBRA toolbox contains various tools usable for modelling and analysing genome-scale networks.
+
     <p>In order to perform flux based analysis we made used functions included in theCOBRA toolbox, a package for MATLAB that implements the COnstraint- Based Reconstruction and Analysis (COBRA) method [6] The COBRA toolbox contains various tools valuable for modelling and analysing genome-scale networks. In order to solve optimization problems IBM ILOG CPLEX Optimizer (v 12.8.0) was used. </p>
<br>First we want to see the maximum growth rate and the maximum output flux of styrene. We therefore run a flux based analysis on the model optimizing for biomass growth and another analysis optimizing for styrene production: </p>
+
<p>In the beginning we wanted to see the maximum theoretical growth rate and the maximum theoretical production flux of styrene. Therefore, we run a flux based analysis twice on the model, using first biomass growth as objective function, and secondly optimizing for styrene production: </p>
<p><i>Table 2. FBA</i></p><table><tr><th>Input flux of glucose (goal = biomass)</th><th>1</th></tr><tr><td>Max biomass growth</td><td>0.097</td></tr><tr><td>Input flux of glucose (goal = styrene)</td><td>0.359</td></tr><tr><td>Max output flux of styrene</td><td>0.00213</td></tr></table>
+
<p><i>Table 2: FBA results </i></p><table><tr><td>Input flux of glucose (goal = biomass)</td><td>1 mmol gDWh<sup>-1</sup></td></tr><tr><td>Max biomass growth</td><td>0.097 mmol gDWh<sup>-1</sup></td></tr><tr><td>Input flux of glucose (goal = styrene)</td><td>0.359 mmol gDWh<sup>-1</sup></td></tr><tr><td>Max output flux of styrene</td><td>0.00213 mmol gDWh<sup>-1</sup></td></tr></table>
<p>This shows that without interventions we can produce about one mole of styrene per 167 moles of glucose meaning that 0.35% of the mass of glucose can be turned into styrene.  This means that we if we would grow the yeast cells in a medium of 20 g glucose/L, and all glucose is taken up by the cell, the final concentration of styrene will reach at its highest a concentration of 70 mg/L. This is well within toxicity levels[7] (Mckenna et al. found that growth was only minimally disrupted with styrene levels of 200 mg/L). However if we increase styrene yield with over expressions/knockouts we will have to look at minimising toxicity, for instance with a system that continuously takes styrene out of the medium</p>
+
<p>The analysis showed us that without additional interventions we maximally produce about one mole of styrene per 167 moles of glucose meaning that 0.35% of the mass of glucose can be turned into styrene.  This means that we if we would grow the yeast cells in a medium of 20 g glucose/L, and all glucose is taken up by the cell, the final concentration of styrene will reach at its highest a concentration of <b>70 mg/L</b>. This is well within toxicity levels[7] (Mckenna et al. found that growth was only minimally disrupted with styrene levels of 200 mg/L). However if we increase styrene yield with over expressions/knockouts we will have to look at minimising toxicity, for instance with a system that continuously takes styrene out of the medium </p>
<p>Before we will look at increasing the yield we will look at how styrene production influences cell growth. In order to do this we will run Flux Based Analysis, optimizing for biomass while iteratively constraining the lower bound of the styrene sink reaction from 0 flux to its max flux of 0.00213. The result of this is plotted In figure 3.</p>
+
<p>Next we tested how styrene production influences cell growth. To this end we ran FBA, optimizing for biomass while iteratively constraining the lower bound of the styrene sink reaction from 0 flux to its max flux of 0.00213 mmol gDWh-1. The result of this analysis is seen in the figure 4. The script used for analysis is available on <a href="https://github.com/Ezmagon/igem_modeling/tree/master/igem_GECKO">Github</a> </p>
<figure><img src="https://static.igem.org/mediawiki/2018/c/c4/T--Groningen--Flux-based-fig3.png" width="100%"></figure>
+
<figure><img src="https://static.igem.org/mediawiki/2018/2/2f/T--Groningen--rplotstyr.png" width="60%"><figcaption><i>Figure 4</i></figcaption></figure>
<p>As can be seen in the figure the cells can still grow almost completely optimally when the flux to styrene is at 65% of the max theoretical flux. This we can use as an argument to not create an inducible system, but rather let the cells produce styrene while they are growing, as styrene production up to a reasonable level barely hinders cell growth. Furthermore it shows that we must slightly careful with the expression levels of our enzymes as if there is too much flux going to producing styrene cell growth is hindered.</p>
+
<p>As seen in the figure the cells can continue to grow at almost optimal rate when the flux through the styrene pathway is at 65% of its theoretical maximum. We can therefore argue that there is no need to create an inducible system in the cells, but rather let the cells produce styrene continuously, since styrene production (up to a reasonable level) barely hinders cell growth. However, it shows that we should be careful with the chosen promoters for gene expression to not overburden the system with too high expression which could hinder the growth (and as a consequence also styrene production).</p>
 
<h4>OptForce</h4>
 
<h4>OptForce</h4>
<p>Next we will look at how we can increase styrene production of our cells. For this we use an algorithm called OptForce[8], which has been employed successfully to find interventions leading to overproduction of a target metabolite. OptForce compares the flux patterns observed in an initial wildtype strain and a mutant strain which overproduces the chemical at the target yield. In order to compare the two strains it makes use of Flux Variance Analysis (FVA). As noted before the steady state equation yields a solution space, meaning that there is a whole range of possible fluxes for which the system is at steady state. With FVA the reactions in the network are iteratively minimized and maximised subject to the constraints. This yield a lower and upper bound for every reaction: a flux range for which the reaction can still satisfy all constraints.</p><p>The key of OptForce is identifying so called MUST sets. The flux ranges for every reaction is first calculated for both the wildtype strain which is constrained to grow at a certain level and the mutant strain which is constrained to produce a target chemical at a certain level. Next it will compare the flux ranges of both strains for every reaction. If the flux ranges has some overlap (figure 4a), the target production is possible without having to intervene in this reaction. However if the flux range for a reaction of the wildtype strain are completely to the left (Figure 4b) or to the right (Figure 4c) of the mutant strain the we have directly or indirectly change the flux of this reaction in order to be able to achieve the overproduction target. Reaction fluxes that must increase are referred to as MUST<sup>U</sup> sets while reaction fluxes that must decrease are referred to as MUST<sup>L</sup> sets. Furthermore we also look at sums of two reaction fluxes and compare the resulting flux range between the wildtype and mutant type. Now non-overlapping flux ranges imply that one of the two reaction flux must increase or decrease. These second order sets are referred to as respectively MUST<sup>UU</sup> and MUST<sup>LL</sup> sets. </p>
+
<p>Next we looked at how the styrene production can be optimized further in the cells. We used an algorithm called OptForce8, which has been employed successfully to find interventions leading to overproduction of a target metabolite. OptForce compares the flux patterns observed in an initial wildtype strain and a mutant strain which overproduces the chemical at the target yield. In order to compare the two strains it makes use of the Flux Variance Analysis (FVA). As noted before the steady state equation usually results in a solution space, meaning that there is a whole range of possible fluxes that satisfy the steady state criterion. With FVA the reactions in the network are iteratively minimized and maximised subject to the constraints. This creates a lower and upper bound for every reaction: a flux range for which the reaction can still satisfy all constraints.</p>
<figure><img src="https://static.igem.org/mediawiki/2018/2/2c/T--Groningen--Flux-based-fig4.png" width="100%"><figcaption><i>Figure 4. MUSTsets (wildtype flux range for a reaction in green, mutant-type flux range in orange)</i></figcaption></figure>
+
 
<p>Finally after identifying the MUST sets the OptForce algorithm looks at how these can be imparted on the wildtype strain with the minimum amount of interventions, or in other words it finds the minimum amount of upregulations or knockouts which will make the wildtype produce the target chemical at the same rate as the mutant strain. The set of interventions is referred to as the FORCE set.</p>
+
<p>The key of OptForce is identification of so called MUST sets (sets of fluxes that <i>must</i> change in order to reach the target optimization). The flux ranges for every reaction is first calculated for both the wildtype strain which is constrained to grow at a certain level and the mutant strain which is constrained to produce a target chemical at a certain level. Next it compares the flux ranges of both strains for every reaction. If the flux ranges have some overlap (figure 5a), the target production is possible without having to intervene in this reaction. However if the flux range for a reaction of the wildtype strain are completely to the left (Figure 5b) or to the right (Figure 5c) of the mutant strain then we should directly or indirectly change the flux of this reaction in order to be able to achieve the overproduction target. Reaction fluxes that must increase are referred to as MUSTU sets while reaction fluxes that must decrease are referred to as MUSTL sets. Furthermore we also look at the sums of two reaction fluxes and compare the resulting flux range between the wildtype and mutant type. Now non-overlapping flux ranges imply that one of the two reaction flux must increase or decrease. These second order sets are referred to as respectively MUSTUU and MUSTLL sets. </p>
 +
 
 +
 
 +
<figure><img src="https://static.igem.org/mediawiki/2018/2/2c/T--Groningen--Flux-based-fig4.png" width="100%"><figcaption><i>Figure 5: MUSTsets (wildtype flux range for a reaction in green, mutant-type flux range in orange)</i></figcaption></figure>
 +
<p>Finally after identifying the MUST sets the OptForce algorithm looks at how these can be imparted on the wildtype strain with the minimum amount of interventions. In other words it finds the minimum amount of upregulations or knockouts which will make the wildtype produce the target chemical at the same rate as the mutant strain. This set of interventions that must actively be <i>forced</i> through genetic manipulations is referred to as the FORCE set. </p>
 
<h4>Goal 2</h4>
 
<h4>Goal 2</h4>
<p>First OptForce was run using the yeastgem7.6 as model. As it is suggested to exclude reactions in a linear pathway leading up to the target chemical we will use try to find interventions leading to an increased phenylalanine production. The wildtype had its growth constrained to 0.07 and the mutant type had phenylalanine production constrained to 0.5, both close to their maximum values. In order to solve the optimization problem IBM ILOG CPLEX Optimizer  (v 12.8.0) was used. </p>
+
<p>The OptForce algorithm was used with YeastGem8 as a wildtype model. As it is suggested to exclude reactions in a linear pathway leading up to the target chemical we will try to find interventions leading to an increased phenylalanine production, precursor of styrene. The wildtype model had its growth constrained to 0.07 mmol gDWh <sup>-1</sup> and the mutant type had phenylalanine production constrained to 0.5 mmol gDWh<sup>-1</sup>, both close to their maximum values. In order to solve the optimization problem IBM ILOG CPLEX Optimizer  (v 12.8.0) was used. </p>
<p>Next we wanted to visualize the found MUST sets; the candidates for downregulation/knockouts or upregulations. In order to do this we made use of Paint4Net (included in the Cobra Toolbox). Paint4Net is able to automatically generate a graph layout of (part of) the metabolism of a COBRA model, including data such as flux rate and direction, and the possibility to show detailed information about reactions and metabolites.[9] </p>
+
 
<p>As the model consists of over 3500 reactions visualizing the whole map at once will be rather messy, so instead we choose to visualize the cells metabolism per subsystem. Here we will show the map of: Phenylalanine, tyrosine and tryptophan biosynthesis (KEGG pathway sce00400) and Phenylalanine metabolism (KEGG pathway sce00360). This part seems most relevant to our analysis and allows us to compare interventions found by the model with well-established interventions from literature. (Do note that our method allows one to visualize any subsystem)</p>
+
<p>As finding the second order MUST sets is computationally heavy we use the cluster (see introduction page) to run the algorithm. The complete output in the form of the first and second order MUST sets and FORCE sets together with the algorithm that was run to find them can be found on our <a href="https://github.com/Ezmagon/igem_modeling/tree/master/igem_optforce">Github page</a>. Note that we excluded transport, exchange and ATP related reactions as we do not want those to end up in the FORCE sets</p>
<p>Once the Paint4net map was created the MUST sets had to be visualised. For this a simple MATLAB script was written  that goes over all nodes and edges in the network and makes them red if they correspond do a reaction in a MUST<sub>L</sub> or MUST<sub>LL</sub> set (are a candidate for downregulation/knockout) , green if they belong to a MUST<sub>U</sub> or MUST<sub>UU</sub> set  (are a candidate for upregulation) and blue if they belong to no MUST set (should be leaved unchanged). Furthermore to make a clearer visualization the script changes the labels of the reaction nodes to display the corresponding genes and the metabolite of interest is made orange. The algorithm (visualization.m) is available on our Github. The result of applying this to the map of the subsystem described above can be seen in figure 5.</p>
+
<p>Next we visualized the found MUST sets; the candidates for downregulation/knockouts or upregulations, using Paint4Net (included in the Cobra Toolbox). Paint4Net is able to automatically generate a graph layout of (part of) the metabolism of a COBRA model, including data such as flux rate and direction, and the possibility to show detailed information about reactions and metabolites.[9] </p>
<figure></figure><!--HIER MOET DAT ZOOMBARE PLAATJE KOMEN -->
+
<p>As the model consists of over 3500 reactions visualizing the whole map at once would not be informative, chose to visualize the cellulars metabolism per subsystem. Here we will show the map of: Phenylalanine, tyrosine and tryptophan biosynthesis (KEGG pathway sce00400) and Phenylalanine metabolism (KEGG pathway sce00360). This part seems most relevant to our analysis and allows us to compare interventions found by the model with well-established interventions from literature. (Do note that our method allows one to visualize any subsystem).</p>
<p>Next we wanted to verify our model by comparing the computed interventions with literature. McKenna et al. showed that Styrene yield can be increased by deletion of ARO10.[7] The reaction catalysed by ARO10 (phenylpyruvate decarboxylase) is indeed marked in the graph as a possible candidate for downregulation (included in a MUST<sub>L</sub> set) . Looking at the graph we notice that our model shows the genes ARO1, ARO2 and ARO7, responsible for the shikimate pathway in S. Cerevisiae as possibly candidates for upregulation. This is in accordance with earlier research showing that Styrene production in Escherichia Coli can be increased by overexpressing the two upstream shikimate pathway genes, aroF and pheA. [10]</p>
+
<p>Once the Paint4net map was created the MUST sets had to be visualised. For this a simple MATLAB script was written  that goes Once the Paint4net map was created the MUST sets were visualised. For this a simple MATLAB script was written  that goes over all nodes and edges in the network and makes them red if they correspond to a reaction in MUSTL or MUSTLL sets (candidatse for downregulation/knockout) , green if they belong to a MUSTU or MUSTUU set  (are a candidate for upregulation) and blue if they belong to no MUST set (should be left unchanged). Furthermore to make a clearer visualization the script changes the labels of the reaction nodes to display the corresponding genes and the metabolite of interest is coloured in orange. The algorithm <a href="https://github.com/Ezmagon/igem_modeling/blob/master/igem_optforce/visualization.m"">visualization.m</a> is available on our Github. The result of applying this to the map of the subsystem described above can be seen in figure 6. </p>
<p>To summarize we first have shown that cell growth and styrene production up to a certain level are not mutually exclusive ,using an genome scale model with extra enzyme based constraints based on the GECKO method, leading us to design cells that will do both simultaneously. Furthermore we used to OptForce algorithm to find candidate interventions for increased styrene yield. Finally we wrote a script that can visualize these interventions on a graph drawn by the Paint4Net method, allowing us to gain more insight in how we can increase styrene production in our cells. </p>
+
<br>
 +
<b> mouse over the image!</b>
 +
<br>
 +
<figure></figure><!--HIER STAAT DAT ZOOMBARE PLAATJE MAKKER -->
 +
 
 +
<div class="img-zoom-container">
 +
<img  border="5" id="myimage" src="https://static.igem.org/mediawiki/2018/0/0b/T--Groningen--metmap3.png" width="450px" height="450px" >
 +
 +
<div id="myresult" class="img-zoom-result"  ></div>
 +
 +
<script>
 +
 +
imageZoom("myimage", "myresult");
 +
</script>
 +
</div>
 +
<br>
 +
<p>Furthermore we verified our model predictions by comparing the computed interventions with literature. McKenna et al. showed that Styrene yield can be increased by deletion of ARO10.[7] The reaction catalysed by ARO10 (phenylpyruvate decarboxylase) is indeed marked in the graph as a possible candidate for downregulation (included in a MUST¬L set) . Looking at the graph we noticed that our model additionally predicts the genes ARO1, ARO2 and ARO7, that are part of `the shikimate pathway in S. cerevisiae as possible candidates for upregulation. This is in accordance with earlier research showing that styrene production in E. Coli can be increased by overexpressing the two upstream shikimate pathway genes, aroF and pheA. [10]</p>
 +
 
 +
<p>Next we look at the found force sets:</p><b>
 +
set1: PHA2 (upregulation)    ARO10    (Knockout) <br>
 +
set2: PHA2 (upregulation)    ARO10    (downregulation)<br>
 +
set3: ARO7 (upregulation)    FOX2    (knockout)<br>
 +
set4: ARO1 (upregulation)    IFA38    (knockout) <br></b>
 +
 
 +
<p>The first two FORCE set include either a knockout or the downregulation of ARO10, which is as noted before a well-established way to increase Styrene yield yeast 7  Furthermore these sets both increase the upregulation of PHA2, which encodes Putative prephenate dehydratase involved in the catalysis of phenylpyruvate a precursor to phenylalanine. The other two FORCE sets include the upregulation of ARO7 and ARO1, which are as noted part of the shikimate pathway. They also include respectively FOX2 knockout, encoding an enzyme involved in Fatty acid Oxidation and IFA38 knockout evolved in  pathway fatty acid biosynthesis. These reactions are bit less intuitive, and further experimental analysis should be done to see if those interventions would actually increase phenylalanine production and are not just the results of an under/over constrained model. 
 +
</p>
 +
 
 +
 
 +
<p>To summarize we have first shown, using a genome scale model with enzyme based constraints based on the GECKO method, that cell growth and styrene production (up to a certain level) are not mutually competitive, leading us to design cells that will express necessary enzymes constitutively. Furthermore we used the OptForce algorithm to find interventions that increase phenylalanine yield. Finally we wrote a script that can visualize these interventions on a graph drawn by the Paint4Net method, allowing us to gain more insight in how we can increase styrene production in our cells.   </p>
  
 
      
 
      

Latest revision as of 22:19, 17 October 2018

Introduction

The increase in availability of genome scale databases of microorganisms has given rise to the possibility of creating genome scale reaction networks that include corresponding metabolites and relevant genes[1]. One of the methods to study metabolic fluxes through such a wide network is called flux based analysis or FBA. As part of our project we used FBA on a genome scale model (GEM) of S. cerevisiae in order to achieve the following two goals:

Goal 1: Study the relation between cell growth and styrene production
Goal 2: Find and visualize interventions that lead to a higher styrene production

An overview of the different methods and software packages that were used to achieve these goals can be seen in figure 1.

Figure 1: overview depicting various methods used in our study

Theory

Let us consider a small network consisting of the five reactions (r1 to r5), with mock metabolites A, B ,C and D (figure 2).

Figure 2: example of a simple metabolic network

The relative amount of a particular metabolite participating in a reaction is represented by its stoichiometric coefficient, for instance metabolite B has a stoichiometric coefficient of 2 in reaction r2. The whole network of metabolic reactions can be represented in a stoichiometric matrix S of m rows and r columns, where m is the number of metabolites and r is the number of reactions in the network. The matrix then contains on the i,jth entry the stoichiometric coefficient of metabolite i in reaction j. The stoichiometric coefficient is negative when the metabolite is a substrate (traditionally written on the left side of the reaction) and positive when a metabolite is a product (on the right side of the reaction).Therefore our example network can thus be represented as a matrix S of 4 rows and 5 columns as can be seen in figure 3. To illustrate let us consider position 1,1 in the matrix, since the first column represents reaction r1 and the first row the metabolite A, we put in the stoichiometric coefficient of A in r1 which is 1. The complete matrix S for the mock network will look as follows:

Figure 3: stochiometric matrix describing the network in figure 2

The flux or reaction rate through each reaction can be represented as a vector v of length r, where r is the amount of reactions in the network and every position in the vector represents the flux through a single reaction. If the network is at a steady state (all systems variables are constant) the following holds:

Where S is the stoichiometric matrix and v is the reaction rate vector. Since what we want to know is the flux through each reaction, given a stoichiometric matrix S that represents the whole metabolic network of our cell we have to solve the above equation for v. In a genome-scale model there are usually fewer equations than variables making the system underdetermined. This means that usually there is more than one v that satisfy the above equation, creating a whole solution space. In order make this solution space smaller, we choose a physiologically relevant objective function, such as a maximum biomass growth and optimize network fluxes to satisfy this criterion.

The model

TIn our study we used to publicly available YeastGem8: “the current consensus genome-scale metabolic model of Saccharomyces cerevisiae”.[2] This model contains three main types of information: 2241 metabolites, 3520 reactions and 1019 genes. The relation between the metabolites and reactions is represented by a stoichiometric matrix, as described above. The interactions between genes and reactions are described by Boolean relationships, meaning that genes can turn reactions on or off. To accomplish our first goal we used a version of the YeastGem model that is further constrained based on the GECKO method: ecYeast7[3].

The GECKO method enhances a model by adding enzymes as part of its reactions. This ensures that fluxes are limited based on the enzymes abundance and their turnover numbers. Since the maximum flux through an enzyme catalysed reaction is equal to the turnover number times its abundance the following holds (for enzymes which catalyse only a single reaction)

Where v is the flux through a reaction Kcat is the enzyme’s turnover number and [E] its concentration. In practice this means that enzymes have to be included in the model as metabolites with a 1/Kcat stoichiometric coefficient in the reactions they catalyse.

Now we have to include the enzyme abundance in our model. Since we do not have the proteomic data to limit each enzyme directly based on its abundance we make use of the single pool assumption. We include a pseudo metabolite “protein pool” representing the total amount of enzymes in the cell and then for each enzyme a reaction in which the enzyme is drawn from the protein pool with its molecular weight as a coefficient. Using this method the following reactions were added to the model:

TCA: trans-cinnamic acid, PAL2: Phenylalanine ammonia-lyase 2, FDC1: Ferulic acid decarboxylase 1, ppool: pseudo metabolite representing the total amount of enzymes

Table 1. Enzymes

EnzymeKcat (/s)Weight(kDa)uniprotGene
Ferulic acid decarboxylase 14.6 [4]56.164Q03034FDC1 (YDR539W)
Phenylalanine ammonia-lyase 23.2 [5]>77.860P45724PAL2 (YNL294C)

Goal 1

First, we studied the relationship between styrene production and cell growth using our updated model. If cell growth is heavily affected by the styrene production, an inducible expression system should be considered. To accomplish it we would have to design our yeast cell so that first the population grows and then induce a switch that activates styrene production and slows down/stops biomass growth. On the other hand, if styrene production does not hinder cell growth significantly our cells can produce styrene continuously through the cell cycle (growth and reproduction).

In order to perform flux based analysis we made used functions included in theCOBRA toolbox, a package for MATLAB that implements the COnstraint- Based Reconstruction and Analysis (COBRA) method [6] The COBRA toolbox contains various tools valuable for modelling and analysing genome-scale networks. In order to solve optimization problems IBM ILOG CPLEX Optimizer (v 12.8.0) was used.

In the beginning we wanted to see the maximum theoretical growth rate and the maximum theoretical production flux of styrene. Therefore, we run a flux based analysis twice on the model, using first biomass growth as objective function, and secondly optimizing for styrene production:

Table 2: FBA results

Input flux of glucose (goal = biomass)1 mmol gDWh-1
Max biomass growth0.097 mmol gDWh-1
Input flux of glucose (goal = styrene)0.359 mmol gDWh-1
Max output flux of styrene0.00213 mmol gDWh-1

The analysis showed us that without additional interventions we maximally produce about one mole of styrene per 167 moles of glucose meaning that 0.35% of the mass of glucose can be turned into styrene. This means that we if we would grow the yeast cells in a medium of 20 g glucose/L, and all glucose is taken up by the cell, the final concentration of styrene will reach at its highest a concentration of 70 mg/L. This is well within toxicity levels[7] (Mckenna et al. found that growth was only minimally disrupted with styrene levels of 200 mg/L). However if we increase styrene yield with over expressions/knockouts we will have to look at minimising toxicity, for instance with a system that continuously takes styrene out of the medium

Next we tested how styrene production influences cell growth. To this end we ran FBA, optimizing for biomass while iteratively constraining the lower bound of the styrene sink reaction from 0 flux to its max flux of 0.00213 mmol gDWh-1. The result of this analysis is seen in the figure 4. The script used for analysis is available on Github

Figure 4

As seen in the figure the cells can continue to grow at almost optimal rate when the flux through the styrene pathway is at 65% of its theoretical maximum. We can therefore argue that there is no need to create an inducible system in the cells, but rather let the cells produce styrene continuously, since styrene production (up to a reasonable level) barely hinders cell growth. However, it shows that we should be careful with the chosen promoters for gene expression to not overburden the system with too high expression which could hinder the growth (and as a consequence also styrene production).

OptForce

Next we looked at how the styrene production can be optimized further in the cells. We used an algorithm called OptForce8, which has been employed successfully to find interventions leading to overproduction of a target metabolite. OptForce compares the flux patterns observed in an initial wildtype strain and a mutant strain which overproduces the chemical at the target yield. In order to compare the two strains it makes use of the Flux Variance Analysis (FVA). As noted before the steady state equation usually results in a solution space, meaning that there is a whole range of possible fluxes that satisfy the steady state criterion. With FVA the reactions in the network are iteratively minimized and maximised subject to the constraints. This creates a lower and upper bound for every reaction: a flux range for which the reaction can still satisfy all constraints.

The key of OptForce is identification of so called MUST sets (sets of fluxes that must change in order to reach the target optimization). The flux ranges for every reaction is first calculated for both the wildtype strain which is constrained to grow at a certain level and the mutant strain which is constrained to produce a target chemical at a certain level. Next it compares the flux ranges of both strains for every reaction. If the flux ranges have some overlap (figure 5a), the target production is possible without having to intervene in this reaction. However if the flux range for a reaction of the wildtype strain are completely to the left (Figure 5b) or to the right (Figure 5c) of the mutant strain then we should directly or indirectly change the flux of this reaction in order to be able to achieve the overproduction target. Reaction fluxes that must increase are referred to as MUSTU sets while reaction fluxes that must decrease are referred to as MUSTL sets. Furthermore we also look at the sums of two reaction fluxes and compare the resulting flux range between the wildtype and mutant type. Now non-overlapping flux ranges imply that one of the two reaction flux must increase or decrease. These second order sets are referred to as respectively MUSTUU and MUSTLL sets.

Figure 5: MUSTsets (wildtype flux range for a reaction in green, mutant-type flux range in orange)

Finally after identifying the MUST sets the OptForce algorithm looks at how these can be imparted on the wildtype strain with the minimum amount of interventions. In other words it finds the minimum amount of upregulations or knockouts which will make the wildtype produce the target chemical at the same rate as the mutant strain. This set of interventions that must actively be forced through genetic manipulations is referred to as the FORCE set.

Goal 2

The OptForce algorithm was used with YeastGem8 as a wildtype model. As it is suggested to exclude reactions in a linear pathway leading up to the target chemical we will try to find interventions leading to an increased phenylalanine production, precursor of styrene. The wildtype model had its growth constrained to 0.07 mmol gDWh -1 and the mutant type had phenylalanine production constrained to 0.5 mmol gDWh-1, both close to their maximum values. In order to solve the optimization problem IBM ILOG CPLEX Optimizer (v 12.8.0) was used.

As finding the second order MUST sets is computationally heavy we use the cluster (see introduction page) to run the algorithm. The complete output in the form of the first and second order MUST sets and FORCE sets together with the algorithm that was run to find them can be found on our Github page. Note that we excluded transport, exchange and ATP related reactions as we do not want those to end up in the FORCE sets

Next we visualized the found MUST sets; the candidates for downregulation/knockouts or upregulations, using Paint4Net (included in the Cobra Toolbox). Paint4Net is able to automatically generate a graph layout of (part of) the metabolism of a COBRA model, including data such as flux rate and direction, and the possibility to show detailed information about reactions and metabolites.[9]

As the model consists of over 3500 reactions visualizing the whole map at once would not be informative, chose to visualize the cellulars metabolism per subsystem. Here we will show the map of: Phenylalanine, tyrosine and tryptophan biosynthesis (KEGG pathway sce00400) and Phenylalanine metabolism (KEGG pathway sce00360). This part seems most relevant to our analysis and allows us to compare interventions found by the model with well-established interventions from literature. (Do note that our method allows one to visualize any subsystem).

Once the Paint4net map was created the MUST sets had to be visualised. For this a simple MATLAB script was written that goes Once the Paint4net map was created the MUST sets were visualised. For this a simple MATLAB script was written that goes over all nodes and edges in the network and makes them red if they correspond to a reaction in MUSTL or MUSTLL sets (candidatse for downregulation/knockout) , green if they belong to a MUSTU or MUSTUU set (are a candidate for upregulation) and blue if they belong to no MUST set (should be left unchanged). Furthermore to make a clearer visualization the script changes the labels of the reaction nodes to display the corresponding genes and the metabolite of interest is coloured in orange. The algorithm visualization.m is available on our Github. The result of applying this to the map of the subsystem described above can be seen in figure 6.


mouse over the image!

Furthermore we verified our model predictions by comparing the computed interventions with literature. McKenna et al. showed that Styrene yield can be increased by deletion of ARO10.[7] The reaction catalysed by ARO10 (phenylpyruvate decarboxylase) is indeed marked in the graph as a possible candidate for downregulation (included in a MUST¬L set) . Looking at the graph we noticed that our model additionally predicts the genes ARO1, ARO2 and ARO7, that are part of `the shikimate pathway in S. cerevisiae as possible candidates for upregulation. This is in accordance with earlier research showing that styrene production in E. Coli can be increased by overexpressing the two upstream shikimate pathway genes, aroF and pheA. [10]

Next we look at the found force sets:

set1: PHA2 (upregulation) ARO10 (Knockout)
set2: PHA2 (upregulation) ARO10 (downregulation)
set3: ARO7 (upregulation) FOX2 (knockout)
set4: ARO1 (upregulation) IFA38 (knockout)

The first two FORCE set include either a knockout or the downregulation of ARO10, which is as noted before a well-established way to increase Styrene yield yeast 7 Furthermore these sets both increase the upregulation of PHA2, which encodes Putative prephenate dehydratase involved in the catalysis of phenylpyruvate a precursor to phenylalanine. The other two FORCE sets include the upregulation of ARO7 and ARO1, which are as noted part of the shikimate pathway. They also include respectively FOX2 knockout, encoding an enzyme involved in Fatty acid Oxidation and IFA38 knockout evolved in pathway fatty acid biosynthesis. These reactions are bit less intuitive, and further experimental analysis should be done to see if those interventions would actually increase phenylalanine production and are not just the results of an under/over constrained model.

To summarize we have first shown, using a genome scale model with enzyme based constraints based on the GECKO method, that cell growth and styrene production (up to a certain level) are not mutually competitive, leading us to design cells that will express necessary enzymes constitutively. Furthermore we used the OptForce algorithm to find interventions that increase phenylalanine yield. Finally we wrote a script that can visualize these interventions on a graph drawn by the Paint4Net method, allowing us to gain more insight in how we can increase styrene production in our cells.

References

[1] Smallboe K, Simeonidis E. Flux balance analysis: A geometric perspective. 2009. doi:10.1016/j.jtbi.2009.01.027

[2] Aung HW, Henry SA, Walker LP. Revising the Representation of Fatty Acid, Glycerolipid, and Glycerophospholipid Metabolism in the Consensus Model of Yeast Metabolism. Ind Biotechnol (New Rochelle N Y). 2013;9(4):215-228. doi:10.1089/ind.2013.0013

[3] Sánchez BJ, Zhang C, Nilsson A, Kerkhoven EJ, Nielsen J, Lahtvee P. Improving the phenotype predictions of a yeast genome-scale metabolic model by incorporating enzymatic constraints. 2017:1-16. doi:10.15252/msb.20167411

[4] Bateman A, Martin MJ, O’Donovan C, et al. UniProt: the universal protein knowledgebase. Nucleic Acids Res. 2017;45(D1):D158-D169. doi:10.1093/nar/gkw1099

[5] Cochrane FC, Davin LB, Lewis NG. The Arabidopsis phenylalanine ammonia lyase gene family: kinetic characterization of the four PAL isoforms. Phytochemistry. 2004;65(11):1557-1564. doi:10.1016/j.phytochem.2004.05.006

[6] Heirendt L, Arreckx S, Pfau T, et al. Creation and analysis of biochemical constraint-based models: the COBRA Toolbox v3.0. October 2017. http://arxiv.org/abs/1710.04038. Accessed October 6, 2018.

[7] Mckenna R, Thompson B, Pugh S, Nielsen DR. Rational and Combinatorial Approaches to Engineering Styrene Production by Saccharomyces Cerevisiae.; 2011. doi:10.1186/2046-1682-4-13

[8] Ranganathan S, Suthers PF, Maranas CD. OptForce: An Optimization Procedure for Identifying All Genetic Manipulations Leading to Targeted Overproductions. Price ND, ed. PLoS Comput Biol. 2010;6(4):e1000744. doi:10.1371/journal.pcbi.1000744

[9] Kostromins A, Stalidzans E. Paint4Net: COBRA Toolbox extension for visualization of stoichiometric models of metabolism. Biosystems. 2012;109(2):233-239. doi:10.1016/J.BIOSYSTEMS.2012.03.002

[10] Liu C, Men X, Chen H, et al. A systematic optimization of styrene biosynthesis in Escherichia coli BL21(DE3). Biotechnol Biofuels. 2018;11. doi:10.1186/s13068-018-1017-z