mirror of
https://github.com/gopl-zh/gopl-zh.github.com.git
synced 2025-12-18 11:44:20 +08:00
rebuild
This commit is contained in:
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.1" data-chapter-title="Goroutines" data-filepath="ch8/ch8-01.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.1" data-chapter-title="Goroutines" data-filepath="ch8/ch8-01.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.2" data-chapter-title="示例: 併髮的Clock服務" data-filepath="ch8/ch8-02.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.2" data-chapter-title="示例: 併髮的Clock服務" data-filepath="ch8/ch8-02.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.3" data-chapter-title="示例: 併髮的Echo服務" data-filepath="ch8/ch8-03.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.3" data-chapter-title="示例: 併髮的Echo服務" data-filepath="ch8/ch8-03.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.4" data-chapter-title="Channels" data-filepath="ch8/ch8-04.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.4" data-chapter-title="Channels" data-filepath="ch8/ch8-04.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.5" data-chapter-title="併行的循環" data-filepath="ch8/ch8-05.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.5" data-chapter-title="併行的循環" data-filepath="ch8/ch8-05.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
136
ch8/ch8-06.html
136
ch8/ch8-06.html
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.6" data-chapter-title="示例: 併髮的Web爬蟲" data-filepath="ch8/ch8-06.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.6" data-chapter-title="示例: 併髮的Web爬蟲" data-filepath="ch8/ch8-06.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
@@ -2060,7 +2060,139 @@
|
||||
<section class="normal" id="section-">
|
||||
|
||||
<h2 id="86-示例-併髮的web爬蟲">8.6. 示例: 併髮的Web爬蟲</h2>
|
||||
<p>TODO</p>
|
||||
<p>在5.6節中,我們做了一箇簡單的web爬蟲,用bfs(廣度優先)算法來抓取整箇網站。在本節中,我們會讓這箇這箇爬蟲併行化,這樣每一箇彼此獨立的抓取命令可以併行進行IO,最大化利用網絡資源。crawl函數和gopl.io/ch5/findlinks3中的是一樣的。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/crawl1
|
||||
<span class="hljs-keyword">func</span> crawl(url <span class="hljs-typename">string</span>) []<span class="hljs-typename">string</span> {
|
||||
fmt.Println(url)
|
||||
list, err := links.Extract(url)
|
||||
<span class="hljs-keyword">if</span> err != <span class="hljs-constant">nil</span> {
|
||||
log.Print(err)
|
||||
}
|
||||
<span class="hljs-keyword">return</span> list
|
||||
}
|
||||
</code></pre>
|
||||
<p>主函數和5.6節中的breadthFirst(深度優先)類似。像之前一樣,一箇worklist是一箇記録了需要處理的元素的隊列,每一箇元素都是一箇需要抓取的URL列錶,不過這一次我們用channel代替slice來做這箇隊列。每一箇對crawl的調用都會在他們自己的goroutine中進行併且會把他們抓到的鏈接髮送迴worklist。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">func</span> main() {
|
||||
worklist := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> []<span class="hljs-typename">string</span>)
|
||||
|
||||
<span class="hljs-comment">// Start with the command-line arguments.</span>
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() { worklist <- os.Args[<span class="hljs-number">1</span>:] }()
|
||||
|
||||
<span class="hljs-comment">// Crawl the web concurrently.</span>
|
||||
seen := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">map</span>[<span class="hljs-typename">string</span>]<span class="hljs-typename">bool</span>)
|
||||
<span class="hljs-keyword">for</span> list := <span class="hljs-keyword">range</span> worklist {
|
||||
<span class="hljs-keyword">for</span> _, link := <span class="hljs-keyword">range</span> list {
|
||||
<span class="hljs-keyword">if</span> !seen[link] {
|
||||
seen[link] = <span class="hljs-constant">true</span>
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>(link <span class="hljs-typename">string</span>) {
|
||||
worklist <- crawl(link)
|
||||
}(link)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>註意這裏的crawl所在的goroutine會將link作為一箇顯式的蔘數傳入,來避免“循環變量快照”的問題(在5.6.1中有講解)。另外註意這裏將命令行蔘數傳入worklist也是在一箇另外的goroutine中進行的,這是為了避免在main goroutine和crawler goroutine中衕時嚮另一箇goroutine通過channel髮送內容時髮生死鎖(因為另一邊的接收操作還沒有準備好)。噹然,這裏我們也可以用buffered channel來解決問題,這裏不再贅述。</p>
|
||||
<p>現在爬蟲可以高併髮地運行起來,併且可以產生一大坨的URL了,不過還是會有倆問題。一箇問題是在運行一段時間後可能會齣現在log的錯誤信息裏的:</p>
|
||||
<pre><code>$ go build gopl.io/ch8/crawl1
|
||||
$ ./crawl1 http://gopl.io/
|
||||
http://gopl.io/
|
||||
https://golang.org/help/
|
||||
https://golang.org/doc/
|
||||
https://golang.org/blog/
|
||||
...
|
||||
2015/07/15 18:22:12 Get ...: dial tcp: lookup blog.golang.org: no such host
|
||||
2015/07/15 18:22:12 Get ...: dial tcp 23.21.222.120:443: socket:
|
||||
too many open files
|
||||
...
|
||||
</code></pre><p>最初的錯誤信息是一箇讓人莫名的DNS査找失敗,卽使這箇域名是完全可靠的。而隨後的錯誤信息揭示了原因:這箇程序一次性創建了太多網絡連接,超過了每一箇進程的打開文件數限製,旣而導緻了在調用net.Dial像DNS査找失敗這樣的問題。</p>
|
||||
<p>這箇程序實在是太他媽併行了。無窮無盡地併行化併不是什麼好事情,因為不管怎麼説,你的係統總是會有一箇些限製因素,比如CPU覈心數會限製你的計算負載,比如你的硬盤轉軸和磁頭數限製了你的本地磁盤IO操作頻率,比如你的網絡帶寬限製了你的下載速度上限,或者是你的一箇web服務的服務容量上限等等。為了解決這箇問題,我們可以限製併髮程序所使用的資源來使之適應自己的運行環境。對於我們的例子來説,最簡單的方法就是限製對links.Extract在衕一時間最多不會有超過n次調用,這裏的n是fd的limit-20,一般情況下。這箇一箇夜店裏限製客人數目是一箇道理,隻有噹有客人離開時,纔會允許新的客人進入店內(譯註:作者你箇老流氓)。</p>
|
||||
<p>我們可以用一箇有容量限製的buffered channel來控製併髮,這類似於操作係統裏的計數信號量概唸。從概唸上講,channel裏的n箇空槽代錶n箇可以處理內容的token(通行証),從channel裏接收一箇值會釋放其中的一箇token,併且生成一箇新的空槽位。這樣保証了在沒有接收介入時最多有n箇髮送操作。(這裏可能我們拿channel裏填充的槽來做token更直觀一些,不過還是這樣吧~)。由於channel裏的元素類型併不重要,我們用一箇零值的struct{}來作為其元素。</p>
|
||||
<p>讓我們重寫crawl函數,將對links.Extract的調用操作用穫取、釋放token的操作包裹起來,來確保衕一時間對其隻有20箇調用。信號量數量和其能操作的IO資源數量應保持接近。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/crawl2
|
||||
<span class="hljs-comment">// tokens is a counting semaphore used to</span>
|
||||
<span class="hljs-comment">// enforce a limit of 20 concurrent requests.</span>
|
||||
<span class="hljs-keyword">var</span> tokens = <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-keyword">struct</span>{}, <span class="hljs-number">20</span>)
|
||||
|
||||
<span class="hljs-keyword">func</span> crawl(url <span class="hljs-typename">string</span>) []<span class="hljs-typename">string</span> {
|
||||
fmt.Println(url)
|
||||
tokens <- <span class="hljs-keyword">struct</span>{}{} <span class="hljs-comment">// acquire a token</span>
|
||||
list, err := links.Extract(url)
|
||||
<-tokens <span class="hljs-comment">// release the token</span>
|
||||
<span class="hljs-keyword">if</span> err != <span class="hljs-constant">nil</span> {
|
||||
log.Print(err)
|
||||
}
|
||||
<span class="hljs-keyword">return</span> list
|
||||
}
|
||||
</code></pre>
|
||||
<p>第二個問題是這個程序永遠都不會終止,卽使它已經爬到了所有初始鏈接衍生齣的鏈接。(噹然,除非你慎重地選擇了閤適的初始化URL或者已經實現了練習8.6中的深度限製,你應該還沒有意識到這個問題)。爲了使這個程序能夠終止,我們需要在worklist爲空或者沒有crawl的goroutine在運行時退齣主循環。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">func</span> main() {
|
||||
worklist := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> []<span class="hljs-typename">string</span>)
|
||||
<span class="hljs-keyword">var</span> n <span class="hljs-typename">int</span> <span class="hljs-comment">// number of pending sends to worklist</span>
|
||||
|
||||
<span class="hljs-comment">// Start with the command-line arguments.</span>
|
||||
n++
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() { worklist <- os.Args[<span class="hljs-number">1</span>:] }()
|
||||
|
||||
|
||||
<span class="hljs-comment">// Crawl the web concurrently.</span>
|
||||
seen := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">map</span>[<span class="hljs-typename">string</span>]<span class="hljs-typename">bool</span>)
|
||||
|
||||
<span class="hljs-keyword">for</span> ; n > <span class="hljs-number">0</span>; n-- {
|
||||
list := <-worklist
|
||||
<span class="hljs-keyword">for</span> _, link := <span class="hljs-keyword">range</span> list {
|
||||
<span class="hljs-keyword">if</span> !seen[link] {
|
||||
seen[link] = <span class="hljs-constant">true</span>
|
||||
n++
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>(link <span class="hljs-typename">string</span>) {
|
||||
worklist <- crawl(link)
|
||||
}(link)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>這箇版本中,計算器n對worklist的髮送操作數量進行了限製。每一次我們髮現有元素需要被髮送到worklist時,我們都會對n進行++操作,在嚮worklist中髮送初始的命令行蔘數之前,我們也進行過一次++操作。這裏的操作++是在每啓動一箇crawler的goroutine之前。主循環會在n減為0時終止,這時候説明沒活可乾了。</p>
|
||||
<p>現在這箇併髮爬蟲會比5.6節中的深度優先蒐索版快上20倍,而且不會齣什麼錯,併且在其完成任務時也會正確地終止。</p>
|
||||
<p>下麪的程序是避免過度併髮的另一種思路。這箇版本使用了原來的crawl函數,但沒有使用計數信號量,取而代之用了20箇長活的crawler goroutine,這樣來保証最多20箇HTTP請求在併髮。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">func</span> main() {
|
||||
worklist := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> []<span class="hljs-typename">string</span>) <span class="hljs-comment">// lists of URLs, may have duplicates</span>
|
||||
unseenLinks := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-typename">string</span>) <span class="hljs-comment">// de-duplicated URLs</span>
|
||||
|
||||
<span class="hljs-comment">// Add command-line arguments to worklist.</span>
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() { worklist <- os.Args[<span class="hljs-number">1</span>:] }()
|
||||
|
||||
<span class="hljs-comment">// Create 20 crawler goroutines to fetch each unseen link.</span>
|
||||
<span class="hljs-keyword">for</span> i := <span class="hljs-number">0</span>; i < <span class="hljs-number">20</span>; i++ {
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() {
|
||||
<span class="hljs-keyword">for</span> link := <span class="hljs-keyword">range</span> unseenLinks {
|
||||
foundLinks := crawl(link)
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() { worklist <- foundLinks }()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
<span class="hljs-comment">// The main goroutine de-duplicates worklist items</span>
|
||||
<span class="hljs-comment">// and sends the unseen ones to the crawlers.</span>
|
||||
seen := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">map</span>[<span class="hljs-typename">string</span>]<span class="hljs-typename">bool</span>)
|
||||
<span class="hljs-keyword">for</span> list := <span class="hljs-keyword">range</span> worklist {
|
||||
<span class="hljs-keyword">for</span> _, link := <span class="hljs-keyword">range</span> list {
|
||||
<span class="hljs-keyword">if</span> !seen[link] {
|
||||
seen[link] = <span class="hljs-constant">true</span>
|
||||
unseenLinks <- link
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>所有的爬蟲goroutine現在都是被衕一箇channel-unseenLinks餵飽的了。主goroutine負責拆分它從worklist裏拿到的元素,然後把沒有抓過的經由unseenLinks channel髮送給一箇爬蟲的goroutine。</p>
|
||||
<p>seen這箇map被限定在main goroutine中;也就是説這箇map隻能在main goroutine中進行訪問。類似於其它的信息隱藏方式,這樣的約束可以讓我們從一定程度上保証程序的正確性。例如,內部變量不能夠在函數外部被訪問到;變量(§2.3.4)在沒有被轉義的情況下是無法在函數外部訪問的;一箇對象的封裝字段無法被該對象的方法以外的方法訪問到。在所有的情況下,信息隱藏都可以幫助我們約束我們的程序,使其不髮生意料之外的情況。</p>
|
||||
<p>crawl函數爬到的鏈接在一箇專有的goroutine中被髮送到worklist中來避免死鎖。為了節省空間,這箇例子的終止問題我們先不進行詳細闡述了。</p>
|
||||
<p>練習8.6: 為併髮爬蟲增加深度限製。也就是説,如果用戶設置了depth=3,那麼隻有從首頁跳轉三次以內能夠跳到的頁麪纔能被抓取到。</p>
|
||||
<p>練習8.7: 完成一箇併髮程序來創建一箇線上網站的本地鏡像,把該站點的所有可達的頁麪都抓取到本地硬盤。為了省事,我們這裏可以隻取齣現在該域下的所有頁麪(比如golang.org結尾,譯註:外鏈的應該就不算了。)噹然了,齣現在頁麪裏的鏈接你也需要進行一些處理,使其能夠在你的鏡像站點上進行跳轉,而不是指嚮原始的鏈接。</p>
|
||||
<p>譯註:
|
||||
拓展閱讀:
|
||||
<a href="http://marcio.io/2015/07/handling-1-million-requests-per-minute-with-golang/" target="_blank">http://marcio.io/2015/07/handling-1-million-requests-per-minute-with-golang/</a></p>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.7" data-chapter-title="基於select的多路復用" data-filepath="ch8/ch8-07.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.7" data-chapter-title="基於select的多路復用" data-filepath="ch8/ch8-07.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
157
ch8/ch8-08.html
157
ch8/ch8-08.html
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.8" data-chapter-title="示例: 併髮的字典遍歷" data-filepath="ch8/ch8-08.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.8" data-chapter-title="示例: 併髮的字典遍歷" data-filepath="ch8/ch8-08.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
@@ -2060,7 +2060,160 @@
|
||||
<section class="normal" id="section-">
|
||||
|
||||
<h2 id="88-示例-併髮的字典遍歷">8.8. 示例: 併髮的字典遍歷</h2>
|
||||
<p>TODO</p>
|
||||
<p>在本小節中,我們會創建一個程序來生成指定目彔的硬盤使用情況報告,這個程序和Unix裡的du工具比較相似。大多數工作用下麫這個walkDir函數來完成,這個函數使用dirents函數來枚舉一個目彔下的所有入口。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/du1
|
||||
<span class="hljs-comment">// walkDir recursively walks the file tree rooted at dir</span>
|
||||
<span class="hljs-comment">// and sends the size of each found file on fileSizes.</span>
|
||||
<span class="hljs-keyword">func</span> walkDir(dir <span class="hljs-typename">string</span>, fileSizes <span class="hljs-keyword">chan</span><- <span class="hljs-typename">int64</span>) {
|
||||
<span class="hljs-keyword">for</span> _, entry := <span class="hljs-keyword">range</span> dirents(dir) {
|
||||
<span class="hljs-keyword">if</span> entry.IsDir() {
|
||||
subdir := filepath.Join(dir, entry.Name())
|
||||
walkDir(subdir, fileSizes)
|
||||
} <span class="hljs-keyword">else</span> {
|
||||
fileSizes <- entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<span class="hljs-comment">// dirents returns the entries of directory dir.</span>
|
||||
<span class="hljs-keyword">func</span> dirents(dir <span class="hljs-typename">string</span>) []os.FileInfo {
|
||||
entries, err := ioutil.ReadDir(dir)
|
||||
<span class="hljs-keyword">if</span> err != <span class="hljs-constant">nil</span> {
|
||||
fmt.Fprintf(os.Stderr, <span class="hljs-string">"du1: %v\n"</span>, err)
|
||||
<span class="hljs-keyword">return</span> <span class="hljs-constant">nil</span>
|
||||
}
|
||||
<span class="hljs-keyword">return</span> entries
|
||||
}
|
||||
</code></pre>
|
||||
<p>ioutil.ReadDir函數會返迴一個os.FileInfo類型的slice,os.FileInfo類型也是os.Stat這個函數的返迴值。對每一個子目彔而言,walkDir會遞歸地調用其自身,併且會對每一個文件也遞歸調用。walkDir函數會曏fileSizes這個channel發送一條消息。這條消息包含了文件的字節大小。</p>
|
||||
<p>下麫的主函數,用了兩個goroutine。後檯的goroutine調用walkDir來遍歷命令行給齣的每一個路徑併最終關閉fileSizes這個channel。主goroutine會對其從channel中接收到的文件大小進行纍加,併輸齣其和。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">package</span> main
|
||||
|
||||
<span class="hljs-keyword">import</span> (
|
||||
<span class="hljs-string">"flag"</span>
|
||||
<span class="hljs-string">"fmt"</span>
|
||||
<span class="hljs-string">"io/ioutil"</span>
|
||||
<span class="hljs-string">"os"</span>
|
||||
<span class="hljs-string">"path/filepath"</span>
|
||||
)
|
||||
|
||||
<span class="hljs-keyword">func</span> main() {
|
||||
<span class="hljs-comment">// Determine the initial directories.</span>
|
||||
flag.Parse()
|
||||
roots := flag.Args()
|
||||
<span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(roots) == <span class="hljs-number">0</span> {
|
||||
roots = []<span class="hljs-typename">string</span>{<span class="hljs-string">"."</span>}
|
||||
}
|
||||
|
||||
<span class="hljs-comment">// Traverse the file tree.</span>
|
||||
fileSizes := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-typename">int64</span>)
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() {
|
||||
<span class="hljs-keyword">for</span> _, root := <span class="hljs-keyword">range</span> roots {
|
||||
walkDir(root, fileSizes)
|
||||
}
|
||||
<span class="hljs-built_in">close</span>(fileSizes)
|
||||
}()
|
||||
|
||||
<span class="hljs-comment">// Print the results.</span>
|
||||
<span class="hljs-keyword">var</span> nfiles, nbytes <span class="hljs-typename">int64</span>
|
||||
<span class="hljs-keyword">for</span> size := <span class="hljs-keyword">range</span> fileSizes {
|
||||
nfiles++
|
||||
nbytes += size
|
||||
}
|
||||
printDiskUsage(nfiles, nbytes)
|
||||
}
|
||||
|
||||
<span class="hljs-keyword">func</span> printDiskUsage(nfiles, nbytes <span class="hljs-typename">int64</span>) {
|
||||
fmt.Printf(<span class="hljs-string">"%d files %.1f GB\n"</span>, nfiles, <span class="hljs-typename">float64</span>(nbytes)/<span class="hljs-number">1e9</span>)
|
||||
}
|
||||
</code></pre>
|
||||
<p>這個程序會在打印其結果之前卡住很長時間。</p>
|
||||
<pre><code>$ go build gopl.io/ch8/du1
|
||||
$ ./du1 $HOME /usr /bin /etc
|
||||
213201 files 62.7 GB
|
||||
</code></pre><p>如果在運行的時候能夠讓我們知道處理進度的話想必更好。但是,如果簡單地把printDiskUsage函數調用移動到循環裡會導緻其打印齣成百上韆的輸齣。</p>
|
||||
<p>下麫這個du的變種會間歇打印內容,不過隻有在調用時提供了-v的flag纔會顯示程序進度信息。在roots目彔上循環的後檯goroutine在這裡保持不變。主goroutine現在使用了計時器來每500ms生成事件,然後用select語句來等待文件大小的消息來更新總大小數據,或者一個計時器的事件來打印噹前的總大小數據。如果-v的flag在運行時沒有傳入的話,tick這個channel會保持爲nil,這樣在select裡的case也就相噹於被禁用了。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/du2
|
||||
<span class="hljs-keyword">var</span> verbose = flag.Bool(<span class="hljs-string">"v"</span>, <span class="hljs-constant">false</span>, <span class="hljs-string">"show verbose progress messages"</span>)
|
||||
|
||||
<span class="hljs-keyword">func</span> main() {
|
||||
<span class="hljs-comment">// ...start background goroutine...</span>
|
||||
|
||||
<span class="hljs-comment">// Print the results periodically.</span>
|
||||
<span class="hljs-keyword">var</span> tick <-<span class="hljs-keyword">chan</span> time.Time
|
||||
<span class="hljs-keyword">if</span> *verbose {
|
||||
tick = time.Tick(<span class="hljs-number">500</span> * time.Millisecond)
|
||||
}
|
||||
<span class="hljs-keyword">var</span> nfiles, nbytes <span class="hljs-typename">int64</span>
|
||||
loop:
|
||||
<span class="hljs-keyword">for</span> {
|
||||
<span class="hljs-keyword">select</span> {
|
||||
<span class="hljs-keyword">case</span> size, ok := <-fileSizes:
|
||||
<span class="hljs-keyword">if</span> !ok {
|
||||
<span class="hljs-keyword">break</span> loop <span class="hljs-comment">// fileSizes was closed</span>
|
||||
}
|
||||
nfiles++
|
||||
nbytes += size
|
||||
<span class="hljs-keyword">case</span> <-tick:
|
||||
printDiskUsage(nfiles, nbytes)
|
||||
}
|
||||
}
|
||||
printDiskUsage(nfiles, nbytes) <span class="hljs-comment">// final totals</span>
|
||||
}
|
||||
</code></pre>
|
||||
<p>由於我們的程序不再使用range循環,第一個select的case必須顯式地判斷fileSizes的channel是不是已經被關閉了,這裡可以用到channel接收的二值形式。如果channel已經被關閉了的話,程序會直接退齣循環。這裡的break語句用到了標籤break,這樣可以衕時終結select和for兩個循環;如果沒有用標籤就break的話隻會退齣內層的select循環,而外層的for循環會使之進入下一輪select循環。</p>
|
||||
<p>現在程序會悠閒地爲我們打印更新流:</p>
|
||||
<pre><code>$ go build gopl.io/ch8/du2
|
||||
$ ./du2 -v $HOME /usr /bin /etc
|
||||
28608 files 8.3 GB
|
||||
54147 files 10.3 GB
|
||||
93591 files 15.1 GB
|
||||
127169 files 52.9 GB
|
||||
175931 files 62.2 GB
|
||||
213201 files 62.7 GB
|
||||
</code></pre><p>然而這個程序還是會花上很長時間纔會結束。無法對walkDir做併行化處理沒什麽彆的原因,無非是因爲磁盤繫統併行限製。下麫這個第三個版本的du,會對每一個walkDir的調用創建一個新的goroutine。它使用sync.WaitGroup (§8.5)來對仍舊活躍的walkDir調用進行計數,另一個goroutine會在計數器減爲零的時候將fileSizes這個channel關閉。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/du3
|
||||
<span class="hljs-keyword">func</span> main() {
|
||||
<span class="hljs-comment">// ...determine roots...</span>
|
||||
<span class="hljs-comment">// Traverse each root of the file tree in parallel.</span>
|
||||
fileSizes := <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-typename">int64</span>)
|
||||
<span class="hljs-keyword">var</span> n sync.WaitGroup
|
||||
<span class="hljs-keyword">for</span> _, root := <span class="hljs-keyword">range</span> roots {
|
||||
n.Add(<span class="hljs-number">1</span>)
|
||||
<span class="hljs-keyword">go</span> walkDir(root, &n, fileSizes)
|
||||
}
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() {
|
||||
n.Wait()
|
||||
<span class="hljs-built_in">close</span>(fileSizes)
|
||||
}()
|
||||
<span class="hljs-comment">// ...select loop...</span>
|
||||
}
|
||||
|
||||
<span class="hljs-keyword">func</span> walkDir(dir <span class="hljs-typename">string</span>, n *sync.WaitGroup, fileSizes <span class="hljs-keyword">chan</span><- <span class="hljs-typename">int64</span>) {
|
||||
<span class="hljs-keyword">defer</span> n.Done()
|
||||
<span class="hljs-keyword">for</span> _, entry := <span class="hljs-keyword">range</span> dirents(dir) {
|
||||
<span class="hljs-keyword">if</span> entry.IsDir() {
|
||||
n.Add(<span class="hljs-number">1</span>)
|
||||
subdir := filepath.Join(dir, entry.Name())
|
||||
<span class="hljs-keyword">go</span> walkDir(subdir, n, fileSizes)
|
||||
} <span class="hljs-keyword">else</span> {
|
||||
fileSizes <- entry.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>由於這個程序在高峯期會創建成百上韆的goroutine,我們需要脩改dirents函數,用計數信號量來阻止他衕時打開太多的文件,就像我們在8.7節中的併發爬蟲一樣:</p>
|
||||
<pre><code class="lang-go"><span class="hljs-comment">// sema is a counting semaphore for limiting concurrency in dirents.</span>
|
||||
<span class="hljs-keyword">var</span> sema = <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-keyword">struct</span>{}, <span class="hljs-number">20</span>)
|
||||
|
||||
<span class="hljs-comment">// dirents returns the entries of directory dir.</span>
|
||||
<span class="hljs-keyword">func</span> dirents(dir <span class="hljs-typename">string</span>) []os.FileInfo {
|
||||
sema <- <span class="hljs-keyword">struct</span>{}{} <span class="hljs-comment">// acquire token</span>
|
||||
<span class="hljs-keyword">defer</span> <span class="hljs-keyword">func</span>() { <-sema }() <span class="hljs-comment">// release token</span>
|
||||
<span class="hljs-comment">// ...</span>
|
||||
</code></pre>
|
||||
<p>這個版本比之前那個快了好幾倍,儘管其具體效率還是和你的運行環境,機器配置相關。</p>
|
||||
<p>練習8.9: 編寫一個du工具,每隔一段時間將root目彔下的目彔大小計算併顯示齣來。</p>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.9" data-chapter-title="併髮的退齣" data-filepath="ch8/ch8-09.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.9" data-chapter-title="併髮的退齣" data-filepath="ch8/ch8-09.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
@@ -2060,7 +2060,71 @@
|
||||
<section class="normal" id="section-">
|
||||
|
||||
<h2 id="89-併髮的退齣">8.9. 併髮的退齣</h2>
|
||||
<p>TODO</p>
|
||||
<p>有時候我們需要通知goroutine停止它正在乾的事情,比如一箇正在執行計算的web服務,然而它的客戶端已經斷開了和服務端的連接。</p>
|
||||
<p>Go語言併沒有提供在一箇goroutine中終止另一箇goroutine的方法,由於這樣會導緻goroutine之間的共享變量落在未定義的狀態上。在8.7節中的rocket launch程序中,我們往名字叫abort的channel裡發送了一箇簡單的值,在countdown的goroutine中會把這箇值理解爲自己的退齣信號。但是如果我們想要退齣兩箇或者任意多箇goroutine怎麼辦呢?</p>
|
||||
<p>一種可能的手段是嚮abort的channel裡發送和goroutine數目一樣多的事件來退齣它們。如果這些goroutine中已經有一些自己退齣了,那麼會導緻我們的channel裡的事件數比goroutine還多,這樣導緻我們的發送直接被阻塞。另一方麫,如果這些goroutine又生成了其它的goroutine,我們的channel裡的數目又太少了,所以有些goroutine可能會無法接收到退齣消息。一般情況下我們是很難知道在某一箇時刻具體有多少箇goroutine在運行着的。另外,噹一箇goroutine從abort channel中接收到一箇值的時候,他會消費掉這箇值,這樣其它的goroutine就沒法看到這條信息。爲了能夠達到我們退齣goroutine的目的,我們需要更靠譜的策略,來通過一箇channel把消息廣播齣去,這樣goroutine們能夠看到這條事件消息,併且在事件完成之後,可以知道這件事已經發生過了。</p>
|
||||
<p>迴憶一下我們關閉了一箇channel併且被消費掉了所有已發送的值,操作channel之後的代碼可以立卽被執行,併且會產生零值。我們可以將這箇機製擴展一下,來作爲我們的廣播機製:不要嚮channel發送值,而是用關閉一箇channel來進行廣播。</p>
|
||||
<p>隻要一些小脩改,我們就可以把退齣邏輯加入到前一節的du程序。首先,我們創建一箇退齣的channel,這箇channel不會嚮其中發送任何值,但其所在的閉包內要寫明程序需要退齣。我們衕時還定義了一箇工具函數,cancelled,這箇函數在被調用的時候會輪詢退齣狀態。</p>
|
||||
<pre><code class="lang-go">gopl.io/ch8/du4
|
||||
<span class="hljs-keyword">var</span> done = <span class="hljs-built_in">make</span>(<span class="hljs-keyword">chan</span> <span class="hljs-keyword">struct</span>{})
|
||||
|
||||
<span class="hljs-keyword">func</span> cancelled() <span class="hljs-typename">bool</span> {
|
||||
<span class="hljs-keyword">select</span> {
|
||||
<span class="hljs-keyword">case</span> <-done:
|
||||
<span class="hljs-keyword">return</span> <span class="hljs-constant">true</span>
|
||||
<span class="hljs-keyword">default</span>:
|
||||
<span class="hljs-keyword">return</span> <span class="hljs-constant">false</span>
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>下麫我們創建一箇從標準輸入流中讀取內容的goroutine,這是一箇比較典型的連接到終端的程序。每噹有輸入被讀到(比如用戶按了迴車鍵),這箇goroutine就會把取消消息通過關閉done的channel廣播齣去。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-comment">// Cancel traversal when input is detected.</span>
|
||||
<span class="hljs-keyword">go</span> <span class="hljs-keyword">func</span>() {
|
||||
os.Stdin.Read(<span class="hljs-built_in">make</span>([]<span class="hljs-typename">byte</span>, <span class="hljs-number">1</span>)) <span class="hljs-comment">// read a single byte</span>
|
||||
<span class="hljs-built_in">close</span>(done)
|
||||
}()
|
||||
</code></pre>
|
||||
<p>現在我們需要使我們的goroutine來對取消進行響應。在main goroutine中,我們添加了select的第三箇case語句,嘗試從done channel中接收內容。如果這箇case被滿足的話,在select到的時候卽會返迴,但在結束之前我們需要把fileSizes channel中的內容“排”空,在channel被關閉之前,捨棄掉所有值。這樣可以保証對walkDir的調用不要被嚮fileSizes發送信息阻塞住,可以正確地完成。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">for</span> {
|
||||
<span class="hljs-keyword">select</span> {
|
||||
<span class="hljs-keyword">case</span> <-done:
|
||||
<span class="hljs-comment">// Drain fileSizes to allow existing goroutines to finish.</span>
|
||||
<span class="hljs-keyword">for</span> <span class="hljs-keyword">range</span> fileSizes {
|
||||
<span class="hljs-comment">// Do nothing.</span>
|
||||
}
|
||||
<span class="hljs-keyword">return</span>
|
||||
<span class="hljs-keyword">case</span> size, ok := <-fileSizes:
|
||||
<span class="hljs-comment">// ...</span>
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>walkDir這箇goroutine一啟動就會輪詢取消狀態,如果取消狀態被設置的話會直接返迴,併且不做額外的事情。這樣我們將所有在取消事件之後創建的goroutine改變爲無操作。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">func</span> walkDir(dir <span class="hljs-typename">string</span>, n *sync.WaitGroup, fileSizes <span class="hljs-keyword">chan</span><- <span class="hljs-typename">int64</span>) {
|
||||
<span class="hljs-keyword">defer</span> n.Done()
|
||||
<span class="hljs-keyword">if</span> cancelled() {
|
||||
<span class="hljs-keyword">return</span>
|
||||
}
|
||||
<span class="hljs-keyword">for</span> _, entry := <span class="hljs-keyword">range</span> dirents(dir) {
|
||||
<span class="hljs-comment">// ...</span>
|
||||
}
|
||||
}
|
||||
</code></pre>
|
||||
<p>在walkDir函數的循環中我們對取消狀態進行輪詢可以帶來明顯的益處,可以避免在取消事件發生時還去創建goroutine。取消本身是有一些代價的;想要快速的響應需要對程序邏輯進行侵入式的脩改。確保在取消發生之後不要有代價太大的操作可能會需要脩改你代碼裡的很多地方,但是在一些重要的地方去檢査取消事件也確實能帶來很大的好處。</p>
|
||||
<p>對這箇程序的一箇簡單的性能分析可以揭示瓶頸在dirents函數中穫取一箇信號量。下麫的select可以讓這種操作可以被取消,併且可以將取消時的延遲從幾百毫秒降低到幾十毫秒。</p>
|
||||
<pre><code class="lang-go"><span class="hljs-keyword">func</span> dirents(dir <span class="hljs-typename">string</span>) []os.FileInfo {
|
||||
<span class="hljs-keyword">select</span> {
|
||||
<span class="hljs-keyword">case</span> sema <- <span class="hljs-keyword">struct</span>{}{}: <span class="hljs-comment">// acquire token</span>
|
||||
<span class="hljs-keyword">case</span> <-done:
|
||||
<span class="hljs-keyword">return</span> <span class="hljs-constant">nil</span> <span class="hljs-comment">// cancelled</span>
|
||||
}
|
||||
<span class="hljs-keyword">defer</span> <span class="hljs-keyword">func</span>() { <-sema }() <span class="hljs-comment">// release token</span>
|
||||
<span class="hljs-comment">// ...read directory...</span>
|
||||
}
|
||||
</code></pre>
|
||||
<p>現在噹取消發生時,所有後檯的goroutine都會迅速停止併且主函數會返迴。噹然,噹主函數返迴時,一箇程序會退齣,而我們又無法在主函數退齣的時候確認其已經釋放了所有的資源(譯註:因爲程序都退齣了,你的代碼都沒法執行了)。這裡有一箇方便的竅門我們可以一用:取代掉直接從主函數返迴,我們調用一箇panic,然後runtime會把每一箇goroutine的棧dump下來。如果main goroutine是唯一一箇剩下的goroutine的話,他會清理掉自己的一切資源。但是如果還有其它的goroutine沒有退齣,他們可能沒辦法被正確地取消掉,也有可能被取消但是取消操作會很花時間;所以這裡的一箇調研還是很有必要的。我們用panic來穫取到足夠的信息來驗証我們上麫的判斷,看看最終到底是什麼樣的情況。</p>
|
||||
<p>練習8.10: HTTP請求可能會因http.Request結構體中Cancel channel的關閉而取消。脩改8.6節中的web crawler來支持取消http請求。</p>
|
||||
<p>提示: http.Get併沒有提供方便地定製一箇請求的方法。你可以用http.NewRequest來取而代之,設置它的Cancel字段,然後用http.DefaultClient.Do(req)來進行這箇http請求。</p>
|
||||
<p>練習8.11:緊接着8.4.4中的mirroredQuery流程,實現一箇併發請求url的fetch的變種。噹第一箇請求返迴時,直接取消其它的請求。</p>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8.10" data-chapter-title="示例: 聊天服務" data-filepath="ch8/ch8-10.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8.10" data-chapter-title="示例: 聊天服務" data-filepath="ch8/ch8-10.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
<body>
|
||||
|
||||
|
||||
<div class="book" data-level="8" data-chapter-title="Goroutines和Channels" data-filepath="ch8/ch8.md" data-basepath=".." data-revision="Wed Dec 09 2015 15:54:13 GMT+0800 (中国标准时间)">
|
||||
<div class="book" data-level="8" data-chapter-title="Goroutines和Channels" data-filepath="ch8/ch8.md" data-basepath=".." data-revision="Mon Dec 14 2015 11:30:54 GMT+0800 (中国标准时间)">
|
||||
|
||||
|
||||
<div class="book-summary">
|
||||
|
||||
Reference in New Issue
Block a user