本文共 2362 字,大约阅读时间需要 7 分钟。
涉及go的net/http包的使用,文件读写,log日志、正则匹配、go routine并发、协程同步。
package mainimport ( "fmt" "net/http" "io/ioutil" "regexp" "sync" "log" "time" "os")var wg sync.WaitGroupfunc main() { log_f, err := os.OpenFile("down.log", os.O_APPEND|os.O_CREATE|os.O_RDWR, 0666) if err !=nil { log.Fatal(err) } defer log_f.Close() log.SetOutput(log_f) //记录用时 t := time.Now() num := 20 //开启20个协程爬20个网页 wg.Add(num) for i:=0; i<=num; i++ { url := fmt.Sprintf("https://~~不显示网址~~ /20140421192446_%d.htm", i+2) fmt.Println(url) log.Printf("GET %s\n", url) go GetImgUrl(url, i) } wg.Wait()//主协程等待 end := time.Since(t) fmt.Println("程序用时:", end)}func GetImgUrl(url string, i int) { defer wg.Done() client := &http.Client{ } req,_ := http.NewRequest("GET",url,nil) req.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0") response, err := client.Do(req) if err != nil { fmt.Println("http get error ",err) return } fmt.Println(url, response.Status) log.Println(url, response.Status) defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { fmt.Println("response.Body error") return } reg := "http://~~不显示网址~~ /uploads/tu/[0-9]{6}/[0-9]{4}/[0-9a-zA-Z]{10}.jpg" compile := regexp.MustCompile(reg) html := []byte(string(body)) submatch := compile.FindAllSubmatch(html, -1) fmt.Println("url:" , url, "match:", len(submatch)) if len(submatch) != 0 { GetImg(string(submatch[0][0]), i) }else { fmt.Println("url:" , url, "null", len(submatch)) }}func GetImg(url string, i int) { client := &http.Client{ } req,_ := http.NewRequest("GET",url,nil) req.Header.Add("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0") response, err := client.Do(req) if err != nil { fmt.Println("http get error ",err) return } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { fmt.Println("response.Body error") return } timeStr := time.Now().Format("2006-01-02 15:04:05") filename := fmt.Sprintf("./IMG/%s_%d.jpg", timeStr, i) //fmt.Println(filename) err = ioutil.WriteFile(filename, body, 0666) if err != nil { fmt.Println("ioutil.WriteFile error", err) return }else { fmt.Println(url, " ok") }}
转载地址:http://tynxz.baihongyu.com/