Black Lives Matter. Support the Equal Justice Initiative.

Source file src/internal/poll/fd_unix.go

Documentation: internal/poll

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris
     6  // +build aix darwin dragonfly freebsd js,wasm linux netbsd openbsd solaris
     7  
     8  package poll
     9  
    10  import (
    11  	"io"
    12  	"sync/atomic"
    13  	"syscall"
    14  )
    15  
    16  // FD is a file descriptor. The net and os packages use this type as a
    17  // field of a larger type representing a network connection or OS file.
    18  type FD struct {
    19  	// Lock sysfd and serialize access to Read and Write methods.
    20  	fdmu fdMutex
    21  
    22  	// System file descriptor. Immutable until Close.
    23  	Sysfd int
    24  
    25  	// I/O poller.
    26  	pd pollDesc
    27  
    28  	// Writev cache.
    29  	iovecs *[]syscall.Iovec
    30  
    31  	// Semaphore signaled when file is closed.
    32  	csema uint32
    33  
    34  	// Non-zero if this file has been set to blocking mode.
    35  	isBlocking uint32
    36  
    37  	// Whether this is a streaming descriptor, as opposed to a
    38  	// packet-based descriptor like a UDP socket. Immutable.
    39  	IsStream bool
    40  
    41  	// Whether a zero byte read indicates EOF. This is false for a
    42  	// message based socket connection.
    43  	ZeroReadIsEOF bool
    44  
    45  	// Whether this is a file rather than a network socket.
    46  	isFile bool
    47  }
    48  
    49  // Init initializes the FD. The Sysfd field should already be set.
    50  // This can be called multiple times on a single FD.
    51  // The net argument is a network name from the net package (e.g., "tcp"),
    52  // or "file".
    53  // Set pollable to true if fd should be managed by runtime netpoll.
    54  func (fd *FD) Init(net string, pollable bool) error {
    55  	// We don't actually care about the various network types.
    56  	if net == "file" {
    57  		fd.isFile = true
    58  	}
    59  	if !pollable {
    60  		fd.isBlocking = 1
    61  		return nil
    62  	}
    63  	err := fd.pd.init(fd)
    64  	if err != nil {
    65  		// If we could not initialize the runtime poller,
    66  		// assume we are using blocking mode.
    67  		fd.isBlocking = 1
    68  	}
    69  	return err
    70  }
    71  
    72  // Destroy closes the file descriptor. This is called when there are
    73  // no remaining references.
    74  func (fd *FD) destroy() error {
    75  	// Poller may want to unregister fd in readiness notification mechanism,
    76  	// so this must be executed before CloseFunc.
    77  	fd.pd.close()
    78  
    79  	// We don't use ignoringEINTR here because POSIX does not define
    80  	// whether the descriptor is closed if close returns EINTR.
    81  	// If the descriptor is indeed closed, using a loop would race
    82  	// with some other goroutine opening a new descriptor.
    83  	// (The Linux kernel guarantees that it is closed on an EINTR error.)
    84  	err := CloseFunc(fd.Sysfd)
    85  
    86  	fd.Sysfd = -1
    87  	runtime_Semrelease(&fd.csema)
    88  	return err
    89  }
    90  
    91  // Close closes the FD. The underlying file descriptor is closed by the
    92  // destroy method when there are no remaining references.
    93  func (fd *FD) Close() error {
    94  	if !fd.fdmu.increfAndClose() {
    95  		return errClosing(fd.isFile)
    96  	}
    97  
    98  	// Unblock any I/O.  Once it all unblocks and returns,
    99  	// so that it cannot be referring to fd.sysfd anymore,
   100  	// the final decref will close fd.sysfd. This should happen
   101  	// fairly quickly, since all the I/O is non-blocking, and any
   102  	// attempts to block in the pollDesc will return errClosing(fd.isFile).
   103  	fd.pd.evict()
   104  
   105  	// The call to decref will call destroy if there are no other
   106  	// references.
   107  	err := fd.decref()
   108  
   109  	// Wait until the descriptor is closed. If this was the only
   110  	// reference, it is already closed. Only wait if the file has
   111  	// not been set to blocking mode, as otherwise any current I/O
   112  	// may be blocking, and that would block the Close.
   113  	// No need for an atomic read of isBlocking, increfAndClose means
   114  	// we have exclusive access to fd.
   115  	if fd.isBlocking == 0 {
   116  		runtime_Semacquire(&fd.csema)
   117  	}
   118  
   119  	return err
   120  }
   121  
   122  // SetBlocking puts the file into blocking mode.
   123  func (fd *FD) SetBlocking() error {
   124  	if err := fd.incref(); err != nil {
   125  		return err
   126  	}
   127  	defer fd.decref()
   128  	// Atomic store so that concurrent calls to SetBlocking
   129  	// do not cause a race condition. isBlocking only ever goes
   130  	// from 0 to 1 so there is no real race here.
   131  	atomic.StoreUint32(&fd.isBlocking, 1)
   132  	return syscall.SetNonblock(fd.Sysfd, false)
   133  }
   134  
   135  // Darwin and FreeBSD can't read or write 2GB+ files at a time,
   136  // even on 64-bit systems.
   137  // The same is true of socket implementations on many systems.
   138  // See golang.org/issue/7812 and golang.org/issue/16266.
   139  // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
   140  const maxRW = 1 << 30
   141  
   142  // Read implements io.Reader.
   143  func (fd *FD) Read(p []byte) (int, error) {
   144  	if err := fd.readLock(); err != nil {
   145  		return 0, err
   146  	}
   147  	defer fd.readUnlock()
   148  	if len(p) == 0 {
   149  		// If the caller wanted a zero byte read, return immediately
   150  		// without trying (but after acquiring the readLock).
   151  		// Otherwise syscall.Read returns 0, nil which looks like
   152  		// io.EOF.
   153  		// TODO(bradfitz): make it wait for readability? (Issue 15735)
   154  		return 0, nil
   155  	}
   156  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   157  		return 0, err
   158  	}
   159  	if fd.IsStream && len(p) > maxRW {
   160  		p = p[:maxRW]
   161  	}
   162  	for {
   163  		n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
   164  		if err != nil {
   165  			n = 0
   166  			if err == syscall.EAGAIN && fd.pd.pollable() {
   167  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   168  					continue
   169  				}
   170  			}
   171  		}
   172  		err = fd.eofError(n, err)
   173  		return n, err
   174  	}
   175  }
   176  
   177  // Pread wraps the pread system call.
   178  func (fd *FD) Pread(p []byte, off int64) (int, error) {
   179  	// Call incref, not readLock, because since pread specifies the
   180  	// offset it is independent from other reads.
   181  	// Similarly, using the poller doesn't make sense for pread.
   182  	if err := fd.incref(); err != nil {
   183  		return 0, err
   184  	}
   185  	if fd.IsStream && len(p) > maxRW {
   186  		p = p[:maxRW]
   187  	}
   188  	var (
   189  		n   int
   190  		err error
   191  	)
   192  	for {
   193  		n, err = syscall.Pread(fd.Sysfd, p, off)
   194  		if err != syscall.EINTR {
   195  			break
   196  		}
   197  	}
   198  	if err != nil {
   199  		n = 0
   200  	}
   201  	fd.decref()
   202  	err = fd.eofError(n, err)
   203  	return n, err
   204  }
   205  
   206  // ReadFrom wraps the recvfrom network call.
   207  func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
   208  	if err := fd.readLock(); err != nil {
   209  		return 0, nil, err
   210  	}
   211  	defer fd.readUnlock()
   212  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   213  		return 0, nil, err
   214  	}
   215  	for {
   216  		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
   217  		if err != nil {
   218  			if err == syscall.EINTR {
   219  				continue
   220  			}
   221  			n = 0
   222  			if err == syscall.EAGAIN && fd.pd.pollable() {
   223  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   224  					continue
   225  				}
   226  			}
   227  		}
   228  		err = fd.eofError(n, err)
   229  		return n, sa, err
   230  	}
   231  }
   232  
   233  // ReadMsg wraps the recvmsg network call.
   234  func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
   235  	if err := fd.readLock(); err != nil {
   236  		return 0, 0, 0, nil, err
   237  	}
   238  	defer fd.readUnlock()
   239  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   240  		return 0, 0, 0, nil, err
   241  	}
   242  	for {
   243  		n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
   244  		if err != nil {
   245  			if err == syscall.EINTR {
   246  				continue
   247  			}
   248  			// TODO(dfc) should n and oobn be set to 0
   249  			if err == syscall.EAGAIN && fd.pd.pollable() {
   250  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   251  					continue
   252  				}
   253  			}
   254  		}
   255  		err = fd.eofError(n, err)
   256  		return n, oobn, sysflags, sa, err
   257  	}
   258  }
   259  
   260  // Write implements io.Writer.
   261  func (fd *FD) Write(p []byte) (int, error) {
   262  	if err := fd.writeLock(); err != nil {
   263  		return 0, err
   264  	}
   265  	defer fd.writeUnlock()
   266  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   267  		return 0, err
   268  	}
   269  	var nn int
   270  	for {
   271  		max := len(p)
   272  		if fd.IsStream && max-nn > maxRW {
   273  			max = nn + maxRW
   274  		}
   275  		n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
   276  		if n > 0 {
   277  			nn += n
   278  		}
   279  		if nn == len(p) {
   280  			return nn, err
   281  		}
   282  		if err == syscall.EAGAIN && fd.pd.pollable() {
   283  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   284  				continue
   285  			}
   286  		}
   287  		if err != nil {
   288  			return nn, err
   289  		}
   290  		if n == 0 {
   291  			return nn, io.ErrUnexpectedEOF
   292  		}
   293  	}
   294  }
   295  
   296  // Pwrite wraps the pwrite system call.
   297  func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
   298  	// Call incref, not writeLock, because since pwrite specifies the
   299  	// offset it is independent from other writes.
   300  	// Similarly, using the poller doesn't make sense for pwrite.
   301  	if err := fd.incref(); err != nil {
   302  		return 0, err
   303  	}
   304  	defer fd.decref()
   305  	var nn int
   306  	for {
   307  		max := len(p)
   308  		if fd.IsStream && max-nn > maxRW {
   309  			max = nn + maxRW
   310  		}
   311  		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
   312  		if err == syscall.EINTR {
   313  			continue
   314  		}
   315  		if n > 0 {
   316  			nn += n
   317  		}
   318  		if nn == len(p) {
   319  			return nn, err
   320  		}
   321  		if err != nil {
   322  			return nn, err
   323  		}
   324  		if n == 0 {
   325  			return nn, io.ErrUnexpectedEOF
   326  		}
   327  	}
   328  }
   329  
   330  // WriteTo wraps the sendto network call.
   331  func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
   332  	if err := fd.writeLock(); err != nil {
   333  		return 0, err
   334  	}
   335  	defer fd.writeUnlock()
   336  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   337  		return 0, err
   338  	}
   339  	for {
   340  		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
   341  		if err == syscall.EINTR {
   342  			continue
   343  		}
   344  		if err == syscall.EAGAIN && fd.pd.pollable() {
   345  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   346  				continue
   347  			}
   348  		}
   349  		if err != nil {
   350  			return 0, err
   351  		}
   352  		return len(p), nil
   353  	}
   354  }
   355  
   356  // WriteMsg wraps the sendmsg network call.
   357  func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
   358  	if err := fd.writeLock(); err != nil {
   359  		return 0, 0, err
   360  	}
   361  	defer fd.writeUnlock()
   362  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   363  		return 0, 0, err
   364  	}
   365  	for {
   366  		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
   367  		if err == syscall.EINTR {
   368  			continue
   369  		}
   370  		if err == syscall.EAGAIN && fd.pd.pollable() {
   371  			if err = fd.pd.waitWrite(fd.isFile); err == nil {
   372  				continue
   373  			}
   374  		}
   375  		if err != nil {
   376  			return n, 0, err
   377  		}
   378  		return n, len(oob), err
   379  	}
   380  }
   381  
   382  // Accept wraps the accept network call.
   383  func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
   384  	if err := fd.readLock(); err != nil {
   385  		return -1, nil, "", err
   386  	}
   387  	defer fd.readUnlock()
   388  
   389  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   390  		return -1, nil, "", err
   391  	}
   392  	for {
   393  		s, rsa, errcall, err := accept(fd.Sysfd)
   394  		if err == nil {
   395  			return s, rsa, "", err
   396  		}
   397  		switch err {
   398  		case syscall.EINTR:
   399  			continue
   400  		case syscall.EAGAIN:
   401  			if fd.pd.pollable() {
   402  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   403  					continue
   404  				}
   405  			}
   406  		case syscall.ECONNABORTED:
   407  			// This means that a socket on the listen
   408  			// queue was closed before we Accept()ed it;
   409  			// it's a silly error, so try again.
   410  			continue
   411  		}
   412  		return -1, nil, errcall, err
   413  	}
   414  }
   415  
   416  // Seek wraps syscall.Seek.
   417  func (fd *FD) Seek(offset int64, whence int) (int64, error) {
   418  	if err := fd.incref(); err != nil {
   419  		return 0, err
   420  	}
   421  	defer fd.decref()
   422  	return syscall.Seek(fd.Sysfd, offset, whence)
   423  }
   424  
   425  // ReadDirent wraps syscall.ReadDirent.
   426  // We treat this like an ordinary system call rather than a call
   427  // that tries to fill the buffer.
   428  func (fd *FD) ReadDirent(buf []byte) (int, error) {
   429  	if err := fd.incref(); err != nil {
   430  		return 0, err
   431  	}
   432  	defer fd.decref()
   433  	for {
   434  		n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
   435  		if err != nil {
   436  			n = 0
   437  			if err == syscall.EAGAIN && fd.pd.pollable() {
   438  				if err = fd.pd.waitRead(fd.isFile); err == nil {
   439  					continue
   440  				}
   441  			}
   442  		}
   443  		// Do not call eofError; caller does not expect to see io.EOF.
   444  		return n, err
   445  	}
   446  }
   447  
   448  // Fchmod wraps syscall.Fchmod.
   449  func (fd *FD) Fchmod(mode uint32) error {
   450  	if err := fd.incref(); err != nil {
   451  		return err
   452  	}
   453  	defer fd.decref()
   454  	return ignoringEINTR(func() error {
   455  		return syscall.Fchmod(fd.Sysfd, mode)
   456  	})
   457  }
   458  
   459  // Fchdir wraps syscall.Fchdir.
   460  func (fd *FD) Fchdir() error {
   461  	if err := fd.incref(); err != nil {
   462  		return err
   463  	}
   464  	defer fd.decref()
   465  	return syscall.Fchdir(fd.Sysfd)
   466  }
   467  
   468  // Fstat wraps syscall.Fstat
   469  func (fd *FD) Fstat(s *syscall.Stat_t) error {
   470  	if err := fd.incref(); err != nil {
   471  		return err
   472  	}
   473  	defer fd.decref()
   474  	return ignoringEINTR(func() error {
   475  		return syscall.Fstat(fd.Sysfd, s)
   476  	})
   477  }
   478  
   479  // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
   480  // If the kernel doesn't support it, this is set to 0.
   481  var tryDupCloexec = int32(1)
   482  
   483  // DupCloseOnExec dups fd and marks it close-on-exec.
   484  func DupCloseOnExec(fd int) (int, string, error) {
   485  	if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 {
   486  		r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
   487  		if e1 == nil {
   488  			return r0, "", nil
   489  		}
   490  		switch e1.(syscall.Errno) {
   491  		case syscall.EINVAL, syscall.ENOSYS:
   492  			// Old kernel, or js/wasm (which returns
   493  			// ENOSYS). Fall back to the portable way from
   494  			// now on.
   495  			atomic.StoreInt32(&tryDupCloexec, 0)
   496  		default:
   497  			return -1, "fcntl", e1
   498  		}
   499  	}
   500  	return dupCloseOnExecOld(fd)
   501  }
   502  
   503  // dupCloseOnExecOld is the traditional way to dup an fd and
   504  // set its O_CLOEXEC bit, using two system calls.
   505  func dupCloseOnExecOld(fd int) (int, string, error) {
   506  	syscall.ForkLock.RLock()
   507  	defer syscall.ForkLock.RUnlock()
   508  	newfd, err := syscall.Dup(fd)
   509  	if err != nil {
   510  		return -1, "dup", err
   511  	}
   512  	syscall.CloseOnExec(newfd)
   513  	return newfd, "", nil
   514  }
   515  
   516  // Dup duplicates the file descriptor.
   517  func (fd *FD) Dup() (int, string, error) {
   518  	if err := fd.incref(); err != nil {
   519  		return -1, "", err
   520  	}
   521  	defer fd.decref()
   522  	return DupCloseOnExec(fd.Sysfd)
   523  }
   524  
   525  // On Unix variants only, expose the IO event for the net code.
   526  
   527  // WaitWrite waits until data can be read from fd.
   528  func (fd *FD) WaitWrite() error {
   529  	return fd.pd.waitWrite(fd.isFile)
   530  }
   531  
   532  // WriteOnce is for testing only. It makes a single write call.
   533  func (fd *FD) WriteOnce(p []byte) (int, error) {
   534  	if err := fd.writeLock(); err != nil {
   535  		return 0, err
   536  	}
   537  	defer fd.writeUnlock()
   538  	return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
   539  }
   540  
   541  // RawRead invokes the user-defined function f for a read operation.
   542  func (fd *FD) RawRead(f func(uintptr) bool) error {
   543  	if err := fd.readLock(); err != nil {
   544  		return err
   545  	}
   546  	defer fd.readUnlock()
   547  	if err := fd.pd.prepareRead(fd.isFile); err != nil {
   548  		return err
   549  	}
   550  	for {
   551  		if f(uintptr(fd.Sysfd)) {
   552  			return nil
   553  		}
   554  		if err := fd.pd.waitRead(fd.isFile); err != nil {
   555  			return err
   556  		}
   557  	}
   558  }
   559  
   560  // RawWrite invokes the user-defined function f for a write operation.
   561  func (fd *FD) RawWrite(f func(uintptr) bool) error {
   562  	if err := fd.writeLock(); err != nil {
   563  		return err
   564  	}
   565  	defer fd.writeUnlock()
   566  	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
   567  		return err
   568  	}
   569  	for {
   570  		if f(uintptr(fd.Sysfd)) {
   571  			return nil
   572  		}
   573  		if err := fd.pd.waitWrite(fd.isFile); err != nil {
   574  			return err
   575  		}
   576  	}
   577  }
   578  
   579  // ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
   580  func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
   581  	for {
   582  		n, err := fn(fd, p)
   583  		if err != syscall.EINTR {
   584  			return n, err
   585  		}
   586  	}
   587  }
   588  

View as plain text