1 /*
2  * This library is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU Lesser General Public
4  * License as published by the Free Software Foundation;
5  * version 2.1 of the License.
6  *
7  * This library is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10  * Lesser General Public License for more details.
11  *
12  * You should have received a copy of the GNU Lesser General Public
13  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
14  *
15  * Split out from xc_linus_osdep.c:
16  *
17  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
18  */
19 
20 #include <alloca.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <unistd.h>
24 #include <string.h>
25 
26 #include <sys/mman.h>
27 #include <sys/ioctl.h>
28 
29 #include "private.h"
30 
31 #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
32 
33 #ifndef O_CLOEXEC
34 #define O_CLOEXEC 0
35 #endif
36 
osdep_xenforeignmemory_open(xenforeignmemory_handle * fmem)37 int osdep_xenforeignmemory_open(xenforeignmemory_handle *fmem)
38 {
39     int fd;
40 
41     /* prefer this newer interface */
42     fd = open("/dev/xen/privcmd", O_RDWR|O_CLOEXEC);
43 
44     if ( fd == -1 && ( errno == ENOENT || errno == ENXIO || errno == ENODEV ))
45     {
46         /* Fallback to /proc/xen/privcmd */
47         fd = open("/proc/xen/privcmd", O_RDWR|O_CLOEXEC);
48     }
49 
50     if ( fd == -1 )
51     {
52         PERROR("Could not obtain handle on privileged command interface");
53         return -1;
54     }
55 
56     /*
57      * Older versions of privcmd return -EINVAL for unimplemented ioctls
58      * so we need to probe for the errno to use rather than just using
59      * the conventional ENOTTY.
60      */
61     if ( ioctl(fd, IOCTL_PRIVCMD_UNIMPLEMENTED, NULL) >= 0 )
62     {
63         xtl_log(fmem->logger, XTL_ERROR, -1, "xenforeignmemory",
64                 "privcmd ioctl should not be implemented");
65         close(fd);
66         return -1;
67     }
68     else
69     {
70         fmem->unimpl_errno = errno;
71         errno = 0;
72     }
73 
74     fmem->fd = fd;
75     return 0;
76 }
77 
osdep_xenforeignmemory_close(xenforeignmemory_handle * fmem)78 int osdep_xenforeignmemory_close(xenforeignmemory_handle *fmem)
79 {
80     int fd = fmem->fd;
81     if (fd == -1)
82         return 0;
83     return close(fd);
84 }
85 
map_foreign_batch_single(int fd,uint32_t dom,xen_pfn_t * mfn,unsigned long addr)86 static int map_foreign_batch_single(int fd, uint32_t dom,
87                                     xen_pfn_t *mfn, unsigned long addr)
88 {
89     privcmd_mmapbatch_t ioctlx;
90     int rc;
91 
92     ioctlx.num = 1;
93     ioctlx.dom = dom;
94     ioctlx.addr = addr;
95     ioctlx.arr = mfn;
96 
97     do
98     {
99         *mfn ^= PRIVCMD_MMAPBATCH_PAGED_ERROR;
100         usleep(100);
101         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
102     }
103     while ( (rc < 0) && (errno == ENOENT) );
104 
105     return rc;
106 }
107 
108 /*
109  * Retry mmap of all paged gfns in batches
110  * retuns < 0 on fatal error
111  * returns 0 if all gfns left paging state
112  * returns > 0 if some gfns are still in paging state
113  *
114  * Walk all gfns and try to assemble blocks of gfns in paging state.
115  * This will keep the request ring full and avoids delays.
116  */
retry_paged(int fd,uint32_t dom,void * addr,const xen_pfn_t * arr,int * err,size_t num)117 static int retry_paged(int fd, uint32_t dom, void *addr,
118                        const xen_pfn_t *arr, int *err, size_t num)
119 {
120     privcmd_mmapbatch_v2_t ioctlx;
121     int rc, paged = 0;
122     size_t i = 0;
123 
124     do
125     {
126         /* Skip gfns not in paging state */
127         if ( err[i] != -ENOENT )
128         {
129             i++;
130             continue;
131         }
132 
133         paged++;
134 
135         /* At least one gfn is still in paging state */
136         ioctlx.num = 1;
137         ioctlx.dom = dom;
138         ioctlx.addr = (unsigned long)addr + (i<<PAGE_SHIFT);
139         ioctlx.arr = arr + i;
140         ioctlx.err = err + i;
141 
142         /* Assemble a batch of requests */
143         while ( ++i < num )
144         {
145             if ( err[i] != -ENOENT )
146                 break;
147             ioctlx.num++;
148         }
149 
150         /* Send request and abort on fatal error */
151         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
152         if ( rc < 0 && errno != ENOENT )
153             goto out;
154 
155     } while ( i < num );
156 
157     rc = paged;
158 out:
159     return rc;
160 }
161 
osdep_xenforeignmemory_map(xenforeignmemory_handle * fmem,uint32_t dom,void * addr,int prot,int flags,size_t num,const xen_pfn_t arr[],int err[])162 void *osdep_xenforeignmemory_map(xenforeignmemory_handle *fmem,
163                                  uint32_t dom, void *addr,
164                                  int prot, int flags, size_t num,
165                                  const xen_pfn_t arr[/*num*/], int err[/*num*/])
166 {
167     int fd = fmem->fd;
168     privcmd_mmapbatch_v2_t ioctlx;
169     size_t i;
170     int rc;
171 
172     addr = mmap(addr, num << PAGE_SHIFT, prot, flags | MAP_SHARED,
173                 fd, 0);
174     if ( addr == MAP_FAILED )
175     {
176         PERROR("mmap failed");
177         return NULL;
178     }
179 
180     ioctlx.num = num;
181     ioctlx.dom = dom;
182     ioctlx.addr = (unsigned long)addr;
183     ioctlx.arr = arr;
184     ioctlx.err = err;
185 
186     rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, &ioctlx);
187 
188     /* Command was recognized, some gfn in arr are in paging state */
189     if ( rc < 0 && errno == ENOENT )
190     {
191         do {
192             usleep(100);
193             rc = retry_paged(fd, dom, addr, arr, err, num);
194         } while ( rc > 0 );
195     }
196     /* Command was not recognized, use fall back */
197     else if ( rc < 0 && errno == EINVAL && (int)num > 0 )
198     {
199         /*
200          * IOCTL_PRIVCMD_MMAPBATCH_V2 is not supported - fall back to
201          * IOCTL_PRIVCMD_MMAPBATCH.
202          */
203         privcmd_mmapbatch_t ioctlx;
204         xen_pfn_t *pfn;
205         unsigned int pfn_arr_size = ROUNDUP((num * sizeof(*pfn)), PAGE_SHIFT);
206 
207         if ( pfn_arr_size <= PAGE_SIZE )
208             pfn = alloca(num * sizeof(*pfn));
209         else
210         {
211             pfn = mmap(NULL, pfn_arr_size, PROT_READ | PROT_WRITE,
212                        MAP_PRIVATE | MAP_ANON | MAP_POPULATE, -1, 0);
213             if ( pfn == MAP_FAILED )
214             {
215                 PERROR("mmap of pfn array failed");
216                 (void)munmap(addr, num << PAGE_SHIFT);
217                 return NULL;
218             }
219         }
220 
221         memcpy(pfn, arr, num * sizeof(*arr));
222 
223         ioctlx.num = num;
224         ioctlx.dom = dom;
225         ioctlx.addr = (unsigned long)addr;
226         ioctlx.arr = pfn;
227 
228         rc = ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx);
229 
230         rc = rc < 0 ? -errno : 0;
231 
232         for ( i = 0; i < num; ++i )
233         {
234             switch ( pfn[i] ^ arr[i] )
235             {
236             case 0:
237                 err[i] = rc != -ENOENT ? rc : 0;
238                 continue;
239             default:
240                 err[i] = -EINVAL;
241                 continue;
242             case PRIVCMD_MMAPBATCH_PAGED_ERROR:
243                 if ( rc != -ENOENT )
244                 {
245                     err[i] = rc ?: -EINVAL;
246                     continue;
247                 }
248                 rc = map_foreign_batch_single(fd, dom, pfn + i,
249                         (unsigned long)addr + (i<<PAGE_SHIFT));
250                 if ( rc < 0 )
251                 {
252                     rc = -errno;
253                     break;
254                 }
255                 rc = -ENOENT;
256                 continue;
257             }
258             break;
259         }
260 
261         if ( pfn_arr_size > PAGE_SIZE )
262             munmap(pfn, pfn_arr_size);
263 
264         if ( rc == -ENOENT && i == num )
265             rc = 0;
266         else if ( rc )
267         {
268             errno = -rc;
269             rc = -1;
270         }
271     }
272 
273     if ( rc < 0 )
274     {
275         int saved_errno = errno;
276 
277         PERROR("ioctl failed");
278         (void)munmap(addr, num << PAGE_SHIFT);
279         errno = saved_errno;
280         return NULL;
281     }
282 
283     return addr;
284 }
285 
osdep_xenforeignmemory_unmap(xenforeignmemory_handle * fmem,void * addr,size_t num)286 int osdep_xenforeignmemory_unmap(xenforeignmemory_handle *fmem,
287                                  void *addr, size_t num)
288 {
289     return munmap(addr, num << PAGE_SHIFT);
290 }
291 
osdep_xenforeignmemory_restrict(xenforeignmemory_handle * fmem,domid_t domid)292 int osdep_xenforeignmemory_restrict(xenforeignmemory_handle *fmem,
293                                     domid_t domid)
294 {
295     return ioctl(fmem->fd, IOCTL_PRIVCMD_RESTRICT, &domid);
296 }
297 
osdep_xenforeignmemory_unmap_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)298 int osdep_xenforeignmemory_unmap_resource(
299     xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
300 {
301     return fres ? munmap(fres->addr, fres->nr_frames << PAGE_SHIFT) : 0;
302 }
303 
osdep_xenforeignmemory_map_resource(xenforeignmemory_handle * fmem,xenforeignmemory_resource_handle * fres)304 int osdep_xenforeignmemory_map_resource(
305     xenforeignmemory_handle *fmem, xenforeignmemory_resource_handle *fres)
306 {
307     privcmd_mmap_resource_t mr = {
308         .dom = fres->domid,
309         .type = fres->type,
310         .id = fres->id,
311         .idx = fres->frame,
312         .num = fres->nr_frames,
313     };
314     int rc;
315 
316     fres->addr = mmap(fres->addr, fres->nr_frames << PAGE_SHIFT,
317                       fres->prot, fres->flags | MAP_SHARED, fmem->fd, 0);
318     if ( fres->addr == MAP_FAILED )
319         return -1;
320 
321     mr.addr = (uintptr_t)fres->addr;
322 
323     rc = ioctl(fmem->fd, IOCTL_PRIVCMD_MMAP_RESOURCE, &mr);
324     if ( rc )
325     {
326         int saved_errno;
327 
328         if ( errno != fmem->unimpl_errno && errno != EOPNOTSUPP )
329             PERROR("ioctl failed");
330         else
331             errno = EOPNOTSUPP;
332 
333         saved_errno = errno;
334         (void)osdep_xenforeignmemory_unmap_resource(fmem, fres);
335         errno = saved_errno;
336 
337         return -1;
338     }
339 
340     return 0;
341 }
342 
343 /*
344  * Local variables:
345  * mode: C
346  * c-file-style: "BSD"
347  * c-basic-offset: 4
348  * tab-width: 4
349  * indent-tabs-mode: nil
350  * End:
351  */
352